Convert character and factor columns to dummy variables

get_dummies.(
  .df,
  cols = where(~is.character(.x) | is.factor(.x)),
  prefix = TRUE,
  prefix_sep = "_",
  drop_first = FALSE,
  dummify_na = TRUE
)

Arguments

.df

A data.frame or data.table

cols

A single column or a vector of unquoted columns to dummify. Defaults to all character & factor columns using c(where(is.character), where(is.factor)). tidyselect compatible.

prefix

TRUE/FALSE - If TRUE, a prefix will be added to new column names

prefix_sep

Separator for new column names

drop_first

TRUE/FALSE - If TRUE, the first dummy column will be dropped

dummify_na

TRUE/FALSE - If TRUE, NAs will also get dummy columns

Examples

df <- tidytable(
  chr = c("a", "b", NA),
  fct = as.factor(c("a", NA, "c")),
  num = 1:3
)

# Automatically does all character/factor columns
df %>%
  get_dummies()
#> # A tidytable: 3 × 9
#>   chr   fct     num chr_a chr_b chr_NA fct_a fct_c fct_NA
#>   <chr> <fct> <int> <int> <int>  <int> <int> <int>  <int>
#> 1 a     a         1     1     0      0     1     0      0
#> 2 b     NA        2     0     1      0     0     0      1
#> 3 NA    c         3     0     0      1     0     1      0

df %>%
  get_dummies(cols = chr)
#> # A tidytable: 3 × 6
#>   chr   fct     num chr_a chr_b chr_NA
#>   <chr> <fct> <int> <int> <int>  <int>
#> 1 a     a         1     1     0      0
#> 2 b     NA        2     0     1      0
#> 3 NA    c         3     0     0      1

df %>%
  get_dummies(cols = c(chr, fct), drop_first = TRUE)
#> # A tidytable: 3 × 7
#>   chr   fct     num chr_b chr_NA fct_c fct_NA
#>   <chr> <fct> <int> <int>  <int> <int>  <int>
#> 1 a     a         1     0      0     0      0
#> 2 b     NA        2     1      0     0      1
#> 3 NA    c         3     0      1     1      0

df %>%
  get_dummies(prefix_sep = ".", dummify_na = FALSE)
#> # A tidytable: 3 × 7
#>   chr   fct     num chr.a chr.b fct.a fct.c
#>   <chr> <fct> <int> <int> <int> <int> <int>
#> 1 a     a         1     1     0     1     0
#> 2 b     NA        2     0     1     0     0
#> 3 NA    c         3     0     0     0     1