An all-in-one missingness report

na_summary(
  df,
  grouping_cols = NULL,
  sort_by = NULL,
  descending = FALSE,
  exclude_cols = NULL,
  pattern = NULL,
  pattern_type = NULL,
  regex_kind = "exclusion",
  round_to = NULL,
  reset_rownames = FALSE
)

Arguments

df

A valid R `object` for which the percentage of missing values is required.

grouping_cols

A character vector. If supplied, one can provide the columns by which to group the data.

sort_by

One of counts or percents. This determines whether the results are sorted by counts or percentages.

descending

Logical. Should missing values be sorted in decreasing order ie largest to smallest? Defaults to FALSE.

exclude_cols

A character vector indicating columns to exclude when returning results.

pattern

Pattern to use for exclusion or inclusion. column inclusion criteria.

pattern_type

A regular expression type. One of "starts_with", "contains", or "regex". Defaults to NULL. Only use for selective inclusion.

regex_kind

One of inclusion or exclusion. Defaults to exclusion to exclude columns using regular expressions.

round_to

Number of places to round 2. Defaults to user digits option.

reset_rownames

Should the rownames be reset in the output? defaults to FALSE

Examples

na_summary(airquality)
#>   variable missing complete percent_complete percent_missing
#> 1      Day       0      153        100.00000        0.000000
#> 2    Month       0      153        100.00000        0.000000
#> 3    Ozone      37      116         75.81699       24.183007
#> 4  Solar.R       7      146         95.42484        4.575163
#> 5     Temp       0      153        100.00000        0.000000
#> 6     Wind       0      153        100.00000        0.000000
# grouping
test2 <- data.frame(ID= c("A","A","B","A","B"),Vals = c(rep(NA,4),"No"),
ID2 = c("E","E","D","E","D"))
df <- data.frame(A=1:5,B=c(NA,NA,25,24,53), C=c(NA,1,2,3,4))

na_summary(test2,grouping_cols = c("ID","ID2"))
#> # A tibble: 2 x 7
#>   ID    ID2   variable missing complete percent_complete percent_missing
#>   <chr> <chr> <chr>      <dbl>    <dbl>            <dbl>           <dbl>
#> 1 B     D     Vals           1        1               50              50
#> 2 A     E     Vals           3        0                0             100
# sort summary
na_summary(airquality,sort_by = "percent_missing",descending = TRUE)
#>   variable missing complete percent_complete percent_missing
#> 3    Ozone      37      116         75.81699       24.183007
#> 4  Solar.R       7      146         95.42484        4.575163
#> 1      Day       0      153        100.00000        0.000000
#> 2    Month       0      153        100.00000        0.000000
#> 5     Temp       0      153        100.00000        0.000000
#> 6     Wind       0      153        100.00000        0.000000
na_summary(airquality,sort_by = "percent_complete")
#>   variable missing complete percent_complete percent_missing
#> 3    Ozone      37      116         75.81699       24.183007
#> 4  Solar.R       7      146         95.42484        4.575163
#> 1      Day       0      153        100.00000        0.000000
#> 2    Month       0      153        100.00000        0.000000
#> 5     Temp       0      153        100.00000        0.000000
#> 6     Wind       0      153        100.00000        0.000000
# Include only via a regular expression
na_summary(mtcars, pattern_type = "contains",
pattern = "mpg|disp|wt", regex_kind = "inclusion")
#>   variable missing complete percent_complete percent_missing
#> 1     disp       0       32              100               0
#> 2      mpg       0       32              100               0
#> 3       wt       0       32              100               0
na_summary(airquality, pattern_type = "starts_with",
pattern = "ozone", regex_kind = "inclusion")
#>   variable missing complete percent_complete percent_missing
#> 1    Ozone      37      116         75.81699        24.18301
# exclusion via a regex
na_summary(airquality, pattern_type = "starts_with",
pattern = "oz|Sol", regex_kind = "exclusion")
#>   variable missing complete percent_complete percent_missing
#> 1      Day       0      153              100               0
#> 2    Month       0      153              100               0
#> 3     Temp       0      153              100               0
#> 4     Wind       0      153              100               0
# reset rownames when sorting by variable
na_summary(df,sort_by="variable",descending=TRUE, reset_rownames = TRUE)
#>   variable missing complete percent_complete percent_missing
#> 1        C       1        4               80              20
#> 2        B       2        3               60              40
#> 3        A       0        5              100               0