An all-in-one missingness report

na_summary(
  df,
  grouping_cols = NULL,
  sort_by = NULL,
  descending = FALSE,
  exclude_cols = NULL,
  pattern = NULL,
  pattern_type = NULL,
  regex_kind = "exclusion",
  round_to = NULL,
  reset_rownames = FALSE
)

Arguments

df: A valid R `object` for which the percentage of missing values is required.
grouping_cols: A character vector. If supplied, one can provide the columns by which to group the data.
sort_by: One of counts or percents. This determines whether the results are sorted by counts or percentages.
descending: Logical. Should missing values be sorted in decreasing order ie largest to smallest? Defaults to FALSE.
exclude_cols: A character vector indicating columns to exclude when returning results.
pattern: Pattern to use for exclusion or inclusion. column inclusion criteria.
pattern_type: A regular expression type. One of "starts_with", "contains", or "regex". Defaults to NULL. Only use for selective inclusion.
regex_kind: One of inclusion or exclusion. Defaults to exclusion to exclude columns using regular expressions.
round_to: Number of places to round 2. Defaults to user digits option.
reset_rownames: Should the rownames be reset in the output? defaults to FALSE

Examples

na_summary(airquality)
#>   variable missing complete percent_complete percent_missing
#> 1      Day       0      153        100.00000        0.000000
#> 2    Month       0      153        100.00000        0.000000
#> 3    Ozone      37      116         75.81699       24.183007
#> 4  Solar.R       7      146         95.42484        4.575163
#> 5     Temp       0      153        100.00000        0.000000
#> 6     Wind       0      153        100.00000        0.000000
# grouping
test2 <- data.frame(ID= c("A","A","B","A","B"),Vals = c(rep(NA,4),"No"),
ID2 = c("E","E","D","E","D"))
df <- data.frame(A=1:5,B=c(NA,NA,25,24,53), C=c(NA,1,2,3,4))

na_summary(test2,grouping_cols = c("ID","ID2"))
#> # A tibble: 2 x 7
#>   ID    ID2   variable missing complete percent_complete percent_missing
#>   <chr> <chr> <chr>      <dbl>    <dbl>            <dbl>           <dbl>
#> 1 B     D     Vals           1        1               50              50
#> 2 A     E     Vals           3        0                0             100
# sort summary
na_summary(airquality,sort_by = "percent_missing",descending = TRUE)
#>   variable missing complete percent_complete percent_missing
#> 3    Ozone      37      116         75.81699       24.183007
#> 4  Solar.R       7      146         95.42484        4.575163
#> 1      Day       0      153        100.00000        0.000000
#> 2    Month       0      153        100.00000        0.000000
#> 5     Temp       0      153        100.00000        0.000000
#> 6     Wind       0      153        100.00000        0.000000
na_summary(airquality,sort_by = "percent_complete")
#>   variable missing complete percent_complete percent_missing
#> 3    Ozone      37      116         75.81699       24.183007
#> 4  Solar.R       7      146         95.42484        4.575163
#> 1      Day       0      153        100.00000        0.000000
#> 2    Month       0      153        100.00000        0.000000
#> 5     Temp       0      153        100.00000        0.000000
#> 6     Wind       0      153        100.00000        0.000000
# Include only via a regular expression
na_summary(mtcars, pattern_type = "contains",
pattern = "mpg|disp|wt", regex_kind = "inclusion")
#>   variable missing complete percent_complete percent_missing
#> 1     disp       0       32              100               0
#> 2      mpg       0       32              100               0
#> 3       wt       0       32              100               0
na_summary(airquality, pattern_type = "starts_with",
pattern = "ozone", regex_kind = "inclusion")
#>   variable missing complete percent_complete percent_missing
#> 1    Ozone      37      116         75.81699        24.18301
# exclusion via a regex
na_summary(airquality, pattern_type = "starts_with",
pattern = "oz|Sol", regex_kind = "exclusion")
#>   variable missing complete percent_complete percent_missing
#> 1      Day       0      153              100               0
#> 2    Month       0      153              100               0
#> 3     Temp       0      153              100               0
#> 4     Wind       0      153              100               0
# reset rownames when sorting by variable
na_summary(df,sort_by="variable",descending=TRUE, reset_rownames = TRUE)
#>   variable missing complete percent_complete percent_missing
#> 1        C       1        4               80              20
#> 2        B       2        3               60              40
#> 3        A       0        5              100               0