Skip to content

Commit 787fa93

Browse files
* standalone-helpers updated
1 parent 69def8d commit 787fa93

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1430
-87
lines changed

DESCRIPTION

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ Suggests:
2727
clock,
2828
dplyr,
2929
fipio,
30-
fixtuRes,
3130
fontawesome,
3231
forcats,
3332
fs (>= 1.6.5),
@@ -54,7 +53,6 @@ Suggests:
5453
tidyr,
5554
timeplyr,
5655
usethis,
57-
wakefield,
5856
zipcodeR
5957
Config/roxyglobals/filename: generated-globals.R
6058
Config/roxyglobals/unique: TRUE

NAMESPACE

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ export("%or%")
66
export(add_counties)
77
export(age_days)
88
export(arrows)
9+
export(as_chr)
10+
export(as_date)
11+
export(as_int)
12+
export(as_num)
913
export(bracks)
1014
export(change)
1115
export(change_lagged)
@@ -29,18 +33,25 @@ export(count_prop_multi)
2933
export(count_wide)
3034
export(create_vec)
3135
export(data_types)
36+
export(delist)
3237
export(delister)
3338
export(density)
3439
export(describe)
40+
export(describe2)
3541
export(describe_unique)
42+
export(desplit)
3643
export(display_long)
3744
export(duration_vec)
45+
export(empty)
3846
export(expand_date_range)
47+
export(false)
3948
export(fancy_ts)
4049
export(find_common_order)
4150
export(gchop)
51+
export(gelm)
4252
export(geomean)
4353
export(get_pin)
54+
export(getelem)
4455
export(gg_theme)
4556
export(gh_raw)
4657
export(glue_chr)
@@ -49,35 +60,68 @@ export(gluestick)
4960
export(gt_marks)
5061
export(histo)
5162
export(histogram)
63+
export(if_empty_null)
64+
export(iif_else)
5265
export(initialize_package)
5366
export(interpolate)
67+
export(invert_named)
5468
export(is_valid_npi)
5569
export(is_valid_npi2)
5670
export(list_pins)
5771
export(make_interval)
72+
export(max_vlen)
5873
export(mock_forager)
5974
export(mock_provider)
6075
export(mount_board)
76+
export(na)
77+
export(na_if)
6178
export(na_if_common)
6279
export(named_group_split)
6380
export(new_value)
81+
export(not_na)
82+
export(not_null)
83+
export(null)
6484
export(pad_number)
6585
export(parens)
6686
export(percentage_calculator)
6787
export(percentage_change)
6888
export(percentage_difference)
6989
export(print_ls)
7090
export(random_npi_generator)
91+
export(random_string)
7192
export(rate_of_return)
93+
export(remove_all_na)
7294
export(remove_quiet)
95+
export(remove_quotes)
7396
export(rename_seq)
97+
export(roundup)
7498
export(search_for)
99+
export(search_in)
100+
export(sf_at)
101+
export(sf_c)
102+
export(sf_chars)
103+
export(sf_conv)
104+
export(sf_detect)
105+
export(sf_extract)
106+
export(sf_ndetect)
107+
export(sf_nextract)
108+
export(sf_remove)
109+
export(sf_replace)
110+
export(sf_smush)
111+
export(sf_strsplit)
112+
export(sf_sub)
75113
export(single_line_string)
76114
export(sorted_bars)
77115
export(splitter)
116+
export(strsort)
78117
export(summary_stats)
118+
export(true)
79119
export(ttimestamp)
120+
export(uniq)
121+
export(uniq_narm)
122+
export(uniq_vlen)
80123
export(update_personal_packages)
124+
export(vlen)
81125
export(years_df)
82126
export(years_floor)
83127
export(years_vec)

R/describe.R

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,75 @@
1+
#' Describe 2
2+
#'
3+
#' @param df `<data.frame>` desc
4+
#'
5+
#' @param ... `<dots>` tidyselect columns
6+
#'
7+
#' @returns `<tibble>` of summary statistics
8+
#'
9+
#' @examples
10+
#' describe2(mock_provider(2000:2020))
11+
#'
12+
#' describe2(mock_forager(200))
13+
#'
14+
#' @autoglobal
15+
#'
16+
#' @export
17+
describe2 <- function(df, ...) {
18+
19+
get_type <- \(x) {
20+
cheapr::enframe_(
21+
purrr::map_vec(x, function(x)
22+
glue_chr("<{pillar::type_sum(x)}>")),
23+
name = "column",
24+
value = "type")
25+
}
26+
27+
fiqr <- \(x) diff(collapse::.quantile(as.numeric(x), c(0.25, 0.75)))
28+
29+
if (nargs() > 1) df <- dplyr::select(df, ...)
30+
31+
dates <- dplyr::select(df, dplyr::where(\(x) inherits(x, "Date")))
32+
df <- dplyr::select(df, dplyr::where(\(x) !inherits(x, "Date")))
33+
34+
sums <- df |>
35+
dplyr::mutate(
36+
dplyr::across(dplyr::where(is.character), stringr::str_length),
37+
dplyr::across(dplyr::where(\(x) is.factor(x) | is.logical(x)), as.numeric)) |>
38+
tidyr::pivot_longer(dplyr::everything(), names_to = "column") |>
39+
dplyr::mutate(n = 1 - cheapr::is_na(value)) |>
40+
dplyr::reframe(
41+
n = collapse::fsum(value, nthreads = 4L),
42+
min = collapse::fmin(value),
43+
mean = collapse::fmean(value, nthreads = 4L),
44+
iqr = fiqr(value),
45+
max = collapse::fmax(value),
46+
med = collapse::fmedian(value),
47+
sd = collapse::fsd(value),
48+
mad = mad(value, na.rm = TRUE),
49+
distribution = histo(value),
50+
.by = column) |>
51+
dplyr::left_join(get_type(df), by = dplyr::join_by(column))
52+
53+
topn <- \(x, limit = 10) {
54+
dplyr::tibble(
55+
column = names(x),
56+
uniq = collapse::fnunique(collapse::na_rm(x)),
57+
top = collapse::fcount(collapse::na_rm(x), name = "n") |>
58+
dplyr::arrange(dplyr::desc(n)) |>
59+
dplyr::slice(seq(1, limit)) |>
60+
dplyr::pull(x) |>
61+
stringr::str_flatten_comma())
62+
}
63+
64+
tops <- purrr::map(df, topn) |>
65+
purrr::list_rbind(names_to = "column") |>
66+
dplyr::filter(uniq != nrow(df))
67+
68+
dplyr::left_join(sums, tops, by = dplyr::join_by(column)) |>
69+
dplyr::arrange(dplyr::desc(type)) |>
70+
dplyr::select(column, type, n, min, mean, med, max, iqr, sd, mad, distribution, uniq, top)
71+
}
72+
173
#' Describe a dataset
274
#'
375
#' @param df `<data.frame>` desc
@@ -140,9 +212,13 @@ histo <- function(x, width = 10) {
140212
#'
141213
#' @examples
142214
#'
143-
#' describe_unique(mock_forager(), ins_class, payer)
215+
#' describe_unique(mock_forager(), class, payer)
144216
#'
145-
#' # describe_unique(mock_forager(200), names(df)[2:3])
217+
#' describe_unique(
218+
#' mock_forager(50),
219+
#' names(mock_forager())[c(2:3, 6:9)]) |>
220+
#' dplyr::filter(n > 2) |>
221+
#' print(n = 30)
146222
#'
147223
#' @autoglobal
148224
#'
@@ -157,7 +233,9 @@ describe_unique <- function(df,
157233

158234
df <- dplyr::select(df, ...)
159235

160-
.set_names <- if (is.null(.set_names)) names(df) else .set_names
236+
.set_names <- if (null(.set_names)) names(df) else .set_names
237+
238+
df <- dplyr::mutate(df, dplyr::across(!dplyr::where(is.character), as.character))
161239

162240
df <- columns_to_character(df) |>
163241
names() |>

R/generated-globals.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ utils::globalVariables(c(
99
# <rate_of_return>
1010
# <combine>
1111
":=",
12+
# <describe2>
13+
"column",
1214
# <combine>
1315
"combined",
1416
# <rate_of_return>
@@ -23,35 +25,46 @@ utils::globalVariables(c(
2325
"date_of_service",
2426
# <mock_forager>
2527
"date_of_submission",
28+
# <describe2>
29+
"distribution",
2630
# <id_runs>
2731
"group",
2832
# <id_runs>
2933
"group_size",
3034
# <make_interval>
3135
"interval",
36+
# <describe2>
3237
# <describe>
3338
"iqr",
3439
# <id_runs>
3540
"key",
3641
# <rate_of_return>
3742
"lg",
43+
# <describe2>
3844
# <describe>
3945
"mad",
46+
# <describe2>
4047
# <describe>
4148
"med",
49+
# <describe2>
4250
# <describe>
4351
# <count_prop>
4452
# <count_prop_multi>
4553
# <count_wide>
4654
"n",
4755
# <describe>
4856
"nuniq",
57+
# <describe2>
4958
# <describe>
5059
"sd",
60+
# <describe2>
61+
"top",
5162
# <describe>
5263
"top_n",
64+
# <describe2>
5365
# <describe>
5466
"type",
67+
# <describe2>
5568
# <describe>
5669
# <id_runs>
5770
"value",

R/mock.R

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ mock_provider <- \(years) {
2929
#'
3030
#' @param rows `<int>` number of rows to generate; default is 10
3131
#'
32-
#' @param nest `<lgl>` whether to nest the dates column; default is `TRUE`
32+
#' @param nest `<lgl>` whether to nest the dates column; default is `FALSE`
3333
#'
3434
#' @returns A [tibble][tibble::tibble-package]
3535
#'
@@ -41,23 +41,47 @@ mock_provider <- \(years) {
4141
#' @family mock
4242
#'
4343
#' @export
44-
mock_forager <- function(rows = 10, nest = TRUE){
44+
mock_forager <- function(rows = 10, nest = FALSE){
4545

46-
payer_names <- c("Medicare", "Medicaid", "Cigna", "Humana", "UHC", "Anthem", "BCBS", "Centene")
46+
payers <- sample(
47+
x = factor(
48+
x = c("Medicare",
49+
"Medicaid",
50+
"Cigna",
51+
"Humana",
52+
"UHC",
53+
"Anthem",
54+
"BCBS",
55+
"Centene",
56+
"MAO")),
57+
size = rows,
58+
replace = TRUE)
59+
60+
classes <- sample(
61+
x = ordered(
62+
c("Primary",
63+
"Secondary")),
64+
size = rows,
65+
replace = TRUE)
4766

4867
x <- dplyr::tibble(
49-
claim_id = as.character(wakefield::id(n = rows)),
50-
date_of_service = wakefield::dob(n = rows, start = Sys.Date() - 730, random = TRUE, k = 12, by = "-1 months"),
51-
payer = fixtuRes::set_vector(rows, set = payer_names),
52-
ins_class = fixtuRes::set_vector(rows, set = c("Primary", "Secondary")),
53-
balance = as.double(wakefield::income(n = rows, digits = 2) / 300),
54-
date_of_release = date_of_service + round(abs(stats::rnorm(length(date_of_service), 11, 4))),
55-
date_of_submission = date_of_release + round(abs(stats::rnorm(length(date_of_release), 2, 2))),
56-
date_of_acceptance = date_of_submission + round(abs(stats::rnorm(length(date_of_submission), 3, 2))),
57-
date_of_adjudication = date_of_acceptance + round(abs(stats::rnorm(length(date_of_acceptance), 30, 3))))
68+
id = sprintf(paste0("%0", nchar(rows) + 3, "d"), seq_len(rows)),
69+
payer = payers,
70+
class = classes,
71+
balance = roundup(stats::rgamma(n = rows, 2) * 20000) / 300,
72+
date_of_service = sample(x = seq.Date(from = Sys.Date(), by = "-1 months", length.out = 12), size = rows, replace = TRUE),
73+
date_of_release = date_of_service + roundup(abs(stats::rnorm(rows, 11, 4))),
74+
date_of_submission = date_of_release + roundup(abs(stats::rnorm(rows, 2, 2))),
75+
date_of_acceptance = date_of_submission + roundup(abs(stats::rnorm(rows, 3, 2))),
76+
date_of_adjudication = date_of_acceptance + roundup(abs(stats::rnorm(rows, 30, 3)))
77+
)
5878

5979
if (nest)
60-
return(tidyr::nest(x, dates = tidyr::contains("date")))
61-
80+
return(
81+
tidyr::nest(
82+
x,
83+
dates = tidyr::contains("date")
84+
)
85+
)
6286
x
6387
}

0 commit comments

Comments
 (0)