From 11771b9ab9c65c2e41d535d01b5ffa8cb11ac449 Mon Sep 17 00:00:00 2001 From: chainsawriot Date: Thu, 16 Nov 2023 16:41:34 +0100 Subject: [PATCH] dfm2 ref #20 --- R/get_dist.R | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/R/get_dist.R b/R/get_dist.R index 46c70e4..8e9c1b8 100644 --- a/R/get_dist.R +++ b/R/get_dist.R @@ -196,3 +196,29 @@ dfm.tokens_with_proximity <- function(x, tolower = TRUE, remove_padding = FALSE, quanteda::docvars(output) <- x_docvars return(output) } + +dfm2 <- function(x, remove_docvars_proximity = TRUE, + weight_function = function(x) { + 1 / x + }) { + x_attrs <- attributes(x) + x_docvars <- quanteda::docvars(x) + type <- types(x) + attrs <- attributes(x) + temp <- unclass(x) + index <- unlist(temp, use.names = FALSE) + val <- weight_function(unlist(docvars(x, "proximity"), use.names = FALSE)) + temp <- Matrix::sparseMatrix(j = index, + p = cumsum(c(1L, lengths(x))) - 1L, + x = val, + dims = c(length(x), + length(type)), + dimnames = list(quanteda::docnames(x), type)) + output <- quanteda::as.dfm(temp) + attributes(output)[["meta"]] <- x_attrs[["meta"]] + if (remove_docvars_proximity) { + x_docvars$proximity <- NULL + } + quanteda::docvars(output) <- x_docvars + return(output) +}