Skip to content
This repository has been archived by the owner on Feb 11, 2024. It is now read-only.

Commit

Permalink
dfm2 ref #20
Browse files Browse the repository at this point in the history
  • Loading branch information
chainsawriot committed Nov 16, 2023
1 parent 3306e25 commit 11771b9
Showing 1 changed file with 26 additions and 0 deletions.
26 changes: 26 additions & 0 deletions R/get_dist.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,29 @@ dfm.tokens_with_proximity <- function(x, tolower = TRUE, remove_padding = FALSE,
quanteda::docvars(output) <- x_docvars
return(output)
}

dfm2 <- function(x, remove_docvars_proximity = TRUE,
weight_function = function(x) {
1 / x
}) {
x_attrs <- attributes(x)
x_docvars <- quanteda::docvars(x)
type <- types(x)
attrs <- attributes(x)
temp <- unclass(x)
index <- unlist(temp, use.names = FALSE)
val <- weight_function(unlist(docvars(x, "proximity"), use.names = FALSE))
temp <- Matrix::sparseMatrix(j = index,
p = cumsum(c(1L, lengths(x))) - 1L,
x = val,
dims = c(length(x),
length(type)),
dimnames = list(quanteda::docnames(x), type))
output <- quanteda::as.dfm(temp)
attributes(output)[["meta"]] <- x_attrs[["meta"]]
if (remove_docvars_proximity) {
x_docvars$proximity <- NULL
}
quanteda::docvars(output) <- x_docvars
return(output)
}

0 comments on commit 11771b9

Please sign in to comment.