Add Embedding Coherence Test ref #8 (#9)

* Initial implementation of ect * Make query, plot_es work for ect * Add test cases for ect * Update the README for ect * Update ect's test case for emitting P * Test also `plot_ect`
gesistsa · Nov 24, 2021 · 3970b4b · 3970b4b
1 parent b38cafb
commit 3970b4b
Show file tree

Hide file tree

Showing 15 changed files with 391 additions and 29 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,10 +1,13 @@
 # Generated by roxygen2: do not edit by hand
 
 export(calculate_es)
+export(ect)
+export(ect_es)
 export(mac)
 export(mac_es)
 export(nas)
 export(plot_bias)
+export(plot_ect)
 export(query)
 export(read_word2vec)
 export(rnd)
@@ -17,7 +20,9 @@ export(weat_es)
 export(weat_exact)
 export(weat_resampling)
 importFrom(Rcpp,sourceCpp)
+importFrom(graphics,abline)
 importFrom(graphics,dotchart)
+importFrom(graphics,text)
 importFrom(stats,dist)
 importFrom(stats,predict)
 importFrom(stats,sd)

diff --git a/R/ect.R b/R/ect.R
@@ -0,0 +1,95 @@
+#' Embedding Coherence Test
+#'
+#' This function estimate the Embedding Coherence Test (ECT) of word embeddings (Dev & Philips, 2019).
+#'
+#' @inheritParams weat
+#' @return A list with class \code{"ect"} containing the following components:
+#' \describe{
+#' \item{\code{$A_words}}{the input A_words}
+#' \item{\code{$B_words}}{the input B_words}
+#' \item{\code{$S_words}}{the input S_words}
+#' \item{\code{$u_a}}{Cosine similarity between each word vector of S_words and average vector of A_words}
+#' \item{\code{$u_b}}{Cosine similarity between each word vector of S_words and average vector of B_words}
+#' }
+#' @seealso
+#' [ect_es()] can be used to obtain the effect size of the test.
+#' [plot_ect()] can be used to visualize the result.
+#' @examples
+#' data(googlenews)
+#' S1 <- c("janitor", "statistician", "midwife", "bailiff", "auctioneer",
+#' "photographer", "geologist", "shoemaker", "athlete", "cashier", "dancer",
+#' "housekeeper", "accountant", "physicist", "gardener", "dentist", "weaver",
+#' "blacksmith", "psychologist", "supervisor", "mathematician", "surveyor",
+#' "tailor", "designer", "economist", "mechanic", "laborer", "postmaster",
+#' "broker", "chemist", "librarian", "attendant", "clerical", "musician",
+#' "porter", "scientist", "carpenter", "sailor", "instructor", "sheriff",
+#' "pilot", "inspector", "mason", "baker", "administrator", "architect",
+#' "collector", "operator", "surgeon", "driver", "painter", "conductor",
+#' "nurse", "cook", "engineer", "retired", "sales", "lawyer", "clergy",
+#' "physician", "farmer", "clerk", "manager", "guard", "artist", "smith",
+#' "official", "police", "doctor", "professor", "student", "judge",
+#' "teacher", "author", "secretary", "soldier")
+#' A1 <- c("he", "son", "his", "him", "father", "man", "boy", "himself",
+#' "male", "brother", "sons", "fathers", "men", "boys", "males", "brothers",
+#' "uncle", "uncles", "nephew", "nephews")
+#' B1 <- c("she", "daughter", "hers", "her", "mother", "woman", "girl",
+#' "herself", "female", "sister", "daughters", "mothers", "women", "girls",
+#' "females", "sisters", "aunt", "aunts", "niece", "nieces")
+#' garg_f1 <- ect(googlenews, S1, A1, B1)
+#' plot_ect(garg_f1)
+#' @author Chung-hong Chan
+#' @references
+#' Dev, S., & Phillips, J. (2019, April). Attenuating bias in word vectors. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 879-887). PMLR.
+#' @export
+ect <- function(w, S_words, A_words, B_words, verbose = FALSE) {
+    w_lab <- rownames(w)
+    A_cleaned <- .clean(A_words, w_lab, verbose = verbose)
+    B_cleaned <- .clean(B_words, w_lab, verbose = verbose)
+    S_cleaned <- .clean(S_words, w_lab, verbose = verbose)
+    vec_a <- matrix(apply(w[A_cleaned,], 2, mean), 1, byrow = TRUE)
+    vec_b <- matrix(apply(w[B_cleaned,], 2, mean), 1, byrow = TRUE)
+    sim_res_a <- as.vector(proxy::simil(y = vec_a, x = w[S_cleaned, , drop = FALSE], method = "cosine"))
+    names(sim_res_a) <- S_cleaned
+    sim_res_b <- as.vector(proxy::simil(y = vec_b, x = w[S_cleaned, , drop = FALSE], method = "cosine"))
+    names(sim_res_b) <- S_cleaned
+    P <- matrix(c(sim_res_a, sim_res_b), nrow = 2, ncol = length(S_cleaned), byrow = TRUE)
+    colnames(P) <- S_cleaned
+    rownames(P) <- c("A_words", "B_words")
+    res <- list(P = P, u_a = sim_res_a, u_b = sim_res_b, S_words = S_cleaned, A_words = A_cleaned, B_words = B_cleaned)
+    class(res) <- append(class(res), c("ect", "sweater"))
+    return(res)
+}
+
+#' Calculate the Spearman Coefficient of an ECT result
+#'
+#' This functions calculates the Spearman Coefficient of an Embedding Coherence Test. The value ranges from -1 to +1 and a larger value indicates less bias.
+#' @param x an ect object from the [ect()] function.
+#' @return Spearman Coefficient
+#' @author Chung-hong Chan
+#' @references
+#' Dev, S., & Phillips, J. (2019, April). Attenuating bias in word vectors. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 879-887). PMLR.
+#' @export
+ect_es <- function(x) {
+    if (!"ect" %in% class(x)) {
+        stop("x is not created with ect().", call. = FALSE)
+    }
+    res <- stats::cor(x$u_a, x$u_b, method = "spearman")
+    res
+}
+
+#' Plot an ECT result on a two-dimensional plane
+#'
+#' This functions plot the words in `S_words` on a 2D plane according to their association with the average vectors of `A_words` and `B_words`. A equality line is also added. Words along the equality line have less bias. Words located on the upper side of the equality line have a stronger association with `A_words` and vice versa.
+#' @param x an ect object from the \link{ect} function.
+#' @param ... additional parameters to the underlying [plot()] function
+#' @return a plot
+#' @author Chung-hong Chan
+#' @export
+plot_ect <- function(x, ...) {
+    if (!"ect" %in% class(x)) {
+        stop("x is not created with ect().", call. = FALSE)
+    }
+    plot(x = rank(x$u_a), y = rank(x$u_b), type = "n", xlab = "Association Ranking: A_words", ylab = "Association ranking: B_words")
+    text(x = rank(x$u_a), rank(x$u_b), labels = x$S_words)
+    abline(a = 0, b = 1, lty = 2, col = "grey")
+}
diff --git a/R/misc.R b/R/misc.R
@@ -1,6 +1,6 @@
 #' @importFrom stats dist predict sd var
 #' @importFrom utils glob2rx head
-#' @importFrom graphics dotchart
+#' @importFrom graphics dotchart abline text
 NULL
 
 #' A subset of the pretrained word2vec word vectors
@@ -57,17 +57,20 @@ read_word2vec <- function(x) {
 
 #' Visualize the bias of words in S
 #'
-#' This function plots the bias of words in `S` as a Cleveland Dot Plot.
+#' For `ect`, this function calls [plot_ect()]. For `mac`, `rnd`, and `semaxis`, this function plots the bias of words in `S` as a Cleveland Dot Plot.
 #' @param x an S3 object returned from mac, rnd, semaxis, nas or rnsb
 #' @return a plot
 #' @author Chung-hong Chan
 #' @export
 plot_bias <- function(x) {
-    if (is.null(x$P)) {
+    if ("ect" %in% class(x)) {
+        plot_ect(x)
+    } else if (is.null(x$P)) {
         stop("No P slot in the input object x.")
+    } else {
+        sortedx <- sort(x$P)
+        graphics::dotchart(sortedx, labels = names(sortedx))
     }
-    sortedx <- sort(x$P)
-    graphics::dotchart(sortedx, labels = names(sortedx))
 }
 
 .purify_class <- function(x) {
@@ -81,7 +84,7 @@ plot_bias <- function(x) {
 #' @param ... additional parameters for the effect size functions
 #' @return the effect size
 #' @author Chung-hong Chan
-#' @seealso [weat_es()], [mac_es()], [rnd_es()], [rnsb_es()] 
+#' @seealso [weat_es()], [mac_es()], [rnd_es()], [rnsb_es()], [ect_es()]
 #' @export
 calculate_es <- function(x, ...) {
     if (!"sweater" %in% class(x)) {
@@ -93,5 +96,6 @@ calculate_es <- function(x, ...) {
            "mac" = mac_es(x),
            "rnd" = rnd_es(x),
            "rnsb" = rnsb_es(x),
+           "ect" = ect_es(x),
            stop("No effect size can be calculated for this query."))
 }
diff --git a/R/query.R b/R/query.R
@@ -5,8 +5,8 @@
     if (missing(S_words) | missing(A_words)) {
         stop("S_words and A_words must be provided.")
     }
-    if (!method %in% c("guess", "weat", "mac", "nas", "semaxis", "rnsb", "rnd")) {
-        stop("Unkonwn method. Available methods are: guess, weat, mac, nas, semaxis, rnsb, rnd.")
+    if (!method %in% c("guess", "weat", "mac", "nas", "semaxis", "rnsb", "rnd", "ect")) {
+        stop("Unkonwn method. Available methods are: guess, weat, mac, nas, semaxis, rnsb, rnd, ect.")
     }
     if (method == "guess") {
         if (missing(T_words) & missing(B_words)) {
@@ -27,10 +27,10 @@
 #'
 #' This function makes a query based on the supplied parameters.
 #' @param ... additional parameters for the underlying function
-#' @param method string, the method to be used to make the query. Available options are: `weat`, `mac`, `nas`, `semaxis`, `rnsb`, `rnd`, `nas` and `guess`. If `guess`, the function selects the best option for you. 
+#' @param method string, the method to be used to make the query. Available options are: `weat`, `mac`, `nas`, `semaxis`, `rnsb`, `rnd`, `nas`, `ect` and `guess`. If `guess`, the function selects the best option for you. 
 #' @inheritParams weat
 #' @return a sweater S3 object
-#' @seealso [weat()], [mac()], [nas()], [semaxis()], [rnsb()], [rnd()], [nas()]
+#' @seealso [weat()], [mac()], [nas()], [semaxis()], [rnsb()], [rnd()], [nas()], [ect()]
 #' @author Chung-hong Chan
 #' @examples
 #' data(googlenews)
@@ -66,6 +66,7 @@ query <- function(w, S_words, T_words, A_words, B_words, method = "guess", verbo
            "nas" = nas(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose),
            "semaxis" = semaxis(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose, ...),
            "rnsb" = rnsb(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose, ...),
-           "rnd" = rnd(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose)
+           "rnd" = rnd(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose),
+           "ect" = ect(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose)
            )
 }
diff --git a/README.Rmd b/README.Rmd
@@ -12,6 +12,7 @@ knitr::opts_chunk$set(
   out.width = "100%"
   )
 set.seed(46709394)
+devtools::load_all()
 ```
 
 # sweater <img src="man/figures/sweater_logo.svg" align="right" height="200" />
@@ -26,7 +27,7 @@ The goal of sweater (**S**peedy **W**ord **E**mbedding **A**ssociation **T**est
 
 The package provides functions that are speedy. They are either implemented in C++, or are speedy but accurate approximation of the original implementation proposed by Caliskan et al (2017).
 
-This package provides extra methods such as Relative Norm Distance, SemAxis and Relative Negative Sentiment Bias.
+This package provides extra methods such as Relative Norm Distance, Embedding Coherence Test, SemAxis and Relative Negative Sentiment Bias.
 
 If your goal is to reproduce the analysis in Caliskan et al (2017), please consider using the [original Java program](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/DX4VWP&version=2.0) or the R package [cbn](https://github.com/conjugateprior/cbn) by Lowe. To reproduce the analysis in Garg et al (2018), please consider using the [original Python program](https://github.com/nikhgarg/EmbeddingDynamicStereotypes). To reproduce the analysis in Mazini et al (2019), please consider using the [original Python program](https://github.com/TManzini/DebiasMulticlassWordEmbedding/).
 
@@ -63,6 +64,7 @@ It is recommended to use the function `query()` to make a query and `calculate_e
 | S_words          | A_words          | Mean Average Cosine Similarity (Mazini et al. 2019)         | mac(), mac_es()                                    |
 | S_words          | A_words, B_words | Relative Norm Distance (Garg et al. 2018)                   | rnd(), rnd_es()                                    |
 | S_words          | A_words, B_words | Relative Negative Sentiment Bias (Sweeney & Najafian. 2019) | rnsb(), rnsb_es()                                  |
+| S_words          | A_words, B_words | Embedding Coherence Test (Dev & Phillips. 2019)             | ect(), ect_es(), plot_ect()                        |
 | S_words          | A_words, B_words | SemAxis (An et al. 2018)                                    | semaxis()                                          |
 | S_words          | A_words, B_words | Normalized Association Score (Caliskan et al. 2017)         | nas()                                              |
 | S_words, T_words | A_words, B_words | Word Embedding Association Test (Caliskan et al. 2017)      | weat(), weat_es(), weat_resampling(), weat_exact() |
@@ -72,9 +74,11 @@ It is recommended to use the function `query()` to make a query and `calculate_e
 
 The simplest form of bias detection is Mean Average Cosine Similarity (Mazini et al. 2019). The same method is used also in Kroon et al. (2020).
 
-```{r mac_neg}
+```{r, eval = FALSE}
 require(sweater)
+```
 
+```{r mac_neg}
 S1 <- c("janitor", "statistician", "midwife", "bailiff", "auctioneer", 
 "photographer", "geologist", "shoemaker", "athlete", "cashier", 
 "dancer", "housekeeper", "accountant", "physicist", "gardener", 
@@ -137,6 +141,25 @@ res <- query(small_reddit, S_words = S2, A_words = A2, B_words = B2, method = "s
 plot_bias(res)
 ```
 
+## Example: Embedding Coherence Test
+
+Embedding Coherence Test (Dev & Phillips, 2019) is similar to SemAxis. The only significant different is that no "SemAxis" is calculated (the difference between the average word vectors of `A_words` and `B_words`). Instead, it calculates two separate axes for `A_words` and `B_words`. Then it calculates the proximity of each word in `S_words` with the two axes. It is like doing two separate `mac`, but `ect` averages the word vectors of `A_words` / `B_words` first.
+
+It is important to note that `P` is a 2-D matrix. Hence, the plot is 2-dimensional. Words above the equality line are more associated with `B_words` and vice versa.
+
+```{r ectplot}
+res <- query(googlenews, S_words = S1, A_words = A1, B_words = B1, method = "ect")
+res$P
+plot_bias(res)
+```
+
+Effect size can also be calculated. It is the Spearman Correlation Coefficient of the two rows in `P`. Higher value indicates more "coherent", i.e. less bias.
+
+```{r}
+calculate_es(res)
+```
+
+
 ## Example: Relative Negative Sentiment Bias
 
 This analysis attempts to reproduce the analysis in Sweeney & Najafian (2019).
@@ -311,11 +334,12 @@ Please note that the sweater project is released with a [Contributor Code of Con
 3. Brunet, M. E., Alkalay-Houlihan, C., Anderson, A., & Zemel, R. (2019, May). Understanding the origins of bias in word embeddings. In International Conference on Machine Learning (pp. 803-811). PMLR.
 4. Caliskan, Aylin, Joanna J. Bryson, and Arvind Narayanan. "Semantics derived automatically from language corpora contain human-like biases." Science 356.6334 (2017): 183-186.
 5. Cohen, J. (1988), Statistical Power Analysis for the Behavioral Sciences, 2nd Edition. Hillsdale: Lawrence Erlbaum.
-6. Garg, N., Schiebinger, L., Jurafsky, D., & Zou, J. (2018). Word embeddings quantify 100 years of gender and ethnic stereotypes. Proceedings of the National Academy of Sciences, 115(16), E3635-E3644.
-7. Manzini, T., Lim, Y. C., Tsvetkov, Y., & Black, A. W. (2019). Black is to criminal as caucasian is to police: Detecting and removing multiclass bias in word embeddings. arXiv preprint arXiv:1904.04047.
-8. McGrath, R. E., & Meyer, G. J. (2006). When effect sizes disagree: the case of r and d. Psychological methods, 11(4), 386.
-9. Rosenthal, R. (1991), Meta-Analytic Procedures for Social Research. Newbury Park: Sage
-10. Sweeney, C., & Najafian, M. (2019, July). A transparent framework for evaluating unintended demographic bias in word embeddings. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (pp. 1662-1667).
-11. Watanabe, K. (2018). Newsmap: A semi-supervised approach to geographical news classification. Digital Journalism, 6(3), 294-309.
+6. Dev, S., & Phillips, J. (2019, April). Attenuating bias in word vectors. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 879-887). PMLR.
+7. Garg, N., Schiebinger, L., Jurafsky, D., & Zou, J. (2018). Word embeddings quantify 100 years of gender and ethnic stereotypes. Proceedings of the National Academy of Sciences, 115(16), E3635-E3644.
+8. Manzini, T., Lim, Y. C., Tsvetkov, Y., & Black, A. W. (2019). Black is to criminal as caucasian is to police: Detecting and removing multiclass bias in word embeddings. arXiv preprint arXiv:1904.04047.
+9. McGrath, R. E., & Meyer, G. J. (2006). When effect sizes disagree: the case of r and d. Psychological methods, 11(4), 386.
+10. Rosenthal, R. (1991), Meta-Analytic Procedures for Social Research. Newbury Park: Sage
+11. Sweeney, C., & Najafian, M. (2019, July). A transparent framework for evaluating unintended demographic bias in word embeddings. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (pp. 1662-1667).
+12. Watanabe, K. (2018). Newsmap: A semi-supervised approach to geographical news classification. Digital Journalism, 6(3), 294-309.
 
 ---