Skip to content

Commit

Permalink
Add Embedding Coherence Test ref #8 (#9)
Browse files Browse the repository at this point in the history
* Initial implementation of ect

* Make query, plot_es work for ect

* Add test cases for ect

* Update the README for ect

* Update ect's test case for emitting P

* Test also `plot_ect`
  • Loading branch information
chainsawriot authored Nov 24, 2021
1 parent b38cafb commit 3970b4b
Show file tree
Hide file tree
Showing 15 changed files with 391 additions and 29 deletions.
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# Generated by roxygen2: do not edit by hand

export(calculate_es)
export(ect)
export(ect_es)
export(mac)
export(mac_es)
export(nas)
export(plot_bias)
export(plot_ect)
export(query)
export(read_word2vec)
export(rnd)
Expand All @@ -17,7 +20,9 @@ export(weat_es)
export(weat_exact)
export(weat_resampling)
importFrom(Rcpp,sourceCpp)
importFrom(graphics,abline)
importFrom(graphics,dotchart)
importFrom(graphics,text)
importFrom(stats,dist)
importFrom(stats,predict)
importFrom(stats,sd)
Expand Down
95 changes: 95 additions & 0 deletions R/ect.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#' Embedding Coherence Test
#'
#' This function estimate the Embedding Coherence Test (ECT) of word embeddings (Dev & Philips, 2019).
#'
#' @inheritParams weat
#' @return A list with class \code{"ect"} containing the following components:
#' \describe{
#' \item{\code{$A_words}}{the input A_words}
#' \item{\code{$B_words}}{the input B_words}
#' \item{\code{$S_words}}{the input S_words}
#' \item{\code{$u_a}}{Cosine similarity between each word vector of S_words and average vector of A_words}
#' \item{\code{$u_b}}{Cosine similarity between each word vector of S_words and average vector of B_words}
#' }
#' @seealso
#' [ect_es()] can be used to obtain the effect size of the test.
#' [plot_ect()] can be used to visualize the result.
#' @examples
#' data(googlenews)
#' S1 <- c("janitor", "statistician", "midwife", "bailiff", "auctioneer",
#' "photographer", "geologist", "shoemaker", "athlete", "cashier", "dancer",
#' "housekeeper", "accountant", "physicist", "gardener", "dentist", "weaver",
#' "blacksmith", "psychologist", "supervisor", "mathematician", "surveyor",
#' "tailor", "designer", "economist", "mechanic", "laborer", "postmaster",
#' "broker", "chemist", "librarian", "attendant", "clerical", "musician",
#' "porter", "scientist", "carpenter", "sailor", "instructor", "sheriff",
#' "pilot", "inspector", "mason", "baker", "administrator", "architect",
#' "collector", "operator", "surgeon", "driver", "painter", "conductor",
#' "nurse", "cook", "engineer", "retired", "sales", "lawyer", "clergy",
#' "physician", "farmer", "clerk", "manager", "guard", "artist", "smith",
#' "official", "police", "doctor", "professor", "student", "judge",
#' "teacher", "author", "secretary", "soldier")
#' A1 <- c("he", "son", "his", "him", "father", "man", "boy", "himself",
#' "male", "brother", "sons", "fathers", "men", "boys", "males", "brothers",
#' "uncle", "uncles", "nephew", "nephews")
#' B1 <- c("she", "daughter", "hers", "her", "mother", "woman", "girl",
#' "herself", "female", "sister", "daughters", "mothers", "women", "girls",
#' "females", "sisters", "aunt", "aunts", "niece", "nieces")
#' garg_f1 <- ect(googlenews, S1, A1, B1)
#' plot_ect(garg_f1)
#' @author Chung-hong Chan
#' @references
#' Dev, S., & Phillips, J. (2019, April). Attenuating bias in word vectors. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 879-887). PMLR.
#' @export
ect <- function(w, S_words, A_words, B_words, verbose = FALSE) {
w_lab <- rownames(w)
A_cleaned <- .clean(A_words, w_lab, verbose = verbose)
B_cleaned <- .clean(B_words, w_lab, verbose = verbose)
S_cleaned <- .clean(S_words, w_lab, verbose = verbose)
vec_a <- matrix(apply(w[A_cleaned,], 2, mean), 1, byrow = TRUE)
vec_b <- matrix(apply(w[B_cleaned,], 2, mean), 1, byrow = TRUE)
sim_res_a <- as.vector(proxy::simil(y = vec_a, x = w[S_cleaned, , drop = FALSE], method = "cosine"))
names(sim_res_a) <- S_cleaned
sim_res_b <- as.vector(proxy::simil(y = vec_b, x = w[S_cleaned, , drop = FALSE], method = "cosine"))
names(sim_res_b) <- S_cleaned
P <- matrix(c(sim_res_a, sim_res_b), nrow = 2, ncol = length(S_cleaned), byrow = TRUE)
colnames(P) <- S_cleaned
rownames(P) <- c("A_words", "B_words")
res <- list(P = P, u_a = sim_res_a, u_b = sim_res_b, S_words = S_cleaned, A_words = A_cleaned, B_words = B_cleaned)
class(res) <- append(class(res), c("ect", "sweater"))
return(res)
}

#' Calculate the Spearman Coefficient of an ECT result
#'
#' This functions calculates the Spearman Coefficient of an Embedding Coherence Test. The value ranges from -1 to +1 and a larger value indicates less bias.
#' @param x an ect object from the [ect()] function.
#' @return Spearman Coefficient
#' @author Chung-hong Chan
#' @references
#' Dev, S., & Phillips, J. (2019, April). Attenuating bias in word vectors. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 879-887). PMLR.
#' @export
ect_es <- function(x) {
if (!"ect" %in% class(x)) {
stop("x is not created with ect().", call. = FALSE)
}
res <- stats::cor(x$u_a, x$u_b, method = "spearman")
res
}

#' Plot an ECT result on a two-dimensional plane
#'
#' This functions plot the words in `S_words` on a 2D plane according to their association with the average vectors of `A_words` and `B_words`. A equality line is also added. Words along the equality line have less bias. Words located on the upper side of the equality line have a stronger association with `A_words` and vice versa.
#' @param x an ect object from the \link{ect} function.
#' @param ... additional parameters to the underlying [plot()] function
#' @return a plot
#' @author Chung-hong Chan
#' @export
plot_ect <- function(x, ...) {
if (!"ect" %in% class(x)) {
stop("x is not created with ect().", call. = FALSE)
}
plot(x = rank(x$u_a), y = rank(x$u_b), type = "n", xlab = "Association Ranking: A_words", ylab = "Association ranking: B_words")
text(x = rank(x$u_a), rank(x$u_b), labels = x$S_words)
abline(a = 0, b = 1, lty = 2, col = "grey")
}
16 changes: 10 additions & 6 deletions R/misc.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' @importFrom stats dist predict sd var
#' @importFrom utils glob2rx head
#' @importFrom graphics dotchart
#' @importFrom graphics dotchart abline text
NULL

#' A subset of the pretrained word2vec word vectors
Expand Down Expand Up @@ -57,17 +57,20 @@ read_word2vec <- function(x) {

#' Visualize the bias of words in S
#'
#' This function plots the bias of words in `S` as a Cleveland Dot Plot.
#' For `ect`, this function calls [plot_ect()]. For `mac`, `rnd`, and `semaxis`, this function plots the bias of words in `S` as a Cleveland Dot Plot.
#' @param x an S3 object returned from mac, rnd, semaxis, nas or rnsb
#' @return a plot
#' @author Chung-hong Chan
#' @export
plot_bias <- function(x) {
if (is.null(x$P)) {
if ("ect" %in% class(x)) {
plot_ect(x)
} else if (is.null(x$P)) {
stop("No P slot in the input object x.")
} else {
sortedx <- sort(x$P)
graphics::dotchart(sortedx, labels = names(sortedx))
}
sortedx <- sort(x$P)
graphics::dotchart(sortedx, labels = names(sortedx))
}

.purify_class <- function(x) {
Expand All @@ -81,7 +84,7 @@ plot_bias <- function(x) {
#' @param ... additional parameters for the effect size functions
#' @return the effect size
#' @author Chung-hong Chan
#' @seealso [weat_es()], [mac_es()], [rnd_es()], [rnsb_es()]
#' @seealso [weat_es()], [mac_es()], [rnd_es()], [rnsb_es()], [ect_es()]
#' @export
calculate_es <- function(x, ...) {
if (!"sweater" %in% class(x)) {
Expand All @@ -93,5 +96,6 @@ calculate_es <- function(x, ...) {
"mac" = mac_es(x),
"rnd" = rnd_es(x),
"rnsb" = rnsb_es(x),
"ect" = ect_es(x),
stop("No effect size can be calculated for this query."))
}
11 changes: 6 additions & 5 deletions R/query.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
if (missing(S_words) | missing(A_words)) {
stop("S_words and A_words must be provided.")
}
if (!method %in% c("guess", "weat", "mac", "nas", "semaxis", "rnsb", "rnd")) {
stop("Unkonwn method. Available methods are: guess, weat, mac, nas, semaxis, rnsb, rnd.")
if (!method %in% c("guess", "weat", "mac", "nas", "semaxis", "rnsb", "rnd", "ect")) {
stop("Unkonwn method. Available methods are: guess, weat, mac, nas, semaxis, rnsb, rnd, ect.")
}
if (method == "guess") {
if (missing(T_words) & missing(B_words)) {
Expand All @@ -27,10 +27,10 @@
#'
#' This function makes a query based on the supplied parameters.
#' @param ... additional parameters for the underlying function
#' @param method string, the method to be used to make the query. Available options are: `weat`, `mac`, `nas`, `semaxis`, `rnsb`, `rnd`, `nas` and `guess`. If `guess`, the function selects the best option for you.
#' @param method string, the method to be used to make the query. Available options are: `weat`, `mac`, `nas`, `semaxis`, `rnsb`, `rnd`, `nas`, `ect` and `guess`. If `guess`, the function selects the best option for you.
#' @inheritParams weat
#' @return a sweater S3 object
#' @seealso [weat()], [mac()], [nas()], [semaxis()], [rnsb()], [rnd()], [nas()]
#' @seealso [weat()], [mac()], [nas()], [semaxis()], [rnsb()], [rnd()], [nas()], [ect()]
#' @author Chung-hong Chan
#' @examples
#' data(googlenews)
Expand Down Expand Up @@ -66,6 +66,7 @@ query <- function(w, S_words, T_words, A_words, B_words, method = "guess", verbo
"nas" = nas(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose),
"semaxis" = semaxis(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose, ...),
"rnsb" = rnsb(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose, ...),
"rnd" = rnd(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose)
"rnd" = rnd(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose),
"ect" = ect(w = w, S_words = S_words, A_words = A_words, B_words = B_words, verbose = verbose)
)
}
40 changes: 32 additions & 8 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ knitr::opts_chunk$set(
out.width = "100%"
)
set.seed(46709394)
devtools::load_all()
```

# sweater <img src="man/figures/sweater_logo.svg" align="right" height="200" />
Expand All @@ -26,7 +27,7 @@ The goal of sweater (**S**peedy **W**ord **E**mbedding **A**ssociation **T**est

The package provides functions that are speedy. They are either implemented in C++, or are speedy but accurate approximation of the original implementation proposed by Caliskan et al (2017).

This package provides extra methods such as Relative Norm Distance, SemAxis and Relative Negative Sentiment Bias.
This package provides extra methods such as Relative Norm Distance, Embedding Coherence Test, SemAxis and Relative Negative Sentiment Bias.

If your goal is to reproduce the analysis in Caliskan et al (2017), please consider using the [original Java program](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/DX4VWP&version=2.0) or the R package [cbn](https://github.com/conjugateprior/cbn) by Lowe. To reproduce the analysis in Garg et al (2018), please consider using the [original Python program](https://github.com/nikhgarg/EmbeddingDynamicStereotypes). To reproduce the analysis in Mazini et al (2019), please consider using the [original Python program](https://github.com/TManzini/DebiasMulticlassWordEmbedding/).

Expand Down Expand Up @@ -63,6 +64,7 @@ It is recommended to use the function `query()` to make a query and `calculate_e
| S_words | A_words | Mean Average Cosine Similarity (Mazini et al. 2019) | mac(), mac_es() |
| S_words | A_words, B_words | Relative Norm Distance (Garg et al. 2018) | rnd(), rnd_es() |
| S_words | A_words, B_words | Relative Negative Sentiment Bias (Sweeney & Najafian. 2019) | rnsb(), rnsb_es() |
| S_words | A_words, B_words | Embedding Coherence Test (Dev & Phillips. 2019) | ect(), ect_es(), plot_ect() |
| S_words | A_words, B_words | SemAxis (An et al. 2018) | semaxis() |
| S_words | A_words, B_words | Normalized Association Score (Caliskan et al. 2017) | nas() |
| S_words, T_words | A_words, B_words | Word Embedding Association Test (Caliskan et al. 2017) | weat(), weat_es(), weat_resampling(), weat_exact() |
Expand All @@ -72,9 +74,11 @@ It is recommended to use the function `query()` to make a query and `calculate_e

The simplest form of bias detection is Mean Average Cosine Similarity (Mazini et al. 2019). The same method is used also in Kroon et al. (2020).

```{r mac_neg}
```{r, eval = FALSE}
require(sweater)
```

```{r mac_neg}
S1 <- c("janitor", "statistician", "midwife", "bailiff", "auctioneer",
"photographer", "geologist", "shoemaker", "athlete", "cashier",
"dancer", "housekeeper", "accountant", "physicist", "gardener",
Expand Down Expand Up @@ -137,6 +141,25 @@ res <- query(small_reddit, S_words = S2, A_words = A2, B_words = B2, method = "s
plot_bias(res)
```

## Example: Embedding Coherence Test

Embedding Coherence Test (Dev & Phillips, 2019) is similar to SemAxis. The only significant different is that no "SemAxis" is calculated (the difference between the average word vectors of `A_words` and `B_words`). Instead, it calculates two separate axes for `A_words` and `B_words`. Then it calculates the proximity of each word in `S_words` with the two axes. It is like doing two separate `mac`, but `ect` averages the word vectors of `A_words` / `B_words` first.

It is important to note that `P` is a 2-D matrix. Hence, the plot is 2-dimensional. Words above the equality line are more associated with `B_words` and vice versa.

```{r ectplot}
res <- query(googlenews, S_words = S1, A_words = A1, B_words = B1, method = "ect")
res$P
plot_bias(res)
```

Effect size can also be calculated. It is the Spearman Correlation Coefficient of the two rows in `P`. Higher value indicates more "coherent", i.e. less bias.

```{r}
calculate_es(res)
```


## Example: Relative Negative Sentiment Bias

This analysis attempts to reproduce the analysis in Sweeney & Najafian (2019).
Expand Down Expand Up @@ -311,11 +334,12 @@ Please note that the sweater project is released with a [Contributor Code of Con
3. Brunet, M. E., Alkalay-Houlihan, C., Anderson, A., & Zemel, R. (2019, May). Understanding the origins of bias in word embeddings. In International Conference on Machine Learning (pp. 803-811). PMLR.
4. Caliskan, Aylin, Joanna J. Bryson, and Arvind Narayanan. "Semantics derived automatically from language corpora contain human-like biases." Science 356.6334 (2017): 183-186.
5. Cohen, J. (1988), Statistical Power Analysis for the Behavioral Sciences, 2nd Edition. Hillsdale: Lawrence Erlbaum.
6. Garg, N., Schiebinger, L., Jurafsky, D., & Zou, J. (2018). Word embeddings quantify 100 years of gender and ethnic stereotypes. Proceedings of the National Academy of Sciences, 115(16), E3635-E3644.
7. Manzini, T., Lim, Y. C., Tsvetkov, Y., & Black, A. W. (2019). Black is to criminal as caucasian is to police: Detecting and removing multiclass bias in word embeddings. arXiv preprint arXiv:1904.04047.
8. McGrath, R. E., & Meyer, G. J. (2006). When effect sizes disagree: the case of r and d. Psychological methods, 11(4), 386.
9. Rosenthal, R. (1991), Meta-Analytic Procedures for Social Research. Newbury Park: Sage
10. Sweeney, C., & Najafian, M. (2019, July). A transparent framework for evaluating unintended demographic bias in word embeddings. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (pp. 1662-1667).
11. Watanabe, K. (2018). Newsmap: A semi-supervised approach to geographical news classification. Digital Journalism, 6(3), 294-309.
6. Dev, S., & Phillips, J. (2019, April). Attenuating bias in word vectors. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 879-887). PMLR.
7. Garg, N., Schiebinger, L., Jurafsky, D., & Zou, J. (2018). Word embeddings quantify 100 years of gender and ethnic stereotypes. Proceedings of the National Academy of Sciences, 115(16), E3635-E3644.
8. Manzini, T., Lim, Y. C., Tsvetkov, Y., & Black, A. W. (2019). Black is to criminal as caucasian is to police: Detecting and removing multiclass bias in word embeddings. arXiv preprint arXiv:1904.04047.
9. McGrath, R. E., & Meyer, G. J. (2006). When effect sizes disagree: the case of r and d. Psychological methods, 11(4), 386.
10. Rosenthal, R. (1991), Meta-Analytic Procedures for Social Research. Newbury Park: Sage
11. Sweeney, C., & Najafian, M. (2019, July). A transparent framework for evaluating unintended demographic bias in word embeddings. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (pp. 1662-1667).
12. Watanabe, K. (2018). Newsmap: A semi-supervised approach to geographical news classification. Digital Journalism, 6(3), 294-309.

---
Loading

0 comments on commit 3970b4b

Please sign in to comment.