From 4836aaa6bb8b0ba8acbd39f80e6a452e09ab5a5a Mon Sep 17 00:00:00 2001 From: Tristan Miller Date: Tue, 8 Sep 2015 18:32:26 +0200 Subject: [PATCH] Fix issue #19. KrippendorffAlphaAgreement now throws InsufficientDataException on encountering invariant values. --- .../coding/KrippendorffAlphaAgreement.java | 79 ++++++++++++------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/dkpro-statistics-agreement/src/main/java/org/dkpro/statistics/agreement/coding/KrippendorffAlphaAgreement.java b/dkpro-statistics-agreement/src/main/java/org/dkpro/statistics/agreement/coding/KrippendorffAlphaAgreement.java index 1e8b17a..8f3bc44 100644 --- a/dkpro-statistics-agreement/src/main/java/org/dkpro/statistics/agreement/coding/KrippendorffAlphaAgreement.java +++ b/dkpro-statistics-agreement/src/main/java/org/dkpro/statistics/agreement/coding/KrippendorffAlphaAgreement.java @@ -25,6 +25,7 @@ import org.dkpro.statistics.agreement.IAnnotationUnit; import org.dkpro.statistics.agreement.ICategorySpecificAgreement; import org.dkpro.statistics.agreement.IChanceCorrectedDisagreement; +import org.dkpro.statistics.agreement.InsufficientDataException; import org.dkpro.statistics.agreement.distance.IDistanceFunction; /** @@ -61,16 +62,18 @@ public KrippendorffAlphaAgreement(final ICodingAnnotationStudy study, @Override public double calculateObservedDisagreement() { ensureDistanceFunction(); - if (coincidenceMatrix == null) - coincidenceMatrix = CodingAnnotationStudy.countCategoryCoincidence(study); + if (coincidenceMatrix == null) { + coincidenceMatrix = CodingAnnotationStudy.countCategoryCoincidence(study); + } double n = 0.0; double result = 0.0; - for (Entry> cat1 : coincidenceMatrix.entrySet()) - for (Entry cat2 : cat1.getValue().entrySet()) { + for (Entry> cat1 : coincidenceMatrix.entrySet()) { + for (Entry cat2 : cat1.getValue().entrySet()) { result += cat2.getValue() * distanceFunction.measureDistance(study, cat1.getKey(), cat2.getKey()); n += cat2.getValue(); } + } result /= n; return result; } @@ -83,50 +86,62 @@ public double calculateObservedDisagreement() { @Override public double calculateExpectedDisagreement() { ensureDistanceFunction(); - if (coincidenceMatrix == null) - coincidenceMatrix = CodingAnnotationStudy.countCategoryCoincidence(study); + if (coincidenceMatrix == null) { + coincidenceMatrix = CodingAnnotationStudy.countCategoryCoincidence(study); + } - double n = 0.0; + if (study.getCategoryCount() <= 1) { + throw new InsufficientDataException("An annotation study needs at least two different categories; otherwise there is no decision for the raters to agree on."); + } + + double n = 0.0; Map marginals = new HashMap(); for (Entry> cat1 : coincidenceMatrix.entrySet()) { double n_c = 0.0; - for (Entry cat2 : cat1.getValue().entrySet()) - n_c += cat2.getValue(); + for (Entry cat2 : cat1.getValue().entrySet()) { + n_c += cat2.getValue(); + } marginals.put(cat1.getKey(), n_c); n += n_c; } double result = 0.0; - for (Entry cat1 : marginals.entrySet()) - for (Entry cat2 : marginals.entrySet()) - result += cat1.getValue() * cat2.getValue() + for (Entry cat1 : marginals.entrySet()) { + for (Entry cat2 : marginals.entrySet()) { + result += cat1.getValue() * cat2.getValue() * distanceFunction.measureDistance(study, cat1.getKey(), cat2.getKey()); + } + } result /= n * (n - 1.0); return result; } - public double calculateItemAgreement(final ICodingAnnotationItem item) { + @Override + public double calculateItemAgreement(final ICodingAnnotationItem item) { ensureDistanceFunction(); Map> itemMatrix = CodingAnnotationStudy.countCategoryCoincidence(item); double n = 0.0; double D_O = 0.0; - for (Entry> cat1 : itemMatrix.entrySet()) - for (Entry cat2 : cat1.getValue().entrySet()) { + for (Entry> cat1 : itemMatrix.entrySet()) { + for (Entry cat2 : cat1.getValue().entrySet()) { D_O += cat2.getValue() * distanceFunction.measureDistance(study, cat1.getKey(), cat2.getKey()); n += cat2.getValue(); } + } D_O /= n; - if (coincidenceMatrix == null) - coincidenceMatrix = CodingAnnotationStudy.countCategoryCoincidence(study); + if (coincidenceMatrix == null) { + coincidenceMatrix = CodingAnnotationStudy.countCategoryCoincidence(study); + } n = 0.0; Map marginals = new TreeMap(); for (Entry> cat1 : coincidenceMatrix.entrySet()) { double n_c = 0.0; - for (Entry cat2 : cat1.getValue().entrySet()) - n_c += cat2.getValue(); + for (Entry cat2 : cat1.getValue().entrySet()) { + n_c += cat2.getValue(); + } marginals.put(cat1.getKey(), n_c); n += n_c; } @@ -138,13 +153,16 @@ public double calculateItemAgreement(final ICodingAnnotationItem item) { * distanceFunction.measureDistance(study, cat1.getKey(), cat2.getKey()); D_E /= n * (n - 1.0);*/ double D_E = calculateExpectedDisagreement(); - if (D_E == 0.0) - return 1.0; - else - return 1.0 - (D_O / D_E); + if (D_E == 0.0) { + return 1.0; + } + else { + return 1.0 - (D_O / D_E); + } } - public double calculateCategoryAgreement(final Object category) { + @Override + public double calculateCategoryAgreement(final Object category) { ensureDistanceFunction(); final Object NULL_CATEGORY = new Object(); @@ -154,11 +172,14 @@ public double calculateCategoryAgreement(final Object category) { for (ICodingAnnotationItem item : study.getItems()) { int nKeepCategory = 0; int nNullCategory = 0; - for (IAnnotationUnit annotation : item.getUnits()) - if (category.equals(annotation.getCategory())) - nKeepCategory++; - else - nNullCategory++; + for (IAnnotationUnit annotation : item.getUnits()) { + if (category.equals(annotation.getCategory())) { + nKeepCategory++; + } + else { + nNullCategory++; + } + } observedDisagreement += nKeepCategory * nKeepCategory * distanceFunction.measureDistance(study, category, category) + nKeepCategory * nNullCategory * distanceFunction.measureDistance(study, category, NULL_CATEGORY)