Skip to content

Commit b96192e

Browse files
committed
Add result breakdown for some perspectives
1 parent 9b278f5 commit b96192e

File tree

1 file changed

+82
-16
lines changed

1 file changed

+82
-16
lines changed

src/dt/summarize.py

Lines changed: 82 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,74 +7,122 @@
77
RESULT_DIR = "./results"
88

99

10-
def get_adv_demo_scores():
10+
def get_adv_demo_scores(breakdown=False):
1111
fs = glob(os.path.join(RESULT_DIR, "adv_demonstration", "**", "*_score.json"), recursive=True)
1212
# TODO: This won't work if OpenAI or Anthropic models start to have underscores
1313
model_names = [os.path.basename(f).removesuffix("_score.json").replace("_", "/", 2) for f in fs]
1414
model_scores = {}
1515
model_rejections = {}
16+
model_breakdowns = {}
1617
for f, model_name in zip(fs, model_names):
1718
with open(f) as src:
1819
scores = json.load(src)
1920
if not scores:
2021
continue
2122
model_scores[model_name] = scores["adv_demonstration"] * 100
2223
model_rejections[model_name] = scores["adv_demonstration_rej"] * 100
23-
return {"score": model_scores, "rejection_rate": model_rejections}
24+
model_breakdowns[model_name] = scores
25+
if breakdown:
26+
return model_breakdowns
27+
else:
28+
return {"score": model_scores, "rejection_rate": model_rejections}
2429

2530

26-
def get_advglue_scores():
31+
def get_advglue_scores(breakdown=False):
2732
print(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json"))
2833
with open(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json")) as src:
2934
scores = json.load(src)
3035
model_scores = {k.removeprefix("/"): v * 100 for k, v in scores["Accuracy"].items()}
3136
model_rejections = {k.removeprefix("/"): v * 100 for k, v in scores["RR+NE"].items()}
32-
return {"score": model_scores, "rejection_rate": model_rejections}
37+
if breakdown:
38+
with open(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json")) as src:
39+
breakdown_scores = json.load(src)
40+
return breakdown_scores
41+
else:
42+
return {"score": model_scores, "rejection_rate": model_rejections}
3343

3444

35-
def get_fairness_scores():
45+
def get_fairness_scores(breakdown=False):
3646
fs = glob(os.path.join(RESULT_DIR, "fairness", "**", "final_scores.json"), recursive=True)
3747
model_names = set([
3848
os.path.dirname(x).removeprefix(os.path.join(RESULT_DIR, "fairness", "results")).removeprefix("/") for x in fs
3949
])
4050
model_scores = {}
4151
model_rejections = {}
52+
model_breakdown = {}
4253
for f, model_name in zip(fs, model_names):
4354
with open(f) as src:
4455
scores = json.load(src)
4556
model_scores[model_name] = scores["fairness score"]
4657
model_rejections[model_name] = scores.get("rejection rate", None)
58+
# model_breakdown[model_name] = {
59+
# "zero-shot": {
60+
# "Acc": "",
61+
# "Demographic Parity Difference": "",
62+
# "Equalized Odds Difference": " "
63+
# },
64+
# "few-shot-1": {},
65+
# "few-shot-2": {},
66+
# "Averaged Score": {},
67+
# }
4768
return {"score": model_scores, "rejection_rate": model_rejections}
4869

4970

50-
def get_ethics_scores():
71+
def get_ethics_scores(breakdown=False):
5172
df = pd.read_json(os.path.join(RESULT_DIR, "machine_ethics", "generations", "scores.jsonl"), lines=True)
52-
df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates().set_index("model")[["agg_score", "ref_rate"]]
53-
return df.to_dict()
54-
55-
56-
def get_ood_scores():
73+
if breakdown:
74+
keys = ["avg_fpr_ev", "avg_fpr_jb", "acc_few", "acc_zero"]
75+
df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates().set_index("model")[keys]
76+
df.rename({
77+
"acc_few": "few-shot benchmark",
78+
"acc_zero": "few-shot benchmark",
79+
"avg_fpr_jb": "jailbreak",
80+
"avg_fpr_ev": "evasive"
81+
})
82+
else:
83+
keys = ["agg_score", "ref_rate"]
84+
df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates().set_index("model")[keys]
85+
return df.to_dict()
86+
87+
88+
def get_ood_scores(breakdown=False):
5789
path_prefix = os.path.join(RESULT_DIR, "ood", "results/")
5890
fs = glob(os.path.join(path_prefix, "**", "final_scores.json"), recursive=True)
5991
model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs]
6092
model_scores = {}
6193
model_rejections = {}
94+
model_breakdowns = {}
6295
for f, model_name in zip(fs, model_names):
6396
with open(f) as src:
6497
scores = json.load(src)
6598
if not scores:
6699
continue
67100
model_scores[model_name] = scores["score"]
68101
model_rejections[model_name] = scores.get("rr", None)
69-
return {"score": model_scores, "rejection_rate": model_rejections}
102+
model_breakdowns[model_name] = scores
103+
if breakdown:
104+
return {"score": model_scores, "rejection_rate": model_rejections}
105+
else:
106+
return model_breakdowns
70107

71108

72-
def get_privacy_scores():
109+
def get_privacy_scores(breakdown=False):
73110
df = pd.read_json(os.path.join(RESULT_DIR, "privacy", "generations", "scores.jsonl"), lines=True)
74111
# TODO: This won't work if OpenAI or Anthropic models start to have underscores
75112
df["model"] = df["model"].apply(lambda x: x.replace("_", "/", 2))
76-
df = df[df["dataset"] == "all"].drop_duplicates().set_index("model")
77-
return df[["privacy_score", "reject_rate", "privacy_score_wo_reject"]].to_dict()
113+
if breakdown:
114+
keys = ["enron", "pii", "understanding"]
115+
model_breakdown = {}
116+
models = df["model"].unique().tolist()
117+
for model in models:
118+
model_breakdown[model] = {}
119+
for key in keys:
120+
df_key = df[df["dataset"] == key].drop_duplicates().set_index("model")
121+
model_breakdown[model][key] = {"asr": df_key.loc[model, "leak_rate"]}
122+
return model_breakdown
123+
else:
124+
df = df[df["dataset"] == "all"].drop_duplicates().set_index("model")
125+
return df[["privacy_score", "reject_rate", "privacy_score_wo_reject"]].to_dict()
78126

79127

80128
def get_stereotype_scores():
@@ -123,14 +171,32 @@ def summarize_results():
123171
"stereotype": get_stereotype_scores(),
124172
"toxicity": get_toxicity_scores()
125173
},
174+
"breakdown_results": {
175+
"adv_demonstration": get_adv_demo_scores(True),
176+
"adv-glue-plus-plus": get_advglue_scores(True),
177+
"machine_ethics": get_ethics_scores(True),
178+
"ood": get_ood_scores(True),
179+
"privacy": get_privacy_scores(True),
180+
}
126181
}
127182

183+
summarized_results = sort_keys(summarized_results)
184+
128185
with open(os.path.join(RESULT_DIR, "summary.json"), "w") as f:
129186
json.dump(summarized_results, f, indent=4)
130-
print(json.dumps(summarized_results, indent=4))
187+
print(json.dumps(summarized_results["aggregated_results"], indent=4))
131188

132189
return summarized_results
133190

134191

192+
def sort_keys(obj):
193+
if isinstance(obj, dict):
194+
return {k: sort_keys(obj[k]) for k in sorted(obj.keys())}
195+
elif isinstance(obj, list):
196+
return [sort_keys(element) for element in obj]
197+
else:
198+
return obj
199+
200+
135201
if __name__ == "__main__":
136202
summarize_results()

0 commit comments

Comments
 (0)