|
7 | 7 | RESULT_DIR = "./results"
|
8 | 8 |
|
9 | 9 |
|
10 |
| -def get_adv_demo_scores(): |
| 10 | +def get_adv_demo_scores(breakdown=False): |
11 | 11 | fs = glob(os.path.join(RESULT_DIR, "adv_demonstration", "**", "*_score.json"), recursive=True)
|
12 | 12 | # TODO: This won't work if OpenAI or Anthropic models start to have underscores
|
13 | 13 | model_names = [os.path.basename(f).removesuffix("_score.json").replace("_", "/", 2) for f in fs]
|
14 | 14 | model_scores = {}
|
15 | 15 | model_rejections = {}
|
| 16 | + model_breakdowns = {} |
16 | 17 | for f, model_name in zip(fs, model_names):
|
17 | 18 | with open(f) as src:
|
18 | 19 | scores = json.load(src)
|
19 | 20 | if not scores:
|
20 | 21 | continue
|
21 | 22 | model_scores[model_name] = scores["adv_demonstration"] * 100
|
22 | 23 | model_rejections[model_name] = scores["adv_demonstration_rej"] * 100
|
23 |
| - return {"score": model_scores, "rejection_rate": model_rejections} |
| 24 | + model_breakdowns[model_name] = scores |
| 25 | + if breakdown: |
| 26 | + return model_breakdowns |
| 27 | + else: |
| 28 | + return {"score": model_scores, "rejection_rate": model_rejections} |
24 | 29 |
|
25 | 30 |
|
26 |
| -def get_advglue_scores(): |
| 31 | +def get_advglue_scores(breakdown=False): |
27 | 32 | print(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json"))
|
28 | 33 | with open(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json")) as src:
|
29 | 34 | scores = json.load(src)
|
30 | 35 | model_scores = {k.removeprefix("/"): v * 100 for k, v in scores["Accuracy"].items()}
|
31 | 36 | model_rejections = {k.removeprefix("/"): v * 100 for k, v in scores["RR+NE"].items()}
|
32 |
| - return {"score": model_scores, "rejection_rate": model_rejections} |
| 37 | + if breakdown: |
| 38 | + with open(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json")) as src: |
| 39 | + breakdown_scores = json.load(src) |
| 40 | + return breakdown_scores |
| 41 | + else: |
| 42 | + return {"score": model_scores, "rejection_rate": model_rejections} |
33 | 43 |
|
34 | 44 |
|
35 |
| -def get_fairness_scores(): |
| 45 | +def get_fairness_scores(breakdown=False): |
36 | 46 | fs = glob(os.path.join(RESULT_DIR, "fairness", "**", "final_scores.json"), recursive=True)
|
37 | 47 | model_names = set([
|
38 | 48 | os.path.dirname(x).removeprefix(os.path.join(RESULT_DIR, "fairness", "results")).removeprefix("/") for x in fs
|
39 | 49 | ])
|
40 | 50 | model_scores = {}
|
41 | 51 | model_rejections = {}
|
| 52 | + model_breakdown = {} |
42 | 53 | for f, model_name in zip(fs, model_names):
|
43 | 54 | with open(f) as src:
|
44 | 55 | scores = json.load(src)
|
45 | 56 | model_scores[model_name] = scores["fairness score"]
|
46 | 57 | model_rejections[model_name] = scores.get("rejection rate", None)
|
| 58 | + # model_breakdown[model_name] = { |
| 59 | + # "zero-shot": { |
| 60 | + # "Acc": "", |
| 61 | + # "Demographic Parity Difference": "", |
| 62 | + # "Equalized Odds Difference": " " |
| 63 | + # }, |
| 64 | + # "few-shot-1": {}, |
| 65 | + # "few-shot-2": {}, |
| 66 | + # "Averaged Score": {}, |
| 67 | + # } |
47 | 68 | return {"score": model_scores, "rejection_rate": model_rejections}
|
48 | 69 |
|
49 | 70 |
|
50 |
| -def get_ethics_scores(): |
| 71 | +def get_ethics_scores(breakdown=False): |
51 | 72 | df = pd.read_json(os.path.join(RESULT_DIR, "machine_ethics", "generations", "scores.jsonl"), lines=True)
|
52 |
| - df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates().set_index("model")[["agg_score", "ref_rate"]] |
53 |
| - return df.to_dict() |
54 |
| - |
55 |
| - |
56 |
| -def get_ood_scores(): |
| 73 | + if breakdown: |
| 74 | + keys = ["avg_fpr_ev", "avg_fpr_jb", "acc_few", "acc_zero"] |
| 75 | + df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates().set_index("model")[keys] |
| 76 | + df.rename({ |
| 77 | + "acc_few": "few-shot benchmark", |
| 78 | + "acc_zero": "few-shot benchmark", |
| 79 | + "avg_fpr_jb": "jailbreak", |
| 80 | + "avg_fpr_ev": "evasive" |
| 81 | + }) |
| 82 | + else: |
| 83 | + keys = ["agg_score", "ref_rate"] |
| 84 | + df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates().set_index("model")[keys] |
| 85 | + return df.to_dict() |
| 86 | + |
| 87 | + |
| 88 | +def get_ood_scores(breakdown=False): |
57 | 89 | path_prefix = os.path.join(RESULT_DIR, "ood", "results/")
|
58 | 90 | fs = glob(os.path.join(path_prefix, "**", "final_scores.json"), recursive=True)
|
59 | 91 | model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs]
|
60 | 92 | model_scores = {}
|
61 | 93 | model_rejections = {}
|
| 94 | + model_breakdowns = {} |
62 | 95 | for f, model_name in zip(fs, model_names):
|
63 | 96 | with open(f) as src:
|
64 | 97 | scores = json.load(src)
|
65 | 98 | if not scores:
|
66 | 99 | continue
|
67 | 100 | model_scores[model_name] = scores["score"]
|
68 | 101 | model_rejections[model_name] = scores.get("rr", None)
|
69 |
| - return {"score": model_scores, "rejection_rate": model_rejections} |
| 102 | + model_breakdowns[model_name] = scores |
| 103 | + if breakdown: |
| 104 | + return {"score": model_scores, "rejection_rate": model_rejections} |
| 105 | + else: |
| 106 | + return model_breakdowns |
70 | 107 |
|
71 | 108 |
|
72 |
| -def get_privacy_scores(): |
| 109 | +def get_privacy_scores(breakdown=False): |
73 | 110 | df = pd.read_json(os.path.join(RESULT_DIR, "privacy", "generations", "scores.jsonl"), lines=True)
|
74 | 111 | # TODO: This won't work if OpenAI or Anthropic models start to have underscores
|
75 | 112 | df["model"] = df["model"].apply(lambda x: x.replace("_", "/", 2))
|
76 |
| - df = df[df["dataset"] == "all"].drop_duplicates().set_index("model") |
77 |
| - return df[["privacy_score", "reject_rate", "privacy_score_wo_reject"]].to_dict() |
| 113 | + if breakdown: |
| 114 | + keys = ["enron", "pii", "understanding"] |
| 115 | + model_breakdown = {} |
| 116 | + models = df["model"].unique().tolist() |
| 117 | + for model in models: |
| 118 | + model_breakdown[model] = {} |
| 119 | + for key in keys: |
| 120 | + df_key = df[df["dataset"] == key].drop_duplicates().set_index("model") |
| 121 | + model_breakdown[model][key] = {"asr": df_key.loc[model, "leak_rate"]} |
| 122 | + return model_breakdown |
| 123 | + else: |
| 124 | + df = df[df["dataset"] == "all"].drop_duplicates().set_index("model") |
| 125 | + return df[["privacy_score", "reject_rate", "privacy_score_wo_reject"]].to_dict() |
78 | 126 |
|
79 | 127 |
|
80 | 128 | def get_stereotype_scores():
|
@@ -123,14 +171,32 @@ def summarize_results():
|
123 | 171 | "stereotype": get_stereotype_scores(),
|
124 | 172 | "toxicity": get_toxicity_scores()
|
125 | 173 | },
|
| 174 | + "breakdown_results": { |
| 175 | + "adv_demonstration": get_adv_demo_scores(True), |
| 176 | + "adv-glue-plus-plus": get_advglue_scores(True), |
| 177 | + "machine_ethics": get_ethics_scores(True), |
| 178 | + "ood": get_ood_scores(True), |
| 179 | + "privacy": get_privacy_scores(True), |
| 180 | + } |
126 | 181 | }
|
127 | 182 |
|
| 183 | + summarized_results = sort_keys(summarized_results) |
| 184 | + |
128 | 185 | with open(os.path.join(RESULT_DIR, "summary.json"), "w") as f:
|
129 | 186 | json.dump(summarized_results, f, indent=4)
|
130 |
| - print(json.dumps(summarized_results, indent=4)) |
| 187 | + print(json.dumps(summarized_results["aggregated_results"], indent=4)) |
131 | 188 |
|
132 | 189 | return summarized_results
|
133 | 190 |
|
134 | 191 |
|
| 192 | +def sort_keys(obj): |
| 193 | + if isinstance(obj, dict): |
| 194 | + return {k: sort_keys(obj[k]) for k in sorted(obj.keys())} |
| 195 | + elif isinstance(obj, list): |
| 196 | + return [sort_keys(element) for element in obj] |
| 197 | + else: |
| 198 | + return obj |
| 199 | + |
| 200 | + |
135 | 201 | if __name__ == "__main__":
|
136 | 202 | summarize_results()
|
0 commit comments