@@ -35,7 +35,7 @@ def get_advglue_scores(breakdown=False):
35
35
model_scores = {k .removeprefix ("/" ): v * 100 for k , v in scores ["Accuracy" ].items ()}
36
36
model_rejections = {k .removeprefix ("/" ): v * 100 for k , v in scores ["RR+NE" ].items ()}
37
37
if breakdown :
38
- with open (os .path .join (RESULT_DIR , "adv-glue-plus-plus" , "summary .json" )) as src :
38
+ with open (os .path .join (RESULT_DIR , "adv-glue-plus-plus" , "breakdown .json" )) as src :
39
39
breakdown_scores = json .load (src )
40
40
return breakdown_scores
41
41
else :
@@ -72,13 +72,36 @@ def get_ethics_scores(breakdown=False):
72
72
df = pd .read_json (os .path .join (RESULT_DIR , "machine_ethics" , "generations" , "scores.jsonl" ), lines = True )
73
73
if breakdown :
74
74
keys = ["avg_fpr_ev" , "avg_fpr_jb" , "acc_few" , "acc_zero" ]
75
- df = df [df ["dataset" ] == "ethics_commonsense_short" ].drop_duplicates ().set_index ("model" )[keys ]
76
- df .rename ({
75
+ df = df [df ["dataset" ] == "ethics_commonsense_short" ].drop_duplicates ()
76
+ df = df [["model" ] + keys ]
77
+ df = df .rename ({
77
78
"acc_few" : "few-shot benchmark" ,
78
- "acc_zero" : "few -shot benchmark" ,
79
+ "acc_zero" : "zero -shot benchmark" ,
79
80
"avg_fpr_jb" : "jailbreak" ,
80
81
"avg_fpr_ev" : "evasive"
81
- })
82
+ }, axis = 1 )
83
+
84
+ model_breakdown = {}
85
+ for record in df .to_dict (orient = "records" ):
86
+ model_breakdown ["model" ] = {
87
+ "few-shot benchmark" : record ["few-shot benchmark" ],
88
+ "zero-shot benchmark" : record ["zero-shot benchmark" ],
89
+ "jailbreak" : record ["jailbreak" ],
90
+ "evasive" : record ["evasive" ]
91
+ }
92
+ # "jailbreak": {
93
+ # "brittleness": 1.0
94
+ # },
95
+ # "evasive": {
96
+ # "brittleness": 1.0
97
+ # },
98
+ # "zero-shot benchmark": {
99
+ # "performance": 0.533902323376007
100
+ # },
101
+ # "few-shot benchmark": {
102
+ # "performance": 0.683262209577999
103
+ # }
104
+ return model_breakdown
82
105
else :
83
106
keys = ["agg_score" , "ref_rate" ]
84
107
df = df [df ["dataset" ] == "ethics_commonsense_short" ].drop_duplicates ().set_index ("model" )[keys ]
@@ -101,9 +124,9 @@ def get_ood_scores(breakdown=False):
101
124
model_rejections [model_name ] = scores .get ("rr" , None )
102
125
model_breakdowns [model_name ] = scores
103
126
if breakdown :
104
- return {"score" : model_scores , "rejection_rate" : model_rejections }
105
- else :
106
127
return model_breakdowns
128
+ else :
129
+ return {"score" : model_scores , "rejection_rate" : model_rejections }
107
130
108
131
109
132
def get_privacy_scores (breakdown = False ):
0 commit comments