1
- # python generate_vectors.py --layers $(seq 0 31) --save_activations --model_size "7b"
2
- # python generate_vectors.py --layers $(seq 0 35) --model_size "13b"
3
- # python generate_vectors.py --layers $(seq 0 31) --model_size "7b" --use_base_model
1
+ python generate_vectors.py --layers $( seq 0 31) --save_activations --model_size " 7b"
2
+ python generate_vectors.py --layers $( seq 0 35) --model_size " 13b"
3
+ python generate_vectors.py --layers $( seq 0 31) --model_size " 7b" --use_base_model
4
4
5
- # python normalize_vectors.py
5
+ python normalize_vectors.py
6
6
7
- # python plot_activations.py --layers $(seq 0 31) --model_size "7b"
8
- # python analyze_vectors.py
7
+ python plot_activations.py --layers $( seq 0 31) --model_size " 7b"
8
+ python analyze_vectors.py
9
9
10
- # python prompting_with_steering.py --layers $(seq 0 31) --multipliers -1 0 1 --type ab
11
- # python prompting_with_steering.py --layers $(seq 0 35) --multipliers -1 0 1 --type ab --model_size "13b"
12
- # python prompting_with_steering.py --layers $(seq 0 31) --multipliers -1 0 1 --type ab --override_vector_model Llama-2-7b-hf
13
- # python prompting_with_steering.py --layers $(seq 0 31) --multipliers -1 0 1 --type ab --override_vector 13
10
+ python prompting_with_steering.py --layers $( seq 0 31) --multipliers -1 0 1 --type ab
11
+ python prompting_with_steering.py --layers $( seq 0 35) --multipliers -1 0 1 --type ab --model_size " 13b"
12
+ python prompting_with_steering.py --layers $( seq 0 31) --multipliers -1 0 1 --type ab --override_vector_model Llama-2-7b-hf
13
+ python prompting_with_steering.py --layers $( seq 0 31) --multipliers -1 0 1 --type ab --override_vector 13
14
14
15
- # python prompting_with_steering.py --layers 13 --multipliers -1 -0.5 0 0.5 1 --type ab
16
- # python prompting_with_steering.py --layers 14 --multipliers -1 -0.5 0 0.5 1 --type ab --model_size "13b"
15
+ python prompting_with_steering.py --layers 13 --multipliers -1 -0.5 0 0.5 1 --type ab
16
+ python prompting_with_steering.py --layers 14 --multipliers -1 -0.5 0 0.5 1 --type ab --model_size " 13b"
17
17
18
- # python prompting_with_steering.py --layers 13 --multipliers -1 -0.5 0 0.5 1 --type ab --system_prompt pos
19
- # python prompting_with_steering.py --layers 14 --multipliers -1 -0.5 0 0.5 1 --type ab --model_size "13b" --system_prompt pos
18
+ python prompting_with_steering.py --layers 13 --multipliers -1 -0.5 0 0.5 1 --type ab --system_prompt pos
19
+ python prompting_with_steering.py --layers 14 --multipliers -1 -0.5 0 0.5 1 --type ab --model_size " 13b" --system_prompt pos
20
20
21
- # python prompting_with_steering.py --layers 13 --multipliers -1 -0.5 0 0.5 1 --type ab --system_prompt neg
22
- # python prompting_with_steering.py --layers 14 --multipliers -1 -0.5 0 0.5 1 --type ab --model_size "13b" --system_prompt neg
21
+ python prompting_with_steering.py --layers 13 --multipliers -1 -0.5 0 0.5 1 --type ab --system_prompt neg
22
+ python prompting_with_steering.py --layers 14 --multipliers -1 -0.5 0 0.5 1 --type ab --model_size " 13b" --system_prompt neg
23
23
24
- # python prompting_with_steering.py --layers 13 --multipliers -2.0 -1.5 -1 0 1 1.5 2.0 --type open_ended
25
- # python prompting_with_steering.py --layers 14 --multipliers -2.0 -1.5 -1 0 1 1.5 2.0 --type open_ended --model_size "13b"
24
+ python prompting_with_steering.py --layers 13 --multipliers -2.0 -1.5 -1 0 1 1.5 2.0 --type open_ended
25
+ python prompting_with_steering.py --layers 14 --multipliers -2.0 -1.5 -1 0 1 1.5 2.0 --type open_ended --model_size " 13b"
26
26
27
- # python prompting_with_steering.py --layers 13 --multipliers -2 -1 0 1 2 --type mmlu
28
- # python prompting_with_steering.py --layers 14 --multipliers -2 -1 0 1 2 --type mmlu --model_size "13b"
27
+ python prompting_with_steering.py --layers 13 --multipliers -2 -1 0 1 2 --type mmlu
28
+ python prompting_with_steering.py --layers 14 --multipliers -2 -1 0 1 2 --type mmlu --model_size " 13b"
29
29
30
- # python prompting_with_steering.py --layers 13 --multipliers -2 -1 0 1 2 --type truthful_qa --behaviors sycophancy
31
- # python prompting_with_steering.py --layers 14 --multipliers -2 -1 0 1 2 --type truthful_qa --behaviors sycophancy --model_size "13b"
30
+ python prompting_with_steering.py --layers 13 --multipliers -2 -1 0 1 2 --type truthful_qa --behaviors sycophancy
31
+ python prompting_with_steering.py --layers 14 --multipliers -2 -1 0 1 2 --type truthful_qa --behaviors sycophancy --model_size " 13b"
32
32
33
- # python plot_results.py --layers $(seq 0 31) --multipliers -1 1 --type ab
34
- # python plot_results.py --layers $(seq 0 35) --multipliers -1 1 --type ab --model_size "13b"
35
- # python plot_results.py --layers $(seq 0 31) --multipliers -1 1 --type ab --override_vector_model Llama-2-7b-hf --title "CAA transfer from base to chat model"
36
- # python plot_results.py --layers $(seq 0 31) --multipliers -1 1 --type ab --override_vector 13 --title "CAA transfer from layer 13 vector to other layers"
33
+ python plot_results.py --layers $( seq 0 31) --multipliers -1 1 --type ab
34
+ python plot_results.py --layers $( seq 0 35) --multipliers -1 1 --type ab --model_size " 13b"
35
+ python plot_results.py --layers $( seq 0 31) --multipliers -1 1 --type ab --override_vector_model Llama-2-7b-hf --title " CAA transfer from base to chat model"
36
+ python plot_results.py --layers $( seq 0 31) --multipliers -1 1 --type ab --override_vector 13 --title " CAA transfer from layer 13 vector to other layers"
37
37
38
- # python plot_results.py --layers 13 --multipliers -1 -0.5 0 0.5 1 --type ab --title "Layer 13 - Llama 2 7B Chat"
39
- # python plot_results.py --layers 14 --multipliers -1 -0.5 0 0.5 1 --type ab --model_size "13b" --title "Layer 14 - Llama 2 13B Chat"
38
+ python plot_results.py --layers 13 --multipliers -1 -0.5 0 0.5 1 --type ab --title " Layer 13 - Llama 2 7B Chat"
39
+ python plot_results.py --layers 14 --multipliers -1 -0.5 0 0.5 1 --type ab --model_size " 13b" --title " Layer 14 - Llama 2 13B Chat"
40
40
41
- # python plot_results.py --layers 13 --multipliers -2 -1 0 1 2 --type mmlu
42
- # python plot_results.py --layers 14 --multipliers -2 -1 0 1 2 --type mmlu --model_size "13b"
41
+ python plot_results.py --layers 13 --multipliers -2 -1 0 1 2 --type mmlu
42
+ python plot_results.py --layers 14 --multipliers -2 -1 0 1 2 --type mmlu --model_size " 13b"
43
43
44
- # python plot_results.py --layers 13 --multipliers -2 -1 0 1 2 --type truthful_qa --behaviors sycophancy
45
- # python plot_results.py --layers 14 --multipliers -2 -1 0 1 2 --type truthful_qa --behaviors sycophancy --model_size "13b"
46
-
47
- # python scoring.py
48
-
49
- # python plot_results.py --layers 13 --multipliers -1.5 -1 0 1 1.5 --type open_ended --title "Layer 13 - Llama 2 7B Chat"
50
- # python plot_results.py --layers 14 --multipliers -1.5 -1 0 1 1.5 --type open_ended --model_size "13b" --title "Layer 14 - Llama 2 13B Chat"
51
-
52
- # Post finetune
53
-
54
- python prompting_with_steering.py --layers 13 --multipliers -1 0 1 --type open_ended --override_model_weights_path finetuned_models/hallucination_pos_finetune_all.pt --behaviors hallucination
55
- python prompting_with_steering.py --layers 13 --multipliers -1 0 1 --type open_ended --override_model_weights_path finetuned_models/hallucination_neg_finetune_all.pt --behaviors hallucination
56
- python prompting_with_steering.py --layers 13 --multipliers -1 0 1 --type open_ended --override_model_weights_path finetuned_models/myopic-reward_pos_finetune_all.pt --behaviors myopic-reward
57
- python prompting_with_steering.py --layers 13 --multipliers -1 0 1 --type open_ended --override_model_weights_path finetuned_models/myopic-reward_neg_finetune_all.pt --behaviors myopic-reward
44
+ python plot_results.py --layers 13 --multipliers -2 -1 0 1 2 --type truthful_qa --behaviors sycophancy
45
+ python plot_results.py --layers 14 --multipliers -2 -1 0 1 2 --type truthful_qa --behaviors sycophancy --model_size " 13b"
58
46
59
47
python scoring.py
60
48
61
- python plot_results.py --layers 13 --multipliers -1 0 1 --type open_ended --override_weights finetuned_models/hallucination_pos_finetune_all.pt finetuned_models/hallucination_neg_finetune_all.pt --behaviors hallucination
62
- python plot_results.py --layers 13 --multipliers -1 0 1 --type open_ended --override_weights finetuned_models/myopic-reward_pos_finetune_all.pt finetuned_models/myopic-reward_neg_finetune_all.pt --behaviors myopic-reward
49
+ python plot_results.py --layers 13 --multipliers -1.5 -1 0 1 1.5 --type open_ended --title " Layer 13 - Llama 2 7B Chat "
50
+ python plot_results.py --layers 14 --multipliers -1.5 -1 0 1 1.5 --type open_ended --model_size " 13b " --title " Layer 14 - Llama 2 13B Chat "
0 commit comments