1
- from typing import List
2
1
from abc import ABC , abstractmethod
3
2
4
3
from langchain_core .prompts import PromptTemplate
8
7
9
8
10
9
class RedTeamTask (ABC ):
10
+ """
11
+ Abstract base class for defining red team tasks in a conversation.
12
+ """
13
+
11
14
def __init__ (
12
15
self ,
13
16
* ,
@@ -16,6 +19,15 @@ def __init__(
16
19
input_messages_key = "input" ,
17
20
history_messages_key = "chat_history" ,
18
21
) -> None :
22
+ """
23
+ Initialize a RedTeamTask instance.
24
+
25
+ Args:
26
+ objective (str): The objective of the task.
27
+ system_template (PromptTemplate): The system prompt template.
28
+ input_messages_key (str): The key for input messages.
29
+ history_messages_key (str): The key for chat history messages.
30
+ """
19
31
if len (objective ) == 0 :
20
32
raise ValueError ("Objective cannot be empty." )
21
33
@@ -25,6 +37,12 @@ def __init__(
25
37
26
38
@property
27
39
def prompt (self ) -> ChatPromptTemplate :
40
+ """
41
+ Get the chat prompt template.
42
+
43
+ Returns:
44
+ ChatPromptTemplate: The chat prompt template.
45
+ """
28
46
return ChatPromptTemplate .from_messages (
29
47
[
30
48
("system" , self .system_prompt ),
@@ -68,6 +86,10 @@ def evaluate_task_completion(
68
86
69
87
70
88
class RedTeamEndTokenTask (RedTeamTask ):
89
+ """
90
+ Red team task with an end token to mark task completion.
91
+ """
92
+
71
93
def __init__ (
72
94
self ,
73
95
* ,
@@ -77,16 +99,33 @@ def __init__(
77
99
history_messages_key = "chat_history" ,
78
100
end_token = RED_TEAM_END_TOKEN ,
79
101
) -> None :
102
+ """
103
+ Initialize a RedTeamEndTokenTask instance.
104
+
105
+ Args:
106
+ objective (str): The objective of the task.
107
+ system_template (PromptTemplate): The system prompt template.
108
+ input_messages_key (str): The key for input messages.
109
+ history_messages_key (str): The key for chat history messages.
110
+ end_token (str): The token to mark task completion.
111
+ """
80
112
super ().__init__ (
81
113
objective = objective ,
82
114
system_template = system_template .partial (end_token = end_token ),
83
115
input_messages_key = input_messages_key ,
84
116
history_messages_key = history_messages_key ,
85
117
)
118
+
86
119
self ._end_token = end_token
87
120
88
121
@property
89
122
def end_token (self ) -> str :
123
+ """
124
+ Get the end token.
125
+
126
+ Returns:
127
+ str: The end token.
128
+ """
90
129
return self ._end_token
91
130
92
131
def evaluate_task_completion (
@@ -137,6 +176,10 @@ def evaluate_task_completion(
137
176
138
177
139
178
class RedTeamClassifierTask (RedTeamTask ):
179
+ """
180
+ Red team task using a classifier to evaluate completion.
181
+ """
182
+
140
183
def __init__ (
141
184
self ,
142
185
* ,
@@ -146,6 +189,16 @@ def __init__(
146
189
input_messages_key = "input" ,
147
190
history_messages_key = "chat_history" ,
148
191
) -> None :
192
+ """
193
+ Initialize a RedTeamClassifierTask instance.
194
+
195
+ Args:
196
+ objective (str): The objective of the task.
197
+ classifier (BaseClassifier): The classifier used to evaluate completion.
198
+ system_template (PromptTemplate): The system prompt template.
199
+ input_messages_key (str): The key for input messages.
200
+ history_messages_key (str): The key for chat history messages.
201
+ """
149
202
super ().__init__ (
150
203
objective = objective ,
151
204
system_template = system_template ,
0 commit comments