Skip to content

Commit 5c9e7d9

Browse files
committed
Add unicode tags converter
1 parent 9cce92d commit 5c9e7d9

File tree

4 files changed

+71
-2
lines changed

4 files changed

+71
-2
lines changed

aisploit/converters/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .stemming import StemmingConverter
1414
from .translation import TranslationConverter
1515
from .unicode_confusable import UnicodeConfusableConverter
16+
from .unicode_tags import UnicodeTagsConverter
1617

1718
__all__ = [
1819
"Base64Converter",
@@ -30,4 +31,5 @@
3031
"StemmingConverter",
3132
"TranslationConverter",
3233
"UnicodeConfusableConverter",
34+
"UnicodeTagsConverter",
3335
]

aisploit/converters/unicode_tags.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from dataclasses import dataclass
2+
3+
from ..core import BaseConverter
4+
5+
6+
@dataclass
7+
class UnicodeTagsConverter(BaseConverter):
8+
prefix: str = ""
9+
suffix: str = ""
10+
add_sequence_markers: bool = False
11+
12+
def _convert(self, prompt: str) -> str:
13+
encoded = ""
14+
15+
if self.add_sequence_markers:
16+
encoded += chr(0xE0001)
17+
18+
encoded = ''.join(chr(0xE0000 + ord(ch)) for ch in prompt)
19+
20+
if self.add_sequence_markers:
21+
encoded += chr(0xE007F)
22+
23+
return self.prefix + encoded + self.suffix

aisploit/scanner/plugins/prompt_injection.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ class PromptInjectionPlugin(SendPromptsPlugin):
3131

3232
def create_prompts(self) -> Sequence[str | Prompt]:
3333
"""Create prompts using templates from the specified dataset."""
34-
3534
prompts = []
3635
for data in self.dataset:
3736
prompt_template = PromptTemplate.from_template(data.template)

examples/converter.ipynb

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
}
2525
],
2626
"source": [
27-
"from IPython.display import display, Markdown\n",
27+
"from IPython.display import display, Markdown, HTML\n",
2828
"import os\n",
2929
"import textwrap\n",
3030
"from dotenv import load_dotenv\n",
@@ -41,6 +41,7 @@
4141
" StemmingConverter,\n",
4242
" UnicodeConfusableConverter,\n",
4343
" TranslationConverter,\n",
44+
" UnicodeTagsConverter,\n",
4445
")\n",
4546
"from aisploit.models import ChatOpenAI\n",
4647
"\n",
@@ -58,6 +59,50 @@
5859
")"
5960
]
6061
},
62+
{
63+
"cell_type": "markdown",
64+
"metadata": {},
65+
"source": [
66+
"## UnicodeTagsConverter"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": 4,
72+
"metadata": {},
73+
"outputs": [
74+
{
75+
"data": {
76+
"text/html": [
77+
"<textarea rows=\"1\" cols=\"30\">START󠁗󠁨󠁡󠁴󠀠󠁩󠁳󠀠󠁴󠁨󠁥󠀠󠁣󠁡󠁰󠁩󠁴󠁡󠁬󠀠󠁯󠁦󠀠󠁆󠁲󠁡󠁮󠁣󠁥󠀿END</textarea>"
78+
],
79+
"text/plain": [
80+
"<IPython.core.display.HTML object>"
81+
]
82+
},
83+
"metadata": {},
84+
"output_type": "display_data"
85+
},
86+
{
87+
"data": {
88+
"text/plain": [
89+
"AIMessage(content=\"It seems like you've made a mistake in your input. Could you please provide more context or details?\", response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 9, 'total_tokens': 30}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-17e2154f-9504-45c4-ac36-f8b1b4f7ce01-0')"
90+
]
91+
},
92+
"execution_count": 4,
93+
"metadata": {},
94+
"output_type": "execute_result"
95+
}
96+
],
97+
"source": [
98+
"converter = UnicodeTagsConverter(prefix=\"START\", suffix=\"END\")\n",
99+
"prompt = converter.convert(\"What is the capital of France?\")\n",
100+
"\n",
101+
"display(HTML(f'<textarea rows=\"1\" cols=\"30\">{prompt.to_string()}</textarea>'))\n",
102+
"\n",
103+
"chat_model.invoke(prompt)"
104+
]
105+
},
61106
{
62107
"cell_type": "markdown",
63108
"metadata": {},

0 commit comments

Comments
 (0)