@@ -589,6 +589,7 @@ def chat_completion_handler(
589589 logit_bias : Optional [Dict [str , float ]] = None ,
590590 logprobs : Optional [bool ] = None ,
591591 top_logprobs : Optional [int ] = None ,
592+ special : bool = False ,
592593 ** kwargs , # type: ignore
593594 ) -> Union [
594595 llama_types .CreateChatCompletionResponse ,
@@ -691,6 +692,7 @@ def chat_completion_handler(
691692 stopping_criteria = stopping_criteria ,
692693 grammar = grammar ,
693694 logit_bias = logit_bias ,
695+ special = special ,
694696 )
695697 if tool is not None :
696698 tool_name = tool ["function" ]["name" ]
@@ -1426,6 +1428,7 @@ def functionary_chat_handler(
14261428 model : Optional [str ] = None ,
14271429 logits_processor : Optional [llama .LogitsProcessorList ] = None ,
14281430 grammar : Optional [llama .LlamaGrammar ] = None ,
1431+ special : bool = False ,
14291432 ** kwargs , # type: ignore
14301433) -> Union [llama_types .ChatCompletion , Iterator [llama_types .ChatCompletionChunk ]]:
14311434 SYSTEM_MESSAGE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"""
@@ -1632,6 +1635,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
16321635 model = model ,
16331636 logits_processor = logits_processor ,
16341637 grammar = grammar ,
1638+ special = special ,
16351639 )
16361640 return _convert_completion_to_chat (completion_or_completion_chunks , stream = stream ) # type: ignore
16371641
@@ -1712,6 +1716,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
17121716 mirostat_eta = mirostat_eta ,
17131717 model = model ,
17141718 logits_processor = logits_processor ,
1719+ special = special ,
17151720 ) # type: ignore
17161721
17171722 assert "usage" in completion
@@ -1785,6 +1790,7 @@ def functionary_v1_v2_chat_handler(
17851790 model : Optional [str ] = None ,
17861791 logits_processor : Optional [llama .LogitsProcessorList ] = None ,
17871792 grammar : Optional [llama .LlamaGrammar ] = None ,
1793+ special : bool = False ,
17881794 ** kwargs , # type: ignore
17891795) -> Union [llama_types .ChatCompletion , Iterator [llama_types .ChatCompletionChunk ]]:
17901796 SYSTEM_MESSAGE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"""
@@ -2001,6 +2007,7 @@ def prepare_messages_for_inference(
20012007 model = model ,
20022008 logits_processor = logits_processor ,
20032009 grammar = grammar ,
2010+ special = special ,
20042011 )
20052012 if stream is False :
20062013 completion_or_completion_chunks ["choices" ][0 ]["text" ] = (
@@ -2064,6 +2071,7 @@ def create_completion(prompt, stop, grammar):
20642071 model = model ,
20652072 logits_processor = logits_processor ,
20662073 grammar = grammar ,
2074+ special = special ,
20672075 ),
20682076 )
20692077
@@ -3582,6 +3590,7 @@ def chatml_function_calling(
35823590 grammar : Optional [llama .LlamaGrammar ] = None ,
35833591 logprobs : Optional [bool ] = None ,
35843592 top_logprobs : Optional [int ] = None ,
3593+ special : bool = False ,
35853594 ** kwargs , # type: ignore
35863595) -> Union [
35873596 llama_types .CreateChatCompletionResponse ,
@@ -3712,6 +3721,7 @@ def chatml_function_calling(
37123721 logits_processor = logits_processor ,
37133722 grammar = grammar ,
37143723 logprobs = top_logprobs if logprobs else None ,
3724+ special = special ,
37153725 ),
37163726 stream = stream ,
37173727 )
@@ -3764,6 +3774,7 @@ def chatml_function_calling(
37643774 model = model ,
37653775 logits_processor = logits_processor ,
37663776 grammar = grammar ,
3777+ special = special ,
37673778 )
37683779 return _convert_completion_to_chat_function (
37693780 tool_name , completion_or_chunks , stream
@@ -3810,6 +3821,7 @@ def chatml_function_calling(
38103821 grammar = llama_grammar .LlamaGrammar .from_string (
38113822 initial_gbnf_tool_grammar , verbose = llama .verbose
38123823 ),
3824+ special = special ,
38133825 )
38143826 completion : llama_types .CreateCompletionResponse = completion_or_chunks # type: ignore
38153827 text = completion ["choices" ][0 ]["text" ]
@@ -3838,6 +3850,7 @@ def chatml_function_calling(
38383850 grammar = llama_grammar .LlamaGrammar .from_string (
38393851 follow_up_gbnf_tool_grammar , verbose = llama .verbose
38403852 ),
3853+ special = special ,
38413854 ),
38423855 stream = stream ,
38433856 )
@@ -3883,6 +3896,7 @@ def chatml_function_calling(
38833896 model = model ,
38843897 logits_processor = logits_processor ,
38853898 grammar = grammar ,
3899+ special = special ,
38863900 )
38873901 completion_or_chunks = cast (
38883902 llama_types .CreateCompletionResponse , completion_or_chunks
@@ -3914,6 +3928,7 @@ def chatml_function_calling(
39143928 grammar = llama_grammar .LlamaGrammar .from_string (
39153929 follow_up_gbnf_tool_grammar , verbose = llama .verbose
39163930 ),
3931+ special = special ,
39173932 )
39183933 response = cast (llama_types .CreateCompletionResponse , response )
39193934
0 commit comments