Skip to content

Commit ca94990

Browse files
committed
remove prompt caching headers as the support has been removed
1 parent 7ba14a3 commit ca94990

File tree

6 files changed

+81
-15
lines changed

6 files changed

+81
-15
lines changed

docs/my-website/docs/providers/anthropic.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ Here's what a sample Raw Request from LiteLLM for Anthropic Context Caching look
444444
POST Request Sent from LiteLLM:
445445
curl -X POST \
446446
https://api.anthropic.com/v1/messages \
447-
-H 'accept: application/json' -H 'anthropic-version: 2023-06-01' -H 'content-type: application/json' -H 'x-api-key: sk-...' -H 'anthropic-beta: prompt-caching-2024-07-31' \
447+
-H 'accept: application/json' -H 'anthropic-version: 2023-06-01' -H 'content-type: application/json' -H 'x-api-key: sk-...' \
448448
-d '{'model': 'claude-3-5-sonnet-20240620', [
449449
{
450450
"role": "user",
@@ -472,6 +472,8 @@ https://api.anthropic.com/v1/messages \
472472
"max_tokens": 10
473473
}'
474474
```
475+
476+
**Note:** Anthropic no longer requires the `anthropic-beta: prompt-caching-2024-07-31` header. Prompt caching now works automatically when you use `cache_control` in your messages.
475477
:::
476478

477479
### Caching - Large Context Caching

litellm/llms/anthropic/chat/transformation.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@
5454
CompletionTokensDetailsWrapper,
5555
)
5656
from litellm.types.utils import Message as LitellmMessage
57-
from litellm.types.utils import PromptTokensDetailsWrapper, ServerToolUse
57+
from litellm.types.utils import (
58+
PromptTokensDetailsWrapper,
59+
ServerToolUse,
60+
)
5861
from litellm.utils import (
5962
ModelResponse,
6063
Usage,
@@ -204,9 +207,11 @@ def get_json_schema_from_pydantic_object(
204207
) # Relevant issue: https://github.com/BerriAI/litellm/issues/7755
205208

206209
def get_cache_control_headers(self) -> dict:
210+
# Anthropic no longer requires the prompt-caching beta header
211+
# Prompt caching now works automatically when cache_control is used in messages
212+
# Reference: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
207213
return {
208214
"anthropic-version": "2023-06-01",
209-
"anthropic-beta": "prompt-caching-2024-07-31",
210215
}
211216

212217
def _map_tool_choice(

litellm/llms/anthropic/common_utils.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
)
1313
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter
1414
from litellm.llms.base_llm.chat.transformation import BaseLLMException
15-
from litellm.types.llms.anthropic import AllAnthropicToolsValues, AnthropicMcpServerTool, ANTHROPIC_HOSTED_TOOLS
15+
from litellm.types.llms.anthropic import (
16+
ANTHROPIC_HOSTED_TOOLS,
17+
AllAnthropicToolsValues,
18+
AnthropicMcpServerTool,
19+
)
1620
from litellm.types.llms.openai import AllMessageValues
1721
from litellm.types.utils import TokenCountResponse
1822

@@ -273,8 +277,9 @@ def get_anthropic_beta_list(
273277
beta_header = self.get_computer_tool_beta_header(computer_tool_used)
274278
betas.append(beta_header)
275279

276-
if prompt_caching_set:
277-
betas.append("prompt-caching-2024-07-31")
280+
# Anthropic no longer requires the prompt-caching beta header
281+
# Prompt caching now works automatically when cache_control is used in messages
282+
# Reference: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
278283

279284
if file_id_used:
280285
betas.append("files-api-2025-04-14")
@@ -305,8 +310,9 @@ def get_anthropic_headers(
305310
container_with_skills_used: bool = False,
306311
) -> dict:
307312
betas = set()
308-
if prompt_caching_set:
309-
betas.add("prompt-caching-2024-07-31")
313+
# Anthropic no longer requires the prompt-caching beta header
314+
# Prompt caching now works automatically when cache_control is used in messages
315+
# Reference: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
310316
if computer_tool_used:
311317
beta_header = self.get_computer_tool_beta_header(computer_tool_used)
312318
betas.add(beta_header)

litellm/model_prices_and_context_window_backup.json

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,30 @@
249249
"/v1/images/generations"
250250
]
251251
},
252+
"aiml/google/imagen-4.0-ultra-generate-001": {
253+
"litellm_provider": "aiml",
254+
"metadata": {
255+
"notes": "Imagen 4.0 Ultra Generate API - Photorealistic image generation with precise text rendering"
256+
},
257+
"mode": "image_generation",
258+
"output_cost_per_image": 0.063,
259+
"source": "https://docs.aimlapi.com/api-references/image-models/google/imagen-4-ultra-generate",
260+
"supported_endpoints": [
261+
"/v1/images/generations"
262+
]
263+
},
264+
"aiml/google/nano-banana-pro": {
265+
"litellm_provider": "aiml",
266+
"metadata": {
267+
"notes": "Gemini 3 Pro Image (Nano Banana Pro) - Advanced text-to-image generation with reasoning and 4K resolution support"
268+
},
269+
"mode": "image_generation",
270+
"output_cost_per_image": 0.1575,
271+
"source": "https://docs.aimlapi.com/api-references/image-models/google/gemini-3-pro-image-preview",
272+
"supported_endpoints": [
273+
"/v1/images/generations"
274+
]
275+
},
252276
"amazon.nova-canvas-v1:0": {
253277
"litellm_provider": "bedrock",
254278
"max_input_tokens": 2600,
@@ -3508,6 +3532,40 @@
35083532
"supports_service_tier": true,
35093533
"supports_vision": true
35103534
},
3535+
"azure/gpt-5.2-chat": {
3536+
"cache_read_input_token_cost": 1.75e-07,
3537+
"cache_read_input_token_cost_priority": 3.5e-07,
3538+
"input_cost_per_token": 1.75e-06,
3539+
"input_cost_per_token_priority": 3.5e-06,
3540+
"litellm_provider": "azure",
3541+
"max_input_tokens": 128000,
3542+
"max_output_tokens": 16384,
3543+
"max_tokens": 16384,
3544+
"mode": "chat",
3545+
"output_cost_per_token": 1.4e-05,
3546+
"output_cost_per_token_priority": 2.8e-05,
3547+
"supported_endpoints": [
3548+
"/v1/chat/completions",
3549+
"/v1/responses"
3550+
],
3551+
"supported_modalities": [
3552+
"text",
3553+
"image"
3554+
],
3555+
"supported_output_modalities": [
3556+
"text"
3557+
],
3558+
"supports_function_calling": true,
3559+
"supports_native_streaming": true,
3560+
"supports_parallel_function_calling": true,
3561+
"supports_pdf_input": true,
3562+
"supports_prompt_caching": true,
3563+
"supports_reasoning": true,
3564+
"supports_response_schema": true,
3565+
"supports_system_messages": true,
3566+
"supports_tool_choice": true,
3567+
"supports_vision": true
3568+
},
35113569
"azure/gpt-5.2-chat-2025-12-11": {
35123570
"cache_read_input_token_cost": 1.75e-07,
35133571
"cache_read_input_token_cost_priority": 3.5e-07,

tests/local_testing/test_anthropic_prompt_caching.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,19 +104,19 @@ def return_val():
104104
],
105105
extra_headers={
106106
"anthropic-version": "2023-06-01",
107-
"anthropic-beta": "prompt-caching-2024-07-31",
108107
},
109108
)
110109

111110
# Print what was called on the mock
112111
print("call args=", mock_post.call_args)
113112

114113
expected_url = "https://api.anthropic.com/v1/messages"
114+
# Note: anthropic-beta header for prompt-caching is no longer required
115+
# Anthropic now supports prompt caching automatically when cache_control is used
115116
expected_headers = {
116117
"accept": "application/json",
117118
"content-type": "application/json",
118119
"anthropic-version": "2023-06-01",
119-
"anthropic-beta": "prompt-caching-2024-07-31",
120120
"x-api-key": "mock_api_key",
121121
}
122122

@@ -285,7 +285,6 @@ async def test_anthropic_api_prompt_caching_basic():
285285
max_tokens=10,
286286
extra_headers={
287287
"anthropic-version": "2023-06-01",
288-
"anthropic-beta": "prompt-caching-2024-07-31",
289288
},
290289
)
291290

@@ -356,7 +355,6 @@ async def test_anthropic_api_prompt_caching_basic_with_cache_creation():
356355
max_tokens=10,
357356
extra_headers={
358357
"anthropic-version": "2023-06-01",
359-
"anthropic-beta": "prompt-caching-2024-07-31",
360358
},
361359
)
362360

@@ -645,7 +643,6 @@ def return_val():
645643
],
646644
extra_headers={
647645
"anthropic-version": "2023-06-01",
648-
"anthropic-beta": "prompt-caching-2024-07-31",
649646
},
650647
)
651648

@@ -657,7 +654,6 @@ def return_val():
657654
"accept": "application/json",
658655
"content-type": "application/json",
659656
"anthropic-version": "2023-06-01",
660-
"anthropic-beta": "prompt-caching-2024-07-31",
661657
"x-api-key": "mock_api_key",
662658
}
663659

tests/old_proxy_tests/tests/test_anthropic_context_caching.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
],
3131
extra_headers={
3232
"anthropic-version": "2023-06-01",
33-
"anthropic-beta": "prompt-caching-2024-07-31",
3433
},
3534
)
3635

0 commit comments

Comments
 (0)