remove prompt caching headers as the support has been removed

Sameerlite · Sameerlite · commit ca9499030739 · 2026-01-02T11:08:35.000+05:30
diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md
@@ -444,7 +444,7 @@ Here's what a sample Raw Request from LiteLLM for Anthropic Context Caching look
 POST Request Sent from LiteLLM:
 curl -X POST \
 https://api.anthropic.com/v1/messages \
--H 'accept: application/json' -H 'anthropic-version: 2023-06-01' -H 'content-type: application/json' -H 'x-api-key: sk-...' -H 'anthropic-beta: prompt-caching-2024-07-31' \
+-H 'accept: application/json' -H 'anthropic-version: 2023-06-01' -H 'content-type: application/json' -H 'x-api-key: sk-...' \
 -d '{'model': 'claude-3-5-sonnet-20240620', [
     {
       "role": "user",
@@ -472,6 +472,8 @@ https://api.anthropic.com/v1/messages \
   "max_tokens": 10
 }'
 ```
+
+**Note:** Anthropic no longer requires the `anthropic-beta: prompt-caching-2024-07-31` header. Prompt caching now works automatically when you use `cache_control` in your messages.
 ::: 
 
 ### Caching - Large Context Caching 
diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py
@@ -54,7 +54,10 @@
     CompletionTokensDetailsWrapper,
 )
 from litellm.types.utils import Message as LitellmMessage
-from litellm.types.utils import PromptTokensDetailsWrapper, ServerToolUse
+from litellm.types.utils import (
+    PromptTokensDetailsWrapper,
+    ServerToolUse,
+)
 from litellm.utils import (
     ModelResponse,
     Usage,
@@ -204,9 +207,11 @@ def get_json_schema_from_pydantic_object(
         )  # Relevant issue: https://github.com/BerriAI/litellm/issues/7755
 
     def get_cache_control_headers(self) -> dict:
+        # Anthropic no longer requires the prompt-caching beta header
+        # Prompt caching now works automatically when cache_control is used in messages
+        # Reference: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
         return {
             "anthropic-version": "2023-06-01",
-            "anthropic-beta": "prompt-caching-2024-07-31",
         }
 
     def _map_tool_choice(
diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py
@@ -12,7 +12,11 @@
 )
 from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
-from litellm.types.llms.anthropic import AllAnthropicToolsValues, AnthropicMcpServerTool, ANTHROPIC_HOSTED_TOOLS
+from litellm.types.llms.anthropic import (
+    ANTHROPIC_HOSTED_TOOLS,
+    AllAnthropicToolsValues,
+    AnthropicMcpServerTool,
+)
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import TokenCountResponse
 
@@ -273,8 +277,9 @@ def get_anthropic_beta_list(
             beta_header = self.get_computer_tool_beta_header(computer_tool_used)
             betas.append(beta_header)
         
-        if prompt_caching_set:
-            betas.append("prompt-caching-2024-07-31")
+        # Anthropic no longer requires the prompt-caching beta header
+        # Prompt caching now works automatically when cache_control is used in messages
+        # Reference: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
         
         if file_id_used:
             betas.append("files-api-2025-04-14")
@@ -305,8 +310,9 @@ def get_anthropic_headers(
         container_with_skills_used: bool = False,
     ) -> dict:
         betas = set()
-        if prompt_caching_set:
-            betas.add("prompt-caching-2024-07-31")
+        # Anthropic no longer requires the prompt-caching beta header
+        # Prompt caching now works automatically when cache_control is used in messages
+        # Reference: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
         if computer_tool_used:
             beta_header = self.get_computer_tool_beta_header(computer_tool_used)
             betas.add(beta_header)
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -249,6 +249,30 @@
             "/v1/images/generations"
         ]
     },
+    "aiml/google/imagen-4.0-ultra-generate-001": {
+        "litellm_provider": "aiml",
+        "metadata": {
+            "notes": "Imagen 4.0 Ultra Generate API - Photorealistic image generation with precise text rendering"
+        },
+        "mode": "image_generation",
+        "output_cost_per_image": 0.063,
+        "source": "https://docs.aimlapi.com/api-references/image-models/google/imagen-4-ultra-generate",
+        "supported_endpoints": [
+            "/v1/images/generations"
+        ]
+    },
+    "aiml/google/nano-banana-pro": {
+        "litellm_provider": "aiml",
+        "metadata": {
+            "notes": "Gemini 3 Pro Image (Nano Banana Pro) - Advanced text-to-image generation with reasoning and 4K resolution support"
+        },
+        "mode": "image_generation",
+        "output_cost_per_image": 0.1575,
+        "source": "https://docs.aimlapi.com/api-references/image-models/google/gemini-3-pro-image-preview",
+        "supported_endpoints": [
+            "/v1/images/generations"
+        ]
+    },
     "amazon.nova-canvas-v1:0": {
         "litellm_provider": "bedrock",
         "max_input_tokens": 2600,
@@ -3508,6 +3532,40 @@
         "supports_service_tier": true,
         "supports_vision": true
     },
+    "azure/gpt-5.2-chat": {
+        "cache_read_input_token_cost": 1.75e-07,
+        "cache_read_input_token_cost_priority": 3.5e-07,
+        "input_cost_per_token": 1.75e-06,
+        "input_cost_per_token_priority": 3.5e-06,
+        "litellm_provider": "azure",
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "max_tokens": 16384,
+        "mode": "chat",
+        "output_cost_per_token": 1.4e-05,
+        "output_cost_per_token_priority": 2.8e-05,
+        "supported_endpoints": [
+            "/v1/chat/completions",
+            "/v1/responses"
+        ],
+        "supported_modalities": [
+            "text",
+            "image"
+        ],
+        "supported_output_modalities": [
+            "text"
+        ],
+        "supports_function_calling": true,
+        "supports_native_streaming": true,
+        "supports_parallel_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_vision": true
+    },
     "azure/gpt-5.2-chat-2025-12-11": {
         "cache_read_input_token_cost": 1.75e-07,
         "cache_read_input_token_cost_priority": 3.5e-07,
diff --git a/tests/local_testing/test_anthropic_prompt_caching.py b/tests/local_testing/test_anthropic_prompt_caching.py
@@ -104,19 +104,19 @@ def return_val():
             ],
             extra_headers={
                 "anthropic-version": "2023-06-01",
-                "anthropic-beta": "prompt-caching-2024-07-31",
             },
         )
 
         # Print what was called on the mock
         print("call args=", mock_post.call_args)
 
         expected_url = "https://api.anthropic.com/v1/messages"
+        # Note: anthropic-beta header for prompt-caching is no longer required
+        # Anthropic now supports prompt caching automatically when cache_control is used
         expected_headers = {
             "accept": "application/json",
             "content-type": "application/json",
             "anthropic-version": "2023-06-01",
-            "anthropic-beta": "prompt-caching-2024-07-31",
             "x-api-key": "mock_api_key",
         }
 
@@ -285,7 +285,6 @@ async def test_anthropic_api_prompt_caching_basic():
         max_tokens=10,
         extra_headers={
             "anthropic-version": "2023-06-01",
-            "anthropic-beta": "prompt-caching-2024-07-31",
         },
     )
 
@@ -356,7 +355,6 @@ async def test_anthropic_api_prompt_caching_basic_with_cache_creation():
         max_tokens=10,
         extra_headers={
             "anthropic-version": "2023-06-01",
-            "anthropic-beta": "prompt-caching-2024-07-31",
         },
     )
 
@@ -645,7 +643,6 @@ def return_val():
             ],
             extra_headers={
                 "anthropic-version": "2023-06-01",
-                "anthropic-beta": "prompt-caching-2024-07-31",
             },
         )
 
@@ -657,7 +654,6 @@ def return_val():
             "accept": "application/json",
             "content-type": "application/json",
             "anthropic-version": "2023-06-01",
-            "anthropic-beta": "prompt-caching-2024-07-31",
             "x-api-key": "mock_api_key",
         }
 
diff --git a/tests/old_proxy_tests/tests/test_anthropic_context_caching.py b/tests/old_proxy_tests/tests/test_anthropic_context_caching.py
@@ -30,7 +30,6 @@
     ],
     extra_headers={
         "anthropic-version": "2023-06-01",
-        "anthropic-beta": "prompt-caching-2024-07-31",
     },
 )
 

Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,6 @@`
`30`	`30`	`],`
`31`	`31`	`extra_headers={`
`32`	`32`	`"anthropic-version": "2023-06-01",`
`33`		`- "anthropic-beta": "prompt-caching-2024-07-31",`
`34`	`33`	`},`
`35`	`34`	`)`
`36`	`35`