From 00535211092699273644e4adfa7c1b8f4cbd8249 Mon Sep 17 00:00:00 2001 From: longkeyy Date: Fri, 6 Dec 2024 13:09:35 +0800 Subject: [PATCH 1/2] feat: update qwen model and price --- relay/adaptor/ali/constants.go | 20 +++++- relay/billing/ratio/model.go | 111 ++++++++++++++++++++++++++------- 2 files changed, 106 insertions(+), 25 deletions(-) diff --git a/relay/adaptor/ali/constants.go b/relay/adaptor/ali/constants.go index 3f24ce2e14..f3d9952000 100644 --- a/relay/adaptor/ali/constants.go +++ b/relay/adaptor/ali/constants.go @@ -1,7 +1,23 @@ package ali var ModelList = []string{ - "qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext", - "text-embedding-v1", + "qwen-turbo", "qwen-turbo-latest", + "qwen-plus", "qwen-plus-latest", + "qwen-max", "qwen-max-latest", + "qwen-max-longcontext", + "qwen-vl-max", "qwen-vl-max-latest", "qwen-vl-plus", "qwen-vl-plus-latest", + "qwen-vl-ocr", "qwen-vl-ocr-latest", + "qwen-audio-turbo", + "qwen-math-plus", "qwen-math-plus-latest", "qwen-math-turbo", "qwen-math-turbo-latest", + "qwen-coder-plus", "qwen-coder-plus-latest", "qwen-coder-turbo", "qwen-coder-turbo-latest", + "qwq-32b-preview", "qwen2.5-72b-instruct", "qwen2.5-32b-instruct", "qwen2.5-14b-instruct", "qwen2.5-7b-instruct", "qwen2.5-3b-instruct", "qwen2.5-1.5b-instruct", "qwen2.5-0.5b-instruct", + "qwen2-72b-instruct", "qwen2-57b-a14b-instruct", "qwen2-7b-instruct", "qwen2-1.5b-instruct", "qwen2-0.5b-instruct", + "qwen1.5-110b-chat", "qwen1.5-72b-chat", "qwen1.5-32b-chat", "qwen1.5-14b-chat", "qwen1.5-7b-chat", "qwen1.5-1.8b-chat", "qwen1.5-0.5b-chat", + "qwen-72b-chat", "qwen-14b-chat", "qwen-7b-chat", "qwen-1.8b-chat", "qwen-1.8b-longcontext-chat", + "qwen2-vl-7b-instruct", "qwen2-vl-2b-instruct", "qwen-vl-v1", "qwen-vl-chat-v1", + "qwen2-audio-instruct", "qwen-audio-chat", + "qwen2.5-math-72b-instruct", "qwen2.5-math-7b-instruct", "qwen2.5-math-1.5b-instruct", "qwen2-math-72b-instruct", "qwen2-math-7b-instruct", "qwen2-math-1.5b-instruct", + "qwen2.5-coder-32b-instruct", "qwen2.5-coder-14b-instruct", "qwen2.5-coder-7b-instruct", "qwen2.5-coder-3b-instruct", "qwen2.5-coder-1.5b-instruct", "qwen2.5-coder-0.5b-instruct", + "text-embedding-v1", "text-embedding-v3", "text-embedding-v2", "text-embedding-async-v2", "text-embedding-async-v1", "ali-stable-diffusion-xl", "ali-stable-diffusion-v1.5", "wanx-v1", } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index 1b58ec0902..95ec6b4e63 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -118,29 +118,94 @@ var ModelRatio = map[string]float64{ "chatglm_lite": 0.1429, // ¥0.002 / 1k tokens "cogview-3": 0.25 * RMB, // https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing - "qwen-turbo": 0.5715, // ¥0.008 / 1k tokens - "qwen-plus": 1.4286, // ¥0.02 / 1k tokens - "qwen-max": 1.4286, // ¥0.02 / 1k tokens - "qwen-max-longcontext": 1.4286, // ¥0.02 / 1k tokens - "text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens - "ali-stable-diffusion-xl": 8, - "ali-stable-diffusion-v1.5": 8, - "wanx-v1": 8, - "SparkDesk": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens - "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens - "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens - "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens - "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens - "hunyuan": 7.143, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 - "ChatStd": 0.01 * RMB, - "ChatPro": 0.1 * RMB, + "qwen-turbo": 1.4286, // ¥0.02 / 1k tokens + "qwen-turbo-latest": 1.4286, + "qwen-plus": 1.4286, + "qwen-plus-latest": 1.4286, + "qwen-max": 1.4286, + "qwen-max-latest": 1.4286, + "qwen-max-longcontext": 1.4286, + "qwen-vl-max": 1.4286, + "qwen-vl-max-latest": 1.4286, + "qwen-vl-plus": 1.4286, + "qwen-vl-plus-latest": 1.4286, + "qwen-vl-ocr": 1.4286, + "qwen-vl-ocr-latest": 1.4286, + "qwen-audio-turbo": 1.4286, + "qwen-math-plus": 1.4286, + "qwen-math-plus-latest": 1.4286, + "qwen-math-turbo": 1.4286, + "qwen-math-turbo-latest": 1.4286, + "qwen-coder-plus": 1.4286, + "qwen-coder-plus-latest": 1.4286, + "qwen-coder-turbo": 1.4286, + "qwen-coder-turbo-latest": 1.4286, + "qwq-32b-preview": 1.4286, + "qwen2.5-72b-instruct": 1.4286, + "qwen2.5-32b-instruct": 1.4286, + "qwen2.5-14b-instruct": 1.4286, + "qwen2.5-7b-instruct": 1.4286, + "qwen2.5-3b-instruct": 1.4286, + "qwen2.5-1.5b-instruct": 1.4286, + "qwen2.5-0.5b-instruct": 1.4286, + "qwen2-72b-instruct": 1.4286, + "qwen2-57b-a14b-instruct": 1.4286, + "qwen2-7b-instruct": 1.4286, + "qwen2-1.5b-instruct": 1.4286, + "qwen2-0.5b-instruct": 1.4286, + "qwen1.5-110b-chat": 1.4286, + "qwen1.5-72b-chat": 1.4286, + "qwen1.5-32b-chat": 1.4286, + "qwen1.5-14b-chat": 1.4286, + "qwen1.5-7b-chat": 1.4286, + "qwen1.5-1.8b-chat": 1.4286, + "qwen1.5-0.5b-chat": 1.4286, + "qwen-72b-chat": 1.4286, + "qwen-14b-chat": 1.4286, + "qwen-7b-chat": 1.4286, + "qwen-1.8b-chat": 1.4286, + "qwen-1.8b-longcontext-chat": 1.4286, + "qwen2-vl-7b-instruct": 1.4286, + "qwen2-vl-2b-instruct": 1.4286, + "qwen-vl-v1": 1.4286, + "qwen-vl-chat-v1": 1.4286, + "qwen2-audio-instruct": 1.4286, + "qwen-audio-chat": 1.4286, + "qwen2.5-math-72b-instruct": 1.4286, + "qwen2.5-math-7b-instruct": 1.4286, + "qwen2.5-math-1.5b-instruct": 1.4286, + "qwen2-math-72b-instruct": 1.4286, + "qwen2-math-7b-instruct": 1.4286, + "qwen2-math-1.5b-instruct": 1.4286, + "qwen2.5-coder-32b-instruct": 1.4286, + "qwen2.5-coder-14b-instruct": 1.4286, + "qwen2.5-coder-7b-instruct": 1.4286, + "qwen2.5-coder-3b-instruct": 1.4286, + "qwen2.5-coder-1.5b-instruct": 1.4286, + "qwen2.5-coder-0.5b-instruct": 1.4286, + "text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens + "text-embedding-v3": 0.05, + "text-embedding-v2": 0.05, + "text-embedding-async-v2": 0.05, + "text-embedding-async-v1": 0.05, + "ali-stable-diffusion-xl": 8.00, + "ali-stable-diffusion-v1.5": 8.00, + "wanx-v1": 8.00, + "SparkDesk": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens + "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens + "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens + "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens + "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens + "hunyuan": 7.143, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 + "ChatStd": 0.01 * RMB, + "ChatPro": 0.1 * RMB, // https://platform.moonshot.cn/pricing "moonshot-v1-8k": 0.012 * RMB, "moonshot-v1-32k": 0.024 * RMB, From 33af45d4a9b21772b5cf6f6b2e208759438c0ade Mon Sep 17 00:00:00 2001 From: longkeyy Date: Fri, 6 Dec 2024 13:35:27 +0800 Subject: [PATCH 2/2] feat: update openai model and price --- relay/adaptor/openai/constants.go | 4 +++- relay/billing/ratio/model.go | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/relay/adaptor/openai/constants.go b/relay/adaptor/openai/constants.go index aacdba1ad3..52446f65eb 100644 --- a/relay/adaptor/openai/constants.go +++ b/relay/adaptor/openai/constants.go @@ -7,7 +7,9 @@ var ModelList = []string{ "gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-1106-preview", "gpt-4-0125-preview", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613", "gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", - "gpt-4o", "gpt-4o-2024-05-13", + "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", + "o1-preview", "o1-preview-2024-09-12", + "o1-mini", "o1-mini-2024-09-12", "gpt-4o-2024-08-06", "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index 95ec6b4e63..e3e6aa663c 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -22,6 +22,10 @@ const ( // 1 === ¥0.014 / 1k tokens var ModelRatio = map[string]float64{ // https://openai.com/pricing + "o1-preview": 15, + "o1-preview-2024-09-12": 15, + "o1-mini": 3, + "o1-mini-2024-09-12": 3, "gpt-4": 15, "gpt-4-0314": 15, "gpt-4-0613": 15, @@ -37,6 +41,7 @@ var ModelRatio = map[string]float64{ "chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens "gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens "gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens + "gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens "gpt-4o-mini": 0.075, // $0.00015 / 1K tokens "gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens "gpt-4-vision-preview": 5, // $0.01 / 1K tokens