Skip to content

Prefer chat_template.jinja #205

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions Sources/Hub/Hub.swift
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public class LanguageModelConfigurationFromHub {
revision: String,
hubApi: HubApi = .shared
) async throws -> Configurations {
let filesToDownload = ["config.json", "tokenizer_config.json", "chat_template.json", "tokenizer.json"]
let filesToDownload = ["config.json", "tokenizer_config.json", "chat_template.jinja", "chat_template.json", "tokenizer.json"]
let repo = Hub.Repo(id: modelName)

do {
Expand Down Expand Up @@ -195,11 +195,22 @@ public class LanguageModelConfigurationFromHub {
}

// Check for chat template and merge if available
let chatTemplateURL = modelFolder.appending(path: "chat_template.json")
if FileManager.default.fileExists(atPath: chatTemplateURL.path),
let chatTemplateConfig = try? hubApi.configuration(fileURL: chatTemplateURL),
let chatTemplate = chatTemplateConfig.chatTemplate.string()
// Prefer .jinja template over .json template
var chatTemplate: String? = nil
let chatTemplateJinjaURL = modelFolder.appending(path: "chat_template.jinja")
let chatTemplateJsonURL = modelFolder.appending(path: "chat_template.json")

if FileManager.default.fileExists(atPath: chatTemplateJinjaURL.path) {
// Try to load .jinja template as plain text
chatTemplate = try? String(contentsOf: chatTemplateJinjaURL, encoding: .utf8)
} else if FileManager.default.fileExists(atPath: chatTemplateJsonURL.path),
let chatTemplateConfig = try? hubApi.configuration(fileURL: chatTemplateJsonURL)
{
// Fall back to .json template
chatTemplate = chatTemplateConfig.chatTemplate.string()
}

if let chatTemplate {
// Create or update tokenizer config with chat template
if var configDict = tokenizerConfig?.dictionary() {
configDict["chat_template"] = .init(chatTemplate)
Expand Down
13 changes: 13 additions & 0 deletions Tests/TokenizersTests/ChatTemplateTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,19 @@ class ChatTemplateTests: XCTestCase {
XCTAssertEqual(decoded, decodedTarget)
}

/// https://github.com/huggingface/transformers/pull/33957
/// .jinja files have been introduced!
func testJinjaOnlyTemplate() async throws {
// Repo only contains .jinja file, no chat_template.json
let tokenizer = try await AutoTokenizer.from(pretrained: "FL33TW00D-HF/jinja-test")
let encoded = try tokenizer.applyChatTemplate(messages: messages)
let encodedTarget = [151643, 151669, 74785, 279, 23670, 15473, 4128, 13, 151670]
let decoded = tokenizer.decode(tokens: encoded)
let decodedTarget = "<|begin▁of▁sentence|><|User|>Describe the Swift programming language.<|Assistant|>"
XCTAssertEqual(encoded, encodedTarget)
XCTAssertEqual(decoded, decodedTarget)
}

func testQwen2_5WithTools() async throws {
let tokenizer = try await AutoTokenizer.from(pretrained: "mlx-community/Qwen2.5-7B-Instruct-4bit")

Expand Down
Loading