Skip to content

Commit

Permalink
convert : allow conversion of Mistral HF models (ggerganov#6144)
Browse files Browse the repository at this point in the history
* Allow conversion of Mistral HF models

* Homogenize Llama, Mistral, Mixtral under the same entry.

* Fix tokenizer, permute tensors

* Use sentencepiece tokenizer, or fall back to hfft.

* convert-hf : small fix for mypy

* convert-hf : fix duplicated block_count

* convert-hf : add vocab size to metadata

---------

Co-authored-by: Jared Van Bortel <[email protected]>
  • Loading branch information
2 people authored and hodlen committed Apr 3, 2024
1 parent c07a7b4 commit b3d70bd
Showing 1 changed file with 64 additions and 4 deletions.
68 changes: 64 additions & 4 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf

from convert import LlamaHfVocab
from convert import LlamaHfVocab, permute


###### MODEL DEFINITIONS ######
Expand Down Expand Up @@ -1052,12 +1052,72 @@ def set_gguf_parameters(self):
self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))


@Model.register("MixtralForCausalLM")
class MixtralModel(Model):
@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
class LlamaModel(Model):
model_arch = gguf.MODEL_ARCH.LLAMA

def set_vocab(self):
self._set_vocab_sentencepiece()
try:
self. _set_vocab_sentencepiece()
except FileNotFoundError:
self._set_vocab_llama_hf()

def set_gguf_parameters(self):
super().set_gguf_parameters()
hparams = self.hparams
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])

# Same as super class, but permuting q_proj, k_proj
def write_tensors(self):
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
n_head = self.hparams.get("num_attention_heads")
n_kv_head = self.hparams.get("num_key_value_heads")
for name, data_torch in self.get_tensors():
# we don't need these
if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")):
continue

old_dtype = data_torch.dtype

# convert any unsupported data types to float32
if data_torch.dtype not in (torch.float16, torch.float32):
data_torch = data_torch.to(torch.float32)

data = data_torch.numpy()

if name.endswith("q_proj.weight"):
data = permute(data, n_head, n_head)
if name.endswith("k_proj.weight"):
data = permute(data, n_head, n_kv_head)

data = data.squeeze()

# map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None:
print(f"Can not map tensor {name!r}")
sys.exit()

n_dims = len(data.shape)
data_dtype = data.dtype

# if f32 desired, convert any float16 to float32
if self.ftype == 0 and data_dtype == np.float16:
data = data.astype(np.float32)

# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
data = data.astype(np.float32)

# if f16 desired, convert any float32 2-dim weight tensors to float16
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16)

print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")

self.gguf_writer.add_tensor(new_name, data)


@Model.register("GrokForCausalLM")
Expand Down

0 comments on commit b3d70bd

Please sign in to comment.