Description
在将模型保存下来后然后重新读取的时候,读取预处理器会报错。下边是错误内容,transformers版本为4.40.1, torch版本为2.0.1+cuda11.8, 设备为移动端RTX4080, 请问是tranformers版本的问题吗?那么我安装哪个版本呢?
{
"name": "TypeError",
"message": "not a string",
"stack": "---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[2], line 1
----> 1 llava_processor = LlavaNextProcessor.from_pretrained(
2 "LlavaNext1.6-Qwen1.5-0.5B-Chat/model001"
3 )
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\transformers\processing_utils.py:465, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
462 if token is not None:
463 kwargs["token"] = token
--> 465 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
466 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
468 return cls.from_args_and_dict(args, processor_dict, **kwargs)
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\transformers\processing_utils.py:511, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
508 else:
509 attribute_class = getattr(transformers_module, class_name)
--> 511 args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
512 return args
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\transformers\tokenization_utils_base.py:2089, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)
2086 else:
2087 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2089 return cls._from_pretrained(
2090 resolved_vocab_files,
2091 pretrained_model_name_or_path,
2092 init_configuration,
2093 *init_inputs,
2094 token=token,
2095 cache_dir=cache_dir,
2096 local_files_only=local_files_only,
2097 _commit_hash=commit_hash,
2098 _is_local=is_local,
2099 trust_remote_code=trust_remote_code,
2100 **kwargs,
2101 )
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\transformers\tokenization_utils_base.py:2311, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, trust_remote_code, *init_inputs, **kwargs)
2309 # Instantiate the tokenizer.
2310 try:
-> 2311 tokenizer = cls(*init_inputs, **init_kwargs)
2312 except OSError:
2313 raise OSError(
2314 "Unable to load vocabulary from file. "
2315 "Please check that the provided vocabulary is accessible and not corrupted."
2316 )
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\transformers\models\llama\tokenization_llama_fast.py:124, in LlamaTokenizerFast.init(self, vocab_file, tokenizer_file, clean_up_tokenization_spaces, unk_token, bos_token, eos_token, add_bos_token, add_eos_token, use_default_system_prompt, add_prefix_space, **kwargs)
119 logger.warning_once(
120 "You set add_prefix_space
. The tokenizer needs to be converted from the slow tokenizers"
121 )
122 kwargs["from_slow"] = True
--> 124 super().init(
125 vocab_file=vocab_file,
126 tokenizer_file=tokenizer_file,
127 clean_up_tokenization_spaces=clean_up_tokenization_spaces,
128 unk_token=unk_token,
129 bos_token=bos_token,
130 eos_token=eos_token,
131 add_bos_token=add_bos_token,
132 add_eos_token=add_eos_token,
133 use_default_system_prompt=use_default_system_prompt,
134 **kwargs,
135 )
136 self._add_bos_token = add_bos_token
137 self._add_eos_token = add_eos_token
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\transformers\tokenization_utils_fast.py:117, in PreTrainedTokenizerFast.init(self, *args, **kwargs)
114 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)
115 elif self.slow_tokenizer_class is not None:
116 # We need to create and convert a slow tokenizer to build the backend
--> 117 slow_tokenizer = self.slow_tokenizer_class(*args, **kwargs)
118 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)
119 else:
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\transformers\models\llama\tokenization_llama.py:169, in LlamaTokenizer.init(self, vocab_file, unk_token, bos_token, eos_token, pad_token, sp_model_kwargs, add_bos_token, add_eos_token, clean_up_tokenization_spaces, use_default_system_prompt, spaces_between_special_tokens, legacy, add_prefix_space, **kwargs)
167 self.add_eos_token = add_eos_token
168 self.use_default_system_prompt = use_default_system_prompt
--> 169 self.sp_model = self.get_spm_processor(kwargs.pop("from_slow", False))
170 self.add_prefix_space = add_prefix_space
172 super().init(
173 bos_token=bos_token,
174 eos_token=eos_token,
(...)
185 **kwargs,
186 )
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\transformers\models\llama\tokenization_llama.py:196, in LlamaTokenizer.get_spm_processor(self, from_slow)
194 tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
195 if self.legacy or from_slow: # no dependency on protobuf
--> 196 tokenizer.Load(self.vocab_file)
197 return tokenizer
199 with open(self.vocab_file, "rb") as f:
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\sentencepiece\init.py:961, in SentencePieceProcessor.Load(self, model_file, model_proto)
959 if model_proto:
960 return self.LoadFromSerializedProto(model_proto)
--> 961 return self.LoadFromFile(model_file)
File c:\Users\zhiyuanUSYD\miniconda3\envs\autoGluon\lib\site-packages\sentencepiece\init.py:316, in SentencePieceProcessor.LoadFromFile(self, arg)
315 def LoadFromFile(self, arg):
--> 316 return _sentencepiece.SentencePieceProcessor_LoadFromFile(self, arg)
TypeError: not a string"
}