按照文档”https://maxkb.cn/docs/dev_manual/dev_environment/“搭建了开发环境,但索引一直失败,local_model 服务报错,我增加了一点日志,最终输出:
ERROR:root:--------------------------------
ERROR:root:model_name: /opt/maxkb/model/embedding/shibing624_text2vec-base-chinese
ERROR:root:model_credential: {'cache_folder': '/opt/maxkb/model/'}
ERROR:root:model_kwargs: {'model_id': '42f63a3d-427e-11ef-b3ec-a8a1595801ab', 'use_local': True, 'streaming': True}
ERROR:root:--------------------------------
2025-03-07 16:42:09 [handle_exception ERROR] stat: path should be string, bytes, os.PathLike or integer, not NoneType:Traceback (most recent call last):
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/rest_framework/views.py", line 506, in dispatch
response = handler(request, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/views/model_apply.py", line 29, in post
ModelApplySerializers(data={'model_id': model_id}).embed_documents(request.data))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/serializers/model_apply_serializers.py", line 58, in embed_documents
model = get_embedding_model(self.data.get('model_id'))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/serializers/model_apply_serializers.py", line 24, in get_embedding_model
embedding_model = ModelManage.get_model(model_id,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/common/config/embedding_config.py", line 28, in get_model
model_instance = get_model(_id)
^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/serializers/model_apply_serializers.py", line 25, in <lambda>
lambda _id: get_model(model, use_local=True))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/models_provider/__init__.py", line 42, in get_model
return get_model_(model.provider, model.model_type, model.model_name, model.credential, str(model.id), **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/models_provider/__init__.py", line 27, in get_model_
model = get_provider(provider).get_model(model_type, model_name,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/models_provider/base_model_provider.py", line 83, in get_model
return model_info.model_class.new_instance(model_type, model_name, model_credential, **model_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/models_provider/impl/local_model_provider/model/embedding.py", line 61, in new_instance
return LocalEmbedding(model_name=model_name, cache_folder='/opt/maxkb/model/embedding',
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/langchain_huggingface/embeddings/huggingface.py", line 61, in __init__
self.client = sentence_transformers.SentenceTransformer(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/sentence_transformers/SentenceTransformer.py", line 197, in __init__
modules = self._load_sbert_model(
^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/sentence_transformers/SentenceTransformer.py", line 1296, in _load_sbert_model
module = Transformer(model_name_or_path, cache_dir=cache_folder, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/sentence_transformers/models/Transformer.py", line 38, in __init__
self.tokenizer = AutoTokenizer.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py", line 944, in from_pretrained
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2052, in from_pretrained
return cls._from_pretrained(
^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2090, in _from_pretrained
slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2292, in _from_pretrained
tokenizer = cls(*init_inputs, **init_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/models/bert/tokenization_bert.py", line 114, in __init__
if not os.path.isfile(vocab_file):
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<frozen genericpath>", line 30, in isfile
TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType
2025-03-07 16:42:09 [basehttp INFO] "POST /api/model/42f63a3d-427e-11ef-b3ec-a8a1595801ab/embed_documents HTTP/1.1" 200 114
ERROR:root:--------------------------------
ERROR:root:model_name: /opt/maxkb/model/embedding/shibing624_text2vec-base-chinese
ERROR:root:model_credential: {'cache_folder': '/opt/maxkb/model/'}
ERROR:root:model_kwargs: {'model_id': '42f63a3d-427e-11ef-b3ec-a8a1595801ab', 'use_local': True, 'streaming': True}
ERROR:root:--------------------------------
2025-03-07 16:42:10 [handle_exception ERROR] stat: path should be string, bytes, os.PathLike or integer, not NoneType:Traceback (most recent call last):
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/rest_framework/views.py", line 506, in dispatch
response = handler(request, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/views/model_apply.py", line 29, in post
ModelApplySerializers(data={'model_id': model_id}).embed_documents(request.data))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/serializers/model_apply_serializers.py", line 58, in embed_documents
model = get_embedding_model(self.data.get('model_id'))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/serializers/model_apply_serializers.py", line 24, in get_embedding_model
embedding_model = ModelManage.get_model(model_id,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/common/config/embedding_config.py", line 28, in get_model
model_instance = get_model(_id)
^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/serializers/model_apply_serializers.py", line 25, in <lambda>
lambda _id: get_model(model, use_local=True))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/models_provider/__init__.py", line 42, in get_model
return get_model_(model.provider, model.model_type, model.model_name, model.credential, str(model.id), **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/models_provider/__init__.py", line 27, in get_model_
model = get_provider(provider).get_model(model_type, model_name,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/models_provider/base_model_provider.py", line 83, in get_model
return model_info.model_class.new_instance(model_type, model_name, model_credential, **model_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/apps/setting/models_provider/impl/local_model_provider/model/embedding.py", line 61, in new_instance
return LocalEmbedding(model_name=model_name, cache_folder='/opt/maxkb/model/embedding',
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/langchain_huggingface/embeddings/huggingface.py", line 61, in __init__
self.client = sentence_transformers.SentenceTransformer(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/sentence_transformers/SentenceTransformer.py", line 197, in __init__
modules = self._load_sbert_model(
^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/sentence_transformers/SentenceTransformer.py", line 1296, in _load_sbert_model
module = Transformer(model_name_or_path, cache_dir=cache_folder, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/sentence_transformers/models/Transformer.py", line 38, in __init__
self.tokenizer = AutoTokenizer.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py", line 944, in from_pretrained
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2052, in from_pretrained
return cls._from_pretrained(
^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2090, in _from_pretrained
slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2292, in _from_pretrained
tokenizer = cls(*init_inputs, **init_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/gitrepo/MaxKB/venv/lib/python3.11/site-packages/transformers/models/bert/tokenization_bert.py", line 114, in __init__
if not os.path.isfile(vocab_file):
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<frozen genericpath>", line 30, in isfile
TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType
很奇怪的是,明明传了数据,为什么提示说 path 是 None 呢?
另外,我在第一次启动服务之前,/opt/maxkb/conf
下面的配置文件是这样配的:
EMBEDDING_MODEL_PATH: /opt/maxkb/model/embedding
EMBEDDING_MODEL_NAME: shibing624_text2vec-base-chinese
因为我看文档里写的是:
# 模型名称:如果模型名称是路径,则会加载目录下的模型,如果是模型名称,则会在https://huggingface.co/下载模型 模型的下载位置为EMBEDDING_MODEL_PATH
以为不配路径,会自动下载模型,然后并没有。后面我调用 installer/install_model.py 下载的模型,目前 /opt/maxkb 下面的目录结构如下:
.
├── conf
│ └── config_example.yml
└── model
├── base
│ └── hub
└── embedding
└── shibing624_text2vec-base-chinese
然后 maxkb 模型的配置是这样的:
感觉一切都对,但索引就是出错,保存模型配置也会报错:
Verification failed, please check whether the parameters are correct: stat: path should be string, bytes, os.PathLike or integer, not NoneType