根据官方文档,添加对英文版的bge embedding的指示模板 (#1585)

Co-authored-by: zR <2448370773@qq.com>
This commit is contained in:
WilliamChen-luckbob 2023-09-28 19:18:31 +08:00 committed by GitHub
parent b3c7f8b072
commit 8fa99026c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 12 additions and 3 deletions

View File

@ -124,9 +124,18 @@ class EmbeddingsPool(CachePool):
if model == "text-embedding-ada-002": # openai text-embedding-ada-002
embeddings = OpenAIEmbeddings(openai_api_key=get_model_path(model), chunk_size=CHUNK_SIZE)
elif 'bge-' in model:
embeddings = HuggingFaceBgeEmbeddings(model_name=get_model_path(model),
model_kwargs={'device': device},
query_instruction="为这个句子生成表示以用于检索相关文章:")
if 'zh' in model:
# for chinese model
query_instruction = "为这个句子生成表示以用于检索相关文章:"
elif 'en' in model:
# for english model
query_instruction = "Represent this sentence for searching relevant passages:"
else:
# maybe ReRanker or else, just use empty string instead
query_instruction = ""
embeddings = HuggingFaceBgeEmbeddings(model_name=embedding_model_dict[model],
model_kwargs={'device': device},
query_instruction=query_instruction)
if model == "bge-large-zh-noinstruct": # bge large -noinstruct embedding
embeddings.query_instruction = ""
else: