From 28a2295f73a844ae34e232fe743297df07f2794f Mon Sep 17 00:00:00 2001 From: Wufisher <1105889424@qq.com> Date: Thu, 10 Aug 2023 21:59:47 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BA=BF=E6=80=A7=E7=AE=80=E5=8D=95=E5=AE=9E?= =?UTF-8?q?=E7=8E=B0=E4=BA=86=EF=BC=9Avecorstores/MyFAISS.py=20TODO:=20?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=98=AF=E5=90=A6=E5=B1=9E=E4=BA=8E=E5=90=8C?= =?UTF-8?q?=E4=B8=80=E6=96=87=E6=A1=A3=E7=9A=84=E5=88=A4=E6=96=AD=20(#945)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vectorstores/MyFAISS.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/vectorstores/MyFAISS.py b/vectorstores/MyFAISS.py index 835799b..77b40d5 100644 --- a/vectorstores/MyFAISS.py +++ b/vectorstores/MyFAISS.py @@ -29,15 +29,17 @@ class MyFAISS(FAISS, VectorStore): self.chunk_conent = False def seperate_list(self, ls: List[int]) -> List[List[int]]: - # TODO: 增加是否属于同一文档的判断 + lists = [] ls1 = [ls[0]] + source1 = self.index_to_docstore_source(ls[0]) for i in range(1, len(ls)): - if ls[i - 1] + 1 == ls[i]: + if ls[i - 1] + 1 == ls[i] and self.index_to_docstore_source(ls[i]) == source1: ls1.append(ls[i]) else: lists.append(ls1) ls1 = [ls[i]] + source1 = self.index_to_docstore_source(ls[i]) lists.append(ls1) return lists @@ -162,3 +164,8 @@ class MyFAISS(FAISS, VectorStore): def list_docs(self): return list(set(v.metadata["source"] for v in self.docstore._dict.values())) + + def index_to_docstore_source(self,i:int): + _id = self.index_to_docstore_id[i] + doc = self.docstore.search(_id) + return doc.metadata["source"]