线性简单实现了:vecorstores/MyFAISS.py TODO: 增加是否属于同一文档的判断 (#945)
This commit is contained in:
parent
62047c880e
commit
28a2295f73
|
|
@ -29,15 +29,17 @@ class MyFAISS(FAISS, VectorStore):
|
||||||
self.chunk_conent = False
|
self.chunk_conent = False
|
||||||
|
|
||||||
def seperate_list(self, ls: List[int]) -> List[List[int]]:
|
def seperate_list(self, ls: List[int]) -> List[List[int]]:
|
||||||
# TODO: 增加是否属于同一文档的判断
|
|
||||||
lists = []
|
lists = []
|
||||||
ls1 = [ls[0]]
|
ls1 = [ls[0]]
|
||||||
|
source1 = self.index_to_docstore_source(ls[0])
|
||||||
for i in range(1, len(ls)):
|
for i in range(1, len(ls)):
|
||||||
if ls[i - 1] + 1 == ls[i]:
|
if ls[i - 1] + 1 == ls[i] and self.index_to_docstore_source(ls[i]) == source1:
|
||||||
ls1.append(ls[i])
|
ls1.append(ls[i])
|
||||||
else:
|
else:
|
||||||
lists.append(ls1)
|
lists.append(ls1)
|
||||||
ls1 = [ls[i]]
|
ls1 = [ls[i]]
|
||||||
|
source1 = self.index_to_docstore_source(ls[i])
|
||||||
lists.append(ls1)
|
lists.append(ls1)
|
||||||
return lists
|
return lists
|
||||||
|
|
||||||
|
|
@ -162,3 +164,8 @@ class MyFAISS(FAISS, VectorStore):
|
||||||
|
|
||||||
def list_docs(self):
|
def list_docs(self):
|
||||||
return list(set(v.metadata["source"] for v in self.docstore._dict.values()))
|
return list(set(v.metadata["source"] for v in self.docstore._dict.values()))
|
||||||
|
|
||||||
|
def index_to_docstore_source(self,i:int):
|
||||||
|
_id = self.index_to_docstore_id[i]
|
||||||
|
doc = self.docstore.search(_id)
|
||||||
|
return doc.metadata["source"]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue