From 2240ed1ec2806af7c1b4c42f89b4ce967cd09c98 Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Sun, 9 Apr 2023 23:30:15 +0800
Subject: [PATCH] update requirements.txt

---
 chatglm_llm.py             | 50 +++++++++++++++++++++++++-------------
 knowledge_based_chatglm.py | 18 ++++++--------
 2 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/chatglm_llm.py b/chatglm_llm.py
index a2e8f6f..810e98a 100644
--- a/chatglm_llm.py
+++ b/chatglm_llm.py
@@ -16,13 +16,28 @@ def torch_gc():
             torch.cuda.ipc_collect()
 
 
+tokenizer = AutoTokenizer.from_pretrained(
+    "/Users/liuqian/Downloads/ChatGLM-6B/chatglm_hf_model",
+    # "THUDM/chatglm-6b",
+    trust_remote_code=True
+)
+model = (
+    AutoModel.from_pretrained(
+        "/Users/liuqian/Downloads/ChatGLM-6B/chatglm_hf_model",
+        # "THUDM/chatglm-6b",
+        trust_remote_code=True)
+    .float()
+    .to("mps")
+    # .half()
+    # .cuda()
+)
+
+
 class ChatGLM(LLM):
     max_token: int = 10000
     temperature: float = 0.1
     top_p = 0.9
     history = []
-    tokenizer: object = None
-    model: object = None
 
     def __init__(self):
         super().__init__()
@@ -34,8 +49,8 @@ class ChatGLM(LLM):
     def _call(self,
               prompt: str,
               stop: Optional[List[str]] = None) -> str:
-        response, updated_history = self.model.chat(
-            self.tokenizer,
+        response, updated_history = model.chat(
+            tokenizer,
             prompt,
             history=self.history,
             max_length=self.max_token,
@@ -48,16 +63,17 @@ class ChatGLM(LLM):
         self.history = updated_history
         return response
 
-    def load_model(self,
-                   model_name_or_path: str = "THUDM/chatglm-6b"):
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            model_name_or_path,
-            trust_remote_code=True
-        )
-        self.model = (
-            AutoModel.from_pretrained(
-                model_name_or_path,
-                trust_remote_code=True)
-            .half()
-            .cuda()
-        )
+    def get_num_tokens(self, text: str) -> int:
+        tokenized_text = tokenizer.tokenize(text)
+        return len(tokenized_text)
+
+if __name__ == "__main__":
+    history = []
+    while True:
+        query = input("Input your question 请输入问题：")
+        resp, history = model.chat(tokenizer,
+                                   query,
+                                   history=history,
+                                   temperature=0.01,
+                                   max_length=100000)
+        print(resp)
\ No newline at end of file
diff --git a/knowledge_based_chatglm.py b/knowledge_based_chatglm.py
index 07fbac1..ae5d456 100644
--- a/knowledge_based_chatglm.py
+++ b/knowledge_based_chatglm.py
@@ -1,5 +1,5 @@
 from langchain.prompts.prompt import PromptTemplate
-from langchain.chains import ChatVectorDBChain
+from langchain.chains import ChatVectorDBChain, ConversationalRetrievalChain
 from langchain.prompts.chat import (
     ChatPromptTemplate,
     SystemMessagePromptTemplate,
@@ -13,16 +13,13 @@ from chatglm_llm import ChatGLM
 embedding_model_dict = {
     "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
     "ernie-base": "nghuyong/ernie-3.0-base-zh",
-    "text2vec": "GanymedeNil/text2vec-large-chinese"
+    "text2vec": "/Users/liuqian/Downloads/ChatGLM-6B/chatglm_embedding"#"GanymedeNil/text2vec-large-chinese"
 }
 
-llm_model_dict = {
-    "chatglm-6b": "THUDM/chatglm-6b",
-    "chatglm-6b-int4": "THUDM/chatglm-6b-int4"
-}
+
 
 chatglm = ChatGLM()
-chatglm.load_model(model_name_or_path=llm_model_dict["chatglm-6b"])
+
 
 def init_knowledge_vector_store(filepath):
     embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict["text2vec"], )
@@ -56,15 +53,16 @@ def get_knowledge_based_answer(query, vector_store, chat_history=[]):
     改写后的独立、完整的问题："""
     new_question_prompt = PromptTemplate.from_template(condese_propmt_template)
     chatglm.history = chat_history
-    knowledge_chain = ChatVectorDBChain.from_llm(
+    knowledge_chain = ConversationalRetrievalChain.from_llm(
         llm=chatglm,
-        vectorstore=vector_store,
+        retriever=vector_store.as_retriever(),
         qa_prompt=prompt,
         condense_question_prompt=new_question_prompt,
     )
 
     knowledge_chain.return_source_documents = True
-    knowledge_chain.top_k_docs_for_context = 10
+    # knowledge_chain.top_k_docs_for_context = 10
+    knowledge_chain.max_tokens_limit = 10000
 
     result = knowledge_chain({"question": query, "chat_history": chat_history})
     return result, chatglm.history