From bbfb59941216721c429a9a881a28c5a6a4f3377a Mon Sep 17 00:00:00 2001
From: imClumsyPanda <littlepanda0716@gmail.com>
Date: Sat, 12 Aug 2023 16:46:42 +0800
Subject: [PATCH] update master

---
 models/baichuan_llm.py   |  71 -------------------
 models/chatglmcpp_llm.py | 143 ---------------------------------------
 2 files changed, 214 deletions(-)
 delete mode 100644 models/baichuan_llm.py
 delete mode 100644 models/chatglmcpp_llm.py

diff --git a/models/baichuan_llm.py b/models/baichuan_llm.py
deleted file mode 100644
index 1a8596c..0000000
--- a/models/baichuan_llm.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from abc import ABC
-from langchain.llms.base import LLM
-from typing import Optional, List
-from models.loader import LoaderCheckPoint
-from models.base import (BaseAnswer,
-                         AnswerResult)
-
-class BaichuanLLMChain(BaseAnswer, LLM, ABC):
-    max_token: int = 10000
-    temperature: float = 0.01
-    top_p = 0.9
-    checkPoint: LoaderCheckPoint = None
-    # history = []
-    history_len: int = 10
-
-    def __init__(self, checkPoint: LoaderCheckPoint = None):
-        super().__init__()
-        self.checkPoint = checkPoint
-
-    @property
-    def _llm_type(self) -> str:
-        return "BaichuanLLMChain"
-
-    @property
-    def _check_point(self) -> LoaderCheckPoint:
-        return self.checkPoint
-
-    @property
-    def _history_len(self) -> int:
-        return self.history_len
-
-    def set_history_len(self, history_len: int = 10) -> None:
-        self.history_len = history_len
-
-    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-        print(f"__call:{prompt}")
-        response, _ = self.checkPoint.model.chat(
-            self.checkPoint.tokenizer,
-            prompt,
-            # history=[],
-            # max_length=self.max_token,
-            # temperature=self.temperature
-        )
-        print(f"response:{response}")
-        print(f"+++++++++++++++++++++++++++++++++++")
-        return response
-
-    def _generate_answer(self, prompt: str,
-                         history: List[List[str]] = [],
-                         streaming: bool = False):
-        messages = []
-        messages.append({"role": "user", "content": prompt})
-        if streaming:
-            for inum, stream_resp in enumerate(self.checkPoint.model.chat(
-                    self.checkPoint.tokenizer,
-                    messages,
-                    stream=True
-            )):
-                self.checkPoint.clear_torch_cache()
-                answer_result = AnswerResult()
-                answer_result.llm_output = {"answer": stream_resp}
-                yield answer_result
-        else:
-            response = self.checkPoint.model.chat(
-                self.checkPoint.tokenizer,
-                messages
-            )
-            self.checkPoint.clear_torch_cache()
-            answer_result = AnswerResult()
-            answer_result.llm_output = {"answer": response}
-            yield answer_result
\ No newline at end of file
diff --git a/models/chatglmcpp_llm.py b/models/chatglmcpp_llm.py
deleted file mode 100644
index 71477da..0000000
--- a/models/chatglmcpp_llm.py
+++ /dev/null
@@ -1,143 +0,0 @@
-
-from abc import ABC
-from typing import Any, Dict, Generator, List, Optional, Union
-
-import torch
-import transformers
-from langchain.callbacks.manager import CallbackManagerForChainRun
-from langchain.chains.base import Chain
-from transformers.generation.logits_process import LogitsProcessor
-from transformers.generation.utils import (LogitsProcessorList,
-                                           StoppingCriteriaList)
-
-from models.base import (AnswerResult,
-                         AnswerResultStream, BaseAnswer)
-from models.loader import LoaderCheckPoint
-
-
-class ChatGLMCppLLMChain(BaseAnswer, Chain, ABC):
-    checkPoint: LoaderCheckPoint = None
-    streaming_key: str = "streaming"  #: :meta private:
-    history_key: str = "history"  #: :meta private:
-    prompt_key: str = "prompt"  #: :meta private:
-    output_key: str = "answer_result_stream"  #: :meta private:
-
-    max_length = 2048
-    max_context_length = 512
-    do_sample = True
-    top_k = 0
-    top_p = 0.7
-    temperature = 0.95
-    num_threads = 0
-
-    def __init__(self, checkPoint: LoaderCheckPoint = None):
-        super().__init__()
-        self.checkPoint = checkPoint
-
-    @property
-    def _chain_type(self) -> str:
-        return "ChatglmCppLLMChain"
-
-    @property
-    def input_keys(self) -> List[str]:
-        """Will be whatever keys the prompt expects.
-
-        :meta private:
-        """
-        return [self.prompt_key]
-
-    @property
-    def output_keys(self) -> List[str]:
-        """Will always return text key.
-
-        :meta private:
-        """
-        return [self.output_key]
-
-    @property
-    def _check_point(self) -> LoaderCheckPoint:
-        return self.checkPoint
-
-    def encode(self, prompt, truncation_length=None):
-        input_ids = self.checkPoint.tokenizer.encode(str(prompt))
-        return input_ids
-
-    def decode(self, output_ids):
-        reply = self.checkPoint.tokenizer.decode(output_ids)
-        return reply
-
-    def _call(
-            self,
-            inputs: Dict[str, Any],
-            run_manager: Optional[CallbackManagerForChainRun] = None,
-    ) -> Dict[str, Generator]:
-        generator = self.generatorAnswer(inputs=inputs, run_manager=run_manager)
-        return {self.output_key: generator}
-
-    def _generate_answer(self,
-                         inputs: Dict[str, Any],
-                         run_manager: Optional[CallbackManagerForChainRun] = None,
-                         generate_with_callback: AnswerResultStream = None) -> None:
-
-        history = inputs[self.history_key] 
-        streaming = inputs[self.streaming_key]
-        prompt = inputs[self.prompt_key]
-        print(f"__call:{prompt}")
-
-        if prompt == "clear":
-            history=[]
-        
-        local_history = []
-
-        if not history:
-            history =[]
-
-        for k,v in history:
-            if k:
-                local_history.append(k)
-                local_history.append(v)
-
-        local_history.append(prompt)
-        
-        if streaming:
-            history += [[]]
-            pieces = []
-            print(f"++++++++++++++Stream++++++++++++++++++++")
-            for piece in self.checkPoint.model.stream_chat(
-                local_history,
-                max_length=self.max_length,
-                max_context_length=self.max_context_length,
-                do_sample=self.temperature > 0,
-                top_k=self.top_k,
-                top_p=self.top_p,
-                temperature=self.temperature,
-            ):  
-                pieces.append(piece)
-                reply = ''.join(pieces)
-                print(f"{piece}",end='')
-
-                answer_result = AnswerResult()
-                history[-1] = [prompt, reply]
-                answer_result.history = history
-                answer_result.llm_output = {"answer": reply}
-                generate_with_callback(answer_result)
-            print("")
-        else :
-            reply = self.checkPoint.model.chat(
-                local_history,
-                max_length=self.max_length,
-                max_context_length=self.max_context_length,
-                do_sample=self.temperature > 0,
-                top_k=self.top_k,
-                top_p=self.top_p,
-                temperature=self.temperature,
-            )
-            
-            print(f"response:{reply}")
-            print(f"+++++++++++++++++++++++++++++++++++")
-
-            answer_result = AnswerResult()
-            history.append([prompt, reply])
-            answer_result.history = history
-            answer_result.llm_output = {"answer": reply}
-            generate_with_callback(answer_result)