From bbfb59941216721c429a9a881a28c5a6a4f3377a Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Sat, 12 Aug 2023 16:46:42 +0800 Subject: [PATCH] update master --- models/baichuan_llm.py | 71 ------------------- models/chatglmcpp_llm.py | 143 --------------------------------------- 2 files changed, 214 deletions(-) delete mode 100644 models/baichuan_llm.py delete mode 100644 models/chatglmcpp_llm.py diff --git a/models/baichuan_llm.py b/models/baichuan_llm.py deleted file mode 100644 index 1a8596c..0000000 --- a/models/baichuan_llm.py +++ /dev/null @@ -1,71 +0,0 @@ -from abc import ABC -from langchain.llms.base import LLM -from typing import Optional, List -from models.loader import LoaderCheckPoint -from models.base import (BaseAnswer, - AnswerResult) - -class BaichuanLLMChain(BaseAnswer, LLM, ABC): - max_token: int = 10000 - temperature: float = 0.01 - top_p = 0.9 - checkPoint: LoaderCheckPoint = None - # history = [] - history_len: int = 10 - - def __init__(self, checkPoint: LoaderCheckPoint = None): - super().__init__() - self.checkPoint = checkPoint - - @property - def _llm_type(self) -> str: - return "BaichuanLLMChain" - - @property - def _check_point(self) -> LoaderCheckPoint: - return self.checkPoint - - @property - def _history_len(self) -> int: - return self.history_len - - def set_history_len(self, history_len: int = 10) -> None: - self.history_len = history_len - - def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: - print(f"__call:{prompt}") - response, _ = self.checkPoint.model.chat( - self.checkPoint.tokenizer, - prompt, - # history=[], - # max_length=self.max_token, - # temperature=self.temperature - ) - print(f"response:{response}") - print(f"+++++++++++++++++++++++++++++++++++") - return response - - def _generate_answer(self, prompt: str, - history: List[List[str]] = [], - streaming: bool = False): - messages = [] - messages.append({"role": "user", "content": prompt}) - if streaming: - for inum, stream_resp in enumerate(self.checkPoint.model.chat( - self.checkPoint.tokenizer, - messages, - stream=True - )): - self.checkPoint.clear_torch_cache() - answer_result = AnswerResult() - answer_result.llm_output = {"answer": stream_resp} - yield answer_result - else: - response = self.checkPoint.model.chat( - self.checkPoint.tokenizer, - messages - ) - self.checkPoint.clear_torch_cache() - answer_result = AnswerResult() - answer_result.llm_output = {"answer": response} - yield answer_result \ No newline at end of file diff --git a/models/chatglmcpp_llm.py b/models/chatglmcpp_llm.py deleted file mode 100644 index 71477da..0000000 --- a/models/chatglmcpp_llm.py +++ /dev/null @@ -1,143 +0,0 @@ - -from abc import ABC -from typing import Any, Dict, Generator, List, Optional, Union - -import torch -import transformers -from langchain.callbacks.manager import CallbackManagerForChainRun -from langchain.chains.base import Chain -from transformers.generation.logits_process import LogitsProcessor -from transformers.generation.utils import (LogitsProcessorList, - StoppingCriteriaList) - -from models.base import (AnswerResult, - AnswerResultStream, BaseAnswer) -from models.loader import LoaderCheckPoint - - -class ChatGLMCppLLMChain(BaseAnswer, Chain, ABC): - checkPoint: LoaderCheckPoint = None - streaming_key: str = "streaming" #: :meta private: - history_key: str = "history" #: :meta private: - prompt_key: str = "prompt" #: :meta private: - output_key: str = "answer_result_stream" #: :meta private: - - max_length = 2048 - max_context_length = 512 - do_sample = True - top_k = 0 - top_p = 0.7 - temperature = 0.95 - num_threads = 0 - - def __init__(self, checkPoint: LoaderCheckPoint = None): - super().__init__() - self.checkPoint = checkPoint - - @property - def _chain_type(self) -> str: - return "ChatglmCppLLMChain" - - @property - def input_keys(self) -> List[str]: - """Will be whatever keys the prompt expects. - - :meta private: - """ - return [self.prompt_key] - - @property - def output_keys(self) -> List[str]: - """Will always return text key. - - :meta private: - """ - return [self.output_key] - - @property - def _check_point(self) -> LoaderCheckPoint: - return self.checkPoint - - def encode(self, prompt, truncation_length=None): - input_ids = self.checkPoint.tokenizer.encode(str(prompt)) - return input_ids - - def decode(self, output_ids): - reply = self.checkPoint.tokenizer.decode(output_ids) - return reply - - def _call( - self, - inputs: Dict[str, Any], - run_manager: Optional[CallbackManagerForChainRun] = None, - ) -> Dict[str, Generator]: - generator = self.generatorAnswer(inputs=inputs, run_manager=run_manager) - return {self.output_key: generator} - - def _generate_answer(self, - inputs: Dict[str, Any], - run_manager: Optional[CallbackManagerForChainRun] = None, - generate_with_callback: AnswerResultStream = None) -> None: - - history = inputs[self.history_key] - streaming = inputs[self.streaming_key] - prompt = inputs[self.prompt_key] - print(f"__call:{prompt}") - - if prompt == "clear": - history=[] - - local_history = [] - - if not history: - history =[] - - for k,v in history: - if k: - local_history.append(k) - local_history.append(v) - - local_history.append(prompt) - - if streaming: - history += [[]] - pieces = [] - print(f"++++++++++++++Stream++++++++++++++++++++") - for piece in self.checkPoint.model.stream_chat( - local_history, - max_length=self.max_length, - max_context_length=self.max_context_length, - do_sample=self.temperature > 0, - top_k=self.top_k, - top_p=self.top_p, - temperature=self.temperature, - ): - pieces.append(piece) - reply = ''.join(pieces) - print(f"{piece}",end='') - - answer_result = AnswerResult() - history[-1] = [prompt, reply] - answer_result.history = history - answer_result.llm_output = {"answer": reply} - generate_with_callback(answer_result) - print("") - else : - reply = self.checkPoint.model.chat( - local_history, - max_length=self.max_length, - max_context_length=self.max_context_length, - do_sample=self.temperature > 0, - top_k=self.top_k, - top_p=self.top_p, - temperature=self.temperature, - ) - - print(f"response:{reply}") - print(f"+++++++++++++++++++++++++++++++++++") - - answer_result = AnswerResult() - history.append([prompt, reply]) - answer_result.history = history - answer_result.llm_output = {"answer": reply} - generate_with_callback(answer_result)