diff --git a/models/moss_llm.py b/models/moss_llm.py index b0c6cdd..be9711b 100644 --- a/models/moss_llm.py +++ b/models/moss_llm.py @@ -1,5 +1,4 @@ from abc import ABC -import transformers from langchain.llms.base import LLM from typing import Optional, List from models.loader import LoaderCheckPoint @@ -58,11 +57,6 @@ class MOSSLLM(BaseAnswer, LLM, ABC): history: List[List[str]] = [], streaming: bool = False, generate_with_callback: AnswerResultStream = None) -> None: - # Create the StoppingCriteriaList with the stopping strings - stopping_criteria_list = transformers.StoppingCriteriaList() - # 定义模型stopping_criteria 队列,在每次响应时将 torch.LongTensor, torch.FloatTensor同步到AnswerResult - listenerQueue = AnswerResultQueueSentinelTokenListenerQueue() - stopping_criteria_list.append(listenerQueue) if len(history) > 0: history = history[-self.history_len:-1] if self.history_len > 0 else [] prompt_w_history = str(history) @@ -91,8 +85,6 @@ class MOSSLLM(BaseAnswer, LLM, ABC): answer_result = AnswerResult() answer_result.history = history answer_result.llm_output = {"answer": response} - if listenerQueue.listenerQueue.__len__() > 0: - answer_result.listenerToken = listenerQueue.listenerQueue.pop() generate_with_callback(answer_result)