diff --git a/chatglm_llm.py b/chatglm_llm.py index b7251bb..e592e00 100644 --- a/chatglm_llm.py +++ b/chatglm_llm.py @@ -15,6 +15,8 @@ model = ( .cuda() ) +tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) +model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() class ChatGLM(LLM): max_token: int = 10000