43 lines
1.7 KiB
Python
43 lines
1.7 KiB
Python
import paddle
|
|
import numpy as np
|
|
from paddlenlp.transformers import ErnieForSequenceClassification, ErnieTokenizer
|
|
import paddle.nn.functional as F
|
|
|
|
|
|
class IntentRecognition:
|
|
def __init__(self, model_path: str, labels: list):
|
|
# 初始化模型和tokenizer
|
|
self.model = ErnieForSequenceClassification.from_pretrained(model_path)
|
|
self.tokenizer = ErnieTokenizer.from_pretrained(model_path)
|
|
self.labels = labels
|
|
|
|
def predict(self, query: str):
|
|
"""
|
|
对输入的查询文本进行意图识别,返回预测的标签和概率。
|
|
|
|
:param query: 待识别的文本
|
|
:return: (predicted_label, predicted_probability)
|
|
"""
|
|
# 对输入文本进行tokenization
|
|
inputs = self.tokenizer(query, max_length=256, truncation=True, padding='max_length', return_tensors="pd")
|
|
|
|
# 将tokenized inputs转换为paddle tensor
|
|
input_ids = paddle.to_tensor(inputs["input_ids"])
|
|
|
|
# 模型推理得到 logits
|
|
logits = self.model(input_ids)
|
|
|
|
# 使用Softmax将 logits 转换为概率分布
|
|
probabilities = F.softmax(logits, axis=-1)
|
|
|
|
# 获取最大概率的标签和其概率值
|
|
max_prob_idx = np.argmax(probabilities.numpy(), axis=-1)
|
|
max_prob_value = np.max(probabilities.numpy(), axis=-1)
|
|
|
|
# 根据预测的标签索引映射到类别名称
|
|
predicted_label = self.labels[max_prob_idx[0]] # 获取最大概率对应的标签
|
|
predicted_probability = float(max_prob_value[0]) # 获取最大概率值
|
|
predicted_id = int(max_prob_idx[0]) # 获取最大概率对应的标签
|
|
|
|
return predicted_label, predicted_probability,predicted_id
|