Intention/ernie/test_model.py

77 lines
2.9 KiB
Python

import paddle
import numpy as np
import yaml
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from paddlenlp.transformers import ErnieTokenizer
from paddle.io import DataLoader
from paddlenlp.data import DataCollatorWithPadding
import json
import functools
from paddlenlp.datasets import load_dataset
# 加载配置
with open("data.yaml", "r", encoding="utf-8") as f:
config = yaml.safe_load(f)
# 加载模型和 tokenizer
model = paddle.jit.load("trained_model_static") # 加载保存的静态图模型
tokenizer = ErnieTokenizer.from_pretrained("E:/workingSpace/PycharmProjects/Intention/models/ernie-3.0-tiny-base-v2-zh")
# 读取数据集的函数
def read_local_dataset(path, label2id=None, is_test=True):
with open(path, "r", encoding="utf-8") as f:
data = json.load(f) # 读取 JSON 数据
for item in data:
if "text" in item:
yield {"text": item["text"], "label": label2id.get(item["label"], -1)} # 如果 label 不存在,标记为 -1
# 生成 label2id 和 id2label
label_id = {label: idx for idx, label in enumerate(config["labels"])}
id_label = {idx: label for label, idx in label_id.items()}
# 数据预处理函数
def preprocess_function(examples, tokenizer, max_length, is_test=False):
result = tokenizer(examples["text"], max_length=max_length, truncation=True, padding='max_length')
if not is_test:
result["labels"] = np.array([examples["label"]], dtype="int64")
return result
# 加载数据集
test_ds = load_dataset(read_local_dataset, path=config["val"], label2id=label_id, lazy=False)
# 转换数据集
trans_func = functools.partial(preprocess_function, tokenizer=tokenizer, max_length=256)
test_ds = test_ds.map(trans_func)
# 使用 DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer)
test_dataloader = DataLoader(test_ds, batch_size=16, shuffle=False, collate_fn=data_collator)
# 评估模型
model.eval() # 切换到评估模式
all_preds = []
all_labels = []
# 遍历数据集进行推理
for batch in test_dataloader:
input_ids = batch["input_ids"] # 使用模型输入的 input_ids
attention_mask = batch["attention_mask"] # 使用 attention_mask
labels = batch["labels"] # 获取真实标签(根据预处理函数传递)
# 获取模型输出
logits = model(input_ids, attention_mask=attention_mask) # 传递 input_ids 和 attention_mask
pred_labels = np.argmax(logits.numpy(), axis=1) # 选择概率最大的标签
# 保存预测值和真实标签
all_preds.extend(pred_labels)
all_labels.extend(labels.numpy())
# 计算评估指标
accuracy = accuracy_score(all_labels, all_preds)
precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')
# 输出性能评估结果
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")