Done. Grounding Fine-tune
This commit is contained in:
parent
7d0144ee0a
commit
cdf8b0a92b
|
|
@ -31,4 +31,6 @@ web_demo_streaming
|
||||||
evaluation
|
evaluation
|
||||||
requirements_web_demo.txt
|
requirements_web_demo.txt
|
||||||
web_demo_mm.py
|
web_demo_mm.py
|
||||||
|
output
|
||||||
|
output2
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
import json
|
||||||
|
from datasets import Dataset
|
||||||
|
|
||||||
|
def load_and_convert_data(file_path):
|
||||||
|
"""加载并转换数据"""
|
||||||
|
loaded_data = []
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
for line in file:
|
||||||
|
loaded_data.append(json.loads(line))
|
||||||
|
|
||||||
|
# 将 loaded_data 转换为适合 Dataset 的格式
|
||||||
|
dataset_dicts = []
|
||||||
|
for item in loaded_data:
|
||||||
|
user_content = item[0]['content']
|
||||||
|
assistant_content = item[1]['content']
|
||||||
|
|
||||||
|
# 提取图像和文本信息
|
||||||
|
image_info = next((x for x in user_content if x['type'] == 'image'), None)
|
||||||
|
text_info = next((x for x in user_content if x['type'] == 'text'), None)
|
||||||
|
|
||||||
|
# 构造新的字典
|
||||||
|
dataset_entry = {
|
||||||
|
'role': 'user',
|
||||||
|
'image_path': image_info['image'] if image_info else None,
|
||||||
|
'question': text_info['text'] if text_info else None,
|
||||||
|
'assistant_answer': assistant_content
|
||||||
|
}
|
||||||
|
|
||||||
|
dataset_dicts.append(dataset_entry)
|
||||||
|
|
||||||
|
return dataset_dicts
|
||||||
|
|
||||||
|
# 分别加载 test 和 val 数据集
|
||||||
|
test_data_path = 'data_test.jsonl'
|
||||||
|
val_data_path = 'data_val.jsonl'
|
||||||
|
|
||||||
|
test_dataset_dicts = load_and_convert_data(test_data_path)
|
||||||
|
val_dataset_dicts = load_and_convert_data(val_data_path)
|
||||||
|
|
||||||
|
# 创建 Dataset 对象
|
||||||
|
test_dataset = Dataset.from_list(test_dataset_dicts)
|
||||||
|
val_dataset = Dataset.from_list(val_dataset_dicts)
|
||||||
|
|
||||||
|
print("Test and Val Datasets have been created.")
|
||||||
|
|
@ -0,0 +1,29 @@
|
||||||
|
{
|
||||||
|
"fp16": {
|
||||||
|
"enabled": "auto",
|
||||||
|
"loss_scale": 0,
|
||||||
|
"loss_scale_window": 1000,
|
||||||
|
"initial_scale_power": 16,
|
||||||
|
"hysteresis": 2,
|
||||||
|
"min_loss_scale": 1
|
||||||
|
},
|
||||||
|
"bf16": {
|
||||||
|
"enabled": "auto"
|
||||||
|
},
|
||||||
|
"zero_optimization": {
|
||||||
|
"stage": 2,
|
||||||
|
"allgather_partitions": true,
|
||||||
|
"allgather_bucket_size": 5e8,
|
||||||
|
"overlap_comm": true,
|
||||||
|
"reduce_scatter": true,
|
||||||
|
"reduce_bucket_size": 5e8,
|
||||||
|
"contiguous_gradients": true
|
||||||
|
},
|
||||||
|
|
||||||
|
"gradient_accumulation_steps": "auto",
|
||||||
|
"gradient_clipping": "auto",
|
||||||
|
"steps_per_print": 2000,
|
||||||
|
"train_batch_size": "auto",
|
||||||
|
"train_micro_batch_size_per_gpu": "auto",
|
||||||
|
"wall_clock_breakdown": false
|
||||||
|
}
|
||||||
73
train.py
73
train.py
|
|
@ -106,33 +106,80 @@ def predict(messages, model):
|
||||||
|
|
||||||
return output_text[0]
|
return output_text[0]
|
||||||
|
|
||||||
|
def load_and_convert_data(file_path):
|
||||||
|
"""加载并转换数据"""
|
||||||
|
loaded_data = []
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
for line in file:
|
||||||
|
loaded_data.append(json.loads(line))
|
||||||
|
|
||||||
|
# 将 loaded_data 转换为适合 Dataset 的格式
|
||||||
|
dataset_dicts = []
|
||||||
|
for item in loaded_data:
|
||||||
|
user_content = item[0]['content']
|
||||||
|
assistant_content = item[1]['content']
|
||||||
|
|
||||||
|
# 提取图像和文本信息
|
||||||
|
image_info = next((x for x in user_content if x['type'] == 'image'), None)
|
||||||
|
text_info = next((x for x in user_content if x['type'] == 'text'), None)
|
||||||
|
|
||||||
|
# 构造新的字典
|
||||||
|
dataset_entry = {
|
||||||
|
'role': 'user',
|
||||||
|
'image_path': image_info['image'] if image_info else None,
|
||||||
|
'question': text_info['text'] if text_info else None,
|
||||||
|
'assistant_answer': assistant_content
|
||||||
|
}
|
||||||
|
|
||||||
|
dataset_dicts.append(dataset_entry)
|
||||||
|
|
||||||
|
return dataset_dicts
|
||||||
|
|
||||||
# 在modelscope上下载Qwen2-VL模型到本地目录下
|
# 在modelscope上下载Qwen2-VL模型到本地目录下
|
||||||
# model_dir = snapshot_download("Qwen/Qwen2-VL-2B-Instruct", cache_dir="./", revision="master")
|
# model_dir = snapshot_download("Qwen/Qwen2-VL-2B-Instruct", cache_dir="./", revision="master")
|
||||||
|
min_pixel = 256*28*28
|
||||||
|
max_pixel = 1280*28*28
|
||||||
# 使用Transformers加载模型权重
|
# 使用Transformers加载模型权重
|
||||||
tokenizer = AutoTokenizer.from_pretrained("/home/gyk/models/Qwen2.5-VL-7B-Instruct/", use_fast=False, trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained("/home/gyk/models/Qwen2.5-VL-7B-Instruct/", use_fast=False, trust_remote_code=True)
|
||||||
processor = AutoProcessor.from_pretrained("/home/gyk/models/Qwen2.5-VL-7B-Instruct/")
|
processor = AutoProcessor.from_pretrained("/home/gyk/models/Qwen2.5-VL-7B-Instruct/")
|
||||||
|
|
||||||
model = Qwen2_5_VLForConditionalGeneration.from_pretrained("/home/gyk/models/Qwen2.5-VL-7B-Instruct/", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True,)
|
model = Qwen2_5_VLForConditionalGeneration.from_pretrained("/home/gyk/models/Qwen2.5-VL-7B-Instruct/", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True,)
|
||||||
model.enable_input_require_grads() # 开启梯度检查点时,要执行该方法
|
model.enable_input_require_grads() # 开启梯度检查点时,要执行该方法
|
||||||
|
|
||||||
# 处理数据集:读取json文件
|
# 处理数据集:读取json文件
|
||||||
# 拆分成训练集和测试集,保存为data_vl_train.json和data_vl_test.json
|
# 拆分成训练集和测试集,保存为data_vl_train.json和data_vl_test.json
|
||||||
train_json_path = "data_vl.json"
|
if True:
|
||||||
with open(train_json_path, 'r') as f:
|
train_json_path = "data_vl.json"
|
||||||
data = json.load(f)
|
with open(train_json_path, 'r') as f:
|
||||||
train_data = data[:-4]
|
data = json.load(f)
|
||||||
test_data = data[-4:]
|
train_data = data[:-4]
|
||||||
|
test_data = data[-4:]
|
||||||
|
|
||||||
with open("data_vl_train.json", "w") as f:
|
with open("data_vl_train.json", "w") as f:
|
||||||
json.dump(train_data, f)
|
json.dump(train_data, f)
|
||||||
|
|
||||||
with open("data_vl_test.json", "w") as f:
|
with open("data_vl_test.json", "w") as f:
|
||||||
json.dump(test_data, f)
|
json.dump(test_data, f)
|
||||||
|
|
||||||
train_ds = Dataset.from_json("data_vl_train.json")
|
train_ds = Dataset.from_json("data_vl_train.json")
|
||||||
train_dataset = train_ds.map(process_func) # type: ignore
|
train_dataset = train_ds.map(process_func) # type: ignore
|
||||||
|
else:
|
||||||
|
# 分别加载 test 和 val 数据集
|
||||||
|
test_data_path = 'data_test.jsonl'
|
||||||
|
val_data_path = 'data_val.jsonl'
|
||||||
|
|
||||||
|
test_dataset_dicts = load_and_convert_data(test_data_path)
|
||||||
|
val_dataset_dicts = load_and_convert_data(val_data_path)
|
||||||
|
|
||||||
|
# 创建 Dataset 对象
|
||||||
|
test_tmp__dataset = Dataset.from_list(test_dataset_dicts)
|
||||||
|
val_tmp_dataset = Dataset.from_list(val_dataset_dicts)
|
||||||
|
|
||||||
|
|
||||||
|
test_tmp_dataset = test_tmp__dataset.select(list(range(1000)))
|
||||||
|
val_tmp_dataset = val_tmp_dataset.select(list(range(50)))
|
||||||
|
|
||||||
|
test_dataset = test_tmp_dataset.map(process_func, batched=True,batch_size=4)
|
||||||
|
val_dataset = val_tmp_dataset.map(process_func, batched=True, batch_size=4)
|
||||||
|
|
||||||
# 配置LoRA
|
# 配置LoRA
|
||||||
config = LoraConfig(
|
config = LoraConfig(
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,371 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
|
||||||
|
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2"
|
||||||
|
import torch, gc
|
||||||
|
gc.collect()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
import deepspeed
|
||||||
|
DS_CONFIG = "ds_zero2_no_offload.json"
|
||||||
|
from datasets import Dataset
|
||||||
|
from modelscope import snapshot_download, AutoTokenizer
|
||||||
|
from swanlab.integration.transformers import SwanLabCallback
|
||||||
|
from qwen_vl_utils import process_vision_info
|
||||||
|
from peft import LoraConfig, TaskType, get_peft_model, PeftModel,get_peft_model_state_dict
|
||||||
|
from transformers import (
|
||||||
|
TrainingArguments, # type: ignore
|
||||||
|
Trainer, # type: ignore
|
||||||
|
Qwen2_5_VLForConditionalGeneration,
|
||||||
|
AutoProcessor,
|
||||||
|
)
|
||||||
|
from transformers.data.data_collator import DataCollatorForSeq2Seq
|
||||||
|
import swanlab
|
||||||
|
import json
|
||||||
|
|
||||||
|
# device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu") # 多GPU时可指定起始位置/编号
|
||||||
|
# 检查 CUDA 是否可用
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
# 从环境变量中获取 local_rank。DeepSpeed/PyTorch DDP 会自动设置这个变量。
|
||||||
|
# 如果环境变量不存在(例如在非分布式模式下运行),默认为 0。
|
||||||
|
local_rank = int(os.environ.get("LOCAL_RANK", "0"))
|
||||||
|
|
||||||
|
# 根据 local_rank 动态创建设备对象
|
||||||
|
device = torch.device(f"cuda:{local_rank}")
|
||||||
|
|
||||||
|
torch.cuda.set_device(device)
|
||||||
|
|
||||||
|
print(f"Process with local_rank {local_rank} is using device: {device}")
|
||||||
|
else:
|
||||||
|
device = torch.device("cpu")
|
||||||
|
|
||||||
|
|
||||||
|
def load_and_convert_data(file_path):
|
||||||
|
"""加载并转换数据"""
|
||||||
|
loaded_data = []
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
for line in file:
|
||||||
|
loaded_data.append(json.loads(line))
|
||||||
|
|
||||||
|
# 将 loaded_data 转换为适合 Dataset 的格式
|
||||||
|
dataset_dicts = []
|
||||||
|
for item in loaded_data:
|
||||||
|
user_content = item[0]['content']
|
||||||
|
assistant_content = item[1]['content']
|
||||||
|
|
||||||
|
# 提取图像和文本信息
|
||||||
|
image_info = next((x for x in user_content if x['type'] == 'image'), None)
|
||||||
|
text_info = next((x for x in user_content if x['type'] == 'text'), None)
|
||||||
|
|
||||||
|
# 构造新的字典
|
||||||
|
dataset_entry = {
|
||||||
|
'role': 'user',
|
||||||
|
'image_path': image_info['image'] if image_info else None,
|
||||||
|
'question': text_info['text'] if text_info else None,
|
||||||
|
'assistant_answer': assistant_content
|
||||||
|
}
|
||||||
|
|
||||||
|
dataset_dicts.append(dataset_entry)
|
||||||
|
|
||||||
|
return dataset_dicts
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def process_func_batch(examples):
|
||||||
|
MAX_LENGTH = 2048
|
||||||
|
input_ids, attention_mask, labels, pixel_values, image_grid_thw = [], [], [], [], []
|
||||||
|
|
||||||
|
for example in zip(examples["question"], examples["assistant_answer"], examples["image_path"]):
|
||||||
|
input_content, output_content, file_path = example
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"image": f"{file_path}",
|
||||||
|
"resized_height": 280,
|
||||||
|
"resized_width": 280,
|
||||||
|
},
|
||||||
|
{"type": "text", "text": input_content},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
text = processor.apply_chat_template(
|
||||||
|
messages, tokenize=False, add_generation_prompt=True
|
||||||
|
)
|
||||||
|
image_inputs, video_inputs = process_vision_info(messages)
|
||||||
|
inputs = processor(
|
||||||
|
text=[text],
|
||||||
|
images=image_inputs,
|
||||||
|
videos=video_inputs,
|
||||||
|
padding=False, # 先不填充
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
|
||||||
|
inputs_dict = {key: value.tolist() for key, value in inputs.items()}
|
||||||
|
instruction_input_ids = inputs_dict['input_ids'][0]
|
||||||
|
instruction_attention_mask = inputs_dict['attention_mask'][0]
|
||||||
|
|
||||||
|
response = tokenizer(f"{output_content}", add_special_tokens=False)
|
||||||
|
response_input_ids = response['input_ids']
|
||||||
|
response_attention_mask = response['attention_mask']
|
||||||
|
|
||||||
|
# 计算剩余可用长度给response
|
||||||
|
remaining_length = MAX_LENGTH - len(instruction_input_ids) - 1 # 减去一个PAD token的空间
|
||||||
|
|
||||||
|
if remaining_length < 0:
|
||||||
|
# 如果指令部分已经超过最大长度,则需要截断指令部分
|
||||||
|
truncation_length = len(instruction_input_ids) + remaining_length
|
||||||
|
instruction_input_ids = instruction_input_ids[:truncation_length]
|
||||||
|
instruction_attention_mask = instruction_attention_mask[:truncation_length]
|
||||||
|
remaining_length = 0
|
||||||
|
|
||||||
|
# 截断response部分以适应剩余空间
|
||||||
|
current_input_ids = (
|
||||||
|
instruction_input_ids + response_input_ids[:remaining_length] + [tokenizer.pad_token_id]
|
||||||
|
)
|
||||||
|
|
||||||
|
current_attention_mask = (
|
||||||
|
instruction_attention_mask + response_attention_mask[:remaining_length] + [1]
|
||||||
|
)
|
||||||
|
current_labels = (
|
||||||
|
[-100] * len(instruction_input_ids) +
|
||||||
|
response_input_ids[:remaining_length] +
|
||||||
|
[tokenizer.pad_token_id]
|
||||||
|
)
|
||||||
|
|
||||||
|
# 填充到MAX_LENGTH
|
||||||
|
if len(current_input_ids) < MAX_LENGTH:
|
||||||
|
current_input_ids += [tokenizer.pad_token_id] * (MAX_LENGTH - len(current_input_ids))
|
||||||
|
current_attention_mask += [0] * (MAX_LENGTH - len(current_attention_mask))
|
||||||
|
current_labels += [-100] * (MAX_LENGTH - len(current_labels))
|
||||||
|
|
||||||
|
input_ids.append(current_input_ids)
|
||||||
|
attention_mask.append(current_attention_mask)
|
||||||
|
labels.append(current_labels)
|
||||||
|
pixel_values.append(inputs_dict['pixel_values'])
|
||||||
|
image_grid_thw.append(torch.tensor(inputs_dict['image_grid_thw']).squeeze(0))
|
||||||
|
|
||||||
|
return {
|
||||||
|
|
||||||
|
"input_ids": torch.tensor(input_ids),
|
||||||
|
"attention_mask": torch.tensor(attention_mask),
|
||||||
|
"labels": torch.tensor(labels),
|
||||||
|
"pixel_values": torch.tensor(pixel_values),
|
||||||
|
"image_grid_thw": torch.stack(image_grid_thw)
|
||||||
|
}
|
||||||
|
|
||||||
|
def predict(messages, model):
|
||||||
|
# 准备推理
|
||||||
|
text = processor.apply_chat_template(
|
||||||
|
messages, tokenize=False, add_generation_prompt=True
|
||||||
|
)
|
||||||
|
image_inputs, video_inputs = process_vision_info(messages)
|
||||||
|
|
||||||
|
inputs = processor(
|
||||||
|
text=[text],
|
||||||
|
images=image_inputs,
|
||||||
|
videos=video_inputs,
|
||||||
|
padding=True,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
|
||||||
|
device = next(model.parameters()).device
|
||||||
|
# 将所有张量移动到指定的设备上
|
||||||
|
for key, value in inputs.items():
|
||||||
|
inputs[key] = value.to(device)
|
||||||
|
|
||||||
|
# 生成输出
|
||||||
|
generated_ids = model.generate(**inputs, max_new_tokens=128)
|
||||||
|
generated_ids_trimmed = [
|
||||||
|
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
||||||
|
]
|
||||||
|
output_text = processor.batch_decode(
|
||||||
|
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
||||||
|
)
|
||||||
|
del inputs
|
||||||
|
|
||||||
|
return output_text[0]
|
||||||
|
|
||||||
|
|
||||||
|
# 在modelscope上下载Qwen2-VL模型到本地目录下
|
||||||
|
# model_dir = snapshot_download("Qwen/Qwen2-VL-2B-Instruct", cache_dir="./", revision="master")
|
||||||
|
|
||||||
|
# 使用Transformers加载模型权重
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("/home/gyk/models/Qwen2.5-VL-7B-Instruct/", use_fast=True)
|
||||||
|
min_pixels = 256*28*28
|
||||||
|
max_pixels = 1280*28*28
|
||||||
|
processor = AutoProcessor.from_pretrained("/home/gyk/models/Qwen2.5-VL-7B-Instruct/", min_pixels=min_pixels, max_pixels=max_pixels,use_fast=True)
|
||||||
|
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
|
||||||
|
model = Qwen2_5_VLForConditionalGeneration.from_pretrained("/home/gyk/models/Qwen2.5-VL-7B-Instruct/", device_map=device_map, torch_dtype=torch.bfloat16)
|
||||||
|
|
||||||
|
model.enable_input_require_grads() # 开启梯度检查点时,要执行该方法/
|
||||||
|
|
||||||
|
# 处理数据集:读取json文件
|
||||||
|
# 分别加载 test 和 val 数据集
|
||||||
|
test_data_path = 'data_test.jsonl'
|
||||||
|
val_data_path = 'data_val.jsonl'
|
||||||
|
|
||||||
|
test_dataset_dicts = load_and_convert_data(test_data_path)
|
||||||
|
val_dataset_dicts = load_and_convert_data(val_data_path)
|
||||||
|
|
||||||
|
# 创建 Dataset 对象
|
||||||
|
test_tmp_dataset = Dataset.from_list(test_dataset_dicts)
|
||||||
|
val_tmp_dataset = Dataset.from_list(val_dataset_dicts)
|
||||||
|
|
||||||
|
indices = list(range(1000))
|
||||||
|
test_tmp_dataset = test_tmp_dataset.select(indices)
|
||||||
|
indices = list(range(50))
|
||||||
|
val_tmp_dataset = val_tmp_dataset.select(indices)
|
||||||
|
|
||||||
|
test_dataset = test_tmp_dataset.map(process_func_batch, batched=True,batch_size=4)
|
||||||
|
val_dataset = val_tmp_dataset.map(process_func_batch, batched=True, batch_size=4)
|
||||||
|
|
||||||
|
print("Test and Val Datasets have been created.")
|
||||||
|
|
||||||
|
# 配置LoRA
|
||||||
|
config = LoraConfig(
|
||||||
|
task_type=TaskType.CAUSAL_LM,
|
||||||
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
|
||||||
|
inference_mode=False, # 训练模式
|
||||||
|
r=64, # Lora 秩
|
||||||
|
lora_alpha=16, # Lora alaph,具体作用参见 Lora 原理
|
||||||
|
lora_dropout=0.05, # Dropout 比例
|
||||||
|
bias="none",
|
||||||
|
)
|
||||||
|
|
||||||
|
# 获取LoRA模型
|
||||||
|
# 转换模型
|
||||||
|
peft_model = get_peft_model(model, config)
|
||||||
|
peft_model.config.use_cache = False # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 配置训练参数
|
||||||
|
args = TrainingArguments(
|
||||||
|
output_dir="./output2/Qwen2.5-VL-7B",
|
||||||
|
per_device_train_batch_size=2,
|
||||||
|
gradient_accumulation_steps=8,
|
||||||
|
logging_steps=10,
|
||||||
|
logging_first_step=True,
|
||||||
|
num_train_epochs=4,
|
||||||
|
save_steps=50,
|
||||||
|
learning_rate=1e-4,
|
||||||
|
save_on_each_node=True,
|
||||||
|
gradient_checkpointing=True,
|
||||||
|
report_to="none",
|
||||||
|
# bf16=True,
|
||||||
|
fp16=True,
|
||||||
|
max_grad_norm=1.0,
|
||||||
|
deepspeed=DS_CONFIG
|
||||||
|
)
|
||||||
|
|
||||||
|
# 设置SwanLab回调
|
||||||
|
swanlab_callback = SwanLabCallback(
|
||||||
|
project="Qwen2.5-VL-finetune",
|
||||||
|
experiment_name="qwen2.5-vl-refcocog",
|
||||||
|
config={
|
||||||
|
"model": "https://modelscope.cn/models/Qwen/Qwen2.5-VL-7B-Instruct",
|
||||||
|
"dataset": "https://huggingface.co/datasets/Kangheng/refcocog",
|
||||||
|
"github": "https://github.com/datawhalechina/self-llm",
|
||||||
|
"prompt": "Please provide the bounding box for the following descriptio: ",
|
||||||
|
"train_data_number": len(test_dataset),
|
||||||
|
"lora_rank": 64,
|
||||||
|
"lora_alpha": 16,
|
||||||
|
"lora_dropout": 0.1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# 配置Trainer
|
||||||
|
trainer = Trainer(
|
||||||
|
model=peft_model,
|
||||||
|
args=args,
|
||||||
|
train_dataset=test_dataset,
|
||||||
|
eval_dataset=val_dataset,
|
||||||
|
data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
|
||||||
|
callbacks=[swanlab_callback],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# 开启模型训练
|
||||||
|
trainer.train()
|
||||||
|
trainer.save_model('./output2/Qwen2.5-VL-7B')
|
||||||
|
trainer.save_state()
|
||||||
|
|
||||||
|
|
||||||
|
# ====================测试模式===================
|
||||||
|
# 配置测试参数
|
||||||
|
val_config = LoraConfig(
|
||||||
|
task_type=TaskType.CAUSAL_LM,
|
||||||
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
|
||||||
|
inference_mode=True, # 训练模式
|
||||||
|
r=64, # Lora 秩
|
||||||
|
lora_alpha=16, # Lora alaph,具体作用参见 Lora 原理
|
||||||
|
lora_dropout=0.05, # Dropout 比例
|
||||||
|
bias="none",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# 获取测试模型
|
||||||
|
val_peft_model = PeftModel.from_pretrained(model, model_id="./output2/Qwen2.5-VL-7B/", config=val_config)
|
||||||
|
|
||||||
|
|
||||||
|
# 创建一个列表来保存所有需要的信息
|
||||||
|
results_to_save = []
|
||||||
|
|
||||||
|
# 同时创建test_image_list用于swanlab日志记录
|
||||||
|
test_image_list = []
|
||||||
|
|
||||||
|
|
||||||
|
for item in val_dataset:
|
||||||
|
if not isinstance (item, dict):
|
||||||
|
print("item解析错误")
|
||||||
|
sys.exit()
|
||||||
|
# 准备输入消息
|
||||||
|
messages = [{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"image": item['image_path']
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": item['question']
|
||||||
|
}
|
||||||
|
]}]
|
||||||
|
|
||||||
|
# 获取模型预测
|
||||||
|
response = predict(messages, val_peft_model)
|
||||||
|
messages.append({"role": "assistant", "content": f"{response}"})
|
||||||
|
|
||||||
|
# 打印或记录预测信息
|
||||||
|
print(messages[-1])
|
||||||
|
|
||||||
|
# 添加预测结果、原始答案和图片路径到结果列表中
|
||||||
|
results_to_save.append({
|
||||||
|
'image_path': item['image_path'],
|
||||||
|
'question':item['question'],
|
||||||
|
'original_answer': item['assistant_answer'],
|
||||||
|
'predicted_answer': response,
|
||||||
|
})
|
||||||
|
|
||||||
|
# 同时添加到test_image_list用于SwanLab日志记录
|
||||||
|
test_image_list.append(swanlab.Image(item['image_path'], caption=response))
|
||||||
|
|
||||||
|
# 定义保存文件的路径
|
||||||
|
output_file_path = './predictions_results.json'
|
||||||
|
|
||||||
|
# 将结果写入JSON文件
|
||||||
|
with open(output_file_path, 'w', encoding='utf-8') as file:
|
||||||
|
json.dump(results_to_save, file, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
print(f"Results have been saved to {output_file_path}")
|
||||||
|
swanlab.init()
|
||||||
|
# 使用SwanLab记录预测结果
|
||||||
|
swanlab.log({"Prediction": test_image_list})
|
||||||
|
|
||||||
|
# 在Jupyter Notebook中运行时要停止SwanLab记录,需要调用swanlab.finish()
|
||||||
|
swanlab.finish()
|
||||||
Reference in New Issue