169 lines
7.9 KiB
Python
169 lines
7.9 KiB
Python
#多项式拟合
|
||
import sys
|
||
|
||
import torch
|
||
import argparse
|
||
|
||
from torch.utils.data import TensorDataset, DataLoader
|
||
sys.path.append("D:\liyong\project\TVS_ML") # 替换为实际路径
|
||
from FC_ML_Data.FC_ML_Data_Load.Data_Load_Excel import get_data_from_excel_xy
|
||
from FC_ML_Data.FC_ML_Data_Output.Data_Output_Pytorch import export_model
|
||
from FC_ML_Loss_Function.Loss_Function_Selector import LossFunctionSelector
|
||
from FC_ML_Optim_Function.Optimizer_Selector import OptimizerSelector
|
||
|
||
# 生成训练数据
|
||
def make_features(x,degree):
|
||
return torch.stack([x**i for i in range(1,degree)], dim=1) # 构建x, x², x³特征矩阵
|
||
|
||
class PolyModel(torch.nn.Module):
|
||
def __init__(self,input_size):
|
||
super().__init__()
|
||
self.linear = torch.nn.Linear(input_size, 1) # 输入3维(x,x²,x³),输出1维
|
||
|
||
def forward(self, x):
|
||
return self.linear(x)
|
||
|
||
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||
|
||
if __name__ == "__main__":
|
||
# 在训练循环前初始化损失记录列表
|
||
train_losses = []
|
||
test_losses = []
|
||
#加载外部参数
|
||
parser = argparse.ArgumentParser(description='模型训练参数配置')
|
||
parser.add_argument('--data_dir',default='D:/liyong/project/TVS_ML/Test_Data/multi_poly/output.xlsx', help='数据集路径')
|
||
parser.add_argument('--model_dir',default='D:/liyong/project/TVS_ML/Test_Data/multi_poly/', help='模型导出路径')
|
||
parser.add_argument('--name', default='model', help='导出模型名称')
|
||
parser.add_argument('--model_format', default='pt', help='模型格式') ##pt onnx bin
|
||
parser.add_argument('--epochs', type=int, default=1000, help='训练轮次')
|
||
parser.add_argument('--epochs_output', type=int, default=10, help='训练轮次损失打印')
|
||
parser.add_argument('--degree', type=int, default=3, help='多项式拟合阶数')
|
||
parser.add_argument('--lr', type=float, default=0.001, help='学习率')# 0.1 - 0.0001
|
||
parser.add_argument('--batch_size', type=int, default=32, help='批量加载大小')# 越大内存消耗越大,计算数据加载速度越快
|
||
# 'mse': '均方误差', TVS
|
||
# 'l1': '平均绝对误差', TVS
|
||
# 'cross_entropy': '交叉熵',
|
||
# 'bce': '二分类交叉熵',
|
||
# 'smooth_l1': '平滑L1',
|
||
# 'kl_div': 'KL散度',
|
||
# 'hinge': '合页损失',
|
||
# 'triplet': '三元组损失'
|
||
parser.add_argument('--loss', default='mse', help='损失函数')
|
||
# 'sgd': '随机梯度下降', TVS
|
||
# 'adam': '自适应矩估计', TVS
|
||
# 'rmsprop': '均方根传播',
|
||
# 'adagrad': '自适应梯度',
|
||
# 'adamw': 'Adam权重衰减版'
|
||
parser.add_argument('--optim', default='sgd', help='优化函数')
|
||
parser.add_argument('--percent', type=float, default=0.8, help='训练集比例') #0.8表示训练集合占总数据集比例80%,区间[0,1]
|
||
parser.add_argument('--sheet', default='Sheet1', help='数据表单名')#不放出来
|
||
parser.add_argument('--normalization',action='store_true', help='是否开启数据预处理')#如果开启normalization_type会生效
|
||
# Min - Max等区间缩放法
|
||
# Z-score等方差缩放法,用于数据标准化,数据特征:数据分布未知、存在异常值、模型依赖梯度下降
|
||
# 小数定标标准化法,与min-max比,保持原始数据分布形态,区间≈[-1,1]
|
||
parser.add_argument('--normalization_type', default='minmax', help='数据处理方式')
|
||
parser.add_argument('--shuffle', action='store_false', help='数据乱序')#默认开启,强时序数据不开启
|
||
parser.add_argument('--num_workers',type=int, default=0, help='加速线程数量')#默认为0,增加线程会提速数据加载 #不开放
|
||
parser.add_argument('--gpu', action='store_true', help='启用GPU加速')#默认采用GPU加速,如果没有则CPU计算 #不开放
|
||
args = parser.parse_args()
|
||
print(f"训练数据源: {args.data_dir},模型导出路径:{args.model_dir},"
|
||
f"模型名称:{args.name} ,模型导出格式:{args.model_format},"
|
||
f"训练轮次: {args.epochs}, 多项式阶数:{args.degree},"
|
||
f"学习率:{args.lr},损失函数:{args.loss},优化函数:{args.optim},"
|
||
f"数据表单名:{args.sheet},是否开启数据预处理:{args.normalization},"
|
||
f"数据处理方式:{args.normalization_type},gpu加速:{args.gpu},"
|
||
f"批量加载:{args.batch_size},数据乱序:{args.shuffle},"
|
||
f"加速线程数量:{args.num_workers},训练集比例:{args.percent},训练轮次损失打印:{args.epochs_output}")
|
||
#默认开启GPU加速
|
||
if not args.gpu:
|
||
DEVICE = torch.device("cpu")
|
||
|
||
#加载训练数据
|
||
x_ori,y_ori,x,y,normalization = get_data_from_excel_xy(args.data_dir,args.sheet,args.normalization,args.normalization_type)
|
||
#拆分测试集和训练集
|
||
aa = len(x)
|
||
split = int(args.percent * len(x))
|
||
train_dataset = TensorDataset(x[:split], y[:split])
|
||
test_dataset = TensorDataset(x[split:], y[split:])
|
||
print(train_dataset,test_dataset)
|
||
train_loader = DataLoader(
|
||
train_dataset,
|
||
batch_size=args.batch_size, #批量加载数据
|
||
shuffle=args.shuffle, #数据打乱
|
||
num_workers=args.num_workers #多线程加速
|
||
)
|
||
|
||
test_loader = DataLoader(
|
||
test_dataset,
|
||
batch_size=args.batch_size, #批量加载数据
|
||
shuffle=False, #验证集默认不打乱
|
||
num_workers=args.num_workers #多线程加速
|
||
)
|
||
|
||
#初始化模型
|
||
model = PolyModel(input_size = args.degree).to(DEVICE)
|
||
#初始化损失函数
|
||
loss_selector = LossFunctionSelector()
|
||
criterion = loss_selector.get_loss(args.loss)
|
||
#初始化优化器
|
||
optim_selector = OptimizerSelector();
|
||
|
||
optimizer = optim_selector.get_optimizer(model.parameters(),args.optim, lr=args.lr)
|
||
for epoch in range(args.epochs):
|
||
#模型启用训练模式
|
||
epoch_train_loss = 0
|
||
model.train()
|
||
for features,labels in train_loader:
|
||
powers = torch.arange(1, args.degree + 1, dtype=x.dtype)
|
||
x_poly = features ** powers.view(1, -1)
|
||
x_poly,labels= x_poly.to(DEVICE),labels.to(DEVICE)
|
||
pred = model(x_poly)
|
||
loss = criterion(pred.squeeze(), labels)
|
||
optimizer.zero_grad()
|
||
loss.backward()
|
||
optimizer.step()
|
||
#计算损失
|
||
epoch_train_loss += loss.item() * x_poly.size(0)
|
||
|
||
# 计算平均训练损失并记录
|
||
avg_train_loss = epoch_train_loss / len(train_loader.dataset)
|
||
train_losses.append(avg_train_loss)
|
||
|
||
#模型启用评估模式
|
||
model.eval()
|
||
test_loss = 0
|
||
with torch.no_grad():#关闭梯度下降
|
||
for features, labels in test_loader:
|
||
powers = torch.arange(1, args.degree + 1, dtype=x.dtype)
|
||
x_poly = features ** powers.view(1, -1)
|
||
x_poly, labels = x_poly.to(DEVICE), labels.to(DEVICE)
|
||
preds = model(x_poly)
|
||
test_loss += criterion(preds.squeeze(), labels).item() * x_poly.size(0)
|
||
|
||
avg_test_loss = test_loss / len(test_loader.dataset)
|
||
test_losses.append(avg_test_loss)
|
||
|
||
|
||
#每100次迭代输出一次损失数值
|
||
if epoch % args.epochs_output == 0:
|
||
print(
|
||
f"Epoch {epoch} | Train Loss: {avg_train_loss:.4f} | Test Loss: {avg_test_loss:.4f} | 损失比: {avg_train_loss / avg_test_loss:.2f}:1")
|
||
|
||
#导出训练后的模型
|
||
export_model(model,args.model_dir,args.name,args.model_format)
|
||
|
||
# 可视化
|
||
# import matplotlib.pyplot as plt
|
||
# plt.scatter(x_ori, y_ori, label='ori')
|
||
# powers = torch.arange(1, args.degree + 1, dtype=x.dtype)
|
||
# x_input = x ** powers.view(1, -1)
|
||
# x_input.to(DEVICE)
|
||
# model.to(DEVICE)
|
||
# y_output = model(x_input).detach().numpy()
|
||
# y_output = torch.tensor(y_output)
|
||
# y_real = y_output
|
||
# if args.normalization:
|
||
# y_real = normalization.inverse_transform(y_output)
|
||
# plt.plot(x_ori, y_real.squeeze(), 'r', label='fit')
|
||
# plt.legend()
|
||
# plt.show() |