169 lines
7.9 KiB
Python
169 lines
7.9 KiB
Python
|
|
#多项式拟合
|
|||
|
|
import sys
|
|||
|
|
|
|||
|
|
import torch
|
|||
|
|
import argparse
|
|||
|
|
|
|||
|
|
from torch.utils.data import TensorDataset, DataLoader
|
|||
|
|
sys.path.append("D:\liyong\project\TVS_ML") # 替换为实际路径
|
|||
|
|
from FC_ML_Data.FC_ML_Data_Load.Data_Load_Excel import get_data_from_excel_xy
|
|||
|
|
from FC_ML_Data.FC_ML_Data_Output.Data_Output_Pytorch import export_model
|
|||
|
|
from FC_ML_Loss_Function.Loss_Function_Selector import LossFunctionSelector
|
|||
|
|
from FC_ML_Optim_Function.Optimizer_Selector import OptimizerSelector
|
|||
|
|
|
|||
|
|
# 生成训练数据
|
|||
|
|
def make_features(x,degree):
|
|||
|
|
return torch.stack([x**i for i in range(1,degree)], dim=1) # 构建x, x², x³特征矩阵
|
|||
|
|
|
|||
|
|
class PolyModel(torch.nn.Module):
|
|||
|
|
def __init__(self,input_size):
|
|||
|
|
super().__init__()
|
|||
|
|
self.linear = torch.nn.Linear(input_size, 1) # 输入3维(x,x²,x³),输出1维
|
|||
|
|
|
|||
|
|
def forward(self, x):
|
|||
|
|
return self.linear(x)
|
|||
|
|
|
|||
|
|
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
# 在训练循环前初始化损失记录列表
|
|||
|
|
train_losses = []
|
|||
|
|
test_losses = []
|
|||
|
|
#加载外部参数
|
|||
|
|
parser = argparse.ArgumentParser(description='模型训练参数配置')
|
|||
|
|
parser.add_argument('--data_dir',default='D:/liyong/project/TVS_ML/Test_Data/multi_poly/output.xlsx', help='数据集路径')
|
|||
|
|
parser.add_argument('--model_dir',default='D:/liyong/project/TVS_ML/Test_Data/multi_poly/', help='模型导出路径')
|
|||
|
|
parser.add_argument('--name', default='model', help='导出模型名称')
|
|||
|
|
parser.add_argument('--model_format', default='pt', help='模型格式') ##pt onnx bin
|
|||
|
|
parser.add_argument('--epochs', type=int, default=1000, help='训练轮次')
|
|||
|
|
parser.add_argument('--epochs_output', type=int, default=10, help='训练轮次损失打印')
|
|||
|
|
parser.add_argument('--degree', type=int, default=3, help='多项式拟合阶数')
|
|||
|
|
parser.add_argument('--lr', type=float, default=0.001, help='学习率')# 0.1 - 0.0001
|
|||
|
|
parser.add_argument('--batch_size', type=int, default=32, help='批量加载大小')# 越大内存消耗越大,计算数据加载速度越快
|
|||
|
|
# 'mse': '均方误差', TVS
|
|||
|
|
# 'l1': '平均绝对误差', TVS
|
|||
|
|
# 'cross_entropy': '交叉熵',
|
|||
|
|
# 'bce': '二分类交叉熵',
|
|||
|
|
# 'smooth_l1': '平滑L1',
|
|||
|
|
# 'kl_div': 'KL散度',
|
|||
|
|
# 'hinge': '合页损失',
|
|||
|
|
# 'triplet': '三元组损失'
|
|||
|
|
parser.add_argument('--loss', default='mse', help='损失函数')
|
|||
|
|
# 'sgd': '随机梯度下降', TVS
|
|||
|
|
# 'adam': '自适应矩估计', TVS
|
|||
|
|
# 'rmsprop': '均方根传播',
|
|||
|
|
# 'adagrad': '自适应梯度',
|
|||
|
|
# 'adamw': 'Adam权重衰减版'
|
|||
|
|
parser.add_argument('--optim', default='sgd', help='优化函数')
|
|||
|
|
parser.add_argument('--percent', type=float, default=0.8, help='训练集比例') #0.8表示训练集合占总数据集比例80%,区间[0,1]
|
|||
|
|
parser.add_argument('--sheet', default='Sheet1', help='数据表单名')#不放出来
|
|||
|
|
parser.add_argument('--normalization',action='store_true', help='是否开启数据预处理')#如果开启normalization_type会生效
|
|||
|
|
# Min - Max等区间缩放法
|
|||
|
|
# Z-score等方差缩放法,用于数据标准化,数据特征:数据分布未知、存在异常值、模型依赖梯度下降
|
|||
|
|
# 小数定标标准化法,与min-max比,保持原始数据分布形态,区间≈[-1,1]
|
|||
|
|
parser.add_argument('--normalization_type', default='minmax', help='数据处理方式')
|
|||
|
|
parser.add_argument('--shuffle', action='store_false', help='数据乱序')#默认开启,强时序数据不开启
|
|||
|
|
parser.add_argument('--num_workers',type=int, default=0, help='加速线程数量')#默认为0,增加线程会提速数据加载 #不开放
|
|||
|
|
parser.add_argument('--gpu', action='store_true', help='启用GPU加速')#默认采用GPU加速,如果没有则CPU计算 #不开放
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
print(f"训练数据源: {args.data_dir},模型导出路径:{args.model_dir},"
|
|||
|
|
f"模型名称:{args.name} ,模型导出格式:{args.model_format},"
|
|||
|
|
f"训练轮次: {args.epochs}, 多项式阶数:{args.degree},"
|
|||
|
|
f"学习率:{args.lr},损失函数:{args.loss},优化函数:{args.optim},"
|
|||
|
|
f"数据表单名:{args.sheet},是否开启数据预处理:{args.normalization},"
|
|||
|
|
f"数据处理方式:{args.normalization_type},gpu加速:{args.gpu},"
|
|||
|
|
f"批量加载:{args.batch_size},数据乱序:{args.shuffle},"
|
|||
|
|
f"加速线程数量:{args.num_workers},训练集比例:{args.percent},训练轮次损失打印:{args.epochs_output}")
|
|||
|
|
#默认开启GPU加速
|
|||
|
|
if not args.gpu:
|
|||
|
|
DEVICE = torch.device("cpu")
|
|||
|
|
|
|||
|
|
#加载训练数据
|
|||
|
|
x_ori,y_ori,x,y,normalization = get_data_from_excel_xy(args.data_dir,args.sheet,args.normalization,args.normalization_type)
|
|||
|
|
#拆分测试集和训练集
|
|||
|
|
aa = len(x)
|
|||
|
|
split = int(args.percent * len(x))
|
|||
|
|
train_dataset = TensorDataset(x[:split], y[:split])
|
|||
|
|
test_dataset = TensorDataset(x[split:], y[split:])
|
|||
|
|
print(train_dataset,test_dataset)
|
|||
|
|
train_loader = DataLoader(
|
|||
|
|
train_dataset,
|
|||
|
|
batch_size=args.batch_size, #批量加载数据
|
|||
|
|
shuffle=args.shuffle, #数据打乱
|
|||
|
|
num_workers=args.num_workers #多线程加速
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
test_loader = DataLoader(
|
|||
|
|
test_dataset,
|
|||
|
|
batch_size=args.batch_size, #批量加载数据
|
|||
|
|
shuffle=False, #验证集默认不打乱
|
|||
|
|
num_workers=args.num_workers #多线程加速
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
#初始化模型
|
|||
|
|
model = PolyModel(input_size = args.degree).to(DEVICE)
|
|||
|
|
#初始化损失函数
|
|||
|
|
loss_selector = LossFunctionSelector()
|
|||
|
|
criterion = loss_selector.get_loss(args.loss)
|
|||
|
|
#初始化优化器
|
|||
|
|
optim_selector = OptimizerSelector();
|
|||
|
|
|
|||
|
|
optimizer = optim_selector.get_optimizer(model.parameters(),args.optim, lr=args.lr)
|
|||
|
|
for epoch in range(args.epochs):
|
|||
|
|
#模型启用训练模式
|
|||
|
|
epoch_train_loss = 0
|
|||
|
|
model.train()
|
|||
|
|
for features,labels in train_loader:
|
|||
|
|
powers = torch.arange(1, args.degree + 1, dtype=x.dtype)
|
|||
|
|
x_poly = features ** powers.view(1, -1)
|
|||
|
|
x_poly,labels= x_poly.to(DEVICE),labels.to(DEVICE)
|
|||
|
|
pred = model(x_poly)
|
|||
|
|
loss = criterion(pred.squeeze(), labels)
|
|||
|
|
optimizer.zero_grad()
|
|||
|
|
loss.backward()
|
|||
|
|
optimizer.step()
|
|||
|
|
#计算损失
|
|||
|
|
epoch_train_loss += loss.item() * x_poly.size(0)
|
|||
|
|
|
|||
|
|
# 计算平均训练损失并记录
|
|||
|
|
avg_train_loss = epoch_train_loss / len(train_loader.dataset)
|
|||
|
|
train_losses.append(avg_train_loss)
|
|||
|
|
|
|||
|
|
#模型启用评估模式
|
|||
|
|
model.eval()
|
|||
|
|
test_loss = 0
|
|||
|
|
with torch.no_grad():#关闭梯度下降
|
|||
|
|
for features, labels in test_loader:
|
|||
|
|
powers = torch.arange(1, args.degree + 1, dtype=x.dtype)
|
|||
|
|
x_poly = features ** powers.view(1, -1)
|
|||
|
|
x_poly, labels = x_poly.to(DEVICE), labels.to(DEVICE)
|
|||
|
|
preds = model(x_poly)
|
|||
|
|
test_loss += criterion(preds.squeeze(), labels).item() * x_poly.size(0)
|
|||
|
|
|
|||
|
|
avg_test_loss = test_loss / len(test_loader.dataset)
|
|||
|
|
test_losses.append(avg_test_loss)
|
|||
|
|
|
|||
|
|
|
|||
|
|
#每100次迭代输出一次损失数值
|
|||
|
|
if epoch % args.epochs_output == 0:
|
|||
|
|
print(
|
|||
|
|
f"Epoch {epoch} | Train Loss: {avg_train_loss:.4f} | Test Loss: {avg_test_loss:.4f} | 损失比: {avg_train_loss / avg_test_loss:.2f}:1")
|
|||
|
|
|
|||
|
|
#导出训练后的模型
|
|||
|
|
export_model(model,args.model_dir,args.name,args.model_format)
|
|||
|
|
|
|||
|
|
# 可视化
|
|||
|
|
# import matplotlib.pyplot as plt
|
|||
|
|
# plt.scatter(x_ori, y_ori, label='ori')
|
|||
|
|
# powers = torch.arange(1, args.degree + 1, dtype=x.dtype)
|
|||
|
|
# x_input = x ** powers.view(1, -1)
|
|||
|
|
# x_input.to(DEVICE)
|
|||
|
|
# model.to(DEVICE)
|
|||
|
|
# y_output = model(x_input).detach().numpy()
|
|||
|
|
# y_output = torch.tensor(y_output)
|
|||
|
|
# y_real = y_output
|
|||
|
|
# if args.normalization:
|
|||
|
|
# y_real = normalization.inverse_transform(y_output)
|
|||
|
|
# plt.plot(x_ori, y_real.squeeze(), 'r', label='fit')
|
|||
|
|
# plt.legend()
|
|||
|
|
# plt.show()
|