261 lines
7.0 KiB
Plaintext
261 lines
7.0 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 引入库\n",
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import os\n",
|
||
"from sklearn.preprocessing import StandardScaler"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 检查os位置\n",
|
||
"print(os.getcwd())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"生成数据方式\n",
|
||
"$$ x_1 = x, x_2 = x^2, x_3 = \\sqrt{x} $$\n",
|
||
"$$ y = 1.35x_1 + 0.75x_2 + 2.1x_3 + 2.89 $$"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 生成数据\n",
|
||
"def generate_data():\n",
|
||
" w = np.array([1.35, 0.75, 2.1]) # 权重\n",
|
||
" b = 2.89 # 偏置\n",
|
||
" x_min = 1\n",
|
||
" x_max = 9\n",
|
||
" x = np.linspace(x_min, x_max, 10) # 均匀分布\n",
|
||
" X = np.array([x, x**2, np.sqrt(x)]) # 特征矩阵3x10\n",
|
||
" y = np.dot(w, X) + b # 1x10 一维向量不区分行向量和列向量\n",
|
||
" y += np.random.normal(scale=0.5, size=y.shape)\n",
|
||
" data = np.column_stack((X.T, y)) # 10x4\n",
|
||
" return data\n",
|
||
"\n",
|
||
"# 保存数据\n",
|
||
"def save_data(filename, data):\n",
|
||
" np.savetxt(filename, data, delimiter=',')\n",
|
||
" print(f\"{filename} 已成功创建并写入数据。\")\n",
|
||
"\n",
|
||
"# 生成并保存数据\n",
|
||
"data = generate_data()\n",
|
||
"#save_data('./1_data.txt', data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 读取数据\n",
|
||
"#points = np.genfromtxt(\"./1_data.txt\", delimiter=',')\n",
|
||
"\n",
|
||
"scaler = StandardScaler()\n",
|
||
"points = data\n",
|
||
"\n",
|
||
"m = len(points[:,0])\n",
|
||
"x = points[:, :3] # 10x3\n",
|
||
"y = points[:,3] # 1x10"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"原函数:\n",
|
||
"$$\n",
|
||
"\\vec{w} = {\\begin{bmatrix} w_1 & w_2 & w_3 & \\cdots & w_n \\end{bmatrix}}^T\n",
|
||
"$$\n",
|
||
"\n",
|
||
"$$\n",
|
||
"\\vec{x} = \\begin{bmatrix} x_1 & x_2 & x_3 & \\cdots & x_n \\end{bmatrix}\n",
|
||
"$$\n",
|
||
"\n",
|
||
"$$\n",
|
||
"f_{\\vec{w} \\cdot,b}(\\vec{x}) = \\vec{w} \\cdot \\vec{x} + b\n",
|
||
"$$\n",
|
||
"\n",
|
||
"损失函数: \n",
|
||
"\n",
|
||
"$$\n",
|
||
"\\text{MSE} = \\frac{1}{2m} \\sum_{i=1}^{m} \\left( y^{(i)} - \\hat{y}^{(i)} \\right)^2\n",
|
||
"$$\n",
|
||
"\n",
|
||
"梯度下降:\n",
|
||
"\n",
|
||
"分别对每个w和b求偏导数,然后更新w和b\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"\n",
|
||
"# 定义损失函数\n",
|
||
"def compute_loss(w, b):\n",
|
||
" return np.sum((y - (np.dot(w, x.T) + b)) ** 2) / (2 * m) # w 1x3 x.T 3x10 y 1x10 y-np.dot(w, x.T) 1x10 sum=number\n",
|
||
"\n",
|
||
"# 定义梯度下降\n",
|
||
"def gradient_descent(w, b, alpha, num_iter):\n",
|
||
" loss_history = []\n",
|
||
" for _ in range(num_iter):\n",
|
||
" error = y - np.dot(w, x.T) - b # 1x10\n",
|
||
" # 计算梯度\n",
|
||
" dw = -np.dot(x.T , error) / m # dw 1x3 \n",
|
||
" db = -np.sum(error) / m # db 1x1\n",
|
||
" # 更新w和b\n",
|
||
" w -= alpha * dw\n",
|
||
" b -= alpha * db\n",
|
||
" loss_history.append(compute_loss(w, b))\n",
|
||
" return w, b, loss_history"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"if __name__ == \"__main__\":\n",
|
||
" # 初始化w和b\n",
|
||
" w = np.zeros(x.shape[1])\n",
|
||
" b = 0.0\n",
|
||
" # 设置学习率\n",
|
||
" alpha = 0.01\n",
|
||
" # 设置迭代次数\n",
|
||
" num_iter = 10000\n",
|
||
" x = scaler.fit_transform(x) # 特征缩放\n",
|
||
" # 进行梯度下降\n",
|
||
" w, b, loss_history = gradient_descent(w, b, alpha, num_iter)\n",
|
||
" print(\"w:\", w)\n",
|
||
" print(\"b:\", b)\n",
|
||
" \n",
|
||
" # 计算损失\n",
|
||
" loss = compute_loss(w, b)\n",
|
||
" print(\"loss:\", loss)\n",
|
||
"\n",
|
||
" # 绘制\n",
|
||
" plt.figure(dpi=600)\n",
|
||
" plt.plot(range(num_iter), loss_history)\n",
|
||
" plt.xlabel('Iteration')\n",
|
||
" plt.ylabel('Loss')\n",
|
||
" plt.title('Loss History')\n",
|
||
" # 显示学习率\n",
|
||
" plt.text(num_iter * 0.6, max(loss_history) * 0.7, f'Learning Rate: {alpha}', fontsize=12, color='blue')\n",
|
||
"\n",
|
||
" plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 预测\n",
|
||
"def predict(x, w, b):\n",
|
||
" x_scaled = scaler.transform(x)\n",
|
||
" y = np.dot(w, x_scaled.T) + b\n",
|
||
" return y\n",
|
||
"\n",
|
||
"def original_predict(x):\n",
|
||
" w = np.array([1.35, 0.75, 2.1])\n",
|
||
" b = 2.89\n",
|
||
" return np.dot(w, x.T) + b\n",
|
||
"\n",
|
||
"x_new = np.array([\n",
|
||
" [2, 4, np.sqrt(2)],\n",
|
||
" [3, 9, np.sqrt(3)],\n",
|
||
" [4, 16, np.sqrt(4)]\n",
|
||
"])\n",
|
||
"y_pred = predict(x_new, w, b)\n",
|
||
"print(\"预测值:\", y_pred)\n",
|
||
"y_pred_original = original_predict(x_new)\n",
|
||
"print(\"原始值:\", y_pred_original)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# 编码中遇到的错误\n",
|
||
"\n",
|
||
"梯度下降算法中,把x.T和error相乘了,正确应使用矩阵乘法。\n",
|
||
"\n",
|
||
"在特征缩放前,学习率大之后会overflow,导致模型不收敛。\n",
|
||
"\n",
|
||
"特征缩放后,由于代码写错,把结果也缩放了,导致后面预测时,结果不正确。"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.linear_model import LinearRegression\n",
|
||
"from sklearn.pipeline import make_pipeline\n",
|
||
"from sklearn.metrics import mean_squared_error\n",
|
||
"\n",
|
||
"model = make_pipeline(LinearRegression())\n",
|
||
"# 拟合模型\n",
|
||
"model.fit(x, y)\n",
|
||
"\n",
|
||
"# 预测\n",
|
||
"y_pred_sklearn = model.predict(scaler.transform(x_new))\n",
|
||
"print(\"使用sklearn预测值:\", y_pred_sklearn)\n",
|
||
"y_pred_original = original_predict(x_new)\n",
|
||
"print(\"原始值:\", y_pred_original)\n",
|
||
"# 计算并打印训练数据的损失(MSE)\n",
|
||
"train_loss = mean_squared_error(y_pred_original, y_pred_sklearn)\n",
|
||
"print(\"训练数据的损失 (MSE):\", train_loss)\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "pt",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.14"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|