{ "cells": [ { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.linear_model import Ridge\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.preprocessing import StandardScaler" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$\n", "f_{w,b}(x) = 2x + 3x^2 + 4x^3 + 5\n", "$$" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def generateData():\n", " # 生成数据集\n", " x = np.linspace(-10, 10, 40)\n", " # 使用噪声构建y\n", " y = 2 * x + 3 * x**2 + 4 * x**3 + 5 + np.random.normal(scale=200, size=x.shape)\n", "\n", " data = np.column_stack((x, y))\n", "\n", " # 绘制数据集\n", " plt.figure(dpi=600)\n", " plt.scatter(x, y)\n", " plt.show()\n", " \n", " return data\n", "\n", "data = generateData()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# 保存数据\n", "def save_data(filename, data):\n", " np.savetxt(filename, data, delimiter=',')\n", " print(f\"{filename} 已成功创建并写入数据。\")\n", "\n", "# save_data('data.csv', data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def polynomial_normal_regression(data, degree=3):\n", " # 生成多项式特征并进行线性回归\n", " model = make_pipeline(PolynomialFeatures(degree), LinearRegression())\n", " model.fit(data[:, 0].reshape(-1, 1), data[:, 1])\n", " y_pred = model.predict(data[:, 0].reshape(-1, 1))\n", " \n", " # 输出拟合后的多项式\n", " coef = model.named_steps['linearregression'].coef_\n", " intercept = model.named_steps['linearregression'].intercept_\n", " poly_features = model.named_steps['polynomialfeatures']\n", " feature_names = poly_features.get_feature_names_out(['x'])\n", " \n", " polynomial = \" + \".join(f\"{coef[i]:.2f}*{feature_names[i]}\" for i in range(len(coef)))\n", " polynomial = f\"{intercept:.2f} + \" + polynomial\n", " print(f\"拟合后的多项式: {polynomial}\")\n", " \n", " # 计算并输出损失\n", " loss = mean_squared_error(data[:, 1], y_pred)\n", " print(f\"损失: {loss:.2f}\")\n", "\n", " # 绘制拟合结果\n", " plt.figure(dpi=600)\n", " plt.scatter(data[:, 0], data[:, 1], label='original data')\n", " plt.plot(data[:, 0], y_pred, label='fit curve', color='red')\n", " plt.legend()\n", " plt.show()\n", "\n", "polynomial_normal_regression(data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def polynomial_ridge_regression(data, degree=3):\n", " # 生成多项式特征并进行线性回归\n", " model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=0.5))\n", " model.fit(data[:, 0].reshape(-1, 1), data[:, 1])\n", " y_pred = model.predict(data[:, 0].reshape(-1, 1))\n", " \n", " # 输出拟合后的多项式\n", " coef = model.named_steps['ridge'].coef_\n", " intercept = model.named_steps['ridge'].intercept_\n", " poly_features = model.named_steps['polynomialfeatures']\n", " feature_names = poly_features.get_feature_names_out(['x'])\n", " \n", " polynomial = \" + \".join(f\"{coef[i]:.2f}*{feature_names[i]}\" for i in range(len(coef)))\n", " polynomial = f\"{intercept:.2f} + \" + polynomial\n", " print(f\"拟合后的多项式: {polynomial}\")\n", " \n", " # 计算并输出损失\n", " loss = mean_squared_error(data[:, 1], y_pred)\n", " print(f\"损失: {loss:.2f}\")\n", "\n", " # 绘制拟合结果\n", " plt.figure(dpi=600)\n", " plt.scatter(data[:, 0], data[:, 1], label='original data')\n", " plt.plot(data[:, 0], y_pred, label='fit curve', color='red')\n", " plt.legend()\n", " plt.show()\n", "\n", "polynomial_ridge_regression(data)" ] } ], "metadata": { "kernelspec": { "display_name": "pt", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }