{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# 导入必要的库\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from sklearn.datasets import make_classification\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def generate_and_plot_dataset():\n", " # 生成一个逻辑回归的数据集\n", " x, y = make_classification(n_samples=100, n_features=2, \n", " n_informative=2, n_redundant=0, \n", " n_clusters_per_class=1, random_state=42)\n", " # 可视化数据集\n", " plt.scatter(x[:, 0], x[:, 1], c=y, cmap='viridis')\n", " plt.xlabel('Feature 1')\n", " plt.ylabel('Feature 2')\n", " plt.title('Logistic Regression Dataset')\n", " plt.show()\n", " return x,y\n", "x,y = generate_and_plot_dataset()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "def logistic_regression_analysis(x, y):\n", " # 创建逻辑回归模型\n", " model = LogisticRegression()\n", "\n", " # 拟合模型\n", " model.fit(x, y)\n", "\n", " # 预测\n", " y_pred = model.predict(x)\n", "\n", " # 计算准确率\n", " accuracy = accuracy_score(y, y_pred)\n", " print(f'模型准确率: {accuracy:.2f}')\n", "\n", " # 绘制决策边界\n", " # 创建网格以绘制决策边界\n", " xx, yy = np.meshgrid(np.arange(x[:, 0].min() - 1, x[:, 0].max() + 1, 0.01),\n", " np.arange(x[:, 1].min() - 1, x[:, 1].max() + 1, 0.01))\n", "\n", " # 预测网格点的类别\n", " Z = model.predict(np.c_[xx.ravel(), yy.ravel()])\n", " Z = Z.reshape(xx.shape)\n", "\n", " # 绘制决策边界\n", " plt.contourf(xx, yy, Z, alpha=0.8, cmap='viridis')\n", " plt.scatter(x[:, 0], x[:, 1], c=y, edgecolors='k', marker='o', cmap='viridis')\n", " plt.xlabel('Feature 1')\n", " plt.ylabel('Feature 2')\n", " plt.title('Logistic Regression Decision Boundary')\n", " plt.show()\n", "\n", "# 调用函数\n", "logistic_regression_analysis(x, y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 手动实现\n", "$$\n", "f_{\\vec{w},b}(x) = g(z) = \\frac{1}{1 + e^{-(\\vec{w} \\cdot \\vec{x} + b)}}\n", "$$\n", "\n", "$$\n", "J(\\vec{w},b) = -\\frac{1}{m} \\sum_{i=1}^{m} \\left( y^{(i)}\\ln(f_{\\vec{w},b}(x^{(i)})) + (1-y^{(i)})\\ln(1-f_{\\vec{w},b}(x^{(i)})) \\right)\n", "$$\n", "\n", "$$\n", "w_j = w_j - \\alpha \\frac{\\partial J(\\vec{w},b)}{\\partial w_j}\n", "$$\n", "\n", "$$\n", "b = b - \\alpha \\frac{\\partial J(\\vec{w},b)}{\\partial b}\n", "$$\n", "\n", "$$\n", "\\frac{\\partial J(\\vec{w},b)}{\\partial w_j} = \\frac{1}{m} \\sum_{i=1}^{m} (f_{\\vec{w},b}(x^{(i)}) - y^{(i)})x_j^{(i)}\n", "$$\n", "\n", "$$\n", "\\frac{\\partial J(\\vec{w},b)}{\\partial b} = \\frac{1}{m} \\sum_{i=1}^{m} (f_{\\vec{w},b}(x^{(i)}) - y^{(i)})\n", "$$" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def compute_cost_vectorized(w, b):\n", " m = x.shape[0]\n", " z = np.dot(x, w) + b\n", " f_wb = sigmoid(z)\n", " cost = (-1/m) * np.sum(y * np.log(f_wb) + (1 - y) * np.log(1 - f_wb))\n", " return cost\n", " \n", "def sigmoid(z):\n", " return 1/(1+np.exp(-z))\n", " \n", "def gradient_descent(w, b, alpha, num_iterations):\n", " m = len(x)\n", " for i in range(num_iterations):\n", " z = np.dot(w, x.T) + b\n", " f_wb = sigmoid(z)\n", " w -= alpha * 1/m * np.dot(x.T, (f_wb-y))\n", " b -= alpha * 1/m * np.sum(f_wb-y)\n", " return w, b" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if __name__ == \"__main__\":\n", " w = np.zeros(x.shape[1])\n", " b = 0\n", " alpha = 0.04\n", " num_iterations = 50000\n", " w, b = gradient_descent(w, b, alpha, num_iterations)\n", " print(w, b)\n", " loss = compute_cost_vectorized(w, b)\n", " print(loss)\n", " \n", " plt.figure(dpi=600)\n", " # 绘制数据点\n", " plt.scatter(x[:, 0], x[:, 1], c=y, cmap='viridis', edgecolors='k')\n", "\n", " # 计算决策边界\n", " x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1\n", " y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1\n", " xx = np.linspace(x_min, x_max, 100)\n", " # 计算对应的y值: w1*x + w2*y + b = 0 => y = -(w1*x + b)/w2\n", " if w[1] != 0:\n", " yy = -(w[0] * xx + b) / w[1]\n", " plt.plot(xx, yy, color='red', label='Decision Boundary')\n", " else:\n", " # 当w2=0时,决策边界为垂直线x = -b/w1\n", " x_boundary = -b / w[0]\n", " plt.axvline(x=x_boundary, color='red', label='Decision Boundary')\n", "\n", " plt.xlabel('Feature 1')\n", " plt.ylabel('Feature 2')\n", " plt.title('Logistic Regression Decision Boundary')\n", " plt.legend()\n", " plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "pt", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }