Python 演算法 Day 2 - 理论基础 线性代数

Chap.I 理论基础

Part 1:线性代数

1. Getting Started with Equations

1-1. eval(),算数

x=6y=10eval('100 * x +300 +y +9')>>  919

1-2. exec(),把 () 中的语法视作 python 语法,接着执行

exec('''for i in range(5):    print (f"iter time: {i}" )''')>>  iter time: 0    iter time: 1    iter time: 2    iter time: 3    iter time: 4

1-3. Symbol Python(sympy)

A. 解方程式 (预设右侧为0)

from sympy.solvers import solvefrom sympy import Symbolx = Symbol('x')print(solve(x**2 - 1, x))>>  [-1, 1]

B 产生上标 (jupyter 有效 QQ)

from sympy.solvers import solvefrom sympy import Symbolx = Symbol('x')x**4+2*x**2+3

http://img2.58codes.com/2024/20138527atIlBNkJ9G.png

C. 直接解方程式

from sympy.core import sympifyx, y = sympify('x, y')print(solve([x + y + 2, 3*x + 2*y], dict=True))>>  [{x: 4, y: -6}]

2. Linear Equations 线性方程式

2-1. Intercept (截距)

# 求 20 个 x 点对应的 y 值import pandas as pdimport matplotlib.pyplot as pltdf = pd.DataFrame ({'x': range(-10, 11)})df['y'] = (3*df['x'] - 4) / 2# 作图plt.plot(df.x, df.y, color="0.5")plt.xlabel('x')plt.ylabel('y')plt.grid()plt.axhline()plt.axvline()x_i = [(1.33, 0), (0, 0)]y_i = [[0, 0], [0, -2]]plt.annotate('x-intercept',(1.333, 0))plt.annotate('y-intercept',(0,-2))plt.plot(x_i[0], x_i[1], color="r")plt.plot(y_i[0], y_i[1], color="yplt.show()

http://img2.58codes.com/2024/20138527qjpcgEqBLY.png")

2-2. slope

# 作图plt.plot(df.x, df.y, color="0.5")plt.xlabel('x')plt.ylabel('y')plt.grid()plt.axhline()plt.axvline()# the slopeslope = 1.5x_slope = [0, 1]y_slope = [-2, -2 + slope]plt.plot(x_slope, y_slope, color='red', lw=5)plt.savefig('pic 03.png')plt.show()

http://img2.58codes.com/2024/20138527e6CGvUnMmB.png

2-3. Regression

# 世界人口预测year=[1950, 1951, 1952, 1953, 1954, ...2100]pop=[2.53, 2.57, 2.62, 2.67, 2.71, ...10.85]# 1. Numpy regressionx1 = np.linspace(1950, 2101, 2000)fit = np.polyfit(year, pop, 1)y1 = np.poly1d(fit)(x1)# 作图plt.figure(num=None, figsize=(18, 10), dpi=80, facecolor='w', edgecolor='k')plt.plot(year, pop, 'b-o', x1, y1, 'r--')plt.show()# 2. LinearRegressionX = np.array(year).reshape(len(year), 1)y = np.array(pop)from sklearn.linear_model import LinearRegression as LRclf = LR()clf.fit(X, y)    # 线性迴归print(f'y = {clf.coef_[0]:.2f} * x + {clf.intercept_:.2f}')>> y = 0.06 * x + -116.36# 作图plt.figure(num=None, figsize=(18, 10), dpi=80, facecolor='w', edgecolor='k')x1 = np.arange(1950, 2101)y1 = clf.coef_[0] * x1 - clf.intercept_plt.plot(year, pop, 'b-o', x1, y1, 'r--')plt.show()

http://img2.58codes.com/2024/201385271QdA5rLxzx.png

2-4. 用 make_regression 产生乱数资料

from sklearn.datasets import make_regression as mr# n_features= : 几个 x# noise= : 杂讯量# bias= : 截距X, y= mr(n_samples=1000, n_features=1, noise=10, bias=50)# use sklearn LinearRegressionfrom sklearn.linear_model import LinearRegression as LRclf = LR()clf.fit(X, y)coe = clf.coef_[0]ic = clf.intercept_print(f' y = {coe:.2f} * x + {ic:.2f}') # 1 + {clf.coef_[1]:.2f} * x2>> y = 89.69 * x + 52.88# 作图import matplotlib.pyplot as pltplt.figure(num=None, figsize=(18, 10), dpi=80, facecolor='w', edgecolor='k')plt.scatter(X, y, c='b')         # 画出 X, y 点点X1 = np.linspace(-3, 2, 100)plt.plot(X1, coe*X1 + ic, 'r')   # 画出迴归红线plt.show()

http://img2.58codes.com/2024/20138527St0kGnLB1D.png

3. Systems of Equations 联立方程式

Numpy 线性代数解联立方程式
EX1. 4x - 5y = -13
-2x + 3y = 9

import numpy as npa = np.array([[4, -5], [-2, 3]])b = np.array([-13, 9])print(np.linalg.solve(a, b))>>  [3. 5.]

EX2. x + 2y = 5
y - 3z = 5
3x - z = 4

a = np.array([[1, 2, 0], [0, 1, -3], [3, 0, -1]])b = np.array([5, 5, 4])print(np.linalg.inv(a) @ b)>>  [ 1.  2. -1.]

4. Exponentials, Radicals, and Logs 指数、根号和对数

4-1. Basic

import math# 16 是 4 的几次方print(math.log(16, 4))>>  2.0# 100 是 log10 的几次方print(math.log10(100))>>  2.0# e & piprint(math.e, math.pi) # 等同于 np.e, np.pi>>  2.718281828459045 3.141592653589793# 100 是 e 的几次方print(math.log(100, math.e))>>  4.605170185988092

4-2. Solving Equations with Exponentials

複利计算

import pandas as pdm = int(input('请输入你想存多少 (钱): '))i = int(input('请输入你想存多久 (年): '))total_monety = m*(1.02**i)df = pd.DataFrame ({'Year': range(1, 50)})df['Balance'] = m*(1.02**df['Year'])from matplotlib import pyplot as pltplt.plot(df.Year, df.Balance, 'b--', i, total_monety, 'ro')plt.xlabel('Year')plt.ylabel('Balance')plt.grid()plt.title(f'Start with: {m}$, save for {i} years.')plt.annotate(f'Your money: {total_monety:.2f}',(i+1, total_monety))plt.show()

http://img2.58codes.com/2024/20138527GkmDkgUXg9.png

5. Polynomials 多项式

Adding PolynomialsSubtracting PolynomialsMultiplying PolynomialsDividing Polynomials

6. Factorization 因式分解

Greatest Common Factor (最大公因数)

a = int(input('数字1: '))b = int(input('数字2: '))def GCD(x, y):    while True:        if y == 0:            print(x)            break        else:            return GCD(y, x%y)GCD(a, b)

7. Quadratic Equations 二次方程式

7-1. Parabola 抛物线

import pandas as pdimport numpy as npfrom matplotlib import pyplot as pltdf = pd.DataFrame ({'x': range(-9, 9)})df['y1'] = 2*df['x']**2 + 2 *df['x'] - 4df['y2'] = -(2*df['x']**2 + 2*df['x'] - 4)# 作图from matplotlib import pyplot as pltplt.plot(df.x, df.y1, 'b', df.x, df.y2, 'r')plt.plot()plt.xlabel('x')plt.ylabel('y')plt.grid()plt.axhline()plt.axvline()plt.show()

http://img2.58codes.com/2024/20138527TAwmMFx5rt.png

7-2. Parabola Vertex and Line of Symmetry 抛物线顶点与对称线

import pandas as pdimport numpy as npimport matplotlib.pyplot as pltdef plot_parabola(a, b, c):     # 代入 (2, 2, -4)    vx = (-1*b)/(2*a)           # 极值点斜率必为0,f(x)'=2ax+b=0,x=(-b/2a)    vy = a*vx**2 + b*vx + c     # 代入 f(x) 求极值 y 座标    df = pd.DataFrame ({'x': np.linspace(-10, 10, 100)})    df['y'] = a*df['x']**2 + b*df['x'] + c     # 把 x, y 输入进 df    # 作图    plt.xlabel('x')     # x 轴名称    plt.ylabel('y')     # y 轴名称    plt.grid()          # 灰色格线    plt.axhline(c='b')  # 水平基準线    plt.axvline(c='b')  # 垂直基準线    xp = [vx, vx]    yp = [df.y.min(), df.y.max()]    plt.plot(df.x, df.y, '0.5', xp, yp, 'm--')  # 划出抛物线 / y 最小垂直线    plt.scatter(vx, vy, c='r')                  # 划出 y 最小点    plt.annotate('vertex', (vx, vy), xytext=(vx - 1, (vy + 5)* np.sign(a)))    plt.savefig('pic 10.png')    plt.show()plot_parabola(2, 2, -4)

http://img2.58codes.com/2024/20138527e4YbsY3Yen.png

8. Function 函数

8-1. Basic

import numpy as npimport matplotlib.pyplot as pltdef f(x):    return x**2 + 2

8-2. Bounds of a Function

import numpy as npimport matplotlib.pyplot as pltdef g(x):    if x != 0:        return (12/(2*x))**2x = range(-100, 101)y = [g(a) for a in x]# 作图plt.xlabel('x')plt.ylabel('g(x)')plt.grid()plt.plot(x,y, color='purple')plt.plot(0, g(0.0000001), c='purple', marker='o', markerfacecolor='w', markersize=8)plt.show()

http://img2.58codes.com/2024/20138527Q1khcKrKcI.png
.
.
.
.
.

Homework Ans:

1. 红酒分类:

import pandas as pdimport numpy as npfrom sklearn import datasetsds = datasets.load_wine()X =pd.DataFrame(ds.data, columns=ds.feature_names)y = ds.targetfrom sklearn.model_selection import train_test_split as ttsX_train, X_test, y_train, y_test = tts(X, y, test_size=0.1)

A. 使用 KNN 验算法:

from sklearn.neighbors import KNeighborsClassifier as KNNclf = KNN(n_neighbors=3)clf.fit(X_train, y_train)clf.score(X_test, y_test)>> 0.78

B. 使用 LogisticRegression 验算法:

from sklearn.linear_model import LogisticRegression as lrclf2 = lr(solver='liblinear')clf2.fit(X_train, y_train)clf2.score(X_test, y_test)>>  1.0

以上两种模型比较后,选择第二种模型。

此步骤是在做机器学习第八步:Evaluate Module

2. 糖尿病迴归:

import pandas as pdimport numpy as npfrom sklearn import datasetsds = datasets.load_diabetes()# 注意此处 X 的数值经过 standardization,固有出现年龄负值的情况。# (X-m)/sigma,平均为 0 / 标準差为 1X = pd.DataFrame(ds.data, columns=ds.feature_names)y = ds.targetfrom sklearn.model_selection import train_test_split as ttsX_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)from sklearn.linear_model import LinearRegressionclf = LinearRegression()clf.fit(X_train, y_train)clf.score(X_test, y_test)>>  0.46

额外补充:求 MSE & Coefficients

from sklearn.metrics import mean_squared_error, r2_scorey_pred = clf.predict(X_test)# Coefficients (一次项式係数)# y = w1*x1 + w2*x2 + w3*x3 ... w10*x10 + bprint('Coefficients: ', clf.coef_)>>  Coefficients:  [ -42.4665896  -278.41905956  519.81553297  346.25936576 -836.62271952      494.18394438  135.91708785  164.44984594  795.02484868   69.8608995 ]print('Intercept: ', clf.intercept_)>>  Intercept:  152.2877890433722# MSE (均方误差):1/n * sum(y_pred-y_test)print(f'MSE: {mean_squared_error(y_test, y_pred)}')>>  MSE: 3161.582266519054# Coefficient of determination (判定係数):越接近 1 越好print(f'Coefficient of determination: {r2_score(y_test, y_pred)}')>>  Coefficient of determination: 0.4556662437750665

3. 小费迴归:

import pandas as pdimport numpy as npfrom sklearn import datasetsdf = pd.read_csv('tips.csv')print(df.head())X = df.drop('tip', axis=1)   # 把'tip'丢弃y = df['tip']                # y 为要分析的资料# 显示 'sex' 栏位不同的项目print(X['sex'].unique())>>  ['Female' 'Male']

此时若直接跑演算法,会发现评分过低,推断为 X['day']转化为数字时编码问题。

解决方式如下:

gb = df.groupby(['day'])['tip'].mean()print(gb)>>  Fri    2.73    Sat    2.99    Sun    3.26    Thur   2.77import seaborn as snsimport matplotlib.pyplot as pltsns.barplot(gb.index, gb.values)plt.show()

http://img2.58codes.com/2024/20138527fC6VtRq2i6.png

# 把所有值换成数字才能分析X['sex'].replace({'Female' : 0, 'Male' : 1}, inplace=True)X['smoker'].replace({'Yes' : 0, 'No' : 1}, inplace=True)X['day'].replace({'Thur' : 0, 'Fri' : 0, 'Sat' : 2, 'Sun' : 3}, inplace=True)X['time'].replace({'Lunch' : 0, 'Dinner' : 1}, inplace=True)from sklearn.model_selection import train_test_split as ttsX_train, X_test, y_train, y_test = tts(X, y, test_size = 0.2)from sklearn.linear_model import LinearRegressionclf = LinearRegression()clf.fit(X_train, y_train)clf.score(X_test, y_test)>>  0.4

关于作者: 网站小编

码农网专注IT技术教程资源分享平台,学习资源下载网站,58码农网包含计算机技术、网站程序源码下载、编程技术论坛、互联网资源下载等产品服务,提供原创、优质、完整内容的专业码农交流分享平台。

热门文章