Mlr codes

4 minute read

MLR(Multi Linear Regression)

  • Using sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pylab as plt
import pandas as pd
import numpy as np
import os
os.listdir()
['train_dataset.csv',
 'test_dataset.csv',
 '.ipynb_checkpoints',
 '2017315014_박장훈_과제.ipynb']
train = pd.read_csv('train_dataset.csv')
test = pd.read_csv('test_dataset.csv')
train.columns
Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT', 'MEDV'],
      dtype='object')
test.columns
Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT'],
      dtype='object')
x= train[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT']]
y = train[['MEDV']]

x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.8, test_size=0.2)
mlr = LinearRegression()
mlr.fit(x_train, y_train)
LinearRegression()
#predict score
print(mlr.score(x_test,y_test))
0.7484362024320568

Baysian Ridge Regression

  • Using sklearn
from sklearn import linear_model
x= train[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT']]
y = train[['MEDV']]

x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.8, test_size=0.2)
reg = linear_model.BayesianRidge()
reg.fit(x_train, y_train)
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(*args, **kwargs)





BayesianRidge()
reg.score(x_test, y_test)
0.6995045762623353

Result

  • compare MLR and REG
print("MLR score : ",mlr.score(x_test, y_test))
print("REG score : ",reg.score(x_test, y_test))
MLR score :  0.7363458569534354
REG score :  0.6995045762623353
#predict test's MEDV to use mlr
test_sources = test[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT']]
predict = mlr.predict(test_sources)
#convert test's MEDV predict values to dataframe
predict =pd.DataFrame(predict)
predict
0
0 17.815181
1 14.491018
2 16.933044
3 16.949283
4 18.641706
5 20.103195
6 23.044692
7 22.417605
8 25.579195
9 16.565801
10 16.099082
11 20.506135
12 11.615682
13 19.293890
14 22.116128
15 23.472798
16 27.154651
17 28.587563
18 20.963850
19 19.285144
20 22.019483
21 19.517722
22 21.344110
23 12.172033
24 8.441316
25 3.772396
26 14.005412
27 16.217609
28 21.021840
29 20.911871
30 16.996598
31 14.154909
32 19.357906
33 21.641787
34 18.822164
35 20.782827
36 23.992039
37 22.970550
38 28.249288
39 26.703454
40 22.945017
#add values to test.csv
test['Pred_MEDV'] = predict
test
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT Pred_MEDV
0 3.16360 0 18.10 0 0.655 5.759 48.2 3.0665 24 666 20.2 334.40 14.13 17.815181
1 3.77498 0 18.10 0 0.655 5.952 84.7 2.8715 24 666 20.2 22.01 17.15 14.491018
2 4.42228 0 18.10 0 0.584 6.003 94.5 2.5403 24 666 20.2 331.29 21.32 16.933044
3 15.57570 0 18.10 0 0.580 5.926 71.0 2.9084 24 666 20.2 368.74 18.13 16.949283
4 13.07510 0 18.10 0 0.580 5.713 56.7 2.8237 24 666 20.2 396.90 14.76 18.641706
5 4.34879 0 18.10 0 0.580 6.167 84.0 3.0334 24 666 20.2 396.90 16.29 20.103195
6 4.03841 0 18.10 0 0.532 6.229 90.7 3.0993 24 666 20.2 395.33 12.87 23.044692
7 3.56868 0 18.10 0 0.580 6.437 75.0 2.8965 24 666 20.2 393.37 14.36 22.417605
8 4.64689 0 18.10 0 0.614 6.980 67.6 2.5329 24 666 20.2 374.68 11.66 25.579195
9 8.05579 0 18.10 0 0.584 5.427 95.4 2.4298 24 666 20.2 352.58 18.14 16.565801
10 6.39312 0 18.10 0 0.584 6.162 97.4 2.2060 24 666 20.2 302.76 24.10 16.099082
11 4.87141 0 18.10 0 0.614 6.484 93.6 2.3053 24 666 20.2 396.21 18.68 20.506135
12 15.02340 0 18.10 0 0.614 5.304 97.3 2.1007 24 666 20.2 349.48 24.91 11.615682
13 10.23300 0 18.10 0 0.614 6.185 96.7 2.1705 24 666 20.2 379.70 18.03 19.293890
14 14.33370 0 18.10 0 0.614 6.229 88.0 1.9512 24 666 20.2 383.32 13.11 22.116128
15 5.82401 0 18.10 0 0.532 6.242 64.7 3.4242 24 666 20.2 396.90 10.74 23.472798
16 5.70818 0 18.10 0 0.532 6.750 74.9 3.3317 24 666 20.2 393.07 7.74 27.154651
17 5.73116 0 18.10 0 0.532 7.061 77.0 3.4106 24 666 20.2 395.28 7.01 28.587563
18 2.81838 0 18.10 0 0.532 5.762 40.3 4.0983 24 666 20.2 392.92 10.42 20.963850
19 2.37857 0 18.10 0 0.583 5.871 41.9 3.7240 24 666 20.2 370.73 13.34 19.285144
20 3.67367 0 18.10 0 0.583 6.312 51.9 3.9917 24 666 20.2 388.62 10.58 22.019483
21 5.69175 0 18.10 0 0.583 6.114 79.8 3.5459 24 666 20.2 392.68 14.98 19.517722
22 4.83567 0 18.10 0 0.583 5.905 53.2 3.1523 24 666 20.2 388.22 11.45 21.344110
23 0.15086 0 27.74 0 0.609 5.454 92.7 1.8209 4 711 20.1 395.09 18.06 12.172033
24 0.18337 0 27.74 0 0.609 5.414 98.3 1.7554 4 711 20.1 344.05 23.97 8.441316
25 0.20746 0 27.74 0 0.609 5.093 98.0 1.8226 4 711 20.1 318.43 29.68 3.772396
26 0.10574 0 27.74 0 0.609 5.983 98.8 1.8681 4 711 20.1 390.11 18.07 14.005412
27 0.11132 0 27.74 0 0.609 5.983 83.5 2.1099 4 711 20.1 396.90 13.35 16.217609
28 0.17331 0 9.69 0 0.585 5.707 54.0 2.3817 6 391 19.2 396.90 12.01 21.021840
29 0.27957 0 9.69 0 0.585 5.926 42.6 2.3817 6 391 19.2 396.90 13.59 20.911871
30 0.17899 0 9.69 0 0.585 5.670 28.8 2.7986 6 391 19.2 393.29 17.60 16.996598
31 0.28960 0 9.69 0 0.585 5.390 72.9 2.7986 6 391 19.2 396.90 21.14 14.154909
32 0.26838 0 9.69 0 0.585 5.794 70.6 2.8927 6 391 19.2 396.90 14.10 19.357906
33 0.23912 0 9.69 0 0.585 6.019 65.3 2.4091 6 391 19.2 396.90 12.92 21.641787
34 0.17783 0 9.69 0 0.585 5.569 73.5 2.3999 6 391 19.2 395.77 15.10 18.822164
35 0.22438 0 9.69 0 0.585 6.027 79.7 2.4982 6 391 19.2 396.90 14.33 20.782827
36 0.06263 0 11.93 0 0.573 6.593 69.1 2.4786 1 273 21.0 391.99 9.67 23.992039
37 0.04527 0 11.93 0 0.573 6.120 76.7 2.2875 1 273 21.0 396.90 9.08 22.970550
38 0.06076 0 11.93 0 0.573 6.976 91.0 2.1675 1 273 21.0 396.90 5.64 28.249288
39 0.10959 0 11.93 0 0.573 6.794 89.3 2.3889 1 273 21.0 393.45 6.48 26.703454
40 0.04741 0 11.93 0 0.573 6.030 80.8 2.5050 1 273 21.0 396.90 7.88 22.945017
#save scv
test.to_csv("result.csv")

Updated: