import numpy as np
np.set_printoptions(precision=2)
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler


dataset_path = "../dataset/houses.txt"
dataset = np.loadtxt(dataset_path, delimiter=",")
print(f"dataset.shape: {dataset.shape}")
x_train = dataset[:,:-1]
y_train = dataset[:, -1]
print(f"x_train.shape: {x_train.shape}, y_train.shape: {y_train.shape}")
x_features = ['size(sqft)','bedrooms','floors','age']

dataset.shape: (100, 5)
x_train.shape: (100, 4), y_train.shape: (100,)


# Scale/normalize the training data
scaler = StandardScaler()
x_norm = scaler.fit_transform(x_train)

x_train_ptp = np.ptp(x_train, axis=0)
x_norm_ptp = np.ptp(x_norm, axis=0)
print(f"Peek-to-pek range of x_train: {x_train_ptp}")
print(f"Peek-to-pek range of x_norm: {x_norm_ptp}")

Peek-to-pek range of x_train: [2.41e+03 4.00e+00 1.00e+00 9.50e+01]
Peek-to-pek range of x_norm: [5.84 6.13 2.06 3.68]


# Create and fit the regression model
sgd = SGDRegressor(max_iter=1000)
sgd.fit(x_norm, y_train)

SGDRegressor()


print(f"# of iterations completed: {sgd.n_iter_}, # of weight updates: {sgd.t_}")
b_norm = sgd.intercept_
w_norm = sgd.coef_

# View parameters
print(f"b_norm: {b_norm} \n w_norm: {w_norm}")

# of iterations completed: 125, # of weight updates: 12501.0
b_norm: [362.23] 
 w_norm: [110.19 -21.25 -32.4  -37.87]


# Make predictions
# Predict the targets of the training data. Use both the predict routine and compute using  𝑤  and  𝑏 .

y_pred_sgd = sgd.predict(x_norm)

# Manual Prediction
y_pred = np.dot(x_norm, w_norm) + b_norm

print(f"Check Whether all Manual Predictions and Sklearn Predictions are Equal: {(y_pred_sgd == y_pred).all()}")
print(f"Prediction on training set:\n{y_pred[:4]}" )
print(f"Target values \n{y_train[:4]}")

Check Whether all Manual Predictions and Sklearn Predictions are Equal: True
Prediction on training set:
[248.59 295.57 485.76 389.7 ]
Target values 
[271.5 300.  509.8 394. ]


# plot predictions and targets vs original features    
fig,ax=plt.subplots(1,4,figsize=(16,4),sharey=True)
for i in range(len(ax)):
    ax[i].scatter(x_train[:,i],y_train, label = 'target')
    ax[i].set_xlabel(x_features[i])
    ax[i].scatter(x_train[:,i],y_pred, color='orange', label = 'predict')
ax[0].set_ylabel("Price"); ax[0].legend()
fig.suptitle("target versus prediction using z-score normalized model")
plt.show()


from sklearn.linear_model import LinearRegression


dataset_path = "../dataset/houses.txt"
dataset = np.loadtxt(dataset_path, delimiter=",")
print(f"dataset.shape: {dataset.shape}")

x_train = dataset[:,:-1]
y_train = dataset[:, -1]
print(f"x_train.shape: {x_train.shape}, y_train.shape: {y_train.shape}")
x_features = ['size(sqft)','bedrooms','floors','age']

dataset.shape: (100, 5)
x_train.shape: (100, 4), y_train.shape: (100,)


# Scale/normalize the training data
scaler = StandardScaler()
x_norm = scaler.fit_transform(x_train)

x_train_ptp = np.ptp(x_train, axis=0)
x_norm_ptp = np.ptp(x_norm, axis=0)
print(f"Peek-to-pek range of x_train: {x_train_ptp}")
print(f"Peek-to-pek range of x_norm: {x_norm_ptp}")

Peek-to-pek range of x_train: [2.41e+03 4.00e+00 1.00e+00 9.50e+01]
Peek-to-pek range of x_norm: [5.84 6.13 2.06 3.68]


lr = LinearRegression()
# lr.fit(x_train, y_train)
lr.fit(x_norm, y_train)

LinearRegression()


b_norm = lr.intercept_
w_norm = lr.coef_

# View parameters
print(f"b_norm: {b_norm} \n w_norm: {w_norm}")

b_norm: 362.23952 
 w_norm: [110.61 -21.47 -32.66 -37.78]


# Make predictions
# Predict the targets of the training data. Use both the predict routine and compute using  𝑤  and  𝑏 .

y_pred_lr = lr.predict(x_norm)

# Manual Prediction
y_pred = np.dot(x_norm, w_norm) + b_norm

print(f"Check Whether all Manual Predictions and Sklearn Predictions are Equal: {(y_pred_lr == y_pred).all()}")
print(f"Prediction on training set:\n{y_pred[:4]}" )
print(f"Target values \n{y_train[:4]}")

Check Whether all Manual Predictions and Sklearn Predictions are Equal: True
Prediction on training set:
[248.66 295.58 485.82 389.61]
Target values 
[271.5 300.  509.8 394. ]


fig, ax = plt.subplots(1,4, figsize=(16,4), sharey=True)
for i in range(len(x_features)):
    ax[i].scatter(x_train[:,i], y_train, c="blue", label="Target")
    ax[i].scatter(x_train[:,i], y_pred, c="orange", label="Predicted")
    ax[i].set_xlabel(x_features[i])
ax[0].set_ylabel("House Price in $1000")
ax[0].legend()
plt.show()

Linear Regression using Scikit-Learn¶

1. Linear Regression using SGDRgressor¶

i) Import Libraries¶

ii) Load Dataset¶

iii) Features Scaling¶

iv) Linear Regression using SGDRegressor¶

v) Plot Results¶

2. Linear Regression using Normal Equation¶

i) Import Libraries¶

ii) Load Dataset¶

iii) Features Scaling¶

iv) Linear Regression using Nomral Equation¶

v) Make Predictions¶

vi) Plot Results¶

Go to Home ¶

Linear Regression using Scikit-Learn¶

1. Linear Regression using SGDRgressor¶

i) Import Libraries¶

ii) Load Dataset¶

iii) Features Scaling¶

iv) Linear Regression using SGDRegressor¶

v) Plot Results¶

2. Linear Regression using Normal Equation¶

i) Import Libraries¶

ii) Load Dataset¶

iii) Features Scaling¶

iv) Linear Regression using Nomral Equation¶

v) Make Predictions¶

vi) Plot Results¶

Go to Home¶

Go to Home ¶