# In pyhton, we use Numpy Library to create vectors and performing different Vectors' operations.
import numpy as np


a = np.zeros((4,))
print(f"a: {a}; \nshape: {a.shape}, dtype: {a.dtype}; dimension: {a.ndim}")

a = np.zeros((4,5), dtype=int)
a = np.zeros((4,5), dtype='int64')
print(f"a: {a}; \nshape: {a.shape}, dtype: {a.dtype}; dimension: {a.ndim}")

a: [0. 0. 0. 0.]; 
shape: (4,), dtype: float64; dimension: 1
a: [[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]; 
shape: (4, 5), dtype: int64; dimension: 2


# Numpy Operations
# create 2 vectors

a = np.arange(4)
a = a.reshape((2,-1)) # -1 means set the shape of 2nd dmesntion as per feasibilty.
print(f"a: {a}; \nshape: {a.shape}, dtype: {a.dtype}; dimension: {a.ndim}")

b = np.ones((2,2))
print(f"b: {b}; \nshape: {b.shape}, dtype: {b.dtype}; dimension: {b.ndim}")

print("\n***** Element-wise Operations *****")

print(f"Addition a+b: {a+b}")
b = b*2  # Multiplied vector b with sacalar (2)
print(f"\nScalar Multiplication b*2: {b}; \nshape: {b.shape}, dtype: {b.dtype}; dimension: {b.ndim}")

b[0][0] = 3; b[1][0] = 4
print(f"new_b: {b}")
c = a*b
print(f"Multiplication c = a*b: {c}")

print(f"\nSlicing c[:,0]: {c[:,0]}")

a: [[0 1]
 [2 3]]; 
shape: (2, 2), dtype: int32; dimension: 2
b: [[1. 1.]
 [1. 1.]]; 
shape: (2, 2), dtype: float64; dimension: 2

***** Element-wise Operations *****
Addition a+b: [[1. 2.]
 [3. 4.]]

Scalar Multiplication b*2: [[2. 2.]
 [2. 2.]]; 
shape: (2, 2), dtype: float64; dimension: 2
new_b: [[3. 2.]
 [4. 2.]]
Multiplication c = a*b: [[0. 2.]
 [8. 6.]]

Slicing c[:,0]: [0. 8.]


# Dot Product (2D)
c = np.dot(a,b)
print(f"Dot Product c = a.b: {c}; \nshape: {c.shape}, dtype: {c.dtype}; dimension: {c.ndim}")

Dot Product c = a.b: [[ 4.  2.]
 [18. 10.]]; 
shape: (2, 2), dtype: float64; dimension: 2


def my_dot_product(a,b):
    dot_procut = np.array(0)
    for i in range(len(a)):
        dot_procut += a[i]*b[i]
    return dot_procut


# test 1-D
a2 = np.array([1, 2, 3, 4])
b2 = np.array([-1, 4, 3, 2])
print(f"a:{a2} \nb:{b2}")
c = my_dot_product(a2, b2)
print(f"NumPy 1-D c = a.b = {c}, c.shape = {c.shape} ")

a:[1 2 3 4] 
b:[-1  4  3  2]
NumPy 1-D c = a.b = 24, c.shape = ()


a2 = np.array([1, 2, 3, 4])
b2 = np.array([-1, 4, 3, 2])
print(f"a:{a2} \nb:{b2}")
c = np.dot(a2, b2)
print(f"NumPy 1-D np.dot(a, b) = {c}, np.dot(a, b).shape = {c.shape} ") 
c = np.dot(b2, a2)
print(f"NumPy 1-D np.dot(b, a) = {c}, np.dot(a, b).shape = {c.shape}, dtype: {c.dtype}; dimension: {c.ndim}")

a:[1 2 3 4] 
b:[-1  4  3  2]
NumPy 1-D np.dot(a, b) = 24, np.dot(a, b).shape = () 
NumPy 1-D np.dot(b, a) = 24, np.dot(a, b).shape = (), dtype: int32; dimension: 0


import math
import numpy as np
import matplotlib.pyplot as plt


x_train = np.array([
    [2104, 5, 1, 45],
    [1416, 3, 2, 40],
    [852, 2, 1, 35]
])

# OR Declare x_train as:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

print(f"x_train: {x_train}; \nshape: {x_train.shape}, dtype: {x_train.dtype}; dimension: {x_train.ndim}")
print(f"y_train: {y_train}; \nshape: {y_train.shape}, dtype: {y_train.dtype}; dimension: {y_train.ndim}")

x_train: [[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]; 
shape: (3, 4), dtype: int32; dimension: 2
y_train: [460 232 178]; 
shape: (3,), dtype: int32; dimension: 1


b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


# Predict Element by Element
def predict_elem_by_elem(w, x, b):
    """
    single predict using linear regression
    
    Args:
      x (ndarray): Shape (n,) example with multiple features
      w (ndarray): Shape (n,) model parameters    
      b (scalar):  model parameter     
      
    Returns:
      f_wb (scalar):  Model prediction
    """
    f_wb = 0
    for i in range(x.shape[0]):
        f_wb += w[i] * x[i] # Weighted Sum >> w1*x1 + w2*x2 + ...
    f_wb = f_wb + b
    return f_wb


x_vec = x_train[0,:]
print(f"x_vec: {x_vec}, shape: {x_vec.shape}, type: {type(x_vec)}")
print(f"w_init: {w_init}, shape: {w_init.shape}, type: {type(w_init)}")
print(f"b_init: {b_init}, type: {type(b_init)}")

x_vec: [2104    5    1   45], shape: (4,), type: <class 'numpy.ndarray'>
w_init: [  0.39133535  18.75376741 -53.36032453 -26.42131618], shape: (4,), type: <class 'numpy.ndarray'>
b_init: 785.1811367994083, type: <class 'float'>


f_wb = predict_elem_by_elem(w_init, x_vec, b_init)
print("w_init:{}, \nx_vec:{}, \nb_init:{} >> f_wb:{}".format(w_init, x_vec, b_init, f_wb))

w_init:[  0.39133535  18.75376741 -53.36032453 -26.42131618], 
x_vec:[2104    5    1   45], 
b_init:785.1811367994083 >> f_wb:459.9999976194083


def predict(w, x, b):
    f_wb = np.dot(w,x) + b
    return f_wb


f_wb = predict(w_init, x_vec, b_init)
print("w_init:{}, \nx_vec:{}, \nb_init:{} >> f_wb:{}".format(w_init, x_vec, b_init, f_wb))

w_init:[  0.39133535  18.75376741 -53.36032453 -26.42131618], 
x_vec:[2104    5    1   45], 
b_init:785.1811367994083 >> f_wb:459.9999976194083


def compute_cost(x, y, w, b):
    m = x.shape[0]
    cost = 0
    for i in range(m):
        # f_wb = predict(w,x[i],b)
        f_wb = np.dot(w,x[i]) + b
        cost += (f_wb - y[i])**2
    mse = cost/(2*m)
    return mse


cost = compute_cost(x_train, y_train, w_init, b_init)
print(cost)

1.5578904428966628e-12


# compute_gradient for multiple variables/features/predictors
def compute_gradient(x, y, w, b):

    m, n = x.shape           #(number of examples, number of features)
    dj_dw = np.zeros((n,))
    dj_db = 0

    for i in range(m):
        f_wb = np.dot(w, x[i]) + b
        dj_db += f_wb - y[i]
        for j in range(n):
            dj_dw[j] += (f_wb - y[i]) * x[i,j] # See Eq. (6) dj_dw[j] >> x[i, j]
    
    dj_db = dj_db / m
    dj_dw = dj_dw / m

    return dj_dw, dj_db


tmp_dj_dw, tmp_dj_db = compute_gradient(x_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: -1.6739251501955248e-06
dj_dw at initial w,b: 
 [-2.72623577e-03 -6.27197263e-06 -2.21745578e-06 -6.92403391e-05]


def gradient_descent(x, y, w, b, lr, iterations):
    """
    Performs batch gradient descent to learn w and b. Updates w and b by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
      X (ndarray (m,n))   : Data, m examples with n features
      y (ndarray (m,))    : target values
      w_in (ndarray (n,)) : initial model parameters  
      b_in (scalar)       : initial model parameter
      lr (float)          : alpha - Learning rate
      iterations (int)     : number of iterations to run gradient descent
      
    Returns:
      w (ndarray (n,)) : Updated values of parameters 
      b (scalar)       : Updated value of parameter 
    """
    steps = iterations // 10
    j_history, p_history = [], []

    for i in range(iterations):
        cost = compute_cost(x,y,w,b)
        j_history.append(cost)
        p_history.append([w,b])

        dj_dw, dj_db = compute_gradient(x,y,w,b)
        temp_w = w - lr * dj_dw
        temp_b = b - lr * dj_db
        w, b = temp_w, temp_b
        # print(i, end=",")
        if (i % steps) == 0:
            print(f"iteration: {i}, cost: {cost}, w:{w}, b:{b}")
    return w, b, j_history #return final w,b and J history for graphing


print(f"x_train: {x_train}; \nshape: {x_train.shape}, dtype: {x_train.dtype}; dimension: {x_train.ndim}")
print(f"y_train: {y_train}; \nshape: {y_train.shape}, dtype: {y_train.dtype}; dimension: {y_train.ndim}")
print("w_init:{}, \nb_init:{}".format(w_init, b_init))

w_init = np.zeros_like(w_init)
b_init = 0.
print("w_init:{}, \nb_init:{}".format(w_init, b_init))

x_train: [[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]; 
shape: (3, 4), dtype: int32; dimension: 2
y_train: [460 232 178]; 
shape: (3,), dtype: int32; dimension: 1
w_init:[0. 0. 0. 0.], 
b_init:0.0
w_init:[0. 0. 0. 0.], 
b_init:0.0


iterations = 1000
# learning_rate = 0.001
learning_rate = 5.0e-7

# run gradient descent 
w_final, b_final, J_hist = gradient_descent(x_train, y_train, w_init, b_init, learning_rate, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")

iteration: 0, cost: 49518.0, w:[2.41334667e-01 5.58666667e-04 1.83666667e-04 6.03500000e-03], b:0.000145
iteration: 100, cost: 696.0010595124644, w:[ 0.20235171  0.00079796 -0.00099658 -0.00219736], b:-0.0001198596187768893
iteration: 200, cost: 694.9313476914762, w:[ 0.20253446  0.00112715 -0.00214349 -0.00940619], b:-0.00035965781839536286
iteration: 300, cost: 693.8709864577195, w:[ 0.2027164   0.00145611 -0.00328876 -0.01658286], b:-0.0005983240279392168
iteration: 400, cost: 692.8198930237817, w:[ 0.20289753  0.00178484 -0.00443238 -0.02372751], b:-0.0008358632706869382
iteration: 500, cost: 691.7779853352548, w:[ 0.20307785  0.00211335 -0.00557437 -0.03084027], b:-0.0010722805476294612
iteration: 600, cost: 690.7451820642369, w:[ 0.20325736  0.00244162 -0.00671473 -0.0379213 ], b:-0.0013075808375690545
iteration: 700, cost: 689.7214026029069, w:[ 0.20343608  0.00276967 -0.00785347 -0.04497072], b:-0.0015417690972177696
iteration: 800, cost: 688.706567057147, w:[ 0.20361399  0.00309749 -0.00899059 -0.05198869], b:-0.001774850261295446
iteration: 900, cost: 687.7005962402227, w:[ 0.20379112  0.00342509 -0.01012611 -0.05897533], b:-0.0020068292426272975
b,w found by gradient descent: -0.00,[ 0.20396569  0.00374919 -0.0112487  -0.0658614 ]


m,_ = x_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178


# plot cost versus iteration  
fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12, 4))
ax1.plot(J_hist)
ax2.plot(100 + np.arange(len(J_hist[100:])), J_hist[100:])
ax1.set_title("Cost vs. iteration");  ax2.set_title("Cost vs. iteration (tail)")
ax1.set_ylabel('Cost')             ;  ax2.set_ylabel('Cost') 
ax1.set_xlabel('iteration step')   ;  ax2.set_xlabel('iteration step') 
plt.show()

General Notation	Description	Python (if applicable)
$a$	scalar, non bold
$\mathbf{a}$	vector, bold
$\mathbf{A}$	matrix, bold capital
Regression
$\mathbf{X}$	training example matrix	`X_train`
$\mathbf{y}$	training example targets	`y_train`
$\mathbf{x}^{(i)}$, $y^{(i)}$	$i_{th}$Training Example	`X[i]`, `y[i]`
m	number of training examples	`m`
n	number of features in each example	`n`
$\mathbf{w}$	parameter: weight,	`w`
$b$	parameter: bias	`b`
$f_{\mathbf{w},b}(\mathbf{x}^{(i)})$	The result of the model evaluation at $\mathbf{x^{(i)}}$ parameterized by $\mathbf{w},b$: $f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = \mathbf{w} \cdot \mathbf{x}^{(i)}+b$	`f_wb`

Multiple Variable Linear Regression¶

1. Vectorization¶

i) Numpy Vector Operations¶

ii) Dot Product using Custom Function¶

iii) Dot Product using Numpy np.dot() Function¶

2. Multiple Variable Linear Regression¶

i) Goals¶

ii) Notations¶

iii) Problem Statement¶

iv) Parameter vector w, b¶

v) Non-Vectorized Implementation of f(x) having Multiple Variable¶

vi) Vectorized Implementation of f(x) having Multiple Variable¶

vii) Compute Cost With Multiple Variables¶

3 Gradient Descent With Multiple Variables¶

i) Compute Gradient with Multiple Variables (Features)¶

ii) Compute Gradient Descent With Multiple Variables (Features)¶

Go to Home ¶

Size (sqft)	Number of Bedrooms	Number of floors	Age of Home	Price (1000s dollars)
2104	5	1	45	460
1416	3	2	40	232
852	2	1	35	178

Multiple Variable Linear Regression¶

1. Vectorization¶

i) Numpy Vector Operations¶

ii) Dot Product using Custom Function¶

iii) Dot Product using Numpy np.dot() Function¶

2. Multiple Variable Linear Regression¶

i) Goals¶

ii) Notations¶

iii) Problem Statement¶

iv) Parameter vector w, b¶

v) Non-Vectorized Implementation of f(x) having Multiple Variable¶

vi) Vectorized Implementation of f(x) having Multiple Variable¶

vii) Compute Cost With Multiple Variables¶

3 Gradient Descent With Multiple Variables¶

i) Compute Gradient with Multiple Variables (Features)¶

ii) Compute Gradient Descent With Multiple Variables (Features)¶

Go to Home¶

Go to Home ¶