Advanced Matrix Algebra and Applications - Python Module

1. An overview of Python 101

Fundamental Mathematical Operations

In [193]:
"""Importing the required modules"""

import numpy as np #module in which most of the operations of our interest are defined
import matplotlib.pyplot as plt #for plotting
from math import * #module for exp,log,power, trig functions etc...
from numpy.linalg import * #module with linear algebraic functions
import scipy.linalg as sp
import matplotlib.image as mpimg 
from sklearn.decomposition import *
In [18]:
a = 2.5
b = 3e01
c = sqrt(2)+1.55555j 
In [19]:
d = a + 2*b - np.conj(c) #linear combination of complex numbers
print(d) #plain print statement
print("Result : {}".format(np.round(d,3))) #formatted print statement
(61.08578643762691+1.55555j)
Result : (61.086+1.556j)

$\ $

Lists

A list is one of the prominent data types in Python. It's a finite sequence of python objects (number/strings/matrices/other lists etc...) which is very used to define matrices.

In [59]:
a = [1,2,3]
b = [1,'vector',3]
c = [[1,0],[0,1]]
print(a+b+c) #prints the concatenated list
print(type(a+b+c)) #outputs the data type
print("Number of elements in a : {}".format(len(a)))
[1, 2, 3, 1, 'vector', 3, [1, 0], [0, 1]]
<class 'list'>
Number of elements in a : 3
In [62]:
#a[i] returns the i+1^th element of the list a
print(a[1])
print(b[1][0])
print(c[1][0])
2
v
0
In [25]:
a + 1 #cannot perform element-wise addition in lists directly
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-25-ca42ed42e993> in <module>()
----> 1 a + 1

TypeError: can only concatenate list (not "int") to list
In [27]:
np.add(a,1) #This serves the purpose. However, it returns a different data type namely a numpy array which will be described later
Out[27]:
array([2, 3, 4])
In [60]:
np.add(a,list(range(3))) #range()
Out[60]:
array([1, 3, 5])
In [61]:
np.multiply(a,a[1]) #element-wise multiplication 
Out[61]:
array([2, 4, 6])
In [58]:
np.multiply(a,[2,3,1/4])
Out[58]:
array([2.  , 6.  , 0.75])
In [63]:
"""List methods"""

a = list(range(3,10))
print(np.sum(a))
print(np.mean(a))
print(sorted(a,reverse=True))
print(a.index(5)) #returns the index of the specified element in the list
a.append(100)  #appends the list
print(a)
print(a.pop()) #removes the last element
print(a)
print(a.pop(a[2])) #removes the element of the specified index
a
42
6.0
[9, 8, 7, 6, 5, 4, 3]
2
[3, 4, 5, 6, 7, 8, 9, 100]
100
[3, 4, 5, 6, 7, 8, 9]
8
Out[63]:
[3, 4, 5, 6, 7, 9]

Arrays(Matrices)

Arrays are the most useful data type for one who works with matrices. A matrix is defined as a 2-dimensional array in python.

In [64]:
A = np.array([[1,0],[0,1],[0,0]])
print(A)

print('\n',A.shape) #dimension of the array
[[1 0]
 [0 1]
 [0 0]]

 (3, 2)
In [65]:
A.reshape(2,3) #reshapes the array provided the new dim is compatible; does not alter the original array
Out[65]:
array([[1, 0, 0],
       [1, 0, 0]])
In [66]:
A.reshape(2,1,3)
Out[66]:
array([[[1, 0, 0]],

       [[1, 0, 0]]])
In [67]:
A.tolist()
Out[67]:
[[1, 0], [0, 1], [0, 0]]
In [68]:
B = 2*A #scalar multiplication
B
Out[68]:
array([[2, 0],
       [0, 2],
       [0, 0]])
In [69]:
C = A + B #matrix addition
print(C)
[[3 0]
 [0 3]
 [0 0]]
In [70]:
C[1][0] = 2
C**2 #element-wise operation
Out[70]:
array([[9, 0],
       [4, 9],
       [0, 0]], dtype=int32)
In [71]:
C@C.T #Matrix multiplication
Out[71]:
array([[ 9,  6,  0],
       [ 6, 13,  0],
       [ 0,  0,  0]])
In [72]:
D = np.array([1,2]).reshape(2,1)
C@D
Out[72]:
array([[3],
       [8],
       [0]])
In [73]:
print('Outer product : {}'.format(D@D.T))
print('Inner product : {}'.format(D.T@D))
Outer product : [[1 2]
 [2 4]]
Inner product : [[5]]
In [76]:
"""Numpy methods for linear algebraic computation"""

print('| I_3 | = {}'.format(det(np.identity(3))))
| I_3 | = 1.0
In [77]:
print('rank(A) = {}'.format(matrix_rank(A)))
rank(A) = 2
In [78]:
print('eigenvalues of CC^T: {}'.format(eig(C@C.T)[0]))
eigenvalues of CC^T: [ 4.67544468 17.32455532  0.        ]
In [79]:
P = eig(C@C.T)[1]
print('eigenvectors of CC^T corresponding to lambda = {}: {}'.format(P[0][0],P[:,0]))
print('eigenvectors of CC^T corresponding to lambda = {}: {}'.format(P[0][1],P[:,1]))
print('eigenvectors of CC^T corresponding to lambda = {}: {}'.format(P[0][2],P[:,2]))
eigenvectors of CC^T corresponding to lambda = -0.8112421851755608: [-0.81124219  0.58471028  0.        ]
eigenvectors of CC^T corresponding to lambda = -0.584710284663765: [-0.58471028 -0.81124219  0.        ]
eigenvectors of CC^T corresponding to lambda = 0.0: [0. 0. 1.]

Loops, Logical Operators and Conditionals

In [107]:
"""Logical operations"""

a,b = 2,3
print('\n',a > b and a == b,'\n', a > b or a != b ,'\n',not(a==b))
 False 
 True 
 True
In [110]:
"""if - else statements"""

if a < b: 
    print ('a < b')
    
elif a == b:
    print('a = b')
    
else:
    print('a > b')
    
#Such a conditional executes the 'first' set of statements for which the condition returns 'True'.
#Note that the indentation is necessary for the compiler to understand the hierarchy.

if a <= b: 
    print ('a <= b')
    
elif a < b:
    print('a < b')
    
else:
    print('a > b') #The first two conditions return 'True', but only the first statement is executed.
a < b
a <= b
In [114]:
"""for loop""" #the statements are executed until the loop exhausts the list of elements in the iterable object

for j in 'Python':
    print (j)

print('\n')

for [k,l] in [[1,2],[2,3]]:
    print (k+l)
P
y
t
h
o
n


3
5
In [115]:
"""while loop"""
n = [1,2,3]
while(len(n)):
    print(n.pop())
3
2
1

User defined functions

In [118]:
def IP(a,b): #computes the euclidean inner product of two 1D arrays
    
    if len(a)==len(b):
        return np.sum([np.conj(a[k])*b[k] for k in range(len(a))])
    else:
        return("Error: The vectors should be of the same dimension")
    
IP(np.array([0+1j,2,3]),np.array([1,1+2j,1]))
Out[118]:
(5+3j)

2D Plots

In [95]:
fig, axs = plt.subplots(1, 2, figsize=(16, 6))

ax = axs[0]

x = np.random.normal(size=5000)
ax.hist(x,density=True,bins=50)

ax = axs[1]

n = np.arange(100)
f = 1/n


ax.plot(n,f,'r-',label='sequence')
ax.set_xlabel('n')
ax.set_ylabel('f(n)')
ax.hlines(y=0,xmin=0,xmax=100,colors='g',label='limit')
ax.set_title('$ f(n) = 1/n $')
ax.legend()

plt.show()
C:\Users\Jaiprakash R\Anaconda3\lib\site-packages\ipykernel_launcher.py:12: RuntimeWarning: divide by zero encountered in true_divide
  if sys.path[0] == '':

2. Applications of Advanced Linear Algebra

Polynomial Regression (Least Squares Problem)

In [89]:
x = np.random.normal(5,2,size = 1000)
y = np.array([0.5 + 3*x[i] - 0.5*x[i]**2 + np.random.normal(0,1,size=1) for i in range(len(x))]) #data with noise
#true parameters b0= 0.5, b1 = 3 + b2 = -0.5, noise variance = 1

plt.plot(x,y,'ro')
Out[89]:
[<matplotlib.lines.Line2D at 0x1335d2b0>]
In [108]:
x = x.reshape(-1,1)
ones = np.ones(len(y)).reshape(-1,1)
X = np.concatenate((ones,x,x**2),axis=1)



B = []

for i in range(1000):
    y = np.array([0.5 + 3*x[i] - 0.5*x[i]**2 + np.random.normal(0,1,size=1) for i in range(len(x))])
    
    b = inv((X.T@X))@X.T@y
    B.append(b)
In [109]:
B = np.array(B)
In [110]:
B.shape
Out[110]:
(1000, 3, 1)
In [115]:
fig, axs = plt.subplots(1, 3, figsize=(16, 6))

axs[0].hist(B[:,0],density = True,bins=100)
axs[0].set_title('$b_0$')

axs[1].hist(B[:,1],density = True,bins=100)
axs[1].set_title('$b_1$')

axs[2].hist(B[:,2],density = True,bins=100)
axs[2].set_title('$b_2$')

#Note the large variance in data however
Out[115]:
Text(0.5,1,'$b_2$')

SVD to recover from Multicollinearity

In [184]:
B = []
U,s,Vh=sp.svd(X)

#r=2
Uhat = U[:,0:2]
s_hat = np.diag(s[0:2])
Vhat = Vh[0:2].T
Vhat.shape

for i in range(1000):
    
    
    y = np.array([0.5 + 3*x[i] - 0.5*x[i]**2 + np.random.normal(0,1,size=1) for i in range(len(x))])
    
    
    b = Vhat@inv(s_hat)@Uhat.T@y
    B.append(b)
    
In [186]:
B = np.array(B)

fig, axs = plt.subplots(1, 3, figsize=(16, 6))

axs[0].hist(B[:,0],density = True,bins=100)
axs[0].set_title('$b_0$')

axs[1].hist(B[:,1],density = True,bins=100)
axs[1].set_title('$b_1$')

axs[2].hist(B[:,2],density = True,bins=100)
axs[2].set_title('$b_2$')


#Notice the reduction in variance. Although, the estimators are not centred at the true values anymore.
Out[186]:
Text(0.5,1,'$b_2$')

Image Compression - SVD

In [190]:
fig = plt.figure(figsize=(16,6))
img = mpimg.imread('12437-large.jpeg')  
print (img.shape) 
plt.axis('off') 
plt.imshow(img) 
(720, 1280, 3)
Out[190]:
<matplotlib.image.AxesImage at 0x16de5128>
In [191]:
img_r = np.reshape(img, (720, 1280*3))
print (img_r.shape )
(720, 3840)
In [194]:
pca = PCA(n_components=256)
ipca = pca.fit(img_r) 
img_c = pca.transform(img_r) 
print (img_c.shape) 
print (np.sum(ipca.explained_variance_ratio_))
(720, 256)
0.9995461532614928
In [199]:
fig = plt.figure(figsize=(16,6))
temp = pca.inverse_transform(img_c) 
print (temp.shape)  
temp = np.reshape(temp, (720,1280,3)) 
print (temp.shape) 
plt.axis('off') 
plt.imshow(temp/255)
plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
(720, 3840)
(720, 1280, 3)
In [201]:
pca = PCA(n_components=64)
ipca = pca.fit(img_r)
img_c = pca.transform(img_r)
print (img_c.shape)
print (np.sum(ipca.explained_variance_ratio_))
(720, 64)
0.9807895029617795
In [202]:
fig = plt.figure(figsize=(16,6))
temp = pca.inverse_transform(img_c) 
print (temp.shape)  
temp = np.reshape(temp, (720,1280,3)) 
print (temp.shape) 
plt.axis('off') 
plt.imshow(temp/255)
plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
(720, 3840)
(720, 1280, 3)
In [203]:
pca = PCA(n_components=16)
ipca = pca.fit(img_r)
img_c = pca.transform(img_r)
print (img_c.shape)
print (np.sum(ipca.explained_variance_ratio_))
(720, 16)
0.8891535421517809
In [204]:
fig = plt.figure(figsize=(16,6))
temp = pca.inverse_transform(img_c) 
print (temp.shape)  
temp = np.reshape(temp, (720,1280,3)) 
print (temp.shape) 
plt.axis('off') 
plt.imshow(temp/255)
plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
(720, 3840)
(720, 1280, 3)