Introduction¶
In [2]:
import numpy as np
print(np.__version__)
1.21.1
In [43]:
a = np.array([1, 3, 6, 9], dtype='int16')
print(a)
[1 3 6 9]
In [3]:
b = np.array([[3.0,9.0,5.0], [1.,79.0,4.9]])
print(b)
[[ 3. 9. 5. ] [ 1. 79. 4.9]]
In [6]:
#Get Dimension
a.ndim
Out[6]:
1
In [4]:
b.ndim
Out[4]:
2
In [7]:
#Get Shape
b.shape
Out[7]:
(2, 3)
In [15]:
a.shape
Out[15]:
(4,)
In [13]:
#Get Type
a.dtype
Out[13]:
dtype('int16')
In [17]:
b.dtype
Out[17]:
dtype('float64')
In [45]:
#Get Size
a.size
Out[45]:
4
In [46]:
b.size
Out[46]:
6
In [20]:
#Get Total Size
a.nbytes
Out[20]:
8
In [22]:
b.nbytes
Out[22]:
48
Accessing/Changing specific elements, rows, columns, etc.¶
2-D Array¶
In [70]:
a = np.array([[1,2,3,4,5,6,7], [8,9,10,11,12,13,14]])
print(a)
[[ 1 2 3 4 5 6 7] [ 8 9 10 11 12 13 14]]
In [24]:
#Get a specific element [r, c]
a[1,5]
#or a[1,-2]
Out[24]:
13
In [28]:
#Get a specific row
a[0, :]
Out[28]:
array([1, 2, 3, 4, 5, 6, 7])
In [30]:
#Get a specific column
a[:, 4]
Out[30]:
array([ 5, 12])
In [34]:
#Doing more specific elements [startindex:endindex:stepsize]
a[0,1:6:2]
Out[34]:
array([2, 4, 6])
In [38]:
#Re-assigning values in the array
a[1,5] = 20
print(a)
[[ 1 2 5 4 5 6 7] [ 8 9 5 11 12 20 14]]
In [39]:
a[:,2] = 5
print(a)
[[ 1 2 5 4 5 6 7] [ 8 9 5 11 12 20 14]]
In [40]:
a[:,2] = [3, 10]
print(a)
[[ 1 2 3 4 5 6 7] [ 8 9 10 11 12 20 14]]
In [9]:
#Get elements divisible by 2
a[a%2==0]
Out[9]:
array([2, 4, 6])
In [11]:
#Get element 5 and up
print((a>5) | (a==5))
[False False False False True True]
3-D Array¶
In [50]:
b = np.array([[[1,2], [3,4]], [[5,6], [7,8]]])
print(b)
[[[1 2] [3 4]] [[5 6] [7 8]]]
In [52]:
#Get specific element (work outside in)
b[0,1,:]
Out[52]:
array([3, 4])
In [53]:
b[:,1,:]
Out[53]:
array([[3, 4],
[7, 8]])
In [55]:
b[:,1,1]
Out[55]:
array([4, 8])
In [56]:
b[0,:,1]
Out[56]:
array([2, 4])
In [12]:
c = np.array([[1,4,1], [2,7,2], [3,9,3]])
c
Out[12]:
array([[1, 4, 1],
[2, 7, 2],
[3, 9, 3]])
In [13]:
#Reversing the order of row by subsetting
c[:: -1, ]
Out[13]:
array([[3, 9, 3],
[2, 7, 2],
[1, 4, 1]])
In [16]:
#reversing the order of rows & columns by subsetting
c[:: - 1, :: -1]
Out[16]:
array([[3, 9, 3],
[2, 7, 2],
[1, 4, 1]])
In [57]:
#Replacing elements in the array
b[:,1,:] = [[78,34], [22,56]]
print(b)
[[[ 1 2] [78 34]] [[ 5 6] [22 56]]]
How to add a new axis to an array¶
In [3]:
a = np.array([1, 2, 3, 4, 5, 6])
a.shape
Out[3]:
(6,)
In [6]:
a2 = a[np.newaxis, :] #row_vector
a2.shape
Out[6]:
(1, 6)
In [8]:
a3 = a[:, np.newaxis] #col_vector
a3.shape
Out[8]:
(6, 1)
Initialize Different Types of Arrays¶
In [58]:
#All 0s matrix
np.zeros(7)
Out[58]:
array([0., 0., 0., 0., 0., 0., 0.])
In [61]:
np.zeros((2,2))
Out[61]:
array([[0., 0.],
[0., 0.]])
In [64]:
np.zeros((3,6,3))
Out[64]:
array([[[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]],
[[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]],
[[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]]])
In [68]:
#All 1s matrix
np.ones((4, 5, 3), dtype = 'int16')
Out[68]:
array([[[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]],
[[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]],
[[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]],
[[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]]], dtype=int16)
In [69]:
#Any other number
np.full((3,5), 77)
Out[69]:
array([[77, 77, 77, 77, 77],
[77, 77, 77, 77, 77],
[77, 77, 77, 77, 77]])
In [73]:
#Any other number (full_like)
np.full_like(a, 4)
Out[73]:
array([[4, 4, 4, 4, 4, 4, 4],
[4, 4, 4, 4, 4, 4, 4]])
Generating Random Integers¶
In [73]:
#Random float number (uniformally distributed around 0 and 1)
np.random.rand(4, 3, 2)
Out[73]:
array([[[0.17264182, 0.62904951],
[0.57135584, 0.94190107],
[0.08369627, 0.59921877]],
[[0.92672639, 0.47279508],
[0.3824544 , 0.47744123],
[0.14927045, 0.61844486]],
[[0.57606845, 0.69176239],
[0.66544527, 0.24365116],
[0.27317977, 0.86767478]],
[[0.17626309, 0.21091273],
[0.36600274, 0.95390974],
[0.68874675, 0.80438325]]])
In [74]:
#Random float number (uniformally distributed around -1 and 1)
np.random.randn(4, 3, 2)
Out[74]:
array([[[-1.75793781e-01, -9.32617860e-01],
[ 2.94349299e+00, 4.57776720e-01],
[ 1.77346713e-01, -1.19284227e-01]],
[[ 2.04774781e-03, -3.80101667e-01],
[-9.62869237e-01, -1.79875640e-01],
[-1.59578007e+00, -1.86062939e+00]],
[[ 1.79637479e+00, 1.36169348e+00],
[ 2.41698665e-01, -1.84841139e+00],
[-6.19981314e-01, -6.64723441e-01]],
[[-5.94095569e-01, 1.74623586e+00],
[ 1.53226211e+00, -5.99715275e-02],
[ 9.36623463e-01, -2.58938942e-01]]])
In [86]:
np.random.random_sample(a.shape)
Out[86]:
array([[0.23252098, 0.46116439, 0.27726669, 0.5987351 , 0.17023959,
0.22901113, 0.5696084 ],
[0.99991291, 0.16844318, 0.73811295, 0.00275439, 0.15681662,
0.65347567, 0.24129701]])
In [84]:
#Random integer values
np.random.randint(-4, 9, size=(3,3))
Out[84]:
array([[ 1, -4, -4],
[ 2, 7, 3],
[-4, 1, 5]])
In [82]:
#Saving the random value generated to the memory for further use
np.random.seed(1)
np.random.randint(-4, 9, size=(3,3))
Out[82]:
array([[ 1, 7, 8],
[ 4, 5, 7],
[ 1, -4, -4]])
In [35]:
#generating a matrix with random integers between 0 & 4
rng = np.random.default_rng(0)
rng.integers(5, size=(2, 4))
Out[35]:
array([[4, 3, 2, 1],
[1, 0, 0, 0]], dtype=int64)
In [62]:
#geerating an array between specific sequence
np.arange(1,8, dtype = 'int')
Out[62]:
array([1, 2, 3, 4, 5, 6, 7])
In [61]:
#generating an array with odd numbers b/w 1 and 50
np.arange(1,50,2)
Out[61]:
array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
35, 37, 39, 41, 43, 45, 47, 49])
In [66]:
#generating an array with as many elemnts we want
np.linspace(1,50,30)
Out[66]:
array([ 1. , 2.68965517, 4.37931034, 6.06896552, 7.75862069,
9.44827586, 11.13793103, 12.82758621, 14.51724138, 16.20689655,
17.89655172, 19.5862069 , 21.27586207, 22.96551724, 24.65517241,
26.34482759, 28.03448276, 29.72413793, 31.4137931 , 33.10344828,
34.79310345, 36.48275862, 38.17241379, 39.86206897, 41.55172414,
43.24137931, 44.93103448, 46.62068966, 48.31034483, 50. ])
In [65]:
#generating an array with equqlly space log scale elements
np.logspace(1,50,10)
Out[65]:
array([1.00000000e+01, 2.78255940e+06, 7.74263683e+11, 2.15443469e+17,
5.99484250e+22, 1.66810054e+28, 4.64158883e+33, 1.29154967e+39,
3.59381366e+44, 1.00000000e+50])
In [88]:
#the identity matrix
np.identity(5)
Out[88]:
array([[1., 0., 0., 0., 0.],
[0., 1., 0., 0., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 0., 1., 0.],
[0., 0., 0., 0., 1.]])
In [68]:
#repeating an array
arr = np.array([[1,2,3]])
r1 = np.repeat(arr,3, axis = 0)
print(r1)
[[1 2 3] [1 2 3] [1 2 3]]
In [69]:
#repeating an array with tile()
np.tile(arr, 4)
Out[69]:
array([[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]])
In [38]:
#get unique items and counts
a = np.array([11, 11, 12, 13, 14, 15, 16, 17, 12, 13, 11, 14, 18, 19, 20])
uni = np.unique(a)
print(uni)
[11 12 13 14 15 16 17 18 19 20]
In [40]:
#Get the indices of unique values
uni, ind = np.unique(a ,return_index=True)
print(ind)
[ 0 2 3 4 5 6 7 12 13 14]
In [41]:
#Get the count of unique values
uni, count = np.unique(a, return_counts=True)
print(count)
[3 2 2 2 1 1 1 1 1 1]
In [45]:
a_2d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]])
uni = np.unique(a_2d, axis=0) #row
print(uni)
[[ 1 2 3 4] [ 5 6 7 8] [ 9 10 11 12]]
In [46]:
uni = np.unique(a_2d, axis=1) #col
print(uni)
[[ 1 2 3 4] [ 5 6 7 8] [ 9 10 11 12] [ 1 2 3 4]]
Problem 1 : Creating a Custom Matrix¶
In [100]:
#Creating a custom matrix
output = np.ones((5,5))
print(output)
z = np.zeros((3,3))
z[1,1] = 9
print(z)
output[1:4,1:4] = z
print(output)
[[1. 1. 1. 1. 1.] [1. 1. 1. 1. 1.] [1. 1. 1. 1. 1.] [1. 1. 1. 1. 1.] [1. 1. 1. 1. 1.]] [[0. 0. 0.] [0. 9. 0.] [0. 0. 0.]] [[1. 1. 1. 1. 1.] [1. 0. 0. 0. 1.] [1. 0. 9. 0. 1.] [1. 0. 0. 0. 1.] [1. 1. 1. 1. 1.]]
- Be careful while copying arrays.
In [111]:
a = np.array([1,2,3])
b = a.copy() #do NOT forget to add copy(), otherwise changes you run in b will also happen in a.
b[0] = 100
print(a)
print(b)
[1 2 3] [100 2 3]
Basic Mathematics in NumPy¶
In [121]:
a = np.array((1,2,3,4))
print(a)
[1 2 3 4]
In [122]:
a + 2
Out[122]:
array([3, 4, 5, 6])
In [115]:
a - 2
Out[115]:
array([-1, 0, 1, 2])
In [116]:
a * 2
Out[116]:
array([2, 4, 6, 8])
In [117]:
a / 2
Out[117]:
array([0.5, 1. , 1.5, 2. ])
In [123]:
b = np.array([1,0,1,0])
a + b
Out[123]:
array([2, 2, 4, 4])
In [124]:
a ** 2
Out[124]:
array([ 1, 4, 9, 16], dtype=int32)
In [131]:
#Take sin of an array
np.sin(a)
#cos and tan values of a
#np.cos(a)
#np.tan(a)
Out[131]:
array([ 1.55740772, -2.18503986, -0.14254654, 1.15782128])
In [54]:
stats = np. array([[0.45053314, 0.17296777, 0.34376245, 0.5510652],
[0.54627315, 0.05093587, 0.40067661, 0.55645993],
[0.12697628, 0.82485143, 0.26590556, 0.56917101]])
stats
Out[54]:
array([[0.45053314, 0.17296777, 0.34376245, 0.5510652 ],
[0.54627315, 0.05093587, 0.40067661, 0.55645993],
[0.12697628, 0.82485143, 0.26590556, 0.56917101]])
In [22]:
np.min(stats)
Out[22]:
0.05093587
In [27]:
np.min(stats, axis = 0) # givevs the minimum value within each column
Out[27]:
array([0.12697628, 0.05093587, 0.26590556, 0.5510652 ])
In [28]:
np.min(stats, axis = 1) # givevs the minimum value within each row
Out[28]:
array([0.17296777, 0.05093587, 0.12697628])
In [24]:
np.max(stats)
Out[24]:
0.82485143
In [25]:
np.sum(stats)
Out[25]:
4.8595784
In [50]:
stats.mean()
Out[50]:
0.4049648666666667
In [51]:
stats.std()
Out[51]:
0.21392120766089617
In [52]:
stats.squeeze()
Out[52]:
array([[0.45053314, 0.17296777, 0.34376245, 0.5510652 ],
[0.54627315, 0.05093587, 0.40067661, 0.55645993],
[0.12697628, 0.82485143, 0.26590556, 0.56917101]])
In [55]:
stats.cumsum()
Out[55]:
array([0.45053314, 0.62350091, 0.96726336, 1.51832856, 2.06460171,
2.11553758, 2.51621419, 3.07267412, 3.1996504 , 4.02450183,
4.29040739, 4.8595784 ])
In [30]:
#Doing Arithematic Operations on matices of diif. sizes
d = np.array([[1, 2], [3, 4], [5, 6]])
one_row = np.array([[1,1]])
d + one_row #NumPy uses its broadcast rules for this operation.
Out[30]:
array([[2, 3],
[4, 5],
[6, 7]])
Reeorginizing Arrays¶
In [18]:
before = np.array([[1,2,3,4], [5,6,7,8]])
print(before)
[[1 2 3 4] [5 6 7 8]]
In [49]:
after = before.reshape((8,1))
print(after)
[[1] [2] [3] [4] [5] [6] [7] [8]]
In [50]:
after = before.reshape((4,2))
print(after)
[[1 2] [3 4] [5 6] [7 8]]
In [51]:
after = before.reshape((2,2,2))
print(after)
[[[1 2] [3 4]] [[5 6] [7 8]]]
In [19]:
#Use transpose() to reverse the axes of an array
#before was 2x4 matrix
#before.transpose() is 4x2 matrix
before.transpose()
Out[19]:
array([[1, 5],
[2, 6],
[3, 7],
[4, 8]])
In [59]:
#flipping 1-D arrays
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])
rev= np.flip(arr)
print('Reversed array: ', rev)
Reversed array: [8 7 6 5 4 3 2 1]
In [60]:
#flipping 2-D arrays
arr_2d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
rev_2d = np.flip(arr_2d)
print(rev_2d)
[[12 11 10 9] [ 8 7 6 5] [ 4 3 2 1]]
In [62]:
#reversing rows
rev_row = np.flip(arr_2d, axis = 0)
print(rev_row)
[[ 9 10 11 12] [ 5 6 7 8] [ 1 2 3 4]]
In [64]:
#reversing columns
rev_col = np.flip(arr_2d, axis=1)
print(rev_col)
[[ 4 3 2 1] [ 8 7 6 5] [12 11 10 9]]
In [65]:
#flipping only second row
arr_2d[1] = np.flip(arr_2d[1])
print(arr_2d)
[[ 1 2 3 4] [ 8 7 6 5] [ 9 10 11 12]]
In [66]:
#fliping second column
arr_2d[:,1] = np.flip(arr_2d[:,1])
print(arr_2d)
[[ 1 10 3 4] [ 8 7 6 5] [ 9 2 11 12]]
Miscellaneous¶
Splitting Arrays¶
In [14]:
#Splitting long arrays into small arrays
x = np.arange(1,25).reshape(2,12)
np.hsplit(x,3)
Out[14]:
[array([[ 1, 2, 3, 4],
[13, 14, 15, 16]]),
array([[ 5, 6, 7, 8],
[17, 18, 19, 20]]),
array([[ 9, 10, 11, 12],
[21, 22, 23, 24]])]
In [15]:
np.hsplit(x,4)
Out[15]:
[array([[ 1, 2, 3],
[13, 14, 15]]),
array([[ 4, 5, 6],
[16, 17, 18]]),
array([[ 7, 8, 9],
[19, 20, 21]]),
array([[10, 11, 12],
[22, 23, 24]])]
In [17]:
np.hsplit(x, (3,4))
Out[17]:
[array([[ 1, 2, 3],
[13, 14, 15]]),
array([[ 4],
[16]]),
array([[ 5, 6, 7, 8, 9, 10, 11, 12],
[17, 18, 19, 20, 21, 22, 23, 24]])]
Load data from file¶
In [87]:
data = np.genfromtxt('data.txt', delimiter = ',')
data
Out[87]:
array([[ 1., 13., 21., 11., 196., 75., 4., 3., 34., 6., 7.,
8., 0., 1., 2., 3., 4., 5.],
[ 3., 42., 12., 33., 766., 75., 4., 55., 6., 4., 3.,
4., 5., 6., 7., 0., 11., 12.],
[ 1., 22., 33., 11., 999., 11., 2., 1., 78., 0., 1.,
2., 9., 8., 7., 1., 76., 88.]])
Changing data type of a file¶
In [88]:
data = data.astype('int32')
data
Out[88]:
array([[ 1, 13, 21, 11, 196, 75, 4, 3, 34, 6, 7, 8, 0,
1, 2, 3, 4, 5],
[ 3, 42, 12, 33, 766, 75, 4, 55, 6, 4, 3, 4, 5,
6, 7, 0, 11, 12],
[ 1, 22, 33, 11, 999, 11, 2, 1, 78, 0, 1, 2, 9,
8, 7, 1, 76, 88]])
Loading data from a URL¶
In [97]:
dta= np.genfromtxt('https://raw.githubusercontent.com/selva86/datasets/master/Auto.csv', delimiter =',', skip_header = 1)
dta
Out[97]:
array([[ 18., 8., 307., ..., 70., 1., nan],
[ 15., 8., 350., ..., 70., 1., nan],
[ 18., 8., 318., ..., 70., 1., nan],
...,
[ 32., 4., 135., ..., 82., 1., nan],
[ 28., 4., 120., ..., 82., 1., nan],
[ 31., 4., 119., ..., 82., 1., nan]])
Filling NaN values with other values¶
In [98]:
dta= np.genfromtxt('https://raw.githubusercontent.com/selva86/datasets/master/Auto.csv', delimiter =',', skip_header = 1,
filling_values= 9999, dtype = 'float')
dta
Out[98]:
array([[1.800e+01, 8.000e+00, 3.070e+02, ..., 7.000e+01, 1.000e+00,
9.999e+03],
[1.500e+01, 8.000e+00, 3.500e+02, ..., 7.000e+01, 1.000e+00,
9.999e+03],
[1.800e+01, 8.000e+00, 3.180e+02, ..., 7.000e+01, 1.000e+00,
9.999e+03],
...,
[3.200e+01, 4.000e+00, 1.350e+02, ..., 8.200e+01, 1.000e+00,
9.999e+03],
[2.800e+01, 4.000e+00, 1.200e+02, ..., 8.200e+01, 1.000e+00,
9.999e+03],
[3.100e+01, 4.000e+00, 1.190e+02, ..., 8.200e+01, 1.000e+00,
9.999e+03]])
Supressing Scientific Notation in the Dataset¶
In [101]:
np.set_printoptions(suppress=True)
dta
Out[101]:
array([[ 18., 8., 307., ..., 70., 1., 9999.],
[ 15., 8., 350., ..., 70., 1., 9999.],
[ 18., 8., 318., ..., 70., 1., 9999.],
...,
[ 32., 4., 135., ..., 82., 1., 9999.],
[ 28., 4., 120., ..., 82., 1., 9999.],
[ 31., 4., 119., ..., 82., 1., 9999.]])
Saving Files into Local¶
In [102]:
np.savetxt('auto.csv', dta, delimiter = ',')
Saving Files as NumPy File¶
In [103]:
#this will open in console only
np.save('auto.npy', dta)
Loading Files through NumPy¶
In [108]:
k = np.load('auto.npy')
k
Out[108]:
array([[ 18., 8., 307., ..., 70., 1., 9999.],
[ 15., 8., 350., ..., 70., 1., 9999.],
[ 18., 8., 318., ..., 70., 1., 9999.],
...,
[ 32., 4., 135., ..., 82., 1., 9999.],
[ 28., 4., 120., ..., 82., 1., 9999.],
[ 31., 4., 119., ..., 82., 1., 9999.]])
Concat (Row & Col wise)¶
In [111]:
arr1 = np.zeros([4,4])
arr1
Out[111]:
array([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
In [112]:
arr2 = np.ones([4,4])
arr2
Out[112]:
array([[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]])
Method 1 : np.concatenate()¶
In [115]:
#cancate along rows
np.concatenate([arr1, arr2], axis = 1)
Out[115]:
array([[0., 0., 0., 0., 1., 1., 1., 1.],
[0., 0., 0., 0., 1., 1., 1., 1.],
[0., 0., 0., 0., 1., 1., 1., 1.],
[0., 0., 0., 0., 1., 1., 1., 1.]])
In [116]:
#cancate along cols
np.concatenate([arr1, arr2], axis = 0)
Out[116]:
array([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]])
Method 2 : vstack() & hstack()¶
In [152]:
#Vertically stacking vectors
v1 = np.array([1,2,3,4,5])
v2 = np.array([6,7,8,9,10])
np.vstack([v1,v2])
Out[152]:
array([[ 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10]])
In [153]:
np.vstack([v1,v2, v2, v1])
Out[153]:
array([[ 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10],
[ 6, 7, 8, 9, 10],
[ 1, 2, 3, 4, 5]])
In [118]:
#Horizantally stacking vectors
h1 = np.ones((2,4))
h2 = np.zeros((2,2))
np.hstack([h1,h2])
Out[118]:
array([[1., 1., 1., 1., 0., 0.],
[1., 1., 1., 1., 0., 0.]])
In [119]:
np.hstack([h1,h2,h2,h1,h2,h1])
Out[119]:
array([[1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1.,
1., 1.],
[1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1.,
1., 1.]])
Sorting Arrays¶
In [121]:
kk = np.random.randint(1,10,size = [10,5])
kk
Out[121]:
array([[4, 7, 9, 1, 3],
[8, 8, 8, 4, 1],
[9, 8, 8, 2, 2],
[4, 1, 9, 7, 5],
[6, 7, 3, 6, 8],
[9, 5, 5, 8, 8],
[5, 1, 3, 1, 8],
[2, 8, 9, 5, 1],
[2, 9, 3, 4, 2],
[3, 8, 3, 7, 1]])
In [126]:
#sorting along rows of the array
np.sort(kk)
Out[126]:
array([[1, 3, 4, 7, 9],
[1, 4, 8, 8, 8],
[2, 2, 8, 8, 9],
[1, 4, 5, 7, 9],
[3, 6, 6, 7, 8],
[5, 5, 8, 8, 9],
[1, 1, 3, 5, 8],
[1, 2, 5, 8, 9],
[2, 2, 3, 4, 9],
[1, 3, 3, 7, 8]])
In [129]:
#sorting a single a col
sorted_col = kk[:, 0].argsort()
kk[sorted_col]
Out[129]:
array([[2, 8, 9, 5, 1],
[2, 9, 3, 4, 2],
[3, 8, 3, 7, 1],
[4, 7, 9, 1, 3],
[4, 1, 9, 7, 5],
[5, 1, 3, 1, 8],
[6, 7, 3, 6, 8],
[8, 8, 8, 4, 1],
[9, 8, 8, 2, 2],
[9, 5, 5, 8, 8]])
Working with Dates¶
In [133]:
d = np.datetime64('2021-08-03 23:10:00')
d
Out[133]:
numpy.datetime64('2021-08-03T23:10:00')
In [137]:
#adding seconds to the date-time
d + 100000
Out[137]:
numpy.datetime64('2021-08-05T02:56:40')
In [141]:
#adding one day to the date-time
oneday = np.timedelta64(1, "D")
oneday
d + oneday
Out[141]:
numpy.datetime64('2021-08-04T23:10:00')
In [143]:
#adding one minute to the date-time
onemin = np.timedelta64(1, 'm')
onemin
d + onemin
Out[143]:
numpy.datetime64('2021-08-03T23:11:00')
In [146]:
#creating a sequence of date-time array
dates = np.arange(np.datetime64('2019-09-03'), np.datetime64('2019-10-03'), 3)
dates
Out[146]:
array(['2019-09-03', '2019-09-06', '2019-09-09', '2019-09-12',
'2019-09-15', '2019-09-18', '2019-09-21', '2019-09-24',
'2019-09-27', '2019-09-30'], dtype='datetime64[D]')
In [147]:
dates = np.arange(np.datetime64('2019-09-03'), np.datetime64('2019-10-03'))
dates
Out[147]:
array(['2019-09-03', '2019-09-04', '2019-09-05', '2019-09-06',
'2019-09-07', '2019-09-08', '2019-09-09', '2019-09-10',
'2019-09-11', '2019-09-12', '2019-09-13', '2019-09-14',
'2019-09-15', '2019-09-16', '2019-09-17', '2019-09-18',
'2019-09-19', '2019-09-20', '2019-09-21', '2019-09-22',
'2019-09-23', '2019-09-24', '2019-09-25', '2019-09-26',
'2019-09-27', '2019-09-28', '2019-09-29', '2019-09-30',
'2019-10-01', '2019-10-02'], dtype='datetime64[D]')
Advanced Functions¶
vectorize() : heklps us to apply created functiion to an array¶
In [154]:
def foo(x):
if x%2 == 1:
return x**2
else:
return x/2
foo(10)
foo(11)
Out[154]:
121
In [157]:
foo_v = np.vectorize(foo, otypes = [float])
foo_v(kk)
Out[157]:
array([[ 2., 49., 81., 1., 9.],
[ 4., 4., 4., 2., 1.],
[81., 4., 4., 1., 1.],
[ 2., 1., 81., 49., 25.],
[ 3., 49., 9., 3., 4.],
[81., 25., 25., 4., 4.],
[25., 1., 9., 1., 4.],
[ 1., 4., 81., 25., 1.],
[ 1., 81., 9., 2., 1.],
[ 9., 4., 9., 49., 1.]])
Boolean Masking & Advanced Indexing¶
In [160]:
data > 50
Out[160]:
array([[False, False, False, False, True, True, False, False, False,
False, False, False, False, False, False, False, False, False],
[False, False, False, False, True, True, False, True, False,
False, False, False, False, False, False, False, False, False],
[False, False, False, False, True, False, False, False, True,
False, False, False, False, False, False, False, True, True]])
In [165]:
data[data > 50]
Out[165]:
array([196, 75, 766, 75, 55, 999, 78, 76, 88])
In [169]:
np.any(data > 50, axis = 0)
Out[169]:
array([False, False, False, False, True, True, False, True, True,
False, False, False, False, False, False, False, True, True])
In [170]:
np.all(data > 50, axis = 0)
Out[170]:
array([False, False, False, False, True, False, False, False, False,
False, False, False, False, False, False, False, False, False])
In [89]:
data[((data > 50) & (data < 100))]
Out[89]:
array([75, 75, 55, 78, 76, 88])
In [91]:
data[(~(data > 50) & (data < 100))]
Out[91]:
array([ 1, 13, 21, 11, 4, 3, 34, 6, 7, 8, 0, 1, 2, 3, 4, 5, 3,
42, 12, 33, 4, 6, 4, 3, 4, 5, 6, 7, 0, 11, 12, 1, 22, 33,
11, 11, 2, 1, 0, 1, 2, 9, 8, 7, 1])
Checking if Data has NAN or INF values¶
In [41]:
x = np.arange(1, 11, dtype=float)
x = np.insert(x, 2, np.inf, axis=0)
x = np.insert(x, 8, np.nan, axis=0)
x
Out[41]:
array([ 1., 2., inf, 3., 4., 5., 6., 7., nan, 8., 9., 10.])
In [43]:
np.isnan(x)
Out[43]:
array([False, False, False, False, False, False, False, False, True,
False, False, False])
In [44]:
np.isinf(x)
Out[44]:
array([False, False, True, False, False, False, False, False, False,
False, False, False])
Replacing NAn & INF with other values¶
In [45]:
missing = np.isnan(x) | np.isinf(x)
missing
Out[45]:
array([False, False, True, False, False, False, False, False, True,
False, False, False])
In [46]:
x[missing]
Out[46]:
array([inf, nan])
In [48]:
x[missing] = 0
x
Out[48]:
array([ 1., 2., 0., 3., 4., 5., 6., 7., 0., 8., 9., 10.])
Problem 2 : Indexing¶
In [178]:
prob = np.array([[1,2,3,4,5], [6,7,8,9,10], [11,12,13,14,15], [16,17,18,19,20], [21,22,23,24,25], [26,27,28,29,30]])
prob
Out[178]:
array([[ 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10],
[11, 12, 13, 14, 15],
[16, 17, 18, 19, 20],
[21, 22, 23, 24, 25],
[26, 27, 28, 29, 30]])
In [179]:
prob[2:4, 0:2]
Out[179]:
array([[11, 12],
[16, 17]])
In [185]:
prob[[0,1,2,3], [1,2,3,4]]
Out[185]:
array([ 2, 8, 14, 20])
In [186]:
prob[[0,4,5], 3:]
Out[186]:
array([[ 4, 5],
[24, 25],
[29, 30]])