#!/usr/bin/env python # coding: utf-8 # # Introduction # In[2]: import numpy as np print(np.__version__) # In[43]: a = np.array([1, 3, 6, 9], dtype='int16') print(a) # In[3]: b = np.array([[3.0,9.0,5.0], [1.,79.0,4.9]]) print(b) # In[6]: #Get Dimension a.ndim # In[4]: b.ndim # In[7]: #Get Shape b.shape # In[15]: a.shape # In[13]: #Get Type a.dtype # In[17]: b.dtype # In[45]: #Get Size a.size # In[46]: b.size # In[20]: #Get Total Size a.nbytes # In[22]: b.nbytes # # Accessing/Changing specific elements, rows, columns, etc. # ## 2-D Array # In[70]: a = np.array([[1,2,3,4,5,6,7], [8,9,10,11,12,13,14]]) print(a) # In[24]: #Get a specific element [r, c] a[1,5] #or a[1,-2] # In[28]: #Get a specific row a[0, :] # In[30]: #Get a specific column a[:, 4] # In[34]: #Doing more specific elements [startindex:endindex:stepsize] a[0,1:6:2] # In[38]: #Re-assigning values in the array a[1,5] = 20 print(a) # In[39]: a[:,2] = 5 print(a) # In[40]: a[:,2] = [3, 10] print(a) # In[9]: #Get elements divisible by 2 a[a%2==0] # In[11]: #Get element 5 and up print((a>5) | (a==5)) # ## 3-D Array # In[50]: b = np.array([[[1,2], [3,4]], [[5,6], [7,8]]]) print(b) # In[52]: #Get specific element (work outside in) b[0,1,:] # In[53]: b[:,1,:] # In[55]: b[:,1,1] # In[56]: b[0,:,1] # In[12]: c = np.array([[1,4,1], [2,7,2], [3,9,3]]) c # In[13]: #Reversing the order of row by subsetting c[:: -1, ] # In[16]: #reversing the order of rows & columns by subsetting c[:: - 1, :: -1] # In[57]: #Replacing elements in the array b[:,1,:] = [[78,34], [22,56]] print(b) # ### How to add a new axis to an array # In[3]: a = np.array([1, 2, 3, 4, 5, 6]) a.shape # In[6]: a2 = a[np.newaxis, :] #row_vector a2.shape # In[8]: a3 = a[:, np.newaxis] #col_vector a3.shape # ## Initialize Different Types of Arrays # In[58]: #All 0s matrix np.zeros(7) # In[61]: np.zeros((2,2)) # In[64]: np.zeros((3,6,3)) # In[68]: #All 1s matrix np.ones((4, 5, 3), dtype = 'int16') # In[69]: #Any other number np.full((3,5), 77) # In[73]: #Any other number (full_like) np.full_like(a, 4) # ### Generating Random Integers # In[73]: #Random float number (uniformally distributed around 0 and 1) np.random.rand(4, 3, 2) # In[74]: #Random float number (uniformally distributed around -1 and 1) np.random.randn(4, 3, 2) # In[86]: np.random.random_sample(a.shape) # In[84]: #Random integer values np.random.randint(-4, 9, size=(3,3)) # In[82]: #Saving the random value generated to the memory for further use np.random.seed(1) np.random.randint(-4, 9, size=(3,3)) # In[35]: #generating a matrix with random integers between 0 & 4 rng = np.random.default_rng(0) rng.integers(5, size=(2, 4)) # In[62]: #geerating an array between specific sequence np.arange(1,8, dtype = 'int') # In[61]: #generating an array with odd numbers b/w 1 and 50 np.arange(1,50,2) # In[66]: #generating an array with as many elemnts we want np.linspace(1,50,30) # In[65]: #generating an array with equqlly space log scale elements np.logspace(1,50,10) # In[88]: #the identity matrix np.identity(5) # In[68]: #repeating an array arr = np.array([[1,2,3]]) r1 = np.repeat(arr,3, axis = 0) print(r1) # In[69]: #repeating an array with tile() np.tile(arr, 4) # In[38]: #get unique items and counts a = np.array([11, 11, 12, 13, 14, 15, 16, 17, 12, 13, 11, 14, 18, 19, 20]) uni = np.unique(a) print(uni) # In[40]: #Get the indices of unique values uni, ind = np.unique(a ,return_index=True) print(ind) # In[41]: #Get the count of unique values uni, count = np.unique(a, return_counts=True) print(count) # In[45]: a_2d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]]) uni = np.unique(a_2d, axis=0) #row print(uni) # In[46]: uni = np.unique(a_2d, axis=1) #col print(uni) # # Problem 1 : Creating a Custom Matrix # In[100]: #Creating a custom matrix output = np.ones((5,5)) print(output) z = np.zeros((3,3)) z[1,1] = 9 print(z) output[1:4,1:4] = z print(output) # - **Be careful while copying arrays.** # In[111]: a = np.array([1,2,3]) b = a.copy() #do NOT forget to add copy(), otherwise changes you run in b will also happen in a. b[0] = 100 print(a) print(b) # # Basic Mathematics in NumPy # In[121]: a = np.array((1,2,3,4)) print(a) # In[122]: a + 2 # In[115]: a - 2 # In[116]: a * 2 # In[117]: a / 2 # In[123]: b = np.array([1,0,1,0]) a + b # In[124]: a ** 2 # In[131]: #Take sin of an array np.sin(a) #cos and tan values of a #np.cos(a) #np.tan(a) # In[54]: stats = np. array([[0.45053314, 0.17296777, 0.34376245, 0.5510652], [0.54627315, 0.05093587, 0.40067661, 0.55645993], [0.12697628, 0.82485143, 0.26590556, 0.56917101]]) stats # In[22]: np.min(stats) # In[27]: np.min(stats, axis = 0) # givevs the minimum value within each column # In[28]: np.min(stats, axis = 1) # givevs the minimum value within each row # In[24]: np.max(stats) # In[25]: np.sum(stats) # In[50]: stats.mean() # In[51]: stats.std() # In[52]: stats.squeeze() # In[55]: stats.cumsum() # In[30]: #Doing Arithematic Operations on matices of diif. sizes d = np.array([[1, 2], [3, 4], [5, 6]]) one_row = np.array([[1,1]]) d + one_row #NumPy uses its broadcast rules for this operation. # ## Reeorginizing Arrays # In[18]: before = np.array([[1,2,3,4], [5,6,7,8]]) print(before) # In[49]: after = before.reshape((8,1)) print(after) # In[50]: after = before.reshape((4,2)) print(after) # In[51]: after = before.reshape((2,2,2)) print(after) # In[19]: #Use transpose() to reverse the axes of an array #before was 2x4 matrix #before.transpose() is 4x2 matrix before.transpose() # In[59]: #flipping 1-D arrays arr = np.array([1, 2, 3, 4, 5, 6, 7, 8]) rev= np.flip(arr) print('Reversed array: ', rev) # In[60]: #flipping 2-D arrays arr_2d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) rev_2d = np.flip(arr_2d) print(rev_2d) # In[62]: #reversing rows rev_row = np.flip(arr_2d, axis = 0) print(rev_row) # In[64]: #reversing columns rev_col = np.flip(arr_2d, axis=1) print(rev_col) # In[65]: #flipping only second row arr_2d[1] = np.flip(arr_2d[1]) print(arr_2d) # In[66]: #fliping second column arr_2d[:,1] = np.flip(arr_2d[:,1]) print(arr_2d) # ## Miscellaneous # ### Splitting Arrays # In[14]: #Splitting long arrays into small arrays x = np.arange(1,25).reshape(2,12) np.hsplit(x,3) # In[15]: np.hsplit(x,4) # In[17]: np.hsplit(x, (3,4)) # ### Load data from file # In[87]: data = np.genfromtxt('data.txt', delimiter = ',') data # ### Changing data type of a file # In[88]: data = data.astype('int32') data # ### Loading data from a URL # In[97]: dta= np.genfromtxt('https://raw.githubusercontent.com/selva86/datasets/master/Auto.csv', delimiter =',', skip_header = 1) dta # ### Filling NaN values with other values # In[98]: dta= np.genfromtxt('https://raw.githubusercontent.com/selva86/datasets/master/Auto.csv', delimiter =',', skip_header = 1, filling_values= 9999, dtype = 'float') dta # ### Supressing Scientific Notation in the Dataset # In[101]: np.set_printoptions(suppress=True) dta # ### Saving Files into Local # In[102]: np.savetxt('auto.csv', dta, delimiter = ',') # ### Saving Files as NumPy File # In[103]: #this will open in console only np.save('auto.npy', dta) # ### Loading Files through NumPy # In[108]: k = np.load('auto.npy') k # ### Concat (Row & Col wise) # In[111]: arr1 = np.zeros([4,4]) arr1 # In[112]: arr2 = np.ones([4,4]) arr2 # #### Method 1 : np.concatenate() # In[115]: #cancate along rows np.concatenate([arr1, arr2], axis = 1) # In[116]: #cancate along cols np.concatenate([arr1, arr2], axis = 0) # #### Method 2 : vstack() & hstack() # In[152]: #Vertically stacking vectors v1 = np.array([1,2,3,4,5]) v2 = np.array([6,7,8,9,10]) np.vstack([v1,v2]) # In[153]: np.vstack([v1,v2, v2, v1]) # In[118]: #Horizantally stacking vectors h1 = np.ones((2,4)) h2 = np.zeros((2,2)) np.hstack([h1,h2]) # In[119]: np.hstack([h1,h2,h2,h1,h2,h1]) # ### Sorting Arrays # In[121]: kk = np.random.randint(1,10,size = [10,5]) kk # In[126]: #sorting along rows of the array np.sort(kk) # In[129]: #sorting a single a col sorted_col = kk[:, 0].argsort() kk[sorted_col] # ### Working with Dates # In[133]: d = np.datetime64('2021-08-03 23:10:00') d # In[137]: #adding seconds to the date-time d + 100000 # In[141]: #adding one day to the date-time oneday = np.timedelta64(1, "D") oneday d + oneday # In[143]: #adding one minute to the date-time onemin = np.timedelta64(1, 'm') onemin d + onemin # In[146]: #creating a sequence of date-time array dates = np.arange(np.datetime64('2019-09-03'), np.datetime64('2019-10-03'), 3) dates # In[147]: dates = np.arange(np.datetime64('2019-09-03'), np.datetime64('2019-10-03')) dates # ### Advanced Functions # #### vectorize() : heklps us to apply created functiion to an array # In[154]: def foo(x): if x%2 == 1: return x**2 else: return x/2 foo(10) foo(11) # In[157]: foo_v = np.vectorize(foo, otypes = [float]) foo_v(kk) # ### Boolean Masking & Advanced Indexing # In[160]: data > 50 # In[165]: data[data > 50] # In[169]: np.any(data > 50, axis = 0) # In[170]: np.all(data > 50, axis = 0) # In[89]: data[((data > 50) & (data < 100))] # In[91]: data[(~(data > 50) & (data < 100))] # ### Checking if Data has NAN or INF values # In[41]: x = np.arange(1, 11, dtype=float) x = np.insert(x, 2, np.inf, axis=0) x = np.insert(x, 8, np.nan, axis=0) x # In[43]: np.isnan(x) # In[44]: np.isinf(x) # ### Replacing NAn & INF with other values # In[45]: missing = np.isnan(x) | np.isinf(x) missing # In[46]: x[missing] # In[48]: x[missing] = 0 x # # Problem 2 : Indexing # In[178]: prob = np.array([[1,2,3,4,5], [6,7,8,9,10], [11,12,13,14,15], [16,17,18,19,20], [21,22,23,24,25], [26,27,28,29,30]]) prob # In[179]: prob[2:4, 0:2] # In[185]: prob[[0,1,2,3], [1,2,3,4]] # In[186]: prob[[0,4,5], 3:]