#!/usr/bin/env python # coding: utf-8 # # Loading Data into Pandas # In[15]: import pandas as pd #reading csv df = pd.read_csv('pokemon_data.csv') print(df.head()) #reading excel #df_xlsx = pd.read_excel('pokemon_data.xlsx') #print(df_xlsx.head()) #reading txt #df_txt = pd.read_csv('pokemon_data.txt', delimiter = '\t') #print(df_txt.head()) # # Reading Data in Pandas # In[20]: #Read Headers #print(df.columns) #or df.columns # In[24]: #Read each Column print(df[['Name', 'Type 1', 'HP']][0:10]) # In[27]: #Read each Row print(df.iloc[0:4]) # In[37]: #For getting a list of a column for index, row in df.iterrows(): print(index, row['Name']) # In[29]: #Read a specific location (R, C) print(df.iloc[2, 1]) # In[36]: df.loc[df['Type 1'] == 'Fire'][0:10] # In[35]: df.loc[df['Type 1'] == 'Grass'][0:10] # # Sorting/Describing Data # In[38]: df.describe() # In[40]: df.sort_values('Name') # In[41]: df.sort_values('Name', ascending = False) # In[44]: df.sort_values(['Type 1', 'HP'], ascending = [1, 0]) # - **1 = True, 0 = False** # # Making changes to the data # ### Adding Columns to create a new Column # - **Method 1** # In[71]: df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed'] df.head() # - **Method 2** # In[70]: df['Total'] = df.iloc[:, 4:12].sum(axis=1) df.head(4) # ****Sum is 319 instead of 318 because the rows are rearranged therefore adding Legendary column too* # - **: means all rows are included** # - **axis = 1 means horizontal summing (for vertical summing axis = 0)** # ### Rearranging Columns # In[57]: cols = list(df.columns) df = df[cols[0:4] + [cols[-1]]+cols[4:12]] df.head() # ### Deleting Columns # In[65]: df = df.drop(columns=['Total']) df.head() # # Saving our Data (Exporting into Desired Format) # In[73]: Saving as CSV df.to_csv('modified_pokemon.csv', index = False) #Saving as Excel #df.to_excel('modified_pokemon.csv', index = False) #Saving as TXT #df.to_csv('modified_pokemon.csv', index = False, sep = '\t') # # Filtering Data # In[83]: df.loc[(df['Type 1'] == 'Water') & (df['Type 2'] == 'Ice')] # In[84]: df.loc[(df['Type 1'] == 'Water') | (df['Type 2'] == 'Ice')] # In[114]: new_df = df.loc[(df['Type 1'] == 'Ice') | (df['Speed'] > 50) & (df['Type 2'] == 'NaN' )] new_df = new_df.reset_index(drop = True) print(new_df) # - **Reset_index = starts a new index from 1** # - **drop = removes the old index from the data frame** # In[117]: df.loc[~df['Name'].str.contains('Mega')] # - **~ means removal of the filtered data after it's usage** # # Regex Filtering # In[121]: import re df.loc[df['Type 1'].str.contains('Fire|Grass', regex = True)] # - **regex means that regular expressions are used in the script** # - **'Fire|Grass' is very Case-sensetive** # In[122]: df.loc[df['Type 1'].str.contains('fire|grass', flags = re.I, regex = True)] # - **'flags = re.I' ignores the Case of the Data** # In[125]: df.loc[df['Name'].str.contains('^pi[a-z]*', flags = re.I, regex = True)] # # Conditional Changes # In[132]: df.loc[df['Type 1'] == 'Fire', 'Type 1'] = 'Flamer' df # - **This script changes Fire into Flamer and change it back to Fire, we write the following code:** # In[133]: df.loc[df['Type 1'] == 'Flamer', 'Type 1'] = 'Fire' df # In[140]: df.loc[df['Total'] > 500, ['Generation', 'Legendary']] = ['Good', 'Great'] df # # Aggregated Statistics (Group By) # In[146]: df.groupby(['Type 1']).mean().sort_values('HP', ascending = False) # In[148]: df.groupby(['Type 1']).mean().sort_values('Defense', ascending = False) # In[147]: df.groupby(['Type 1']).mean().sort_values('Attack', ascending = False) # In[152]: df['Count'] = 1 df.groupby(['Type 1']).count()['Count'] # In[156]: df.groupby(['Type 1', 'Type 2']).count()['Count'] # # Working with Large Amounts of Data # In[160]: big_df = pd.DataFrame(columns = df.columns) for df in pd.read_csv('modified_pokemon.csv', chunksize = 5): chunks = df.groupby(['Type 1']).count() big_df = pd.concat([big_df, chunks]) print(big_df)