#!/usr/bin/env python
# coding: utf-8

# # Loading Data into Pandas

# In[15]:


import pandas as pd

#reading csv
df = pd.read_csv('pokemon_data.csv')
print(df.head())

#reading excel
#df_xlsx = pd.read_excel('pokemon_data.xlsx')
#print(df_xlsx.head())

#reading txt
#df_txt = pd.read_csv('pokemon_data.txt', delimiter = '\t')
#print(df_txt.head())


# # Reading Data in Pandas

# In[20]:


#Read Headers
#print(df.columns)
#or 
df.columns


# In[24]:


#Read each Column
print(df[['Name', 'Type 1', 'HP']][0:10])


# In[27]:


#Read each Row
print(df.iloc[0:4])


# In[37]:


#For getting a list of a column
for index, row in df.iterrows():
    print(index, row['Name'])


# In[29]:


#Read a specific location (R, C)
print(df.iloc[2, 1])


# In[36]:


df.loc[df['Type 1'] == 'Fire'][0:10]


# In[35]:


df.loc[df['Type 1'] == 'Grass'][0:10]


# # Sorting/Describing Data

# In[38]:


df.describe()


# In[40]:


df.sort_values('Name')


# In[41]:


df.sort_values('Name', ascending = False)


# In[44]:


df.sort_values(['Type 1', 'HP'], ascending = [1, 0])


# - **1 = True, 0 = False**

# # Making changes to the data

# ### Adding Columns to create a new Column

# - **Method 1**

# In[71]:


df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']

df.head()


# - **Method 2**

# In[70]:


df['Total'] = df.iloc[:, 4:12].sum(axis=1)

df.head(4)


# ****Sum is 319 instead of 318 because the rows are rearranged therefore adding Legendary column too*

# - **: means all rows are included**

# - **axis = 1 means horizontal summing (for vertical summing axis = 0)**

# ### Rearranging Columns

# In[57]:


cols = list(df.columns)
df = df[cols[0:4] + [cols[-1]]+cols[4:12]]

df.head()


# ### Deleting Columns

# In[65]:


df = df.drop(columns=['Total'])
             
df.head()


# # Saving our Data (Exporting into Desired Format)

# In[73]:


Saving as CSV
df.to_csv('modified_pokemon.csv', index = False)

#Saving as Excel
#df.to_excel('modified_pokemon.csv', index = False)

#Saving as TXT
#df.to_csv('modified_pokemon.csv', index = False, sep = '\t')


# # Filtering Data

# In[83]:


df.loc[(df['Type 1'] == 'Water') & (df['Type 2'] == 'Ice')]


# In[84]:


df.loc[(df['Type 1'] == 'Water') | (df['Type 2'] == 'Ice')]


# In[114]:


new_df = df.loc[(df['Type 1'] == 'Ice') | (df['Speed'] > 50) & (df['Type 2'] == 'NaN' )]

new_df = new_df.reset_index(drop = True)

print(new_df)


# - **Reset_index = starts a new index from 1**

# - **drop = removes the old index from the data frame**

# In[117]:


df.loc[~df['Name'].str.contains('Mega')]


# - **~ means removal of the filtered data after it's usage**

# # Regex Filtering

# In[121]:


import re

df.loc[df['Type 1'].str.contains('Fire|Grass', regex = True)]


# - **regex means that regular expressions are used in the script**

# - **'Fire|Grass' is very Case-sensetive**

# In[122]:


df.loc[df['Type 1'].str.contains('fire|grass', flags = re.I, regex = True)]


# - **'flags = re.I' ignores the Case of the Data**

# In[125]:


df.loc[df['Name'].str.contains('^pi[a-z]*', flags = re.I, regex = True)]


# # Conditional Changes

# In[132]:


df.loc[df['Type 1'] == 'Fire', 'Type 1'] = 'Flamer'

df


# - **This script changes Fire into Flamer and change it back to Fire, we write the following code:**

# In[133]:


df.loc[df['Type 1'] == 'Flamer', 'Type 1'] = 'Fire'

df


# In[140]:


df.loc[df['Total'] > 500, ['Generation', 'Legendary']] = ['Good', 'Great']

df


# # Aggregated Statistics (Group By)

# In[146]:


df.groupby(['Type 1']).mean().sort_values('HP', ascending = False)


# In[148]:


df.groupby(['Type 1']).mean().sort_values('Defense', ascending = False)


# In[147]:


df.groupby(['Type 1']).mean().sort_values('Attack', ascending = False)


# In[152]:


df['Count'] = 1

df.groupby(['Type 1']).count()['Count']


# In[156]:


df.groupby(['Type 1', 'Type 2']).count()['Count']


# # Working with Large Amounts of Data

# In[160]:


big_df = pd.DataFrame(columns = df.columns)

for df in pd.read_csv('modified_pokemon.csv', chunksize = 5):
    chunks = df.groupby(['Type 1']).count()
    
    big_df = pd.concat([big_df, chunks])
    
print(big_df)