Importing Necessary Libraries¶
In [1]:
import pandas as pd
import datetime as dt
date¶
In [2]:
today = dt.date(2021, 8, 22)
In [3]:
today.day
Out[3]:
22
In [4]:
today.month
Out[4]:
8
In [5]:
today.year
Out[5]:
2021
datetime¶
In [6]:
today = dt.datetime(2021, 8, 22, 17, 45, 51)
In [8]:
print(today)
2021-08-22 17:45:51
In [9]:
today.hour
Out[9]:
17
In [10]:
today.minute
Out[10]:
45
In [11]:
today.second
Out[11]:
51
Timestamp¶
In [12]:
pd.Timestamp('2021-12-22')
Out[12]:
Timestamp('2021-12-22 00:00:00')
In [13]:
pd.Timestamp('2019/02/13')
Out[13]:
Timestamp('2019-02-13 00:00:00')
In [14]:
pd.Timestamp('2020, 4, 21')
Out[14]:
Timestamp('2020-04-21 00:00:00')
In [19]:
pd.Timestamp('2020, 4, 21 18:33:12')
Out[19]:
Timestamp('2020-04-21 18:33:12')
DatetimeIndex¶
In [23]:
dates = ['2019/12/22', '2012/8/22', '2015/2/22']
In [24]:
pd.DatetimeIndex(dates)
Out[24]:
DatetimeIndex(['2019-12-22', '2012-08-22', '2015-02-22'], dtype='datetime64[ns]', freq=None)
In [26]:
type(dates)
Out[26]:
list
In [27]:
dates[1]
Out[27]:
'2012/8/22'
In [38]:
dates2 = [dt.date(2021, 9, 8), dt.date(2021, 9, 9), dt.date(2021, 9, 10)]
In [39]:
dates2
Out[39]:
[datetime.date(2021, 9, 8), datetime.date(2021, 9, 9), datetime.date(2021, 9, 10)]
In [40]:
dateIndex = pd.DatetimeIndex(dates2)
dateIndex
Out[40]:
DatetimeIndex(['2021-09-08', '2021-09-09', '2021-09-10'], dtype='datetime64[ns]', freq=None)
In [42]:
values = [12, 7, 30]
pd.Series(data=values, index=dateIndex)
Out[42]:
2021-09-08 12 2021-09-09 7 2021-09-10 30 dtype: int64
pd.to_datetime()¶
In [44]:
pd.to_datetime('2021, 1, 5')
Out[44]:
Timestamp('2021-01-05 00:00:00')
In [45]:
pd.to_datetime(dt.date(2001, 12, 12))
Out[45]:
Timestamp('2001-12-12 00:00:00')
In [47]:
pd.to_datetime(dt.datetime(2012, 2, 12, 18, 33, 12))
Out[47]:
Timestamp('2012-02-12 18:33:12')
In [48]:
dates = ['2019/12/22', '2012/8/22', '2015/2/22']
pd.to_datetime(dates)
Out[48]:
DatetimeIndex(['2019-12-22', '2012-08-22', '2015-02-22'], dtype='datetime64[ns]', freq=None)
In [50]:
dates3 = pd.Series(
['December 22nd, 1999', '2019, 12, 12', "This is Date", 'Jan 5th, 2021'])
dates3
Out[50]:
0 December 22nd, 1999 1 2019, 12, 12 2 This is Date 3 Jan 5th, 2021 dtype: object
In [51]:
pd.to_datetime(dates3, errors='coerce') #invalid parsing will be set as NaT
Out[51]:
0 1999-12-22 1 2019-12-12 2 NaT 3 2021-01-05 dtype: datetime64[ns]
In [54]:
unixtime = [3244563, 3545463, 3242037, 7363236]
pd.to_datetime(unixtime, unit='s')
Out[54]:
DatetimeIndex(['1970-02-07 13:16:03', '1970-02-11 00:51:03',
'1970-02-07 12:33:57', '1970-03-27 05:20:36'],
dtype='datetime64[ns]', freq=None)
Create Range in the form of Date pd.date_range()¶
In [58]:
times = pd.date_range(start='2021-01-01', end='2021-12-31', freq='D')
times
Out[58]:
DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
'2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
'2021-01-09', '2021-01-10',
...
'2021-12-22', '2021-12-23', '2021-12-24', '2021-12-25',
'2021-12-26', '2021-12-27', '2021-12-28', '2021-12-29',
'2021-12-30', '2021-12-31'],
dtype='datetime64[ns]', length=365, freq='D')
In [61]:
times = pd.date_range(start='2021-01-01', end='2029-12-31', freq='A')
times
Out[61]:
DatetimeIndex(['2021-12-31', '2022-12-31', '2023-12-31', '2024-12-31',
'2025-12-31', '2026-12-31', '2027-12-31', '2028-12-31',
'2029-12-31'],
dtype='datetime64[ns]', freq='A-DEC')
In [62]:
times = pd.date_range(start='2021-01-01', end='2021-12-31', freq='5H')
times
Out[62]:
DatetimeIndex(['2021-01-01 00:00:00', '2021-01-01 05:00:00',
'2021-01-01 10:00:00', '2021-01-01 15:00:00',
'2021-01-01 20:00:00', '2021-01-02 01:00:00',
'2021-01-02 06:00:00', '2021-01-02 11:00:00',
'2021-01-02 16:00:00', '2021-01-02 21:00:00',
...
'2021-12-29 02:00:00', '2021-12-29 07:00:00',
'2021-12-29 12:00:00', '2021-12-29 17:00:00',
'2021-12-29 22:00:00', '2021-12-30 03:00:00',
'2021-12-30 08:00:00', '2021-12-30 13:00:00',
'2021-12-30 18:00:00', '2021-12-30 23:00:00'],
dtype='datetime64[ns]', length=1748, freq='5H')
In [63]:
times = pd.date_range(start='2021-01-01', end='2021-12-31', freq='4H')
times
Out[63]:
DatetimeIndex(['2021-01-01 00:00:00', '2021-01-01 04:00:00',
'2021-01-01 08:00:00', '2021-01-01 12:00:00',
'2021-01-01 16:00:00', '2021-01-01 20:00:00',
'2021-01-02 00:00:00', '2021-01-02 04:00:00',
'2021-01-02 08:00:00', '2021-01-02 12:00:00',
...
'2021-12-29 12:00:00', '2021-12-29 16:00:00',
'2021-12-29 20:00:00', '2021-12-30 00:00:00',
'2021-12-30 04:00:00', '2021-12-30 08:00:00',
'2021-12-30 12:00:00', '2021-12-30 16:00:00',
'2021-12-30 20:00:00', '2021-12-31 00:00:00'],
dtype='datetime64[ns]', length=2185, freq='4H')
In [ ]:
times = pd.date_range(start='2021-01-01', end='2021-12-31', freq='5H')
times
Importing Stock Data using datareader¶
In [66]:
from pandas_datareader import data
In [67]:
# !pip install pandas_datareader
In [69]:
company = 'MSFT'
start = '2020-12-22'
end = '2021-06-22'
In [71]:
stocks = data.DataReader(name=company,
data_source='yahoo',
start=start,
end=end)
In [72]:
stocks
Out[72]:
| High | Low | Open | Close | Volume | Adj Close | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2020-12-21 | 224.000000 | 217.279999 | 217.550003 | 222.589996 | 37181900.0 | 221.143524 |
| 2020-12-22 | 225.630005 | 221.850006 | 222.690002 | 223.940002 | 22612200.0 | 222.484772 |
| 2020-12-23 | 223.559998 | 220.800003 | 223.110001 | 221.020004 | 18699600.0 | 219.583740 |
| 2020-12-24 | 223.610001 | 221.199997 | 221.419998 | 222.750000 | 10550600.0 | 221.302490 |
| 2020-12-28 | 226.029999 | 223.020004 | 224.449997 | 224.960007 | 17933500.0 | 223.498138 |
| ... | ... | ... | ... | ... | ... | ... |
| 2021-06-16 | 260.579987 | 254.419998 | 259.399994 | 257.380005 | 27220000.0 | 256.888214 |
| 2021-06-17 | 261.750000 | 256.010010 | 256.070007 | 260.899994 | 27565500.0 | 260.401489 |
| 2021-06-18 | 262.299988 | 258.750000 | 259.630005 | 259.429993 | 37202200.0 | 258.934296 |
| 2021-06-21 | 263.519989 | 257.920013 | 259.820007 | 262.630005 | 26696100.0 | 262.128174 |
| 2021-06-22 | 265.790009 | 262.399994 | 262.720001 | 265.510010 | 24694100.0 | 265.002686 |
126 rows × 6 columns
In [73]:
stocks.loc['2021-06-17']
Out[73]:
High 2.617500e+02 Low 2.560100e+02 Open 2.560700e+02 Close 2.609000e+02 Volume 2.756550e+07 Adj Close 2.604015e+02 Name: 2021-06-17 00:00:00, dtype: float64
In [84]:
stocks.iloc[111]
Out[84]:
High 2.492700e+02 Low 2.458400e+02 Open 2.481300e+02 Close 2.473000e+02 Volume 1.940670e+07 Adj Close 2.468275e+02 Name: 2021-06-02 00:00:00, dtype: float64
In [88]:
stocks.loc['2020-12-24':'2021-01-18']
Out[88]:
| High | Low | Open | Close | Volume | Adj Close | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2020-12-24 | 223.610001 | 221.199997 | 221.419998 | 222.750000 | 10550600.0 | 221.302490 |
| 2020-12-28 | 226.029999 | 223.020004 | 224.449997 | 224.960007 | 17933500.0 | 223.498138 |
| 2020-12-29 | 227.179993 | 223.580002 | 226.309998 | 224.149994 | 17403200.0 | 222.693390 |
| 2020-12-30 | 225.630005 | 221.470001 | 225.229996 | 221.679993 | 20272300.0 | 220.239456 |
| 2020-12-31 | 223.000000 | 219.679993 | 221.699997 | 222.419998 | 20942100.0 | 220.974640 |
| 2021-01-04 | 223.000000 | 214.809998 | 222.529999 | 217.690002 | 37130100.0 | 216.275375 |
| 2021-01-05 | 218.520004 | 215.699997 | 217.259995 | 217.899994 | 23823000.0 | 216.484009 |
| 2021-01-06 | 216.490005 | 211.940002 | 212.169998 | 212.250000 | 35930700.0 | 210.870728 |
| 2021-01-07 | 219.339996 | 213.710007 | 214.039993 | 218.289993 | 27694500.0 | 216.871460 |
| 2021-01-08 | 220.580002 | 217.029999 | 218.679993 | 219.619995 | 22956200.0 | 218.192841 |
| 2021-01-11 | 218.910004 | 216.729996 | 218.470001 | 217.490005 | 23031300.0 | 216.076675 |
| 2021-01-12 | 217.100006 | 213.320007 | 216.500000 | 214.929993 | 23249300.0 | 213.533310 |
| 2021-01-13 | 216.759995 | 213.929993 | 214.020004 | 216.339996 | 20087100.0 | 214.934143 |
| 2021-01-14 | 217.460007 | 212.740005 | 215.910004 | 213.020004 | 29480800.0 | 211.635742 |
| 2021-01-15 | 214.509995 | 212.029999 | 213.520004 | 212.649994 | 31746500.0 | 211.268112 |
In [89]:
stocks.truncate(before='2020-12-30', after='2021-01-11')
Out[89]:
| High | Low | Open | Close | Volume | Adj Close | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2020-12-30 | 225.630005 | 221.470001 | 225.229996 | 221.679993 | 20272300.0 | 220.239456 |
| 2020-12-31 | 223.000000 | 219.679993 | 221.699997 | 222.419998 | 20942100.0 | 220.974640 |
| 2021-01-04 | 223.000000 | 214.809998 | 222.529999 | 217.690002 | 37130100.0 | 216.275375 |
| 2021-01-05 | 218.520004 | 215.699997 | 217.259995 | 217.899994 | 23823000.0 | 216.484009 |
| 2021-01-06 | 216.490005 | 211.940002 | 212.169998 | 212.250000 | 35930700.0 | 210.870728 |
| 2021-01-07 | 219.339996 | 213.710007 | 214.039993 | 218.289993 | 27694500.0 | 216.871460 |
| 2021-01-08 | 220.580002 | 217.029999 | 218.679993 | 219.619995 | 22956200.0 | 218.192841 |
| 2021-01-11 | 218.910004 | 216.729996 | 218.470001 | 217.490005 | 23031300.0 | 216.076675 |
Timedelta¶
In [97]:
timeA = pd.Timestamp('2021-01-01 10:26:00')
timeB = pd.Timestamp('2020-01-15 10:33:00')
In [98]:
timeA - timeB
Out[98]:
Timedelta('351 days 23:53:00')
In [99]:
pd.Timedelta(weeks=10, days=9, minutes=12)
Out[99]:
Timedelta('79 days 00:12:00')
In [101]:
#parsing/converting dates into datetime from csv files
df = pd.read_csv('catalog.csv', index_col='id')
df.head()
Out[101]:
| date | time | continent_code | country_name | country_code | state/province | population | city/town | distance | location_description | ... | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||||||||||||
| 34 | 3/2/07 | Night | NaN | United States | US | Virginia | 16000 | Cherry Hill | 3.40765 | Unknown | ... | (38.600900000000003, -77.268199999999993) | Landslide | Landslide | Small | Rain | NaN | NaN | NaN | NBC 4 news | http://www.nbc4.com/news/11186871/detail.html |
| 42 | 3/22/07 | NaN | NaN | United States | US | Ohio | 17288 | New Philadelphia | 3.33522 | NaN | ... | (40.517499999999998, -81.430499999999995) | Landslide | Landslide | Small | Rain | NaN | NaN | NaN | Canton Rep.com | http://www.cantonrep.com/index.php?ID=345054&C... |
| 56 | 4/6/07 | NaN | NaN | United States | US | Pennsylvania | 15930 | Wilkinsburg | 2.91977 | Urban area | ... | (40.4377, -79.915999999999997) | Landslide | Landslide | Small | Rain | NaN | NaN | NaN | The Pittsburgh Channel.com | https://web.archive.org/web/20080423132842/htt... |
| 59 | 4/14/07 | NaN | NaN | Canada | CA | Quebec | 42786 | Châteauguay | 2.98682 | Above river | ... | (45.322600000000001, -73.777100000000004) | Landslide | Riverbank collapse | Small | Rain | NaN | NaN | NaN | Le Soleil | http://www.hebdos.net/lsc/edition162007/articl... |
| 61 | 4/15/07 | NaN | NaN | United States | US | Kentucky | 6903 | Pikeville | 5.66542 | Below road | ... | (37.432499999999997, -82.493099999999998) | Landslide | Landslide | Small | Downpour | NaN | NaN | 0.0 | Matthew Crawford (KGS) | NaN |
5 rows × 22 columns
In [102]:
df = pd.read_csv('catalog.csv', index_col='id', parse_dates=['date'])
df.head()
Out[102]:
| date | time | continent_code | country_name | country_code | state/province | population | city/town | distance | location_description | ... | geolocation | hazard_type | landslide_type | landslide_size | trigger | storm_name | injuries | fatalities | source_name | source_link | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||||||||||||
| 34 | 2007-03-02 | Night | NaN | United States | US | Virginia | 16000 | Cherry Hill | 3.40765 | Unknown | ... | (38.600900000000003, -77.268199999999993) | Landslide | Landslide | Small | Rain | NaN | NaN | NaN | NBC 4 news | http://www.nbc4.com/news/11186871/detail.html |
| 42 | 2007-03-22 | NaN | NaN | United States | US | Ohio | 17288 | New Philadelphia | 3.33522 | NaN | ... | (40.517499999999998, -81.430499999999995) | Landslide | Landslide | Small | Rain | NaN | NaN | NaN | Canton Rep.com | http://www.cantonrep.com/index.php?ID=345054&C... |
| 56 | 2007-04-06 | NaN | NaN | United States | US | Pennsylvania | 15930 | Wilkinsburg | 2.91977 | Urban area | ... | (40.4377, -79.915999999999997) | Landslide | Landslide | Small | Rain | NaN | NaN | NaN | The Pittsburgh Channel.com | https://web.archive.org/web/20080423132842/htt... |
| 59 | 2007-04-14 | NaN | NaN | Canada | CA | Quebec | 42786 | Châteauguay | 2.98682 | Above river | ... | (45.322600000000001, -73.777100000000004) | Landslide | Riverbank collapse | Small | Rain | NaN | NaN | NaN | Le Soleil | http://www.hebdos.net/lsc/edition162007/articl... |
| 61 | 2007-04-15 | NaN | NaN | United States | US | Kentucky | 6903 | Pikeville | 5.66542 | Below road | ... | (37.432499999999997, -82.493099999999998) | Landslide | Landslide | Small | Downpour | NaN | NaN | 0.0 | Matthew Crawford (KGS) | NaN |
5 rows × 22 columns