Time Series Geodata
Contents
68. Time Series Geodata#
Data is private so it’s not provided together with this notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-poster')
from datetime import datetime
from dateutil import parser
68.1. Load and Clean Data#
%%bash
ls
audio-processing.ipynb
geodata.ipynb
time-series-data.ipynb
file = '../../../OneDrive - University of New Mexico/data/moves-data/csv/yearly/storyline/storyline_2017.csv'
df = pd.read_csv(file)
homedf = df[df['Name']=='Home']
workdf = df[df['Name']=='UNM Physics & Astronomy']
# homedf.set_index(['Date','Duration'])
# homedf
homedur = homedf.groupby('Date').Duration.sum().reset_index();
homedur = homedur.rename(columns={'Duration': 'HomeDuration'});
workdur = workdf.groupby('Date').Duration.sum().reset_index();
workdur = workdur.rename(columns={'Duration': 'WorkDuration'});
totaldf = pd.merge(homedur,workdur, how='left')
plt.plot(totaldf['Date'].values, totaldf['HomeDuration'].values/(24*3600), 'k-', marker='.', label='Home')
plt.plot(totaldf['Date'].values, totaldf['WorkDuration'].values/(24*3600), 'r-', marker='.', label='Work')
plt.locator_params(axis='y', nbins=6)
# plt.locator_params(axis='x', nbins=6)
plt.tick_params(
axis='x',
which='both',
bottom='off',
top='off',
labelbottom='off')
plt.ylabel('Percent of the day')
plt.xlabel('Date')
plt.title('Duration of Home and Work During 2017')
plt.legend()
plt.show()
plt.hist(totaldf['HomeDuration'].values/(24*3600), facecolor='k')
plt.title('Home Duration for 2017')
plt.show()
I define the mode in histogram to be the Home-Mode-Index. In this case it’s 0.5.
I define the average time at home to be the Home-AVG-Index. In my case it’s:
totaldf['HomeDuration'].sum()/(24*3600)/len(totaldf['HomeDuration'])
0.5587051276434163
plt.hist(totaldf['WorkDuration'].dropna().values/(24*3600), facecolor='k')
plt.title('Work Duration for 2017')
plt.show()
totaldf['WorkDuration'].dropna().sum()/(24*3600)/len(totaldf['WorkDuration'].dropna())
0.30993137881605454
68.2. Functions#
def CleanData(filepath, method=None):
if method == None:
method = 'outer'
df = pd.read_csv(filepath)
hdf = df[df['Name']=='Home']
wdf = df[df['Name']=='UNM Physics & Astronomy']
hdur = hdf.groupby('Date').Duration.sum().reset_index();
hdur = hdur.rename(columns={'Duration': 'HomeDuration'});
wdur = wdf.groupby('Date').Duration.sum().reset_index();
wdur = wdur.rename(columns={'Duration': 'WorkDuration'});
tdf = pd.merge(hdur,wdur, how=method)
tdf['Date'] = pd.to_datetime(tdf.Date)
tdf = tdf.sort_values(by='Date')
return tdf
# data2013 = '../../../OneDrive - University of New Mexico/data/moves-data/csv/yearly/storyline/storyline_2013.csv'
# df2013 = CleanData(data2013,'outer')
data2014 = '../../../OneDrive - University of New Mexico/data/moves-data/csv/yearly/storyline/storyline_2014.csv'
df2014 = CleanData(data2014,'outer')
data2015 = '../../../OneDrive - University of New Mexico/data/moves-data/csv/yearly/storyline/storyline_2015.csv'
df2015 = CleanData(data2015,'outer')
data2016 = '../../../OneDrive - University of New Mexico/data/moves-data/csv/yearly/storyline/storyline_2016.csv'
df2016 = CleanData(data2016,'outer')
data2017 = '../../../OneDrive - University of New Mexico/data/moves-data/csv/yearly/storyline/storyline_2017.csv'
df2017 = CleanData(data2017, 'outer')
# with pd.option_context('display.max_rows', None, 'display.max_columns', 3):
# print(df2015)
def plotSeries(tdf, styles=None, markers=None):
if styles==None:
styles=['k-','r-']
if markers == None:
markers=['.','.']
plt.plot(tdf['Date'].values, tdf['HomeDuration'].values/(24*3600), styles[0], marker=markers[0], label='Home')
plt.plot(tdf['Date'].values, tdf['WorkDuration'].values/(24*3600), styles[1], marker=markers[1], label='Work')
plt.locator_params(axis='y', nbins=6)
# plt.locator_params(axis='x', nbins=6)
plt.tick_params(
axis='x',
which='both',
bottom='off',
top='off',
labelbottom='off')
plt.ylabel('Percent of the day')
plt.xlabel('Date')
plt.legend()
plotSeries(df2017)
plt.title('Duration of Home and Work During 2017')
plt.show()
plotSeries(df2016)
plt.title('Duration of Home and Work During 2016')
plt.show()
plotSeries(df2015)
plt.title('Duration of Home and Work During 2016')
plt.show()
df2016['HomeDuration'].sum()/(24*3600)/len(df2016['HomeDuration'])
df2016['WorkDuration'].sum()/(24*3600)/len(df2016['WorkDuration'])
0.20510375083166998
df2015['HomeDuration'].sum()/(24*3600)/len(df2015['HomeDuration'])
df2015['WorkDuration'].sum()/(24*3600)/len(df2015['WorkDuration'])
0.3528205364833272
df2014['HomeDuration'].sum()/(24*3600)/len(df2014['HomeDuration'])
df2014['WorkDuration'].sum()/(24*3600)/len(df2014['WorkDuration'])
0.3832610887096774
plt.hist(df2016['HomeDuration'].dropna().values/(24*3600), facecolor='k')
plt.title('Home Duration for 2016')
plt.show()
plt.hist(df2015['HomeDuration'].dropna().values/(24*3600), facecolor='k')
plt.title('Home Duration for 2015')
plt.show()
plt.hist(df2014['HomeDuration'].dropna().values/(24*3600), facecolor='k')
plt.title('Home Duration for 2014')
plt.show()
plt.hist(df2016['WorkDuration'].dropna().values/(24*3600), facecolor='k')
plt.title('Work Duration for 2016')
plt.show()
plt.hist(df2015['WorkDuration'].dropna().values/(24*3600), facecolor='k')
plt.title('Work Duration for 2015')
plt.show()
plt.hist(df2014['WorkDuration'].dropna().values/(24*3600), facecolor='k')
plt.title('Work Duration for 2014')
plt.show()