66. Air Freight Time Series Data#

import matplotlib.pyplot as plt 
import seaborn as sns; sns.set()
import pandas as pd

we use the dataset here: https://data.transportation.gov/Aviation/International_Report_Freight/u4sg-r5vg

More illustrations: https://www.kaggle.com/parulpandey/us-international-air-traffic-data

The scheduled, charter, total columns are the total weight in tons.

data_uri = "https://data.transportation.gov/api/views/u4sg-r5vg/rows.csv?accessType=DOWNLOAD"

df = pd.read_csv(data_uri)
df
data_dte Year Month usg_apt_id usg_apt usg_wac fg_apt_id fg_apt fg_wac airlineid carrier carriergroup type Scheduled Charter Total
0 05/01/2008 2008 5 13930 ORD 41 16217 YUL 941 19531 AC 0 Freight 3 0 3
1 06/01/2005 2005 6 13303 MIA 33 14739 SDQ 224 20400 PT 1 Freight 0 21 21
2 09/01/2006 2006 9 13204 MCO 33 12972 LHR 493 19540 BA 0 Freight 15 0 15
3 08/01/2004 2004 8 13487 MSP 63 10292 AMS 461 19386 NW 1 Freight 479 0 479
4 03/01/2004 2004 3 12478 JFK 22 16321 ZRH 486 19805 AA 1 Freight 687 0 687
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
601072 06/01/2022 2022 6 15422 UAM 5 13856 OKO 736 21629 KD 1 Freight 0 107 107
601073 06/01/2022 2022 6 15422 UAM 5 15505 UTP 782 19874 8C 1 Freight 0 10 10
601074 06/01/2022 2022 6 16091 YIP 43 11187 CUU 148 20447 U7 1 Freight 0 8 8
601075 06/01/2022 2022 6 16091 YIP 43 14879 SLP 148 20447 U7 1 Freight 0 11 11
601076 06/01/2022 2022 6 16091 YIP 43 14886 SLW 148 20447 U7 1 Freight 0 2 2

601077 rows × 16 columns

df_daily = df[["data_dte", "airlineid"]].groupby(
    "data_dte"
).agg(
    {
        "airlineid": [
            ("counts", "count"),
            ("unique", "nunique")
        ]
    }
)
df_daily.droplevel(0,axis=1)
counts unique
data_dte
01/01/1990 1165 149
01/01/1991 1152 154
01/01/1992 1255 156
01/01/1993 1246 164
01/01/1994 1300 164
... ... ...
12/01/2017 1691 144
12/01/2018 1691 150
12/01/2019 1617 139
12/01/2020 1264 119
12/01/2021 1481 130

390 rows × 2 columns

Build the time series of daily tonnage for all.

df.head()
data_dte Year Month usg_apt_id usg_apt usg_wac fg_apt_id fg_apt fg_wac airlineid carrier carriergroup type Scheduled Charter Total
0 05/01/2008 2008 5 13930 ORD 41 16217 YUL 941 19531 AC 0 Freight 3 0 3
1 06/01/2005 2005 6 13303 MIA 33 14739 SDQ 224 20400 PT 1 Freight 0 21 21
2 09/01/2006 2006 9 13204 MCO 33 12972 LHR 493 19540 BA 0 Freight 15 0 15
3 08/01/2004 2004 8 13487 MSP 63 10292 AMS 461 19386 NW 1 Freight 479 0 479
4 03/01/2004 2004 3 12478 JFK 22 16321 ZRH 486 19805 AA 1 Freight 687 0 687
df_daily_tonnage = df.groupby(
    ["data_dte", "usg_apt", "fg_apt"]
).agg({
    "Total": [("tonnage", "sum")]
})
df_daily_tonnage = df_daily_tonnage.droplevel(0, axis=1).reset_index()
df_daily_tonnage
data_dte usg_apt fg_apt tonnage
0 01/01/1990 ADK JCK 15
1 01/01/1990 ALB YQX 52
2 01/01/1990 ANC AMS 555
3 01/01/1990 ANC ARN 38
4 01/01/1990 ANC BRU 390
... ... ... ... ...
361226 12/01/2021 WRI TER 17
361227 12/01/2021 YIP BJX 14
361228 12/01/2021 YIP QRO 18
361229 12/01/2021 YIP SLW 9
361230 12/01/2021 YIP YKF 1

361231 rows × 4 columns

df_daily_tonnage.groupby(
    ["usg_apt", "fg_apt"]
).agg(
    {
        "data_dte": [
            ("unique_dates", "nunique"),
            ("count_dates", "count")
        ]
    }
).droplevel(0, axis=1).sort_values(by=["unique_dates"], ascending=False).head()
unique_dates count_dates
usg_apt fg_apt
JFK FRA 390 390
MIA GIG 390 390
CUN 390 390
GUM MNL 390 390
NRT 390 390

We will look into (‘SFO’, ‘FRA’)

df_daily_tonnage_sfo_fra = df_daily_tonnage.loc[
    (
        df_daily_tonnage.usg_apt == "SFO"
    ) & (
        df_daily_tonnage.fg_apt == "FRA"
    )
]