Contents

Modal Split in Transportation

Contents

17. Load Data#

DATA_REMOTE = "https://raw.githubusercontent.com/datumorphism/dataset-eurostats-freight-modal-split/master/dataset/eurostats_freight_modal_split.csv"

df = pd.read_csv(DATA_REMOTE)

df.head()

	transport_mode	country	year	value	is_estimated	not_applicable
0	inland_waterways	AT	2005	0.033	False	False
1	inland_waterways	BE	2005	0.135	False	False
2	inland_waterways	BG	2005	0.300	True	False
3	inland_waterways	CH	2005	0.001	True	False
4	inland_waterways	CY	2005	NaN	False	True

df.country.unique()

array(['AT', 'BE', 'BG', 'CH', 'CY', 'CZ', 'DE', 'DK', 'EE', 'EL', 'ES',
       'EU27_2020', 'EU28', 'FI', 'FR', 'HR', 'HU', 'IE', 'IT', 'LT',
       'LU', 'LV', 'MT', 'NL', 'NO', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK',
       'UK'], dtype=object)

18. Visualizations#

import matplotlib.pyplot as plt
import seaborn as sns;sns.set()

18.1. Visualize Time Series#

df_road = df.loc[df.transport_mode == 'road']
df_road.sort_values(by=["country", "year", "value"], inplace=True)

/tmp/ipykernel_2440/618637640.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_road.sort_values(by=["country", "year", "value"], inplace=True)

fig, ax = plt.subplots(figsize=(10,6.18))

g = sns.lineplot(
    x="year",
    y="value",
    data=df_road,
    hue='country',
    palette="tab10",
    linewidth=2.5,
    markers='+',
    ax=ax
)
g.legend(loc='lower center', bbox_to_anchor=(0.5, -0.4),ncol=7)
ax.set_ylim([0,1])
ax.set_ylabel('percentage of the total freight in tonne-kilometers')

Text(0, 0.5, 'percentage of the total freight in tonne-kilometers')

../../../_images/eurostats_modal_split_12_1.png

df_road_eu28 = df_road.loc[df_road.country == "EU28"]

fig, ax = plt.subplots(figsize=(10,6.18))

sns.lineplot(
    x="year",
    y="value",
    data=df_road_eu28,
    ax=ax,
    palette="tab10"
)
ax.set_ylim([0,1])

/tmp/ipykernel_2440/2033319731.py:3: UserWarning: Ignoring `palette` because no `hue` variable has been assigned.
  sns.lineplot(

(0.0, 1.0)

../../../_images/eurostats_modal_split_14_2.png

18.2. Modal Split#

df.columns

Index(['transport_mode', 'country', 'year', 'value', 'is_estimated',
       'not_applicable'],
      dtype='object')

df_year_2018 = df.loc[df.year == 2018]
df_year_2018.sort_values(by=["country", "transport_mode", "value"], inplace=True)

/tmp/ipykernel_2440/244472121.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_year_2018.sort_values(by=["country", "transport_mode", "value"], inplace=True)

df_year_2018.head()

	transport_mode	country	year	value	is_estimated	not_applicable
1664	inland_waterways	AT	2018	0.021	False	False
1696	rail	AT	2018	0.315	False	False
1728	rail_inland_waterways_sum_of_available	AT	2018	0.337	False	False
1760	road	AT	2018	0.663	False	False
1665	inland_waterways	BE	2018	0.161	True	False

19. Visualize the Ranks of the countries#

df_road_pivot = df_road.pivot(index='country', columns='year')['value']
df_road_pivot = df_road_pivot.reset_index()
df_road_pivot.head()

year	country	2005	2006	2007	2008	2009	2010	2011	2012	2013	2014	2015	2016	2017	2018
0	AT	0.610	0.608	0.617	0.628	0.643	0.630	0.635	0.637	0.640	0.634	0.647	0.649	0.654	0.663
1	BE	0.737	0.733	0.729	0.728	0.767	0.734	0.730	0.722	0.731	0.730	0.736	0.743	0.736	0.721
2	BG	0.476	0.474	0.461	0.451	0.505	0.494	0.561	0.529	0.560	0.549	0.547	0.556	0.566	0.562
3	CH	0.663	0.650	0.648	0.644	0.663	0.660	0.647	0.652	0.637	0.637	0.625	0.624	0.651	0.652
4	CY	1.000	1.000	1.000	1.000	1.000	1.000	1.000	1.000	1.000	1.000	1.000	1.000	1.000	1.000

df_road_vis = df_road_pivot[["country",2014,2015,2016,2017,2018]]

# Make the PairGrid
g = sns.PairGrid(
    df_road_vis.sort_values(2018, ascending=False),
    x_vars=df_road_vis.columns[1:], y_vars=["country"],
    height=12, aspect=.25
)

# Draw a dot plot using the stripplot function
g.map(
    sns.stripplot, size=15, orient="h",
    palette="ch:s=1,r=-.1,h=1_r", linewidth=1, edgecolor="w"
)

# Use the same x axis limits on all columns and add better labels
g.set(xlim=(0, 1), xlabel="Road Freight Share", ylabel="")

# Use semantically meaningful titles for the columns
titles = df_road_vis.columns[1:].tolist()

for ax, title in zip(g.axes.flat, titles):

    # Set a different title for each axes
    ax.set(title=title)

    # Make the grid horizontal instead of vertical
    ax.xaxis.grid(False)
    ax.yaxis.grid(True)

sns.despine(left=True, bottom=True)

../../../_images/eurostats_modal_split_23_0.png

fig, ax=plt.subplots(figsize=(6.18, 10))

df_road_vis_2 = df_road.loc[df.year>=2014]
df_road_vis_2.sort_values("value", ascending=False)

g = sns.stripplot(
    y="country", x="value", hue="year", data=df_road_vis_2, ax=ax, orient="h",
    palette="Set2", size=10, marker="o",
    edgecolor="w", alpha=.5, linewidth=1, 
    order=df_road_vis_2.loc[df_road_vis_2.year==2018].sort_values("value", ascending=False).country.unique()
)
g.legend(loc='lower center', bbox_to_anchor=(0.5, -0.15),ncol=7)
ax.set_xlabel('Road Freight Share')
ax.set_ylabel('Countries')

Text(0, 0.5, 'Countries')

../../../_images/eurostats_modal_split_24_1.png