In [1]:
# Bring polars into scope and import our dataset
import polars as pl
import altair as alt

df_cola = pl.read_csv("../datasets/cost-of-living-2018.csv")

df_cola
Out[1]:
shape: (540, 7)
CityCost of Living IndexRent IndexCost of Living Plus Rent IndexGroceries IndexRestaurant Price IndexLocal Purchasing Power Index
strf64f64f64f64f64f64
"Hamilton, Bermuda"145.43110.87128.76143.47158.75112.26
"Zurich, Switzerland"141.2566.14105.03149.86135.76142.7
"Geneva, Switzerland"134.8371.7104.38138.98129.74130.96
"Basel, Switzerland"130.6849.6891.61127.54127.22139.01
"Bern, Switzerland"128.0343.5787.3132.7119.48112.71
…………………
"Kochi, India"24.656.3115.826.9313.9477.7
"Coimbatore, India"24.615.3515.3225.2315.2153.23
"Alexandria, Egypt"23.784.3414.423.1917.6623.75
"Navi Mumbai, India"23.446.2515.1524.0214.14111.99
"Thiruvananthapuram, India"20.865.113.2621.9812.0666.25
In [2]:
# Split out Country from City feature
df_cola = df_cola.with_columns(
    # The last element is the country
    pl.col("City").str.split(", ").list.last().alias("Country")
)


df_cola
Out[2]:
shape: (540, 8)
CityCost of Living IndexRent IndexCost of Living Plus Rent IndexGroceries IndexRestaurant Price IndexLocal Purchasing Power IndexCountry
strf64f64f64f64f64f64str
"Hamilton, Bermuda"145.43110.87128.76143.47158.75112.26"Bermuda"
"Zurich, Switzerland"141.2566.14105.03149.86135.76142.7"Switzerland"
"Geneva, Switzerland"134.8371.7104.38138.98129.74130.96"Switzerland"
"Basel, Switzerland"130.6849.6891.61127.54127.22139.01"Switzerland"
"Bern, Switzerland"128.0343.5787.3132.7119.48112.71"Switzerland"
……………………
"Kochi, India"24.656.3115.826.9313.9477.7"India"
"Coimbatore, India"24.615.3515.3225.2315.2153.23"India"
"Alexandria, Egypt"23.784.3414.423.1917.6623.75"Egypt"
"Navi Mumbai, India"23.446.2515.1524.0214.14111.99"India"
"Thiruvananthapuram, India"20.865.113.2621.9812.0666.25"India"
In [3]:
# Calculate average cost of living by country and show the top 10
top_10_countries = (
    df_cola.group_by("Country")
    .agg(pl.mean("Cost of Living Index").alias("Avg Cost of Living"))
    .sort("Avg Cost of Living", descending=True)
    .head(10)
)

top_10_countries
Out[3]:
shape: (10, 2)
CountryAvg Cost of Living
strf64
"Bermuda"145.43
"Switzerland"130.088333
"Iceland"123.78
"Norway"115.5925
"Bahamas"99.73
"Luxembourg"95.37
"Denmark"93.563333
"Singapore"91.4
"Japan"90.88
"South Korea"87.56
In [4]:
# Barplot for top 10 cost of living
chart = (
    top_10_countries.plot.bar(
        x="Avg Cost of Living",
        y=alt.Y("Country", sort="-x")
    )
    .properties(
        title="Top 10 Most Expensive Countries"
    )
    .configure_axisY(
        title=None
    )
)

chart
Out[4]:
In [5]:
# Calculate average purchasing power by country and show the top 10
top_10_power_countries = (
    df_cola.group_by("Country")
    .agg(pl.mean("Local Purchasing Power Index").alias("Avg Purchasing Power"))
    .sort("Avg Purchasing Power", descending=True)
    .head(10)
)

top_10_power_countries
Out[5]:
shape: (10, 2)
CountryAvg Purchasing Power
strf64
"Switzerland"128.865
"Germany"128.189444
"Luxembourg"127.42
"Saudi Arabia"127.0125
"United Arab Emirates"126.09
"United States"124.266591
"Australia"123.193636
"Qatar"120.86
"Finland"116.763333
"Sweden"116.754
In [6]:
# Barplot for top 10 purchasing power
chart = (
    top_10_power_countries.plot.bar(
        x="Avg Purchasing Power",
        y=alt.Y("Country", sort="-x")
    )
    .properties(
        title="Top 10 Countries by Purchasing Power"
    )
    .configure_axisY(
        title=None
    )
)

chart
Out[6]:
In [7]:
# Scatter plot of Groceries vs. Restaurant Prices
chart = (
    df_cola.plot.point(
        x="Groceries Index",
        y="Restaurant Price Index",
        color=alt.Color("Country", legend=None),
        tooltip=["Groceries Index", "Restaurant Price Index", "City"]
    )
    .properties(
        width=600,
        height=400,
        title="Groceries Index vs. Restaurant Price Index"
    )
)

chart
Out[7]:
In [8]:
# Scatter plot of Rent vs. Cost of Living
chart = (
    df_cola.plot.point(
        x="Rent Index",
        y="Cost of Living Index",
        color=alt.Color("Country", legend=None),
        tooltip=["Rent Index", "Cost of Living Index", "City"]
        )
        .properties(width=500, title="Rent vs. Cost of Living")
        .configure_scale(zero=False)
)

chart
Out[8]:
In [ ]:
# Line plot showing cost of living plus rent index across city
df_sorted_cities = df_cola.sort("Cost of Living Plus Rent Index")

chart = (
    df_sorted_cities.plot.line(
        x=alt.X("City", sort=None),
        y="Cost of Living Plus Rent Index",
        tooltip=["City", "Cost of Living Plus Rent Index"]
    )
    .properties(
        width=1200,
        height=500,
        title="Cost of Living Plus Rent Index Across All Cities (Sorted)"
    )
    .configure_axisX(
        title=None,
        labels=False,
        ticks=False
    )
)

chart
Out[ ]: