In [1]:
# Bring polars into scope and import our dataset
import polars as pl
import altair as alt
df_cola = pl.read_csv("../datasets/cost-of-living-2018.csv")
df_cola
Out[1]:
shape: (540, 7)
City | Cost of Living Index | Rent Index | Cost of Living Plus Rent Index | Groceries Index | Restaurant Price Index | Local Purchasing Power Index |
---|---|---|---|---|---|---|
str | f64 | f64 | f64 | f64 | f64 | f64 |
"Hamilton, Bermuda" | 145.43 | 110.87 | 128.76 | 143.47 | 158.75 | 112.26 |
"Zurich, Switzerland" | 141.25 | 66.14 | 105.03 | 149.86 | 135.76 | 142.7 |
"Geneva, Switzerland" | 134.83 | 71.7 | 104.38 | 138.98 | 129.74 | 130.96 |
"Basel, Switzerland" | 130.68 | 49.68 | 91.61 | 127.54 | 127.22 | 139.01 |
"Bern, Switzerland" | 128.03 | 43.57 | 87.3 | 132.7 | 119.48 | 112.71 |
… | … | … | … | … | … | … |
"Kochi, India" | 24.65 | 6.31 | 15.8 | 26.93 | 13.94 | 77.7 |
"Coimbatore, India" | 24.61 | 5.35 | 15.32 | 25.23 | 15.21 | 53.23 |
"Alexandria, Egypt" | 23.78 | 4.34 | 14.4 | 23.19 | 17.66 | 23.75 |
"Navi Mumbai, India" | 23.44 | 6.25 | 15.15 | 24.02 | 14.14 | 111.99 |
"Thiruvananthapuram, India" | 20.86 | 5.1 | 13.26 | 21.98 | 12.06 | 66.25 |
In [2]:
# Split out Country from City feature
df_cola = df_cola.with_columns(
# The last element is the country
pl.col("City").str.split(", ").list.last().alias("Country")
)
df_cola
Out[2]:
shape: (540, 8)
City | Cost of Living Index | Rent Index | Cost of Living Plus Rent Index | Groceries Index | Restaurant Price Index | Local Purchasing Power Index | Country |
---|---|---|---|---|---|---|---|
str | f64 | f64 | f64 | f64 | f64 | f64 | str |
"Hamilton, Bermuda" | 145.43 | 110.87 | 128.76 | 143.47 | 158.75 | 112.26 | "Bermuda" |
"Zurich, Switzerland" | 141.25 | 66.14 | 105.03 | 149.86 | 135.76 | 142.7 | "Switzerland" |
"Geneva, Switzerland" | 134.83 | 71.7 | 104.38 | 138.98 | 129.74 | 130.96 | "Switzerland" |
"Basel, Switzerland" | 130.68 | 49.68 | 91.61 | 127.54 | 127.22 | 139.01 | "Switzerland" |
"Bern, Switzerland" | 128.03 | 43.57 | 87.3 | 132.7 | 119.48 | 112.71 | "Switzerland" |
… | … | … | … | … | … | … | … |
"Kochi, India" | 24.65 | 6.31 | 15.8 | 26.93 | 13.94 | 77.7 | "India" |
"Coimbatore, India" | 24.61 | 5.35 | 15.32 | 25.23 | 15.21 | 53.23 | "India" |
"Alexandria, Egypt" | 23.78 | 4.34 | 14.4 | 23.19 | 17.66 | 23.75 | "Egypt" |
"Navi Mumbai, India" | 23.44 | 6.25 | 15.15 | 24.02 | 14.14 | 111.99 | "India" |
"Thiruvananthapuram, India" | 20.86 | 5.1 | 13.26 | 21.98 | 12.06 | 66.25 | "India" |
In [3]:
# Calculate average cost of living by country and show the top 10
top_10_countries = (
df_cola.group_by("Country")
.agg(pl.mean("Cost of Living Index").alias("Avg Cost of Living"))
.sort("Avg Cost of Living", descending=True)
.head(10)
)
top_10_countries
Out[3]:
shape: (10, 2)
Country | Avg Cost of Living |
---|---|
str | f64 |
"Bermuda" | 145.43 |
"Switzerland" | 130.088333 |
"Iceland" | 123.78 |
"Norway" | 115.5925 |
"Bahamas" | 99.73 |
"Luxembourg" | 95.37 |
"Denmark" | 93.563333 |
"Singapore" | 91.4 |
"Japan" | 90.88 |
"South Korea" | 87.56 |
In [4]:
# Barplot for top 10 cost of living
chart = (
top_10_countries.plot.bar(
x="Avg Cost of Living",
y=alt.Y("Country", sort="-x")
)
.properties(
title="Top 10 Most Expensive Countries"
)
.configure_axisY(
title=None
)
)
chart
Out[4]:
In [5]:
# Calculate average purchasing power by country and show the top 10
top_10_power_countries = (
df_cola.group_by("Country")
.agg(pl.mean("Local Purchasing Power Index").alias("Avg Purchasing Power"))
.sort("Avg Purchasing Power", descending=True)
.head(10)
)
top_10_power_countries
Out[5]:
shape: (10, 2)
Country | Avg Purchasing Power |
---|---|
str | f64 |
"Switzerland" | 128.865 |
"Germany" | 128.189444 |
"Luxembourg" | 127.42 |
"Saudi Arabia" | 127.0125 |
"United Arab Emirates" | 126.09 |
"United States" | 124.266591 |
"Australia" | 123.193636 |
"Qatar" | 120.86 |
"Finland" | 116.763333 |
"Sweden" | 116.754 |
In [6]:
# Barplot for top 10 purchasing power
chart = (
top_10_power_countries.plot.bar(
x="Avg Purchasing Power",
y=alt.Y("Country", sort="-x")
)
.properties(
title="Top 10 Countries by Purchasing Power"
)
.configure_axisY(
title=None
)
)
chart
Out[6]:
In [7]:
# Scatter plot of Groceries vs. Restaurant Prices
chart = (
df_cola.plot.point(
x="Groceries Index",
y="Restaurant Price Index",
color=alt.Color("Country", legend=None),
tooltip=["Groceries Index", "Restaurant Price Index", "City"]
)
.properties(
width=600,
height=400,
title="Groceries Index vs. Restaurant Price Index"
)
)
chart
Out[7]:
In [8]:
# Scatter plot of Rent vs. Cost of Living
chart = (
df_cola.plot.point(
x="Rent Index",
y="Cost of Living Index",
color=alt.Color("Country", legend=None),
tooltip=["Rent Index", "Cost of Living Index", "City"]
)
.properties(width=500, title="Rent vs. Cost of Living")
.configure_scale(zero=False)
)
chart
Out[8]:
In [ ]:
# Line plot showing cost of living plus rent index across city
df_sorted_cities = df_cola.sort("Cost of Living Plus Rent Index")
chart = (
df_sorted_cities.plot.line(
x=alt.X("City", sort=None),
y="Cost of Living Plus Rent Index",
tooltip=["City", "Cost of Living Plus Rent Index"]
)
.properties(
width=1200,
height=500,
title="Cost of Living Plus Rent Index Across All Cities (Sorted)"
)
.configure_axisX(
title=None,
labels=False,
ticks=False
)
)
chart
Out[Â ]: