Combine choropleth map and histogram in Python

logo of a chart:Choropleth

Choropleth maps combined with a histogram are an excellent way of representing both the legend and the distribution of the underlying data. In this article, we'll create a step-by-step choropleth map with a histogram to highlight the US average salary, using matplotlib.

About

A choropleth map is a map combined where colors are proportional to values in each region.

This chart has been created by Joseph Barbier, thanks to him for sharing his work here!

Libraries

First, we need to load the following libraries:

import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
from pyfonts import load_google_font
from pypalettes import load_cmap

Dataset

Here we need to load 2 datasets:

  • one with map data (shape of each state)
  • one with data for each state

And then we merge them:

path = "https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/refs/heads/master/static/data/usa-salary.csv"
path = "../../static/data/usa-salary.csv"
df_salary = pd.read_csv(path)

path = "https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/refs/heads/master/static/data/us.geojson"
path = "../../static/data/us.geojson"
gdf = gpd.read_file(path).merge(df_salary, on="state")

gdf = gdf[gdf["salary"] < 100]  # remove district of columbia
gdf = gdf[gdf["state"] != "Alaska"]
gdf = gdf[gdf["state"] != "Hawaii"]
gdf.head()
state geometry salary
0 Alabama MULTIPOLYGON (((-87.41958 30.4796, -87.42683 3... 59.605
2 Arizona POLYGON ((-111.00627 31.32718, -111.06712 31.3... 66.340
3 Arkansas POLYGON ((-90.30422 35.00008, -90.30124 34.995... 54.658
4 California MULTIPOLYGON (((-114.72428 32.71284, -114.7645... 74.819
5 Colorado POLYGON ((-109.04633 40.99983, -108.88932 40.9... 77.331

We add a centroid column (approximation of the center of the state) that will be used later to add individual state labels:

gdf_projected = gdf.to_crs(epsg=3035)
gdf_projected["centroid"] = gdf_projected.geometry.centroid
gdf["centroid"] = gdf_projected["centroid"].to_crs(gdf.crs)
gdf.head()
state geometry salary centroid
0 Alabama MULTIPOLYGON (((-87.41958 30.4796, -87.42683 3... 59.605 POINT (-86.81841 32.78448)
2 Arizona POLYGON ((-111.00627 31.32718, -111.06712 31.3... 66.340 POINT (-111.63132 34.30186)
3 Arkansas POLYGON ((-90.30422 35.00008, -90.30124 34.995... 54.658 POINT (-92.43851 34.90643)
4 California MULTIPOLYGON (((-114.72428 32.71284, -114.7645... 74.819 POINT (-119.36883 37.27561)
5 Colorado POLYGON ((-109.04633 40.99983, -108.88932 40.9... 77.331 POINT (-105.52816 39.02442)

Basic choropleth map

The key steps here are:

  • create a colormap (cmap) with the color range we want
  • create a matplotlib Figure with fig, ax = plt.subplots()
  • plot the choropleth map with gdf.plot()
cmap = load_cmap("enara", cmap_type="continuous", reverse=True)
edgecolor = "white"
linewidth = 0

fig, ax = plt.subplots(figsize=(8, 8), dpi=300)

ax.set_xlim(-130, -65)
ax.set_ylim(20, 50)

gdf.plot(ax=ax, column="salary", cmap=cmap, edgecolor=edgecolor, linewidth=linewidth)

fig.tight_layout()

Add barplot

In order to add the barplot, we use the ax.inset_axes() function to create a subplot (smaller) that will contain our barplot.

If you're not familiar with complex layouts in Matplotlib, please check this dedicated lesson where we explain the concept in depth!

Then we customize it a bit so that it uses the right color scale and looks nice.

cmap = load_cmap("enara", cmap_type="continuous", reverse=True)
edgecolor = "white"
linewidth = 0

fig, ax = plt.subplots(figsize=(8, 8), dpi=300)

gdf.plot(ax=ax, column="salary", cmap=cmap, edgecolor=edgecolor, linewidth=linewidth)

ax.set_xlim(-130, -65)
ax.set_ylim(20, 50)

bar_ax = ax.inset_axes(bounds=[0.05, -0.05, 0.5, 0.4], zorder=-1)
n, bins, _ = bar_ax.hist(gdf["salary"], bins=15, alpha=0)
colors = [cmap((val - min(bins)) / (max(bins) - min(bins))) for val in bins]
bar_ax.bar(
    bins[:-1], n, color=colors, width=2, edgecolor=edgecolor, linewidth=linewidth
)

fig.tight_layout()

Clean the Axes of map and barplot

Here we remove all spines (border of each plot) and clean up the labels in the histogram.

cmap = load_cmap("enara", cmap_type="continuous", reverse=True)
edgecolor = "white"
linewidth = 0

fig, ax = plt.subplots(figsize=(8, 8), dpi=300)

gdf.plot(ax=ax, column="salary", cmap=cmap, edgecolor=edgecolor, linewidth=linewidth)

ax.set_xlim(-130, -65)
ax.set_ylim(20, 50)
ax.axis("off")

bar_ax = ax.inset_axes(bounds=[0.05, -0.05, 0.5, 0.4], zorder=-1)
n, bins, _ = bar_ax.hist(gdf["salary"], bins=15, alpha=0)
colors = [cmap((val - min(bins)) / (max(bins) - min(bins))) for val in bins]
bar_ax.bar(
    bins[:-1], n, color=colors, width=2, edgecolor=edgecolor, linewidth=linewidth
)
bar_ax.spines[["top", "left", "right"]].set_visible(False)
bar_ax.set_yticks([])
x_ticks = list(range(50, 90, 10))
x_tick_labels = [f"{val}k" for val in x_ticks]
bar_ax.set_xticks(x_ticks, labels=x_tick_labels, size=8)
bar_ax.tick_params(axis="x", length=0, pad=5)

fig.tight_layout()

Add individual state labels

The easiest way to find the center of each country is to use the centroid attribute of the geometry column in the geo dataframe. These coordinates can then be used to add annotations to the map.

Since the default font isn't very attractive, we load a font for the annotations. We can then use the load_google_font() function from pyfonts to load the fonts from Google font.

Just before adding the annotations, we define an adjustment dictionary to help us improve the approximate position of the annotations. This is necessary because centroids aren't always ideally positioned for annotations.

font2 = load_google_font("Ubuntu")
cmap = load_cmap("enara", cmap_type="continuous", reverse=True)
edgecolor = "white"
linewidth = 0
text_color = "white"

fig, ax = plt.subplots(figsize=(8, 8), dpi=300)

gdf.plot(ax=ax, column="salary", cmap=cmap, edgecolor=edgecolor, linewidth=linewidth)

ax.set_xlim(-130, -65)
ax.set_ylim(20, 50)
ax.axis("off")

bar_ax = ax.inset_axes(bounds=[0.05, -0.05, 0.5, 0.4], zorder=-1)
n, bins, _ = bar_ax.hist(gdf["salary"], bins=15, alpha=0)
colors = [cmap((val - min(bins)) / (max(bins) - min(bins))) for val in bins]
bar_ax.bar(
    bins[:-1], n, color=colors, width=2, edgecolor=edgecolor, linewidth=linewidth
)
bar_ax.spines[["top", "left", "right"]].set_visible(False)
bar_ax.set_yticks([])
x_ticks = list(range(50, 90, 10))
x_tick_labels = [f"{val}k" for val in x_ticks]
bar_ax.set_xticks(x_ticks, labels=x_tick_labels, size=8, font=font2)
bar_ax.tick_params(axis="x", length=0, pad=5)

exclude = {
    "Indiana",
    "Michigan",
    "Mississippi",
    "Florida",
    "New Jersey",
    "West Virginia",
    "South Carolina",
    "Louisiana",
    "Massachusetts",
    "Vermont",
    "Connedgecolorticut",
    "Maryland",
    "Delaware",
    "Rhode Island",
    "New Hampshire",
}
states_to_annotate = [state for state in gdf.state.to_list() if state not in exclude]

adjustments = {
    "California": (0, -1),
    "Kentucky": (0, -0.2),
    "Washington": (0.5, -0.4),
    "Virginia": (0, -0.2),
    "Idaho": (0, -0.4),
    "New York": (0, -0.2),
}

for state in states_to_annotate:
    centroid = gdf.loc[gdf["state"] == state, "centroid"].values[0]
    x_val, y_val = centroid.coords[0]
    try:
        x_val += adjustments[state][0]
        y_val += adjustments[state][1]
    except KeyError:
        pass
    value = gdf.loc[gdf["state"] == state, "salary"].values[0]
    if value <= 65:
        color_text = "black"
    else:
        color_text = text_color
    ax.text(
        x=x_val,
        y=y_val,
        s=f"{state.upper()}\n${value:.0f}k",
        fontsize=5,
        font=font2,
        color=color_text,
        ha="center",
        va="center",
    )

fig.tight_layout()

Add last annotations

The title, credit and source annotations are added via the fig.text() function at the end:

font1 = load_google_font("Ubuntu", italic=True)
font2 = load_google_font("Ubuntu")
cmap = load_cmap("enara", cmap_type="continuous", reverse=True)
edgecolor = "white"
linewidth = 0
text_color = "white"

fig, ax = plt.subplots(figsize=(8, 8), dpi=300)

gdf.plot(ax=ax, column="salary", cmap=cmap, edgecolor=edgecolor, linewidth=linewidth)

ax.set_xlim(-130, -65)
ax.set_ylim(20, 50)
ax.axis("off")

bar_ax = ax.inset_axes(bounds=[0.05, -0.05, 0.5, 0.4], zorder=-1)
n, bins, _ = bar_ax.hist(gdf["salary"], bins=15, alpha=0)
colors = [cmap((val - min(bins)) / (max(bins) - min(bins))) for val in bins]
bar_ax.bar(
    bins[:-1], n, color=colors, width=2, edgecolor=edgecolor, linewidth=linewidth
)
bar_ax.spines[["top", "left", "right"]].set_visible(False)
bar_ax.set_yticks([])
x_ticks = list(range(50, 90, 10))
x_tick_labels = [f"{val}k" for val in x_ticks]
bar_ax.set_xticks(x_ticks, labels=x_tick_labels, size=8, font=font2)
bar_ax.tick_params(axis="x", length=0, pad=5)

exclude = {
    "Indiana",
    "Michigan",
    "Mississippi",
    "Florida",
    "New Jersey",
    "West Virginia",
    "South Carolina",
    "Louisiana",
    "Massachusetts",
    "Vermont",
    "Connedgecolorticut",
    "Maryland",
    "Delaware",
    "Rhode Island",
    "New Hampshire",
}
states_to_annotate = [state for state in gdf.state.to_list() if state not in exclude]

adjustments = {
    "California": (0, -1),
    "Kentucky": (0, -0.2),
    "Washington": (0.5, -0.4),
    "Virginia": (0, -0.2),
    "Idaho": (0, -0.4),
    "New York": (0, -0.2),
}

for state in states_to_annotate:
    centroid = gdf.loc[gdf["state"] == state, "centroid"].values[0]
    x_val, y_val = centroid.coords[0]
    try:
        x_val += adjustments[state][0]
        y_val += adjustments[state][1]
    except KeyError:
        pass
    value = gdf.loc[gdf["state"] == state, "salary"].values[0]
    if value <= 65:
        color_text = "black"
    else:
        color_text = text_color
    ax.text(
        x=x_val,
        y=y_val,
        s=f"{state.upper()}\n${value:.0f}k",
        fontsize=5,
        font=font2,
        color=color_text,
        ha="center",
        va="center",
    )

fig.text(
    x=0.5,
    y=0.8,
    s="Average salary in the United States in 2025",
    ha="center",
    size=22,
    font=load_google_font("Roboto Slab"),
)

credit_params = dict(x=0.9, ha="right", size=7, font=font1, va="bottom")
fig.text(y=0.24, s="Graphic: Joseph Barbier", **credit_params)
fig.text(y=0.22, s="Data from Forbes and the U.S. Census Bureau", **credit_params)
fig.text(y=0.2, s="Data do not include District of Columbia", **credit_params)

fig.tight_layout()

fig.savefig(
    "../../static/graph/web-choropleth-map-with-histogram.png",
    dpi=300,
    bbox_inches="tight",
)

Going further

You might be interested in:

🚨 Grab the Data To Viz poster!


Do you know all the chart types? Do you know which one you should pick? I made a decision tree that answers those questions. You can download it for free!

    dataviz decision tree poster