Download notebook (.ipynb)

Generating Color Palettes with scale.palette()#

The palette() method generates a list of hex color codes from any color scale specification.

This list can then be used in scale_color_manual(), which is useful for maintaining consistent colors across multiple plots, especially when different plots show different subsets of categorical data.

import pandas as pd

from lets_plot import *
LetsPlot.setup_html()
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/nobel.csv")
df['fullname'] = df.firstname + ' ' + df.surname
df.head(3)
firstname surname born_country_code died_country_code gender year category share name_of_university city_of_university country_of_university born_month age age_get_prize fullname
0 Wilhelm Conrad Röntgen DE DE male 1901 physics 1 Munich University Munich Germany Mar 78 56 Wilhelm Conrad Röntgen
1 Hendrik A. Lorentz NL NL male 1902 physics 2 Leiden University Leiden the Netherlands Jul 75 49 Hendrik A. Lorentz
2 Pieter Zeeman NL NL male 1902 physics 2 Amsterdam University Amsterdam the Netherlands May 78 37 Pieter Zeeman
not_migrated_laureates_df = df[
    (~df.died_country_code.isna()) &
    (~df.born_country_code.isna()) &
    (df.born_country_code == df.died_country_code)
].drop_duplicates(subset=['fullname'])

migrated_laureates_df = df[
    (~df.died_country_code.isna()) &
    (~df.born_country_code.isna()) &
    (df.born_country_code != df.died_country_code)
].drop_duplicates(subset=['fullname'])
# Create 3 bar-charts, each showing the top 10 countries by count of:
# 1. Non-migrated laureates (born and died in same country)
# 2. Immigrated laureates (by country of death)
# 3. Emigrated laureates (by country of birth)

plots = []
for d, country_code, counted_by in [
                                (not_migrated_laureates_df, 'born_country_code', 'non migrated laureates'), \
                                (migrated_laureates_df, 'died_country_code', 'immigrated laureates'), \
                                (migrated_laureates_df, 'born_country_code', 'emigrated laureates')
]:
    p = (
        ggplot(d)
        + geom_bar(aes(x=as_discrete(country_code, order_by='..count..'), fill=country_code),
                   color='pen', size=.3,
                   sampling=sampling_pick(10))
        + ggtitle(f'by {counted_by}')
        + labs(x="country", fill="country")
        + guides(fill=guide_legend(nrow=2, byrow=True, override_aes={'color':'paper'}))
        + theme(axis_text_x='blank', axis_ticks='blank', plot_message='blank', legend_title='blank',
                plot_title=element_text(hjust=1),
                legend_position='bottom')
    )
    plots.append(p)

Problem: Independent Color Scales#

Each chart assigns colors independently, so the same country appears in different colors across plots, making comparison difficult.

plots1 = [p + scale_fill_brewer(palette='Paired') for p in plots]

gggrid(plots1, ncol=3) + ggtitle('Top 10 Countries') + theme(plot_title=element_text(face='bold', hjust=0.5))

Solution: Using a Shared Palette#

Let’s create a shared color palette that assigns each country a unique color, then apply this palette to all three charts. This ensures consistent coloring across the visualizations, making them easy to compare.

# Get unique country codes from both dataframes
combined_df = pd.concat([not_migrated_laureates_df, migrated_laureates_df])
unique_countries_raw = sorted(
    pd.concat([combined_df['born_country_code'], combined_df['died_country_code']]).unique()
)

len(unique_countries_raw)
73
# Unfortunately, 73 unique colors is more than categorical palettes can handle effectively.
# Let's limit this to only countries that appear in the top 10 of any chart.

# Get top 10 countries from each chart
top_not_migrated = not_migrated_laureates_df['born_country_code'].value_counts().head(10).index.tolist()
top_migrated_born = migrated_laureates_df['born_country_code'].value_counts().head(10).index.tolist()
top_migrated_died = migrated_laureates_df['died_country_code'].value_counts().head(10).index.tolist()

# Combine and get unique countries that appear in any chart
unique_countries = sorted(set(top_not_migrated + top_migrated_born + top_migrated_died))

print(unique_countries)
print(len(unique_countries))
['AT', 'CA', 'CH', 'DE', 'DK', 'ES', 'FR', 'GB', 'HU', 'IE', 'IT', 'NL', 'PL', 'RU', 'SE', 'US']
16
# Much better but still, 17 countries exceeds the capacity of a single Brewer palette.
# Let's combine two palettes: 10 colors from 'Paired' + 7 colors from 'Pastel1'

palette_paired = scale_color_brewer(palette='Paired').palette(10)
palette_pastel = scale_color_brewer(palette='Pastel1').palette(7)
country_colors = palette_paired + palette_pastel
# Create a manual color scale mapping each country to its color, then apply to all plots.
manual_scale = scale_fill_manual(values=dict(zip(unique_countries, country_colors)))

plots2 = [p + manual_scale for p in plots]

gggrid(plots2, ncol=3, guides='collect') + ggtitle('Top 10 Countries') + theme(plot_title=element_text(face='bold', hjust=0.5))