Generating Color Palettes with scale.palette()#
The palette() method generates a list of hex color codes from any color scale specification.
This list can then be used in scale_color_manual(), which is useful for maintaining consistent colors across multiple plots, especially when different plots show different subsets of categorical data.
import pandas as pd
from lets_plot import *
LetsPlot.setup_html()
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/nobel.csv")
df['fullname'] = df.firstname + ' ' + df.surname
df.head(3)
| firstname | surname | born_country_code | died_country_code | gender | year | category | share | name_of_university | city_of_university | country_of_university | born_month | age | age_get_prize | fullname | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Wilhelm Conrad | Röntgen | DE | DE | male | 1901 | physics | 1 | Munich University | Munich | Germany | Mar | 78 | 56 | Wilhelm Conrad Röntgen |
| 1 | Hendrik A. | Lorentz | NL | NL | male | 1902 | physics | 2 | Leiden University | Leiden | the Netherlands | Jul | 75 | 49 | Hendrik A. Lorentz |
| 2 | Pieter | Zeeman | NL | NL | male | 1902 | physics | 2 | Amsterdam University | Amsterdam | the Netherlands | May | 78 | 37 | Pieter Zeeman |
not_migrated_laureates_df = df[
(~df.died_country_code.isna()) &
(~df.born_country_code.isna()) &
(df.born_country_code == df.died_country_code)
].drop_duplicates(subset=['fullname'])
migrated_laureates_df = df[
(~df.died_country_code.isna()) &
(~df.born_country_code.isna()) &
(df.born_country_code != df.died_country_code)
].drop_duplicates(subset=['fullname'])
# Create 3 bar-charts, each showing the top 10 countries by count of:
# 1. Non-migrated laureates (born and died in same country)
# 2. Immigrated laureates (by country of death)
# 3. Emigrated laureates (by country of birth)
plots = []
for d, country_code, counted_by in [
(not_migrated_laureates_df, 'born_country_code', 'non migrated laureates'), \
(migrated_laureates_df, 'died_country_code', 'immigrated laureates'), \
(migrated_laureates_df, 'born_country_code', 'emigrated laureates')
]:
p = (
ggplot(d)
+ geom_bar(aes(x=as_discrete(country_code, order_by='..count..'), fill=country_code),
color='pen', size=.3,
sampling=sampling_pick(10))
+ ggtitle(f'by {counted_by}')
+ labs(x="country", fill="country")
+ guides(fill=guide_legend(nrow=2, byrow=True, override_aes={'color':'paper'}))
+ theme(axis_text_x='blank', axis_ticks='blank', plot_message='blank', legend_title='blank',
plot_title=element_text(hjust=1),
legend_position='bottom')
)
plots.append(p)
Problem: Independent Color Scales#
Each chart assigns colors independently, so the same country appears in different colors across plots, making comparison difficult.
plots1 = [p + scale_fill_brewer(palette='Paired') for p in plots]
gggrid(plots1, ncol=3) + ggtitle('Top 10 Countries') + theme(plot_title=element_text(face='bold', hjust=0.5))