Custom Histogram Bins Using the breaks Parameter¶
In [1]:
import pandas as pd
from lets_plot import *
In [2]:
LetsPlot.setup_html()
In [3]:
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/diamonds.csv")
print(df.shape)
df.head()
Out[3]:
Default Bins¶
In [4]:
ggplot(df, aes("price")) + geom_histogram(color="black", fill="gray80")
Out[4]:
Equi-probable Bins¶
In [5]:
price_bins = df["price"].quantile(q=[i/10 for i in range(11)], interpolation="linear")
ggplot(df, aes("price")) + geom_histogram(breaks=price_bins, color="black", fill="gray80")
Out[5]:
Identity Stat¶
In [6]:
breaks = [2**d for d in range(16)]
bin_x = [(breaks[i] + breaks[i + 1]) / 2.0 for i in range(len(breaks) - 1)] # bin centers
agg_df = df.assign(
bin_x=pd.cut(df["price"], bins=breaks, labels=bin_x) # bin center, corresponding to current price
).groupby("bin_x", observed=True)["price"].count().to_frame("count").reset_index() # aggregated dataframe: bin center -> size of bin
agg_df["bin_x"] = agg_df["bin_x"].astype(float)
agg_df
Out[6]:
In [7]:
ggplot(agg_df) + \
geom_histogram(aes("bin_x", "count"), stat='identity', breaks=breaks, color="black", fill="gray80") + \
xlab("price")
Out[7]: