Download notebook (.ipynb)

Time Series Visualizations#

This notebook demonstrates how to use Lets-Plot to investigate time series.

The data is provided by Kaggle.

import pandas as pd

from lets_plot import *
LetsPlot.setup_html()

Preparation#

df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/delhi_climate.csv")
df = df.rename(columns={"meantemp": "mean temperature", "wind_speed": "wind speed"})
df.date = pd.to_datetime(df.date)
df["day"] = df.date.dt.day
df["month"] = df.date.dt.month
df["year"] = df.date.dt.year
df = df[df.year < 2017]

General Information#

ggplot(df, aes("date", "mean temperature")) + \
    geom_line(aes(group="year", color=as_discrete("year")), size=1) + \
    scale_x_datetime(breaks=df[df.date.dt.day == 1].date, format="%b %Y") + \
    scale_color_discrete(format="d") + \
    facet_grid(x="year", scales='free', x_format="d") + \
    ggtitle("Mean Temperature Along Period Under Review") + \
    ggsize(1000, 500) + \
    theme(legend_position='bottom')
p1 = ggplot() + \
    geom_boxplot(aes(x=as_discrete("year"), y="mean temperature", \
                     fill=as_discrete("year")), \
                 data=df, size=2, alpha=.5) + \
    scale_x_discrete(name="year", format="d") + \
    scale_fill_discrete(format="d") + \
    ggtitle("Mean Temperature Aggregated") + \
    theme(legend_position='bottom', panel_grid='blank')
p2 = ggplot() + \
    geom_boxplot(aes(x=as_discrete("month"), y="mean temperature", \
                     fill=as_discrete("year")), \
                 data=df, size=.75, alpha=.5) + \
    scale_x_continuous(breaks=list(range(1, 13))) + \
    facet_grid(x="year", x_format="d") + \
    ggtitle("Mean Temperature by Month") + \
    theme(legend_position='none', panel_grid='blank')

gggrid([p1, p2]) + ggsize(1000, 300)

Year-to-Year Temperature Comparison#

ggplot(df, aes("day", "mean temperature")) + \
    geom_line(aes(group="year", color=as_discrete("year")), size=2, \
              tooltips=layer_tooltips().format("@year", "d")\
                                       .title("@year")\
                                       .format("@{mean temperature}", ".2f")\
                                       .line("@|@{mean temperature}")\
                                       .line("date|@month/@day/@year")) + \
    scale_x_continuous(breaks=list(range(1, 32))) + \
    scale_color_discrete(format="d") + \
    facet_grid(y="month", scales='free') + \
    ylab("month") + \
    ggtitle("Mean Temperature for Each Month") + \
    theme(legend_position='top')

Most Common Temperature Values#

ggplot(df, aes(x="mean temperature")) + \
    geom_histogram(aes(group="year", fill="year"), \
                   color='black', bins=15, size=.5, alpha=.5, \
                   tooltips=layer_tooltips().line("count|@..count..")\
                                            .format("@{mean temperature}", ".2f")\
                                            .line("@|@{mean temperature}")\
                                            .line("@|@month")\
                                            .format("@year", "d")\
                                            .line("@|@year")) + \
    scale_fill_discrete(format="d") + \
    facet_grid(x="month", y="year", y_format="d") + \
    xlab("month") + ylab("year") + \
    ggtitle("Most Common Temperature") + \
    ggsize(1000, 500) + \
    theme_classic() + theme(legend_position='bottom')
int_mean_temp_df = df[["mean temperature", "month", "year"]].copy()
int_mean_temp_df["mean temperature"] = int_mean_temp_df["mean temperature"].astype(int)

ggplot(int_mean_temp_df, aes("month", "mean temperature", fill="mean temperature")) + \
    geom_bin2d(stat='identity', size=.5, color='white', alpha=.2,
               tooltips=layer_tooltips().format("@{mean temperature}", ".2f")\
                                        .line("@|@{mean temperature}")\
                                        .line("month|@month")\
                                        .format("@year", "d")\
                                        .title("@year")) + \
    scale_x_continuous(breaks=list(range(1, 13))) + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year", x_format="d") + \
    coord_fixed(ratio=.5) + \
    xlab("") + \
    ggtitle("Heatmap of Temperatures by Year") + \
    ggsize(1000, 500) + \
    theme_classic() + theme(legend_position='bottom')

Observing Mean Temperature and Wind Speed Correlation#

ggplot(df, aes("wind speed", y="mean temperature")) + \
    geom_point(aes(color="mean temperature", fill="mean temperature"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year", x_format="d") + \
    ggtitle("Relation Between Mean Temperature and Wind Speed") + \
    ggsize(1000, 500) + \
    theme_classic()

Observing Mean Temperature and Humidity Correlation#

ggplot(df, aes("humidity", "mean temperature")) + \
    geom_point(aes(color="humidity", fill="humidity"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#fdae61', high='#2c7bb6') + \
    scale_fill_gradient(low='#fdae61', high='#2c7bb6') + \
    facet_grid(x="year", x_format="d") + \
    ggtitle("Relation Between Mean Temperature and Humidity") + \
    ggsize(1000, 500) + \
    theme_classic()

In Search of Correlation on Lag Scatter Plots#

df_shifted_by_day = df[["mean temperature", "year"]].copy()
df_shifted_by_day["shifted mean temperature"] = df["mean temperature"].shift(-1)
df_shifted_by_day = df_shifted_by_day.dropna()

p1 = ggplot(df_shifted_by_day, aes("mean temperature", "shifted mean temperature")) + \
    geom_point(aes(color="mean temperature", fill="mean temperature"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year", x_format="d") + \
    coord_fixed(ratio=1) + \
    ggtitle("One Day Lag Scatter Plot") + \
    theme_classic()

df_shifted_by_month = df[["mean temperature", "year"]].copy()
df_shifted_by_month["shifted mean temperature"] = df["mean temperature"].shift(-30)
df_shifted_by_month = df_shifted_by_month.dropna()

p2 = ggplot(df_shifted_by_month, aes("mean temperature", "shifted mean temperature")) + \
    geom_point(aes(color="mean temperature", fill="mean temperature"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year", x_format="d") + \
    coord_fixed(ratio=1) + \
    ggtitle("One Month Lag Scatter Plot") + \
    theme_classic()

df_shifted_by_year = df[["mean temperature", "year"]].copy()
df_shifted_by_year["shifted mean temperature"] = df["mean temperature"].shift(-365)
df_shifted_by_year = df_shifted_by_year.dropna()[:-1]

p3 = ggplot(df_shifted_by_year, aes("mean temperature", "shifted mean temperature")) + \
    geom_point(aes(color="mean temperature", fill="mean temperature"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year", x_format="d") + \
    coord_fixed(ratio=1) + \
    ggtitle("One Year Lag Scatter Plot") + \
    theme_classic()

gggrid([p1, p2, p3], ncol=1) + ggsize(1000, 900)

Annual Path of Mean Temperature and Humidity#

mean_df = df.groupby(by=["year", "month"]).mean(numeric_only=True)[["mean temperature", "humidity"]].reset_index()

ggplot(mean_df, aes("humidity", "mean temperature")) + \
    geom_path(color='#99d8c9', size=1) + \
    geom_point(aes(fill="month"), shape=21, size=3, color='#00441b',
               tooltips=layer_tooltips().format("@year", "d")\
                                        .title("@year")\
                                        .line("month|@month")\
                                        .format("@humidity", ".2f")\
                                        .line("@|@humidity")\
                                        .format("@{mean temperature}", ".2f")\
                                        .line("mean temperature|@{mean temperature}")) + \
    scale_fill_gradient(name="", low='#e5f5f9', high='#2ca25f') + \
    facet_grid(x="year", x_format="d") + \
    ylab("mean temperature") + \
    ggtitle("Annual Path of Mean Temperature and Humidity") + \
    ggsize(1000, 500) + \
    theme_classic()

Autocorrelation Plots for Mean Temperature, Wind Speed and Humidity#

acf_df = pd.DataFrame([
    (lag, df["mean temperature"].autocorr(lag=lag), df["wind speed"].autocorr(lag=lag), df.humidity.autocorr(lag=lag))
    for lag in range(365 * 3)
], columns=["lag", "mean temperature acf", "wind speed acf", "humidity acf"]).melt(
    id_vars=["lag"],
    value_vars=["mean temperature acf", "wind speed acf", "humidity acf"],
    var_name="acf_type", value_name="acf_value"
)

ggplot(acf_df, aes("lag", "acf_value")) + \
    geom_point(aes(color="acf_value"), size=3) + \
    scale_color_gradient(low='#fc8d59', high='#91cf60') + \
    facet_grid(y="acf_type") + \
    ylab("ACF value") + \
    ggtitle("Autocorrelation Functions") + \
    ggsize(1000, 600) + \
    theme(legend_position='none')