Missing Values: Line, Path, Area, and Ribbon

Missing value (Double.NaN or null) handling in geomLine(), geomPath(), geomArea(), and geomRibbon().

Note: geomPath() handles missing values slightly differently than the others.

In [1]:
%useLatestDescriptors
%use dataframe
%use lets-plot
In [2]:
LetsPlot.getInfo()
Out[2]:
Lets-Plot Kotlin API v.4.11.3-SNAPSHOT. Frontend: Notebook with dynamically loaded JS. Lets-Plot JS v.4.8.1rc1.
Outputs: Web (HTML+JS), Kotlin Notebook (Swing), Static SVG (hidden)
In [3]:
import java.time.format.DateTimeFormatter
import java.time.LocalDate

val economics_url = "https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/economics.csv"
val df = DataFrame.readCSV(economics_url)
val dfWithDate = df.convert("date") { LocalDate.parse(it.toString()) }

val dfFiltered = dfWithDate.filter { it["date"] as LocalDate >= LocalDate.of(2006, 1, 1) }
val economics = dfFiltered.toMap()

dfFiltered.head()
Out[3]:

DataFrame: rowsCount = 5, columnsCount = 7

untitleddatepcepoppsavertuempmedunemploy
4632006-01-019059.800000297647.0000004.2000008.6000007064
4642006-02-019090.100000297854.0000004.2000009.1000007184
4652006-03-019122.100000298060.0000004.2000008.7000007072
4662006-04-019174.800000298281.0000004.0000008.4000007120
4672006-05-019215.100000298496.0000003.8000008.5000006980
In [4]:
// Make some gaps in the data

val MILLISECONDS_IN_DAY = 24.0 * 60 * 60 * 1_000

val dfStep1 = dfFiltered.convert { "unemploy"<Int>() }.to<Double>()

val dfStep2 = dfStep1.update { "unemploy"<Double>() }
    .where { "date"<LocalDate>() in LocalDate.of(2012, 1, 1)..LocalDate.of(2012, 4, 1) }
    .with { Double.NaN }

val dfStep3 = dfStep2.update {it["date"]}
    .where {it as LocalDate in LocalDate.of(2008, 1, 1)..LocalDate.of(2009, 6, 1)}
    .withNull()

val economicsWithGaps = dfStep3.toMap()
In [5]:
fun epochMillis(y: Int, m: Int, d: Int): Double =
    LocalDate(y, m, d).toEpochDays() * MILLISECONDS_IN_DAY

val basePlot = letsPlot { x = "date"; y = "unemploy" } +
    geomLine(
        data = economics,
        size = 10.0, alpha = 0.1, tooltips = tooltipsNone
    ) +
        geomLabel(
            label = "Missing dates",
            x = epochMillis(2008, 8, 15), y = 11000.0,
            nudgeX = -70.0, nudgeUnit = "px"
        ) +
        geomLabel(
            label = "Missing unemployment\nfigures",
            x = epochMillis(2012, 2, 15), y = 13000.0,
            nudgeX = 80.0, nudgeY = 40.0, nudgeUnit = "px"
        ) +
    themeClassic() +
    ggsize(800, 300)

1. Line

In [6]:
basePlot +
    geomLine(
        data = economicsWithGaps,
        color = "teal"
    )
Out[6]:
Missing dates Missing unemployment figures 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 8,000 10,000 12,000 14,000 unemploy date

2. Path

In [7]:
basePlot +
    geomPath(
        data = economicsWithGaps,
        color = "teal"
    )
Out[7]:
Missing dates Missing unemployment figures 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 8,000 10,000 12,000 14,000 unemploy date

3. Area

In [8]:
basePlot +
    geomArea(
        data = economicsWithGaps,
        color = "teal", fill = "teal", alpha = 0.2
    ) +
    coordCartesian(ylim = null to 20000)
Out[8]:
Missing dates Missing unemployment figures 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 0 5,000 10,000 15,000 20,000 unemploy date

4. Ribbon

In [9]:
// Make some more gaps

val dfStep5 = dfStep3
    .update { "uempmed"<Double?>() }
    .where { "date"<LocalDate?>()?.let { d -> d in LocalDate.of(2011, 8, 1)..LocalDate.of(2011, 11, 1) } == true }
    .with { Double.NaN }

val dfStep6 = dfStep5
    .update { "psavert"<Double?>() }
    .where { "date"<LocalDate?>()?.let { d -> d in LocalDate.of(2013, 5, 1)..LocalDate.of(2013, 8, 1) } == true }
    .with { Double.NaN }

val economicsWithGaps2 = dfStep6.toMap()
In [10]:
letsPlot { x = "date"; ymin = "psavert"; ymax = "uempmed" } +
    geomLine(
        data = economics,
        size = 10.0, alpha = 0.1,
        tooltips = tooltipsNone
    ) { y = "psavert" } +
    geomLine(
        data = economics,
        size = 10.0, alpha = 0.1,
        tooltips = tooltipsNone
    ) { y = "uempmed"} +
    geomRibbon(
        data = economicsWithGaps2,
        color = "teal", fill = "teal", alpha = 0.2
    ) +
    geomLabel(
        label = "Missing dates",
        x = epochMillis(2008, 8, 15), y = 13.0,
        nudgeX = -70.0, nudgeUnit = "px"
    ) +
    geomLabel(
        label = "Missing unemployment figures",
        x = epochMillis(2011, 8, 15), y = 24.0,
        nudgeX = 80.0, nudgeUnit = "px"
    ) +
    geomLabel(
        label = "Missing savings rate figures",
        x = epochMillis(2013, 5, 15), y = 3.5,
        nudgeX = 50.0, nudgeUnit = "px"
    ) +
    themeClassic() +
    ggsize(800, 300)
Out[10]:
Missing dates Missing unemployment figures Missing savings rate figures 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 5 10 15 20 25 y date