Statistical Annotations: geomBracket() and geomBracketDodge()¶
geomBracket() is a general-purpose layer for annotating ranges. It is particularly effective for adding significance bars (p-values) to categorical plots.
geomBracketDodge() is a specialized layer designed for annotating ranges between dodged positions (usually dodged groups within a category).
Note:
geomBracket()does not compute statistics internally. It is a visualization tool that renders the results of your analysis. In this demo, we use the Mann–Whitney U test to calculate p-values before passing them to the plot.
%useLatestDescriptors
%use dataframe
%use lets-plot
LetsPlot.getInfo()
// In the examples below, we will use the Mann–Whitney U test to calculate p-values
// Mann-Whitney U test (two-sided) matching scipy.stats.mannwhitneyu defaults:
// - average-rank tie handling
// - tie correction in variance
// - continuity correction in z-score
fun mannWhitneyPValue(x: List<Double>, y: List<Double>): Double {
val nx = x.size
val ny = y.size
val n = nx + ny
val combined = (x.map { it to 0 } + y.map { it to 1 }).sortedBy { it.first }
// Assign average ranks and accumulate tie correction term T = Σ(t³ - t)
val ranks = DoubleArray(n)
var tieCorrection = 0.0
var i = 0
while (i < n) {
var j = i
while (j < n - 1 && combined[j].first == combined[j + 1].first) j++
val avgRank = (i + j + 2).toDouble() / 2.0 // 1-based
for (k in i..j) ranks[k] = avgRank
val t = (j - i + 1).toDouble()
tieCorrection += t * t * t - t
i = j + 1
}
val r1 = combined.indices.filter { combined[it].second == 0 }.sumOf { ranks[it] }
val u1 = r1 - nx.toLong() * (nx + 1) / 2.0
val mean = nx.toDouble() * ny / 2.0
// Variance with tie correction (matches scipy formula)
val variance = nx.toDouble() * ny / 12.0 * (n + 1 - tieCorrection / (n * (n - 1)))
val std = kotlin.math.sqrt(variance)
// Z with continuity correction (scipy default: use_continuity=True)
val z = (kotlin.math.abs(u1 - mean) - 0.5) / std
// Two-sided p-value: erfc(z / sqrt(2)) [Abramowitz & Stegun approximation]
fun erfc(a: Double): Double {
val t = 1.0 / (1.0 + 0.3275911 * a)
val poly = t * (0.254829592 + t * (-0.284496736 + t * (1.421413741 + t * (-1.453152027 + t * 1.061405429))))
return poly * kotlin.math.exp(-a * a)
}
return erfc(z / kotlin.math.sqrt(2.0))
}
// Matches mizani label_pvalue(add_p=True, accuracy=0.001):
// p < 0.001 is shown as "p < 0.001", otherwise "p = X.XXX"
fun formatPValue(p: Double): String =
if (p < 0.001) "p < 0.001" else "p = ${"%.3f".format(p)}"
val mpgDf = DataFrame.readCSV("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/mpg.csv")
val mpg = mpgDf.toMap()
println("${mpgDf.rowsCount()} x ${mpgDf.columnsCount()}")
mpgDf.head()
Adding Significance Bars (p-values) to Categorical Plots¶
Compute Significance Data¶
fun getPValuesData(
df: DataFrame<*>,
catCol: String,
valCol: String,
baseDy: Double,
step: Double
): DataFrame<*> {
val categories = df[catCol].distinct().toList().map { it.toString() }
val pairs = categories.flatMapIndexed { i, a ->
categories.drop(i + 1).map { b -> a to b }
}
val xmins = pairs.map { it.first }
val xmaxs = pairs.map { it.second }
val ys = pairs.indices.map { i -> baseDy + i * step }
val ps = pairs.map { (a, b) ->
val x = df.filter { it[catCol].toString() == a }[valCol].toList().map { it.toString().toDouble() }
val y = df.filter { it[catCol].toString() == b }[valCol].toList().map { it.toString().toDouble() }
mannWhitneyPValue(x, y)
}
return dataFrameOf(
xmins.toColumn("xmin"),
xmaxs.toColumn("xmax"),
ys.toColumn("y"),
ps.toColumn("p")
)
}
val pValuesData = getPValuesData(mpgDf, "drv", "hwy", 47.0, 4.0)
println("${pValuesData.rowsCount()} rows")
pValuesData
val p = letsPlot(mpg) { x = "drv"; y = "hwy"; fill = "drv" } +
geomBoxplot(alpha = .25) +
geomJitter(height = 0.0, shape = 1, alpha = .25, showLegend = false, seed = 42) { color = "drv" }
Basic p-value Annotations¶
p + geomBracket(data = pValuesData.toMap()) { // <-- Pass p-values data to the brackets layer
xmin = "xmin"; xmax = "xmax"; y = "y"; label = "p"
}
Format Labels with labelFormat¶
p + geomBracket(data = pValuesData.toMap(), labelFormat = ".2~g") {
xmin = "xmin"; xmax = "xmax"; y = "y"; label = "p"
}
Format Labels with formatPValue()¶
A helper that formats p-values in the style of mizani's label_pvalue(add_p=True):
values below 0.001 are shown as "p < 0.001", others as "p = X.XXX".
val pValuesDataLabeled = pValuesData.add("label") { formatPValue(this["p"] as Double) }
p + geomBracket(data = pValuesDataLabeled.toMap()) {
xmin = "xmin"; xmax = "xmax"; y = "y"; label = "label"
}
Apply Custom Formatting Logic (Significance Stars)¶
Map p-values to custom strings (like *** or ns) using your own function.
fun starsFormatter(p: Double): String = when {
p <= 0.001 -> "***"
p <= 0.01 -> "**"
p <= 0.05 -> "*"
else -> "ns"
}
val pValuesDataStars = pValuesDataLabeled.add("star") { starsFormatter(this["p"] as Double) }
p + geomBracket(data = pValuesDataStars.toMap()) {
xmin = "xmin"; xmax = "xmax"; y = "y"; label = "star"
}
Annotating Dodged Groups¶
To draw brackets between dodged groups, use geomBracketDodge(). Instead of using data coordinates, this layer uses group indices:
x: The main category on the x-axis.istart,iend: The zero-based indices of the dodged groups to compare.
// Group indices must be specified in the same order as they appear on the plot for other layers
val groupOrder = mapOf(4.0 to 0, 6.0 to 1, 8.0 to 2, 5.0 to 3)
val pValuesGroupedData = mpgDf["class"].distinct().toList().map { cls ->
val subDf = mpgDf.filter { it["class"].toString() == cls.toString() }
getPValuesData(subDf, "cyl", "hwy", 47.0, 4.0)
.add("x") { cls }
.add("start") { groupOrder[this["xmin"].toString().toDoubleOrNull()] }
.add("end") { groupOrder[this["xmax"].toString().toDoubleOrNull()] }
.add("star") { starsFormatter(this["p"] as Double) }
.select("x", "y", "start", "end", "star")
}.concat()
println("${pValuesGroupedData.rowsCount()} rows")
pValuesGroupedData.head(5)
letsPlot(mpg) { x = "class"; y = "hwy"; fill = asDiscrete("cyl") } +
geomBoxplot(alpha = .25) +
geomPoint(
position = positionJitterDodge(jitterWidth = .2, jitterHeight = 0.0, seed = 42),
shape = 1, alpha = .25, showLegend = false
) { color = asDiscrete("cyl") } +
geomBracketDodge(data = pValuesGroupedData.toMap()) {
x = "x"; y = "y"; istart = "start"; iend = "end"; label = "star"
} +
scaleFillBrewer(palette = "Accent", format = "d") +
scaleColorBrewer(palette = "Accent", format = "d") +
ggsize(1000, 500)
Customizing Bracket Geometry¶
Bottom-Axis (Upside Down) Annotations¶
To place brackets at the bottom of a plot, use negative values for lenstart and lenend.
You may also need to adjust vjust to position the labels correctly outside the bracket.
val upsideDownData = getPValuesData(mpgDf, "drv", "hwy", 8.0, -4.0)
.add("label") { formatPValue(this["p"] as Double) }
println("${upsideDownData.rowsCount()} rows")
upsideDownData
p + geomBracket(
data = upsideDownData.toMap(),
lenstart = -5, lenend = -5, // Negative values to reverse the direction of the brackets
vjust = 2.0 // Specify vjust to move the labels under the brackets
) {
xmin = "xmin"; xmax = "xmax"; y = "y"; label = "label"
}
Precise Alignment with tipLengthUnit = "identity"¶
By default, bracket tips use a fixed length. Setting tipLengthUnit = "identity" allows you to specify tip lengths in data units.
This is essential when you want a bracket tip to reach a specific coordinate, such as the exact top of a boxplot or a particular data point.
// Recall the y-coordinates from the datasets
val maxByDrv = (mpg["drv"] as List<*>).zip(mpg["hwy"] as List<*>)
.groupBy { it.first.toString() }
.mapValues { (_, pairs) -> pairs.maxOf { it.second.toString().toInt() } }
println("Upper limits of boxes for each category:\n$maxByDrv\n")
// The tip lengths are chosen so that each tip reaches either the previous bracket or the box
// Example (first bracket):
// lenstart = y - upper_box_limit_at_xmin - 1
// -> 47 - 44 - 1 = 2
// lenend = y - upper_box_limit_at_xmax - 1
// -> 47 - 28 - 1 = 18
p + geomBracket(
data = pValuesDataLabeled.toMap(),
tipLengthUnit = "identity" // identity units (data space)
) {
xmin = "xmin"; xmax = "xmax"; y = "y"; label = "label"
lenstart = listOf(2, 1, 1)
lenend = listOf(18, 24, 1)
}
Non-Statistical Annotations: Range Grouping¶
Brackets can be used for more than just drawing p-values.
For example, they can be used to highlight cluster boundaries in the following scatter plot:
fun getContinuousBracketsData(
df: DataFrame<*>,
catCol: String,
primaryValCol: String,
secondaryValCol: String,
stepMult: Double
): DataFrame<*> {
val secondaryVals = df[secondaryValCol].toList().map { it.toString().toDouble() }
val step = (secondaryVals.max() - secondaryVals.min()) * stepMult
val base = secondaryVals.max() + step
val categories = listOf("r", "4", "f")
.filter { cat -> df[catCol].toList().any { it.toString() == cat } }
val primaryValsPerCat = categories.map { cat ->
df.filter { it[catCol].toString() == cat }[primaryValCol].toList().map { it.toString().toDouble() }
}
return dataFrameOf(
categories.toColumn(catCol),
primaryValsPerCat.map { it.min() }.toColumn("min_$primaryValCol"),
primaryValsPerCat.map { it.max() }.toColumn("max_$primaryValCol"),
categories.indices.map { i -> base + i * step }.toColumn("${secondaryValCol}_level")
)
}
val hwyBracketsData = getContinuousBracketsData(mpgDf, "drv", "hwy", "cty", 0.1)
val ctyBracketsData = getContinuousBracketsData(mpgDf, "drv", "cty", "hwy", 0.05)
letsPlot() +
geomPoint(data = mpg, alpha = .25, showLegend = false) { x = "hwy"; y = "cty"; color = "drv" } +
// Horizontal brackets for hwy ranges
geomBracket(data = hwyBracketsData.toMap()) {
xmin = "min_hwy"; xmax = "max_hwy"; y = "cty_level"; label = "drv"; color = "drv"
} +
// Vertical brackets for cty ranges
geomBracket(data = ctyBracketsData.toMap()) {
x = "hwy_level"; ymin = "min_cty"; ymax = "max_cty"; label = "drv"; color = "drv"
} +
themeMinimal() +
labs(x = "hwy", y = "cty")