Source code for process_improve.visualization.raincloud
"""Raincloud plot.
A raincloud combines three complementary views of a distribution in one
figure: a one-sided violin (the density "cloud"), a boxplot (the five-number
summary), and the jittered raw observations (the "rain"). It is drawn with
Plotly so it inherits the package's registered base theme.
"""
from __future__ import annotations
import pandas as pd
try:
import plotly.graph_objects as go
except ImportError: # pragma: no cover - exercised via env-without-plotly
from process_improve._extras import _MissingExtra
go = _MissingExtra("plotly", "plotting") # type: ignore[assignment]
[docs]
def raincloud( # noqa: PLR0913
data: pd.DataFrame | pd.Series,
value: str | None = None,
group: str | None = None,
*,
title: str = "",
orientation: str = "h",
template: str | None = None,
) -> go.Figure:
"""Draw a raincloud plot.
Parameters
----------
data : pandas.DataFrame or pandas.Series
The data to plot. A Series is treated as a single, ungrouped sample.
value : str or None
Name of the numeric column in ``data`` to plot. Required when ``data``
is a DataFrame.
group : str or None
Optional name of a categorical column; one raincloud is drawn per
unique group value.
title : str, optional
Figure title.
orientation : {"h", "v"}, optional
``"h"`` draws horizontal rainclouds (the default and usual
orientation); ``"v"`` draws them vertically.
template : str or None, optional
Plotly template name. When ``None``, the package's registered default
theme is used.
Returns
-------
plotly.graph_objects.Figure
A figure with one :class:`plotly.graph_objects.Violin` trace per group,
each showing the density cloud, the box, and the jittered raw points.
Examples
--------
>>> raincloud(df, value="yield", group="reactor")
>>> raincloud(pd.Series([1.0, 2.0, 3.0]))
"""
if orientation not in ("h", "v"):
raise ValueError(f"orientation must be 'h' or 'v', got {orientation!r}.")
if isinstance(data, pd.Series):
column = value or (str(data.name) if data.name is not None else "value")
frame = data.to_frame(name=column)
value = column
else:
if value is None:
raise ValueError("`value` (the numeric column name) is required when `data` is a DataFrame.")
frame = data
if group is None:
groups: list[tuple[str, pd.Series]] = [("", frame[value])]
else:
groups = [(str(name), sub[value]) for name, sub in frame.groupby(group, sort=False)]
fig = go.Figure()
for name, series in groups:
values = series.dropna()
# A one-sided violin is the "cloud"; box_visible adds the box; and
# points="all" with an offset draws the jittered raw "rain".
violin_axis = {"x": values} if orientation == "h" else {"y": values}
fig.add_trace(
go.Violin(
name=name,
side="positive",
box_visible=True,
meanline_visible=True,
points="all",
jitter=0.3,
pointpos=-0.9,
**violin_axis,
)
)
fig.update_layout(title=title, showlegend=group is not None)
if template is not None:
fig.update_layout(template=template)
return fig