# (c) Kevin Dunn, 2010-2026. MIT License. Based on own private work over the years.
# Built-in libraries
from __future__ import annotations
import json
from collections.abc import Sequence
import numpy as np
import pandas as pd
from pydantic import BaseModel, field_validator
from sklearn.base import BaseEstimator
try:
import plotly.graph_objects as go
except ImportError: # pragma: no cover - exercised via env-without-plotly
from process_improve._extras import _MissingExtra
go = _MissingExtra("plotly", "plotting") # type: ignore[assignment]
from process_improve.visualization.themes import (
DEFAULT_THEME,
LIMIT_LINE_COLOR,
REFERENCE_LINE_COLOR,
)
def _decode_highlight_style(key: str) -> dict:
"""Decode an ``items_to_highlight`` key into a Plotly marker-style dict.
Each key must be a JSON-encoded Plotly marker/line-style spec. Decoding it
here (rather than calling ``json.loads`` inline) means a malformed key
raises a clear ``ValueError`` at the API surface instead of a confusing
``json.JSONDecodeError`` deep inside the trace-building loop. Mirrors the
SEC-32 guard already applied in ``process_improve.batch.plotting``.
"""
try:
return json.loads(key)
except json.JSONDecodeError as exc:
raise ValueError(
f"items_to_highlight: each key must be a JSON-encoded Plotly "
f'style spec (e.g. \'{{"color": "red", "symbol": "cross"}}\'). '
f"Got {key!r}."
) from exc
def plot_pre_checks(model: BaseEstimator, pc_horiz: int, pc_vert: int, pc_depth: int) -> bool:
"""Check the inputs for the plot functions are valid."""
n_components = model.n_components if hasattr(model, "n_components") else model._parent.n_components
if not 0 < pc_horiz <= n_components:
raise ValueError(
f"The model has {n_components} components. Ensure that 1 <= pc_horiz <= {n_components}."
)
if not 0 < pc_vert <= n_components:
raise ValueError(
f"The model has {n_components} components. Ensure that 1 <= pc_vert <= {n_components}."
)
if not -1 <= pc_depth <= n_components:
raise ValueError(
f"The model has {n_components} components. Ensure that pc_depth is -1 (no depth axis) "
f"or 1 <= pc_depth <= {n_components}."
)
if len({pc_horiz, pc_vert, pc_depth}) != 3:
raise ValueError("Specify distinct components for each axis.")
return True
[docs]
def score_plot( # noqa: C901, PLR0913
model: BaseEstimator,
pc_horiz: int = 1,
pc_vert: int = 2,
pc_depth: int = -1,
items_to_highlight: dict[str, list] | None = None,
settings: dict | None = None,
fig: go.Figure | None = None,
) -> go.Figure:
"""Generate a 2D or 3D score plot for the given latent variable model.
A 2D scatter on (``pc_horiz``, ``pc_vert``) is produced by default. Supplying
``pc_depth >= 1`` adds a third score axis and switches the underlying trace
to ``Scatter3d``.
Parameters
----------
model : MVmodel object (PCA, or PLS)
A latent variable model generated by this library.
pc_horiz : int, optional
Which component to plot on the horizontal axis, by default 1 (the first component)
pc_vert : int, optional
Which component to plot on the vertical axis, by default 2 (the second component)
pc_depth : int, optional
If pc_depth >= 1, then a 3D score plot is generated, with this component on the 3rd axis
items_to_highlight : dict, optional
Keys are JSON strings parseable by ``json.loads`` into a Plotly line specifier;
values are lists of index names to highlight. For example::
items_to_highlight = {'{"color": "red", "symbol": "cross"}': items_in_red}
will highlight the items in ``items_in_red`` with the given colour and shape.
settings : dict
Default settings::
{
"show_ellipse": True, # bool: show the Hotelling's T2 ellipse
"ellipse_conf_level": 0.95, # float: ellipse confidence level (< 1.00)
"title": "Score plot of ...", # str: overall plot title
"show_labels": False, # bool: add a label for each observation
"show_legend": True, # bool: show clickable legend
"html_image_height": 500, # int: image height in pixels
"html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height
"template": "pi_journal", # str: registered Plotly theme name
}
Examples
--------
>>> pca = PCA(n_components=3).fit(X_scaled)
>>> pca.score_plot() # PC1 vs PC2
>>> pca.score_plot(pc_horiz=1, pc_vert=3) # PC1 vs PC3
>>> pca.score_plot(pc_horiz=1, pc_vert=2, pc_depth=3) # 3D
"""
plot_pre_checks(model, pc_horiz, pc_vert, pc_depth)
data_to_plot = model.scores_ if hasattr(model, "scores_") else model._parent.t_scores_super
ellipse_coordinates = (
model.ellipse_coordinates if hasattr(model, "ellipse_coordinates") else model._parent.ellipse_coordinates
)
class Settings(BaseModel):
"""Validated display settings for the score plot."""
show_ellipse: bool = True
ellipse_conf_level: float = 0.95
@field_validator("ellipse_conf_level")
@classmethod
def check_ellipse_conf_level(cls, val: float) -> float:
"""Check confidence value is in range."""
if val >= 1:
raise ValueError("0.0 < `ellipse_conf_level` < 1.0")
if val <= 0:
raise ValueError("0.0 < `ellipse_conf_level` < 1.0")
return val
title: str = (
f"Score plot of component {pc_horiz} vs component {pc_vert} vs component {pc_depth}" if pc_depth > 0 else ""
)
show_labels: bool = False
show_legend: bool = True
html_image_height: float = 500.0
html_aspect_ratio_w_over_h: float = 16 / 9.0
template: str = DEFAULT_THEME
setdict = Settings(**settings).model_dump() if settings else Settings().model_dump()
if fig is None:
fig = go.Figure()
name = "Scores [T]"
fig.update_layout(xaxis_title_text=f"PC {pc_horiz}", yaxis_title_text=f"PC {pc_vert}")
highlights: dict[str, list] = {}
default_index = data_to_plot.index
if items_to_highlight is not None:
highlights = items_to_highlight.copy()
for key, items in items_to_highlight.items():
highlights[key] = list(set(items) & set(default_index))
default_index = (set(default_index) ^ set(highlights[key])) & set(default_index)
# Ensure it is back to a list
default_index = list(default_index)
# 3D plot
if pc_depth >= 1:
fig.add_trace(
go.Scatter3d(
x=data_to_plot.loc[default_index, pc_horiz],
y=data_to_plot.loc[default_index, pc_vert],
z=data_to_plot.loc[default_index, pc_depth],
name=name,
mode="markers+text" if setdict["show_labels"] else "markers",
marker=dict(
symbol="circle",
),
text=list(default_index),
textposition="top center",
)
)
# Items to highlight, if any
for key, index in highlights.items():
styling = _decode_highlight_style(key)
fig.add_trace(
go.Scatter3d(
x=data_to_plot.loc[index, pc_horiz],
y=data_to_plot.loc[index, pc_vert],
z=data_to_plot.loc[index, pc_depth],
name=name,
mode="markers+text" if setdict["show_labels"] else "markers",
marker=styling,
text=list(index),
textposition="top center",
)
)
else:
# Regular 2D plot
fig.add_trace(
go.Scatter(
x=data_to_plot.loc[default_index, pc_horiz],
y=data_to_plot.loc[default_index, pc_vert],
name=name,
mode="markers+text" if setdict["show_labels"] else "markers",
marker=dict(
symbol="circle",
size=7,
),
text=default_index,
textposition="top center",
)
)
# Items to highlight, if any
for key, index in highlights.items():
styling = _decode_highlight_style(key)
fig.add_trace(
go.Scatter(
x=data_to_plot.loc[index, pc_horiz],
y=data_to_plot.loc[index, pc_vert],
name=name,
mode="markers+text" if setdict["show_labels"] else "markers",
marker=styling,
text=list(index),
textposition="top center",
)
)
if setdict["show_ellipse"]:
ellipse = ellipse_coordinates(
score_horiz=pc_horiz,
score_vert=pc_vert,
conf_level=setdict["ellipse_conf_level"],
)
fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR)
fig.add_vline(x=0, line_color=REFERENCE_LINE_COLOR)
fig.add_trace(
go.Scatter(
x=ellipse[0],
y=ellipse[1],
name=f"Hotelling's T^2 [{setdict['ellipse_conf_level'] * 100:.4g}%]",
mode="lines",
line=dict(
color=LIMIT_LINE_COLOR,
width=2,
),
)
)
fig.update_layout(
template=setdict["template"],
title_text=setdict["title"],
hovermode="closest",
showlegend=setdict["show_legend"],
autosize=False,
width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"],
height=setdict["html_image_height"],
)
if pc_depth >= 1:
fig.update_layout(
scene=dict(
xaxis=fig.to_dict()["layout"]["xaxis"],
yaxis=fig.to_dict()["layout"]["xaxis"],
zaxis=dict(
title_text=f"PC {pc_depth}",
mirror=True,
showspikes=True,
visible=True,
gridwidth=1,
),
),
)
return fig
[docs]
def loading_plot( # noqa: PLR0913
model: BaseEstimator,
loadings_type: str = "p",
pc_horiz: int = 1,
pc_vert: int = 2,
settings: dict | None = None,
fig: go.Figure | None = None,
) -> go.Figure:
"""Generate a 2-dimensional loadings for the given latent variable model.
Parameters
----------
model : MVmodel object (PCA, or PLS)
A latent variable model generated by this library.
loadings_type : str, optional
A choice of the following:
'p' : (default for PCA) : the P (projection) loadings: only option possible for PCA
'w' : the W loadings: Suitable for PLS
'w*' : (default for PLS) the W* (or R) loadings: Suitable for PLS
'w*c' : the W* (from X-space) with C loadings from the Y-space: Suitable for PLS
'c' : the C loadings from the Y-space: Suitable for PLS
For PCA model any other choice besides 'p' will be ignored.
pc_horiz : int, optional
Which component to plot on the horizontal axis, by default 1 (the first component)
pc_vert : int, optional
Which component to plot on the vertical axis, by default 2 (the second component)
settings : dict
Default settings::
{
"title": "Loadings plot ...", # str: overall plot title
"show_labels": True, # bool: add a label for each variable
"html_image_height": 500, # int: image height in pixels
"html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height
"template": "pi_journal", # str: registered Plotly theme name
}
Examples
--------
>>> pca.loading_plot() # P loadings, PC1 vs PC2
>>> pls.loading_plot(loadings_type="w*c") # W* and C loadings
>>> pls.loading_plot(loadings_type="w", pc_vert=3) # W loadings, PC1 vs PC3
"""
plot_pre_checks(model, pc_horiz, pc_vert, pc_depth=0)
class Settings(BaseModel):
"""Validated display settings for the loadings plot."""
title: str = f"Loadings plot [{loadings_type.upper()}] of component {pc_horiz} vs component {pc_vert}"
show_labels: bool = True
html_image_height: float = 500.0
html_aspect_ratio_w_over_h: float = 16 / 9.0
template: str = DEFAULT_THEME
setdict = Settings(**settings).model_dump() if settings else Settings().model_dump()
if fig is None:
fig = go.Figure()
what = model.loadings_ if hasattr(model, "loadings_") else model.loadings # PCA default
if hasattr(model, "direct_weights_"):
what = model.direct_weights_ # PLS default
extra = None
if loadings_type.lower() == "p":
what = model.loadings_ if hasattr(model, "loadings_") else model.loadings
if loadings_type.lower() == "w":
what = model.x_weights_
elif loadings_type.lower() == "w*":
what = model.direct_weights_
elif loadings_type.lower() == "w*c":
loadings_type = loadings_type[0:-1]
what = model.direct_weights_
extra = model.y_loadings_
elif loadings_type.lower() == "c":
what = model.y_loadings_
fig.add_trace(
go.Scatter(
x=what.loc[:, pc_horiz],
y=what.loc[:, pc_vert],
name="X-space loadings W*",
mode="markers+text" if setdict["show_labels"] else "markers",
marker=dict(
symbol="circle",
size=7,
),
text=what.index,
textposition="top center",
)
)
add_legend = False
# Note, we have cut off the 'c' from loadings_type
add_legend = False
if loadings_type.lower() == "w*" and extra is not None:
add_legend = True
fig.add_trace(
go.Scatter(
x=extra.loc[:, pc_horiz],
y=extra.loc[:, pc_vert],
name="Y-space loadings C",
mode="markers+text" if setdict["show_labels"] else "markers",
marker=dict(
symbol="star",
size=8,
),
text=extra.index,
textposition="bottom center",
)
)
fig.update_layout(xaxis_title_text=f"PC {pc_horiz}", yaxis_title_text=f"PC {pc_vert}")
fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR)
fig.add_vline(x=0, line_color=REFERENCE_LINE_COLOR)
fig.update_layout(
template=setdict["template"],
title_text=setdict["title"],
hovermode="closest",
showlegend=add_legend,
autosize=False,
width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"],
height=setdict["html_image_height"],
)
return fig
[docs]
def spe_plot( # noqa: C901
model: BaseEstimator,
with_a: int = -1,
items_to_highlight: dict[str, list] | None = None,
settings: dict | None = None,
fig: go.Figure | None = None,
) -> go.Figure:
"""Generate a squared-prediction error (SPE) plot for the given latent variable model using
`with_a` number of latent variables. The default will use the total number of latent variables
which have already been fitted.
Parameters
----------
model : MVmodel object (PCA, or PLS)
A latent variable model generated by this library.
with_a : int, optional
Uses this many number of latent variables, and therefore shows the SPE after this number of
model components. By default the total number of components fitted will be used.
items_to_highlight : dict, optional
Keys are JSON strings parseable by ``json.loads`` into a Plotly line specifier;
values are lists of index names to highlight. For example::
items_to_highlight = {'{"color": "red", "symbol": "cross"}': items_in_red}
will highlight the items in ``items_in_red`` with the given colour and shape.
settings : dict
Default settings::
{
"show_limit": True, # bool: show the SPE confidence limit line
"conf_level": 0.95, # float: confidence level for limit (< 1.00)
"title": "SPE plot ...", # str: overall plot title
"default_marker": {...}, # dict: e.g. dict(symbol="circle", size=7)
"show_labels": False, # bool: add a label for each observation
"show_legend": False, # bool: show clickable legend
"html_image_height": 500, # int: image height in pixels
"html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height
"template": "pi_journal", # str: registered Plotly theme name
}
Examples
--------
>>> pca.spe_plot()
>>> pca.spe_plot(settings={"conf_level": 0.99, "show_labels": True})
"""
# TO CONSIDER: allow a setting `as_line`: which connects the points with line segments
if with_a < 0:
# Get the actual name of the last column in the model if negative indexing is used
with_a = model.spe_.columns[with_a]
elif with_a == 0:
raise ValueError("`with_a` must be >= 1, or specified with negative indexing.")
if not with_a <= model.n_components:
raise ValueError(
f"`with_a` must be <= the number of components fitted "
f"({model.n_components}); got {with_a}."
)
class Settings(BaseModel):
"""Validated display settings for the SPE plot."""
show_limit: bool = True
conf_level: float = 0.95
@field_validator("conf_level")
@classmethod
def check_conf_level(cls, val: float) -> float:
"""Check confidence value is in range."""
if val >= 1:
raise ValueError("0.0 < `conf_level` < 1.0")
if val <= 0:
raise ValueError("0.0 < `conf_level` < 1.0")
return val
title: str = (
"Squared prediction error plot after "
f"fitting {with_a} component{'s' if with_a > 1 else ''}"
f", with the {conf_level * 100}% confidence limit"
)
default_marker: dict = dict(symbol="circle", size=7)
show_labels: bool = False
show_legend: bool = False
html_image_height: float = 500.0
html_aspect_ratio_w_over_h: float = 16 / 9.0
template: str = DEFAULT_THEME
setdict = Settings(**settings).model_dump() if settings else Settings().model_dump()
if fig is None:
fig = go.Figure()
name = f"SPE values after {with_a} component{'s' if with_a > 1 else ''}"
highlights: dict[str, list] = {}
default_index = model.spe_.index
if items_to_highlight is not None:
highlights = items_to_highlight.copy()
for key, items in items_to_highlight.items():
highlights[key] = list(set(items) & set(default_index))
default_index = (set(default_index) ^ set(highlights[key])) & set(default_index)
# Ensure it is back to a list
default_index = list(default_index)
fig.add_trace(
go.Scatter(
x=default_index,
y=model.spe_.loc[default_index, with_a],
name=name,
mode="markers+text" if setdict["show_labels"] else "markers",
marker=setdict["default_marker"],
text=default_index,
textposition="top center",
showlegend=setdict["show_legend"],
)
)
# Items to highlight, if any
for key, index in highlights.items():
styling = _decode_highlight_style(key)
fig.add_trace(
go.Scatter(
x=index,
y=model.spe_.loc[index, with_a],
name=name,
mode="markers+text" if setdict["show_labels"] else "markers",
marker=styling,
text=index,
textposition="top center",
)
)
limit_SPE_conf_level = model.spe_limit(conf_level=setdict["conf_level"])
name = f"{setdict['conf_level'] * 100:.3g}% limit"
fig.add_hline(
y=limit_SPE_conf_level,
line_color=LIMIT_LINE_COLOR,
annotation_text=name,
annotation_position="bottom right",
name=name,
)
fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR)
fig.update_layout(
template=setdict["template"],
title_text=setdict["title"],
hovermode="closest",
showlegend=setdict["show_legend"],
autosize=False,
yaxis_title_text=name,
width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"],
height=setdict["html_image_height"],
)
return fig
[docs]
def t2_plot( # noqa: C901
model: BaseEstimator,
with_a: int = -1,
items_to_highlight: dict[str, list] | None = None,
settings: dict | None = None,
fig: go.Figure | None = None,
) -> go.Figure:
"""Generate a Hotelling's T2 (T^2) plot for the given latent variable model using
`with_a` number of latent variables. The default will use the total number of latent variables
which have already been fitted.
Parameters
----------
model : MVmodel object (PCA, or PLS)
A latent variable model generated by this library.
with_a : int, optional
Uses this many number of latent variables, and therefore shows the SPE after this number of
model components. By default the total number of components fitted will be used.
items_to_highlight : dict, optional
Keys are JSON strings parseable by ``json.loads`` into a Plotly line specifier;
values are lists of index names to highlight. For example::
items_to_highlight = {'{"color": "red", "symbol": "cross"}': items_in_red}
will highlight the items in ``items_in_red`` with the given colour and shape.
settings : dict
Default settings::
{
"show_limit": True, # bool: show the T2 confidence limit line
"conf_level": 0.95, # float: confidence level for limit (< 1.00)
"title": "T2 plot ...", # str: overall plot title
"default_marker": {...}, # dict: e.g. dict(symbol="circle", size=7)
"show_labels": False, # bool: add a label for each observation
"show_legend": False, # bool: show clickable legend
"html_image_height": 500, # int: image height in pixels
"html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height
"template": "pi_journal", # str: registered Plotly theme name
}
Examples
--------
>>> pca.t2_plot()
>>> pca.t2_plot(settings={"conf_level": 0.99, "show_labels": True})
"""
# TO CONSIDER: allow a setting `as_line`: which connects the points with line segments
if with_a < 0:
with_a = model.hotellings_t2_.columns[with_a]
elif with_a == 0:
raise ValueError("`with_a` must be >= 1, or specified with negative indexing.")
if not with_a <= model.n_components:
raise ValueError(
f"`with_a` must be <= the number of components fitted "
f"({model.n_components}); got {with_a}."
)
class Settings(BaseModel):
"""Validated display settings for the Hotelling's T2 plot."""
show_limit: bool = True
conf_level: float = 0.95
@field_validator("conf_level")
@classmethod
def check_conf_level(cls, val: float) -> float:
"""Check confidence value is in range."""
if val >= 1:
raise ValueError("0.0 < `conf_level` < 1.0")
if val <= 0:
raise ValueError("0.0 < `conf_level` < 1.0")
return val
title: str = (
f"Hotelling's T2 plot after fitting {with_a} component{'s' if with_a > 1 else ''}"
f", with the {conf_level * 100}% confidence limit"
)
default_marker: dict = dict(symbol="circle", size=7)
show_labels: bool = False
show_legend: bool = False
html_image_height: float = 500.0
html_aspect_ratio_w_over_h: float = 16 / 9.0
template: str = DEFAULT_THEME
setdict = Settings(**settings).model_dump() if settings else Settings().model_dump()
if fig is None:
fig = go.Figure()
name = f"T2 values after {with_a} component{'s' if with_a > 1 else ''}"
highlights: dict[str, list] = {}
default_index = model.hotellings_t2_.index
if items_to_highlight is not None:
highlights = items_to_highlight.copy()
for key, items in items_to_highlight.items():
highlights[key] = list(set(items) & set(default_index))
default_index = (set(default_index) ^ set(highlights[key])) & set(default_index)
# Ensure it is back to a list
default_index = list(default_index)
fig.add_trace(
go.Scatter(
x=default_index,
y=model.hotellings_t2_.loc[default_index, with_a],
name=name,
mode="markers+text" if setdict["show_labels"] else "markers",
marker=setdict["default_marker"],
text=default_index,
textposition="top center",
showlegend=setdict["show_legend"],
)
)
# Items to highlight, if any
for key, index in highlights.items():
styling = _decode_highlight_style(key)
fig.add_trace(
go.Scatter(
x=index,
y=model.hotellings_t2_.loc[index, with_a],
name=name,
mode="markers+text" if setdict["show_labels"] else "markers",
marker=styling,
text=index,
textposition="top center",
)
)
limit_HT2_conf_level = model.hotellings_t2_limit(conf_level=setdict["conf_level"])
name = f"{setdict['conf_level'] * 100:.3g}% limit"
fig.add_hline(
y=limit_HT2_conf_level,
line_color=LIMIT_LINE_COLOR,
annotation_text=name,
annotation_position="bottom right",
name=name,
)
fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR)
fig.update_layout(
template=setdict["template"],
title_text=setdict["title"],
hovermode="closest",
showlegend=setdict["show_legend"],
autosize=False,
yaxis_title_text=name,
width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"],
height=setdict["html_image_height"],
)
return fig
[docs]
def explained_variance_plot(
model: BaseEstimator,
settings: dict | None = None,
fig: go.Figure | None = None,
) -> go.Figure:
"""Generate an explained-variance plot for a fitted latent variable model.
Shows the variance explained by each component as bars, with the cumulative
variance explained overlaid as a line. For PCA the variance refers to the
X-block; for PLS it refers to the Y-block.
Parameters
----------
model : MVmodel object (PCA, or PLS)
A fitted latent variable model generated by this library.
settings : dict
Default settings::
{
"as_percentage": True, # bool: y-axis as a percentage, else a fraction
"title": "Variance explained ...", # str: overall plot title
"bar_color": None, # str|None: bar colour; None uses the theme
"line_color": None, # str|None: line colour; None uses the theme
"show_legend": True, # bool: show clickable legend
"html_image_height": 500, # int: image height in pixels
"html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height
"template": "pi_journal", # str: registered Plotly theme name
}
fig : go.Figure, optional
An existing figure to draw onto. A new figure is created if omitted.
Examples
--------
>>> pca.explained_variance_plot()
>>> pls.explained_variance_plot(settings={"as_percentage": False})
"""
if not hasattr(model, "r2_per_component_"):
msg = "Model is not fitted. Call fit() before plotting the explained variance."
raise ValueError(msg)
block_label = "Y-variance" if type(model).__name__ == "PLS" else "X-variance"
class Settings(BaseModel):
"""Validated display settings for the explained-variance plot."""
as_percentage: bool = True
title: str = f"{block_label} explained per component"
bar_color: str | None = None
line_color: str | None = None
show_legend: bool = True
html_image_height: float = 500.0
html_aspect_ratio_w_over_h: float = 16 / 9.0
template: str = DEFAULT_THEME
setdict = Settings(**settings).model_dump() if settings else Settings().model_dump()
if fig is None:
fig = go.Figure()
scale = 100.0 if setdict["as_percentage"] else 1.0
unit = "%" if setdict["as_percentage"] else "fraction"
components = [str(component) for component in model.r2_per_component_.index]
per_component = model.r2_per_component_.to_numpy(dtype=float) * scale
cumulative = model.r2_cumulative_.to_numpy(dtype=float) * scale
fig.add_trace(
go.Bar(
x=components,
y=per_component,
name="Per component",
marker_color=setdict["bar_color"],
showlegend=setdict["show_legend"],
)
)
fig.add_trace(
go.Scatter(
x=components,
y=cumulative,
name="Cumulative",
mode="lines+markers",
line=dict(color=setdict["line_color"]),
showlegend=setdict["show_legend"],
)
)
fig.update_layout(
template=setdict["template"],
title_text=setdict["title"],
hovermode="x",
showlegend=setdict["show_legend"],
autosize=False,
xaxis=dict(title_text="Component", type="category"),
yaxis=dict(title_text=f"Variance explained ({unit})", rangemode="tozero"),
width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"],
height=setdict["html_image_height"],
)
return fig
def _per_component_r2(cumulative: pd.DataFrame, component: int) -> np.ndarray:
"""Per-component R2 (per variable) from a cumulative-R2-per-variable table."""
columns: list[object] = list(cumulative.columns)
position = columns.index(component)
values = cumulative.iloc[:, position].to_numpy(dtype=float)
if position == 0:
return values
return values - cumulative.iloc[:, position - 1].to_numpy(dtype=float)
def _correlation_loadings(cumulative_r2: pd.DataFrame, loadings: pd.DataFrame, component: int) -> np.ndarray:
"""Correlation of each variable with one component's scores.
The squared correlation loading equals the fraction of a variable's
variance explained by that component; the sign follows the loading.
"""
explained = np.clip(_per_component_r2(cumulative_r2, component), 0.0, None)
signs = np.sign(loadings[component].to_numpy(dtype=float))
return signs * np.sqrt(explained)
[docs]
def correlation_loadings_plot( # noqa: C901, PLR0913
model: BaseEstimator,
pc_horiz: int = 1,
pc_vert: int = 2,
variance_ellipses: Sequence[float] = (0.5, 1.0),
settings: dict | None = None,
fig: go.Figure | None = None,
) -> go.Figure:
"""Generate a correlation loadings plot for a fitted latent variable model.
Each variable is placed by its correlation with the scores of two
components. A variable's squared distance from the origin is the fraction
of its variance explained by those two components, so every variable lies
inside the unit circle. Concentric ellipses mark variance-explained
thresholds: a variable beyond the 50% ellipse has at least half of its
variance captured by the two components shown.
For PCA the X-variables are shown. For PLS both the X-variables and the
Y-variables are overlaid against the X-scores, which reveals how process
variables relate to quality variables.
Parameters
----------
model : MVmodel object (PCA, or PLS)
A fitted latent variable model generated by this library.
pc_horiz : int, default 1
Component shown on the horizontal axis (1-based).
pc_vert : int, default 2
Component shown on the vertical axis (1-based).
variance_ellipses : sequence of float, default (0.5, 1.0)
Variance-explained thresholds, each a fraction in (0, 1], at which to
draw a concentric ellipse. The conventional choice is the 50% and
100% ellipses; any other thresholds (for example 0.75 and 0.95) are
equally valid.
settings : dict
Default settings::
{
"title": "Correlation loadings ...", # str: overall plot title
"x_marker_color": None, # str|None: X-variable marker colour; None uses the theme
"y_marker_color": None, # str|None: Y-variable marker colour (PLS); None uses the theme
"ellipse_color": "grey", # str: colour of the variance ellipses
"show_labels": True, # bool: label each variable
"show_legend": True, # bool: show clickable legend (PLS only)
"html_image_height": 600, # int: image height in pixels
"html_aspect_ratio_w_over_h": 1.0, # float: width as ratio of height
"template": "pi_journal", # str: registered Plotly theme name
}
fig : go.Figure, optional
An existing figure to draw onto. A new figure is created if omitted.
Examples
--------
>>> pca.correlation_loadings_plot()
>>> pls.correlation_loadings_plot(pc_horiz=1, pc_vert=3)
>>> pca.correlation_loadings_plot(variance_ellipses=(0.75, 0.95))
"""
if not hasattr(model, "r2_per_variable_"):
msg = "Model is not fitted. Call fit() before plotting the correlation loadings."
raise ValueError(msg)
available = list(model.r2_per_variable_.columns)
for axis_name, component in (("pc_horiz", pc_horiz), ("pc_vert", pc_vert)):
if component not in available:
msg = f"{axis_name}={component} is not a fitted component; choose from {available}."
raise ValueError(msg)
if pc_horiz == pc_vert:
msg = "pc_horiz and pc_vert must be different components."
raise ValueError(msg)
ellipse_levels = [float(level) for level in variance_ellipses]
for level in ellipse_levels:
if not 0 < level <= 1:
msg = f"Each value in variance_ellipses must be a fraction in (0, 1]; got {level}."
raise ValueError(msg)
is_pls = hasattr(model, "r2y_per_variable_")
x_loadings = model.x_loadings_ if is_pls else model.loadings_
class Settings(BaseModel):
"""Validated display settings for the correlation-loadings plot."""
title: str = f"Correlation loadings: components {pc_horiz} and {pc_vert}"
x_marker_color: str | None = None
y_marker_color: str | None = None
ellipse_color: str = "grey"
show_labels: bool = True
show_legend: bool = True
html_image_height: float = 600.0
html_aspect_ratio_w_over_h: float = 1.0
template: str = DEFAULT_THEME
setdict = Settings(**settings).model_dump() if settings else Settings().model_dump()
if fig is None:
fig = go.Figure()
# Variance ellipses, drawn first so the variable markers sit on top.
for level in ellipse_levels:
radius = float(np.sqrt(level))
fig.add_shape(
type="circle",
xref="x",
yref="y",
x0=-radius,
y0=-radius,
x1=radius,
y1=radius,
line=dict(color=setdict["ellipse_color"], width=1, dash="dot"),
)
fig.add_annotation(
x=0,
y=radius,
text=f"{level * 100:g}%",
showarrow=False,
yshift=9,
font=dict(color=setdict["ellipse_color"], size=11),
)
fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR, line_width=1)
fig.add_vline(x=0, line_color=REFERENCE_LINE_COLOR, line_width=1)
mode = "markers+text" if setdict["show_labels"] else "markers"
fig.add_trace(
go.Scatter(
x=_correlation_loadings(model.r2_per_variable_, x_loadings, pc_horiz),
y=_correlation_loadings(model.r2_per_variable_, x_loadings, pc_vert),
mode=mode,
text=[str(name) for name in model.r2_per_variable_.index],
textposition="top center",
marker=dict(color=setdict["x_marker_color"], size=8, symbol="circle"),
name="X-variables",
)
)
if is_pls:
fig.add_trace(
go.Scatter(
x=_correlation_loadings(model.r2y_per_variable_, model.y_loadings_, pc_horiz),
y=_correlation_loadings(model.r2y_per_variable_, model.y_loadings_, pc_vert),
mode=mode,
text=[str(name) for name in model.r2y_per_variable_.index],
textposition="top center",
marker=dict(color=setdict["y_marker_color"], size=9, symbol="diamond"),
name="Y-variables",
)
)
def _axis_title(component: int) -> str:
explained = float(model.r2_per_component_[component]) * 100.0
return f"Component {component} ({explained:.1f}%)"
axis_common: dict = dict(range=[-1.08, 1.08], zeroline=False)
fig.update_layout(
template=setdict["template"],
title_text=setdict["title"],
hovermode="closest",
showlegend=setdict["show_legend"] and is_pls,
autosize=False,
xaxis=dict(title_text=_axis_title(pc_horiz), **axis_common),
yaxis=dict(title_text=_axis_title(pc_vert), scaleanchor="x", scaleratio=1, **axis_common),
width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"],
height=setdict["html_image_height"],
)
return fig
[docs]
def predictions_vs_observed_plot(
model: BaseEstimator,
*,
y_observed: pd.DataFrame | np.ndarray,
variable: str | None = None,
settings: dict | None = None,
fig: go.Figure | None = None,
) -> go.Figure:
"""Generate an observed-vs-predicted (parity) plot for a fitted PLS model.
Plots the calibration predictions against the observed Y values, with a
``y = x`` reference line and an RMSE annotation. Points lying close to the
reference line indicate good predictions.
Parameters
----------
model : PLS object
A fitted PLS model generated by this library.
y_observed : array-like of shape (n_samples, n_targets)
The observed Y values, on the same scale as the data used to fit the
model (for example the scaled Y from :class:`MCUVScaler`).
variable : str, optional
Which Y-variable to plot. Defaults to the first Y-variable.
settings : dict
Default settings::
{
"title": "Observed vs predicted ...", # str: overall plot title
"marker_color": None, # str|None: data-marker colour; None uses the theme
"reference_color": "#9CA3AF", # str: colour of the y = x line
"html_image_height": 500, # int: image height in pixels
"html_aspect_ratio_w_over_h": 1.0, # float: width as ratio of height
"template": "pi_journal", # str: registered Plotly theme name
}
fig : go.Figure, optional
An existing figure to draw onto. A new figure is created if omitted.
Examples
--------
>>> pls.predictions_vs_observed_plot(y_observed=Y_scaled)
>>> pls.predictions_vs_observed_plot(y_observed=Y_scaled, variable="quality")
"""
if not hasattr(model, "predictions_"):
msg = "Model is not fitted. Call fit() before plotting predictions vs observed."
raise ValueError(msg)
y_observed = y_observed if isinstance(y_observed, pd.DataFrame) else pd.DataFrame(y_observed)
if variable is None:
variable = str(model.predictions_.columns[0])
if variable not in model.predictions_.columns:
msg = f"Unknown Y-variable '{variable}'. Known: {list(model.predictions_.columns)}."
raise ValueError(msg)
if variable not in y_observed.columns:
msg = f"y_observed has no column '{variable}'. Its columns are {list(y_observed.columns)}."
raise ValueError(msg)
if y_observed.shape[0] != model.predictions_.shape[0]:
msg = (
f"y_observed must have {model.predictions_.shape[0]} rows (the number of training "
f"observations), got {y_observed.shape[0]}."
)
raise ValueError(msg)
observed = y_observed[variable].to_numpy(dtype=float)
predicted = model.predictions_[variable].to_numpy(dtype=float)
rmse = float(np.sqrt(np.mean((observed - predicted) ** 2)))
lo = float(min(observed.min(), predicted.min()))
hi = float(max(observed.max(), predicted.max()))
pad = 0.05 * (hi - lo) if hi > lo else 1.0
class Settings(BaseModel):
"""Validated display settings for the predictions-vs-observed plot."""
title: str = f"Observed vs predicted for {variable}"
marker_color: str | None = None
reference_color: str = REFERENCE_LINE_COLOR
html_image_height: float = 500.0
html_aspect_ratio_w_over_h: float = 1.0
template: str = DEFAULT_THEME
setdict = Settings(**settings).model_dump() if settings else Settings().model_dump()
if fig is None:
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=[lo - pad, hi + pad],
y=[lo - pad, hi + pad],
mode="lines",
line=dict(color=setdict["reference_color"], dash="dash"),
name="y = x",
)
)
fig.add_trace(
go.Scatter(
x=observed,
y=predicted,
mode="markers",
marker=dict(color=setdict["marker_color"], size=7),
name="Observations",
)
)
fig.add_annotation(
x=lo + 0.05 * (hi - lo),
y=hi - 0.05 * (hi - lo),
text=f"RMSE = {rmse:.4g}",
showarrow=False,
)
axis_common: dict = dict(range=[lo - pad, hi + pad])
fig.update_layout(
template=setdict["template"],
title_text=setdict["title"],
hovermode="closest",
showlegend=False,
autosize=False,
xaxis=dict(title_text=f"Observed: {variable}", **axis_common),
yaxis=dict(title_text=f"Predicted: {variable}", scaleanchor="x", scaleratio=1, **axis_common),
width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"],
height=setdict["html_image_height"],
)
return fig
[docs]
def coefficient_plot(
model: BaseEstimator,
variable: str | None = None,
settings: dict | None = None,
fig: go.Figure | None = None,
) -> go.Figure:
"""Generate a bar plot of the PLS regression coefficients.
Shows ``beta_coefficients_`` for one Y-variable: one bar per X-variable,
mapping the (preprocessed) X onto the predicted Y. Tall bars mark the
X-variables that most strongly drive the prediction.
Parameters
----------
model : PLS object
A fitted PLS model generated by this library.
variable : str, optional
Which Y-variable's coefficients to plot. Defaults to the first one.
settings : dict
Default settings::
{
"title": "Regression coefficients ...", # str: overall plot title
"bar_color": None, # str|None: bar colour; None uses the theme
"html_image_height": 500, # int: image height in pixels
"html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height
"template": "pi_journal", # str: registered Plotly theme name
}
fig : go.Figure, optional
An existing figure to draw onto. A new figure is created if omitted.
Examples
--------
>>> pls.coefficient_plot()
>>> pls.coefficient_plot(variable="quality")
"""
if not hasattr(model, "beta_coefficients_"):
msg = "Model is not fitted. Call fit() before plotting the coefficients."
raise ValueError(msg)
if variable is None:
variable = str(model.beta_coefficients_.columns[0])
if variable not in model.beta_coefficients_.columns:
msg = f"Unknown Y-variable '{variable}'. Known: {list(model.beta_coefficients_.columns)}."
raise ValueError(msg)
coefficients = model.beta_coefficients_[variable]
features = [str(name) for name in coefficients.index]
class Settings(BaseModel):
"""Validated display settings for the regression-coefficient plot."""
title: str = f"Regression coefficients for {variable}"
bar_color: str | None = None
html_image_height: float = 500.0
html_aspect_ratio_w_over_h: float = 16 / 9.0
template: str = DEFAULT_THEME
setdict = Settings(**settings).model_dump() if settings else Settings().model_dump()
if fig is None:
fig = go.Figure()
fig.add_trace(
go.Bar(
x=features,
y=coefficients.to_numpy(dtype=float),
marker_color=setdict["bar_color"],
name=f"beta: {variable}",
)
)
fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR, line_width=1)
fig.update_layout(
template=setdict["template"],
title_text=setdict["title"],
hovermode="x",
showlegend=False,
autosize=False,
xaxis=dict(title_text="X-variable", type="category"),
yaxis=dict(title_text=f"Coefficient ({variable})"),
width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"],
height=setdict["html_image_height"],
)
return fig
class Plot:
"""Create plots of estimators."""
def __init__(self, parent: BaseEstimator) -> None:
self._parent = parent
def scores(self, pc_horiz: int = 1, pc_vert: int = 2, **kwargs) -> go.Figure:
"""Generate a score plot."""
return score_plot(self, pc_horiz=pc_horiz, pc_vert=pc_vert, **kwargs)
def loadings(self, pc_horiz: int = 1, pc_vert: int = 2, **kwargs) -> go.Figure:
"""Generate a loading plot."""
return loading_plot(self, pc_horiz=pc_horiz, pc_vert=pc_vert, **kwargs)