Source code for process_improve.multivariate.plots

# (c) Kevin Dunn, 2010-2026. MIT License. Based on own private work over the years.

# Built-in libraries
from __future__ import annotations

import json
from collections.abc import Sequence

import numpy as np
import pandas as pd
from pydantic import BaseModel, field_validator
from sklearn.base import BaseEstimator

try:
    import plotly.graph_objects as go
except ImportError:  # pragma: no cover - exercised via env-without-plotly
    from process_improve._extras import _MissingExtra
    go = _MissingExtra("plotly", "plotting")  # type: ignore[assignment]

from process_improve.visualization.themes import (
    DEFAULT_THEME,
    LIMIT_LINE_COLOR,
    REFERENCE_LINE_COLOR,
)


def _decode_highlight_style(key: str) -> dict:
    """Decode an ``items_to_highlight`` key into a Plotly marker-style dict.

    Each key must be a JSON-encoded Plotly marker/line-style spec. Decoding it
    here (rather than calling ``json.loads`` inline) means a malformed key
    raises a clear ``ValueError`` at the API surface instead of a confusing
    ``json.JSONDecodeError`` deep inside the trace-building loop. Mirrors the
    SEC-32 guard already applied in ``process_improve.batch.plotting``.
    """
    try:
        return json.loads(key)
    except json.JSONDecodeError as exc:
        raise ValueError(
            f"items_to_highlight: each key must be a JSON-encoded Plotly "
            f'style spec (e.g. \'{{"color": "red", "symbol": "cross"}}\'). '
            f"Got {key!r}."
        ) from exc


def plot_pre_checks(model: BaseEstimator, pc_horiz: int, pc_vert: int, pc_depth: int) -> bool:
    """Check the inputs for the plot functions are valid."""
    n_components = model.n_components if hasattr(model, "n_components") else model._parent.n_components
    if not 0 < pc_horiz <= n_components:
        raise ValueError(
            f"The model has {n_components} components. Ensure that 1 <= pc_horiz <= {n_components}."
        )
    if not 0 < pc_vert <= n_components:
        raise ValueError(
            f"The model has {n_components} components. Ensure that 1 <= pc_vert <= {n_components}."
        )
    if not -1 <= pc_depth <= n_components:
        raise ValueError(
            f"The model has {n_components} components. Ensure that pc_depth is -1 (no depth axis) "
            f"or 1 <= pc_depth <= {n_components}."
        )
    if len({pc_horiz, pc_vert, pc_depth}) != 3:
        raise ValueError("Specify distinct components for each axis.")

    return True


[docs] def score_plot( # noqa: C901, PLR0913 model: BaseEstimator, pc_horiz: int = 1, pc_vert: int = 2, pc_depth: int = -1, items_to_highlight: dict[str, list] | None = None, settings: dict | None = None, fig: go.Figure | None = None, ) -> go.Figure: """Generate a 2D or 3D score plot for the given latent variable model. A 2D scatter on (``pc_horiz``, ``pc_vert``) is produced by default. Supplying ``pc_depth >= 1`` adds a third score axis and switches the underlying trace to ``Scatter3d``. Parameters ---------- model : MVmodel object (PCA, or PLS) A latent variable model generated by this library. pc_horiz : int, optional Which component to plot on the horizontal axis, by default 1 (the first component) pc_vert : int, optional Which component to plot on the vertical axis, by default 2 (the second component) pc_depth : int, optional If pc_depth >= 1, then a 3D score plot is generated, with this component on the 3rd axis items_to_highlight : dict, optional Keys are JSON strings parseable by ``json.loads`` into a Plotly line specifier; values are lists of index names to highlight. For example:: items_to_highlight = {'{"color": "red", "symbol": "cross"}': items_in_red} will highlight the items in ``items_in_red`` with the given colour and shape. settings : dict Default settings:: { "show_ellipse": True, # bool: show the Hotelling's T2 ellipse "ellipse_conf_level": 0.95, # float: ellipse confidence level (< 1.00) "title": "Score plot of ...", # str: overall plot title "show_labels": False, # bool: add a label for each observation "show_legend": True, # bool: show clickable legend "html_image_height": 500, # int: image height in pixels "html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height "template": "pi_journal", # str: registered Plotly theme name } Examples -------- >>> pca = PCA(n_components=3).fit(X_scaled) >>> pca.score_plot() # PC1 vs PC2 >>> pca.score_plot(pc_horiz=1, pc_vert=3) # PC1 vs PC3 >>> pca.score_plot(pc_horiz=1, pc_vert=2, pc_depth=3) # 3D """ plot_pre_checks(model, pc_horiz, pc_vert, pc_depth) data_to_plot = model.scores_ if hasattr(model, "scores_") else model._parent.t_scores_super ellipse_coordinates = ( model.ellipse_coordinates if hasattr(model, "ellipse_coordinates") else model._parent.ellipse_coordinates ) class Settings(BaseModel): """Validated display settings for the score plot.""" show_ellipse: bool = True ellipse_conf_level: float = 0.95 @field_validator("ellipse_conf_level") @classmethod def check_ellipse_conf_level(cls, val: float) -> float: """Check confidence value is in range.""" if val >= 1: raise ValueError("0.0 < `ellipse_conf_level` < 1.0") if val <= 0: raise ValueError("0.0 < `ellipse_conf_level` < 1.0") return val title: str = ( f"Score plot of component {pc_horiz} vs component {pc_vert} vs component {pc_depth}" if pc_depth > 0 else "" ) show_labels: bool = False show_legend: bool = True html_image_height: float = 500.0 html_aspect_ratio_w_over_h: float = 16 / 9.0 template: str = DEFAULT_THEME setdict = Settings(**settings).model_dump() if settings else Settings().model_dump() if fig is None: fig = go.Figure() name = "Scores [T]" fig.update_layout(xaxis_title_text=f"PC {pc_horiz}", yaxis_title_text=f"PC {pc_vert}") highlights: dict[str, list] = {} default_index = data_to_plot.index if items_to_highlight is not None: highlights = items_to_highlight.copy() for key, items in items_to_highlight.items(): highlights[key] = list(set(items) & set(default_index)) default_index = (set(default_index) ^ set(highlights[key])) & set(default_index) # Ensure it is back to a list default_index = list(default_index) # 3D plot if pc_depth >= 1: fig.add_trace( go.Scatter3d( x=data_to_plot.loc[default_index, pc_horiz], y=data_to_plot.loc[default_index, pc_vert], z=data_to_plot.loc[default_index, pc_depth], name=name, mode="markers+text" if setdict["show_labels"] else "markers", marker=dict( symbol="circle", ), text=list(default_index), textposition="top center", ) ) # Items to highlight, if any for key, index in highlights.items(): styling = _decode_highlight_style(key) fig.add_trace( go.Scatter3d( x=data_to_plot.loc[index, pc_horiz], y=data_to_plot.loc[index, pc_vert], z=data_to_plot.loc[index, pc_depth], name=name, mode="markers+text" if setdict["show_labels"] else "markers", marker=styling, text=list(index), textposition="top center", ) ) else: # Regular 2D plot fig.add_trace( go.Scatter( x=data_to_plot.loc[default_index, pc_horiz], y=data_to_plot.loc[default_index, pc_vert], name=name, mode="markers+text" if setdict["show_labels"] else "markers", marker=dict( symbol="circle", size=7, ), text=default_index, textposition="top center", ) ) # Items to highlight, if any for key, index in highlights.items(): styling = _decode_highlight_style(key) fig.add_trace( go.Scatter( x=data_to_plot.loc[index, pc_horiz], y=data_to_plot.loc[index, pc_vert], name=name, mode="markers+text" if setdict["show_labels"] else "markers", marker=styling, text=list(index), textposition="top center", ) ) if setdict["show_ellipse"]: ellipse = ellipse_coordinates( score_horiz=pc_horiz, score_vert=pc_vert, conf_level=setdict["ellipse_conf_level"], ) fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR) fig.add_vline(x=0, line_color=REFERENCE_LINE_COLOR) fig.add_trace( go.Scatter( x=ellipse[0], y=ellipse[1], name=f"Hotelling's T^2 [{setdict['ellipse_conf_level'] * 100:.4g}%]", mode="lines", line=dict( color=LIMIT_LINE_COLOR, width=2, ), ) ) fig.update_layout( template=setdict["template"], title_text=setdict["title"], hovermode="closest", showlegend=setdict["show_legend"], autosize=False, width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"], height=setdict["html_image_height"], ) if pc_depth >= 1: fig.update_layout( scene=dict( xaxis=fig.to_dict()["layout"]["xaxis"], yaxis=fig.to_dict()["layout"]["xaxis"], zaxis=dict( title_text=f"PC {pc_depth}", mirror=True, showspikes=True, visible=True, gridwidth=1, ), ), ) return fig
[docs] def loading_plot( # noqa: PLR0913 model: BaseEstimator, loadings_type: str = "p", pc_horiz: int = 1, pc_vert: int = 2, settings: dict | None = None, fig: go.Figure | None = None, ) -> go.Figure: """Generate a 2-dimensional loadings for the given latent variable model. Parameters ---------- model : MVmodel object (PCA, or PLS) A latent variable model generated by this library. loadings_type : str, optional A choice of the following: 'p' : (default for PCA) : the P (projection) loadings: only option possible for PCA 'w' : the W loadings: Suitable for PLS 'w*' : (default for PLS) the W* (or R) loadings: Suitable for PLS 'w*c' : the W* (from X-space) with C loadings from the Y-space: Suitable for PLS 'c' : the C loadings from the Y-space: Suitable for PLS For PCA model any other choice besides 'p' will be ignored. pc_horiz : int, optional Which component to plot on the horizontal axis, by default 1 (the first component) pc_vert : int, optional Which component to plot on the vertical axis, by default 2 (the second component) settings : dict Default settings:: { "title": "Loadings plot ...", # str: overall plot title "show_labels": True, # bool: add a label for each variable "html_image_height": 500, # int: image height in pixels "html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height "template": "pi_journal", # str: registered Plotly theme name } Examples -------- >>> pca.loading_plot() # P loadings, PC1 vs PC2 >>> pls.loading_plot(loadings_type="w*c") # W* and C loadings >>> pls.loading_plot(loadings_type="w", pc_vert=3) # W loadings, PC1 vs PC3 """ plot_pre_checks(model, pc_horiz, pc_vert, pc_depth=0) class Settings(BaseModel): """Validated display settings for the loadings plot.""" title: str = f"Loadings plot [{loadings_type.upper()}] of component {pc_horiz} vs component {pc_vert}" show_labels: bool = True html_image_height: float = 500.0 html_aspect_ratio_w_over_h: float = 16 / 9.0 template: str = DEFAULT_THEME setdict = Settings(**settings).model_dump() if settings else Settings().model_dump() if fig is None: fig = go.Figure() what = model.loadings_ if hasattr(model, "loadings_") else model.loadings # PCA default if hasattr(model, "direct_weights_"): what = model.direct_weights_ # PLS default extra = None if loadings_type.lower() == "p": what = model.loadings_ if hasattr(model, "loadings_") else model.loadings if loadings_type.lower() == "w": what = model.x_weights_ elif loadings_type.lower() == "w*": what = model.direct_weights_ elif loadings_type.lower() == "w*c": loadings_type = loadings_type[0:-1] what = model.direct_weights_ extra = model.y_loadings_ elif loadings_type.lower() == "c": what = model.y_loadings_ fig.add_trace( go.Scatter( x=what.loc[:, pc_horiz], y=what.loc[:, pc_vert], name="X-space loadings W*", mode="markers+text" if setdict["show_labels"] else "markers", marker=dict( symbol="circle", size=7, ), text=what.index, textposition="top center", ) ) add_legend = False # Note, we have cut off the 'c' from loadings_type add_legend = False if loadings_type.lower() == "w*" and extra is not None: add_legend = True fig.add_trace( go.Scatter( x=extra.loc[:, pc_horiz], y=extra.loc[:, pc_vert], name="Y-space loadings C", mode="markers+text" if setdict["show_labels"] else "markers", marker=dict( symbol="star", size=8, ), text=extra.index, textposition="bottom center", ) ) fig.update_layout(xaxis_title_text=f"PC {pc_horiz}", yaxis_title_text=f"PC {pc_vert}") fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR) fig.add_vline(x=0, line_color=REFERENCE_LINE_COLOR) fig.update_layout( template=setdict["template"], title_text=setdict["title"], hovermode="closest", showlegend=add_legend, autosize=False, width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"], height=setdict["html_image_height"], ) return fig
[docs] def spe_plot( # noqa: C901 model: BaseEstimator, with_a: int = -1, items_to_highlight: dict[str, list] | None = None, settings: dict | None = None, fig: go.Figure | None = None, ) -> go.Figure: """Generate a squared-prediction error (SPE) plot for the given latent variable model using `with_a` number of latent variables. The default will use the total number of latent variables which have already been fitted. Parameters ---------- model : MVmodel object (PCA, or PLS) A latent variable model generated by this library. with_a : int, optional Uses this many number of latent variables, and therefore shows the SPE after this number of model components. By default the total number of components fitted will be used. items_to_highlight : dict, optional Keys are JSON strings parseable by ``json.loads`` into a Plotly line specifier; values are lists of index names to highlight. For example:: items_to_highlight = {'{"color": "red", "symbol": "cross"}': items_in_red} will highlight the items in ``items_in_red`` with the given colour and shape. settings : dict Default settings:: { "show_limit": True, # bool: show the SPE confidence limit line "conf_level": 0.95, # float: confidence level for limit (< 1.00) "title": "SPE plot ...", # str: overall plot title "default_marker": {...}, # dict: e.g. dict(symbol="circle", size=7) "show_labels": False, # bool: add a label for each observation "show_legend": False, # bool: show clickable legend "html_image_height": 500, # int: image height in pixels "html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height "template": "pi_journal", # str: registered Plotly theme name } Examples -------- >>> pca.spe_plot() >>> pca.spe_plot(settings={"conf_level": 0.99, "show_labels": True}) """ # TO CONSIDER: allow a setting `as_line`: which connects the points with line segments if with_a < 0: # Get the actual name of the last column in the model if negative indexing is used with_a = model.spe_.columns[with_a] elif with_a == 0: raise ValueError("`with_a` must be >= 1, or specified with negative indexing.") if not with_a <= model.n_components: raise ValueError( f"`with_a` must be <= the number of components fitted " f"({model.n_components}); got {with_a}." ) class Settings(BaseModel): """Validated display settings for the SPE plot.""" show_limit: bool = True conf_level: float = 0.95 @field_validator("conf_level") @classmethod def check_conf_level(cls, val: float) -> float: """Check confidence value is in range.""" if val >= 1: raise ValueError("0.0 < `conf_level` < 1.0") if val <= 0: raise ValueError("0.0 < `conf_level` < 1.0") return val title: str = ( "Squared prediction error plot after " f"fitting {with_a} component{'s' if with_a > 1 else ''}" f", with the {conf_level * 100}% confidence limit" ) default_marker: dict = dict(symbol="circle", size=7) show_labels: bool = False show_legend: bool = False html_image_height: float = 500.0 html_aspect_ratio_w_over_h: float = 16 / 9.0 template: str = DEFAULT_THEME setdict = Settings(**settings).model_dump() if settings else Settings().model_dump() if fig is None: fig = go.Figure() name = f"SPE values after {with_a} component{'s' if with_a > 1 else ''}" highlights: dict[str, list] = {} default_index = model.spe_.index if items_to_highlight is not None: highlights = items_to_highlight.copy() for key, items in items_to_highlight.items(): highlights[key] = list(set(items) & set(default_index)) default_index = (set(default_index) ^ set(highlights[key])) & set(default_index) # Ensure it is back to a list default_index = list(default_index) fig.add_trace( go.Scatter( x=default_index, y=model.spe_.loc[default_index, with_a], name=name, mode="markers+text" if setdict["show_labels"] else "markers", marker=setdict["default_marker"], text=default_index, textposition="top center", showlegend=setdict["show_legend"], ) ) # Items to highlight, if any for key, index in highlights.items(): styling = _decode_highlight_style(key) fig.add_trace( go.Scatter( x=index, y=model.spe_.loc[index, with_a], name=name, mode="markers+text" if setdict["show_labels"] else "markers", marker=styling, text=index, textposition="top center", ) ) limit_SPE_conf_level = model.spe_limit(conf_level=setdict["conf_level"]) name = f"{setdict['conf_level'] * 100:.3g}% limit" fig.add_hline( y=limit_SPE_conf_level, line_color=LIMIT_LINE_COLOR, annotation_text=name, annotation_position="bottom right", name=name, ) fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR) fig.update_layout( template=setdict["template"], title_text=setdict["title"], hovermode="closest", showlegend=setdict["show_legend"], autosize=False, yaxis_title_text=name, width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"], height=setdict["html_image_height"], ) return fig
[docs] def t2_plot( # noqa: C901 model: BaseEstimator, with_a: int = -1, items_to_highlight: dict[str, list] | None = None, settings: dict | None = None, fig: go.Figure | None = None, ) -> go.Figure: """Generate a Hotelling's T2 (T^2) plot for the given latent variable model using `with_a` number of latent variables. The default will use the total number of latent variables which have already been fitted. Parameters ---------- model : MVmodel object (PCA, or PLS) A latent variable model generated by this library. with_a : int, optional Uses this many number of latent variables, and therefore shows the SPE after this number of model components. By default the total number of components fitted will be used. items_to_highlight : dict, optional Keys are JSON strings parseable by ``json.loads`` into a Plotly line specifier; values are lists of index names to highlight. For example:: items_to_highlight = {'{"color": "red", "symbol": "cross"}': items_in_red} will highlight the items in ``items_in_red`` with the given colour and shape. settings : dict Default settings:: { "show_limit": True, # bool: show the T2 confidence limit line "conf_level": 0.95, # float: confidence level for limit (< 1.00) "title": "T2 plot ...", # str: overall plot title "default_marker": {...}, # dict: e.g. dict(symbol="circle", size=7) "show_labels": False, # bool: add a label for each observation "show_legend": False, # bool: show clickable legend "html_image_height": 500, # int: image height in pixels "html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height "template": "pi_journal", # str: registered Plotly theme name } Examples -------- >>> pca.t2_plot() >>> pca.t2_plot(settings={"conf_level": 0.99, "show_labels": True}) """ # TO CONSIDER: allow a setting `as_line`: which connects the points with line segments if with_a < 0: with_a = model.hotellings_t2_.columns[with_a] elif with_a == 0: raise ValueError("`with_a` must be >= 1, or specified with negative indexing.") if not with_a <= model.n_components: raise ValueError( f"`with_a` must be <= the number of components fitted " f"({model.n_components}); got {with_a}." ) class Settings(BaseModel): """Validated display settings for the Hotelling's T2 plot.""" show_limit: bool = True conf_level: float = 0.95 @field_validator("conf_level") @classmethod def check_conf_level(cls, val: float) -> float: """Check confidence value is in range.""" if val >= 1: raise ValueError("0.0 < `conf_level` < 1.0") if val <= 0: raise ValueError("0.0 < `conf_level` < 1.0") return val title: str = ( f"Hotelling's T2 plot after fitting {with_a} component{'s' if with_a > 1 else ''}" f", with the {conf_level * 100}% confidence limit" ) default_marker: dict = dict(symbol="circle", size=7) show_labels: bool = False show_legend: bool = False html_image_height: float = 500.0 html_aspect_ratio_w_over_h: float = 16 / 9.0 template: str = DEFAULT_THEME setdict = Settings(**settings).model_dump() if settings else Settings().model_dump() if fig is None: fig = go.Figure() name = f"T2 values after {with_a} component{'s' if with_a > 1 else ''}" highlights: dict[str, list] = {} default_index = model.hotellings_t2_.index if items_to_highlight is not None: highlights = items_to_highlight.copy() for key, items in items_to_highlight.items(): highlights[key] = list(set(items) & set(default_index)) default_index = (set(default_index) ^ set(highlights[key])) & set(default_index) # Ensure it is back to a list default_index = list(default_index) fig.add_trace( go.Scatter( x=default_index, y=model.hotellings_t2_.loc[default_index, with_a], name=name, mode="markers+text" if setdict["show_labels"] else "markers", marker=setdict["default_marker"], text=default_index, textposition="top center", showlegend=setdict["show_legend"], ) ) # Items to highlight, if any for key, index in highlights.items(): styling = _decode_highlight_style(key) fig.add_trace( go.Scatter( x=index, y=model.hotellings_t2_.loc[index, with_a], name=name, mode="markers+text" if setdict["show_labels"] else "markers", marker=styling, text=index, textposition="top center", ) ) limit_HT2_conf_level = model.hotellings_t2_limit(conf_level=setdict["conf_level"]) name = f"{setdict['conf_level'] * 100:.3g}% limit" fig.add_hline( y=limit_HT2_conf_level, line_color=LIMIT_LINE_COLOR, annotation_text=name, annotation_position="bottom right", name=name, ) fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR) fig.update_layout( template=setdict["template"], title_text=setdict["title"], hovermode="closest", showlegend=setdict["show_legend"], autosize=False, yaxis_title_text=name, width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"], height=setdict["html_image_height"], ) return fig
[docs] def explained_variance_plot( model: BaseEstimator, settings: dict | None = None, fig: go.Figure | None = None, ) -> go.Figure: """Generate an explained-variance plot for a fitted latent variable model. Shows the variance explained by each component as bars, with the cumulative variance explained overlaid as a line. For PCA the variance refers to the X-block; for PLS it refers to the Y-block. Parameters ---------- model : MVmodel object (PCA, or PLS) A fitted latent variable model generated by this library. settings : dict Default settings:: { "as_percentage": True, # bool: y-axis as a percentage, else a fraction "title": "Variance explained ...", # str: overall plot title "bar_color": None, # str|None: bar colour; None uses the theme "line_color": None, # str|None: line colour; None uses the theme "show_legend": True, # bool: show clickable legend "html_image_height": 500, # int: image height in pixels "html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height "template": "pi_journal", # str: registered Plotly theme name } fig : go.Figure, optional An existing figure to draw onto. A new figure is created if omitted. Examples -------- >>> pca.explained_variance_plot() >>> pls.explained_variance_plot(settings={"as_percentage": False}) """ if not hasattr(model, "r2_per_component_"): msg = "Model is not fitted. Call fit() before plotting the explained variance." raise ValueError(msg) block_label = "Y-variance" if type(model).__name__ == "PLS" else "X-variance" class Settings(BaseModel): """Validated display settings for the explained-variance plot.""" as_percentage: bool = True title: str = f"{block_label} explained per component" bar_color: str | None = None line_color: str | None = None show_legend: bool = True html_image_height: float = 500.0 html_aspect_ratio_w_over_h: float = 16 / 9.0 template: str = DEFAULT_THEME setdict = Settings(**settings).model_dump() if settings else Settings().model_dump() if fig is None: fig = go.Figure() scale = 100.0 if setdict["as_percentage"] else 1.0 unit = "%" if setdict["as_percentage"] else "fraction" components = [str(component) for component in model.r2_per_component_.index] per_component = model.r2_per_component_.to_numpy(dtype=float) * scale cumulative = model.r2_cumulative_.to_numpy(dtype=float) * scale fig.add_trace( go.Bar( x=components, y=per_component, name="Per component", marker_color=setdict["bar_color"], showlegend=setdict["show_legend"], ) ) fig.add_trace( go.Scatter( x=components, y=cumulative, name="Cumulative", mode="lines+markers", line=dict(color=setdict["line_color"]), showlegend=setdict["show_legend"], ) ) fig.update_layout( template=setdict["template"], title_text=setdict["title"], hovermode="x", showlegend=setdict["show_legend"], autosize=False, xaxis=dict(title_text="Component", type="category"), yaxis=dict(title_text=f"Variance explained ({unit})", rangemode="tozero"), width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"], height=setdict["html_image_height"], ) return fig
def _per_component_r2(cumulative: pd.DataFrame, component: int) -> np.ndarray: """Per-component R2 (per variable) from a cumulative-R2-per-variable table.""" columns: list[object] = list(cumulative.columns) position = columns.index(component) values = cumulative.iloc[:, position].to_numpy(dtype=float) if position == 0: return values return values - cumulative.iloc[:, position - 1].to_numpy(dtype=float) def _correlation_loadings(cumulative_r2: pd.DataFrame, loadings: pd.DataFrame, component: int) -> np.ndarray: """Correlation of each variable with one component's scores. The squared correlation loading equals the fraction of a variable's variance explained by that component; the sign follows the loading. """ explained = np.clip(_per_component_r2(cumulative_r2, component), 0.0, None) signs = np.sign(loadings[component].to_numpy(dtype=float)) return signs * np.sqrt(explained)
[docs] def correlation_loadings_plot( # noqa: C901, PLR0913 model: BaseEstimator, pc_horiz: int = 1, pc_vert: int = 2, variance_ellipses: Sequence[float] = (0.5, 1.0), settings: dict | None = None, fig: go.Figure | None = None, ) -> go.Figure: """Generate a correlation loadings plot for a fitted latent variable model. Each variable is placed by its correlation with the scores of two components. A variable's squared distance from the origin is the fraction of its variance explained by those two components, so every variable lies inside the unit circle. Concentric ellipses mark variance-explained thresholds: a variable beyond the 50% ellipse has at least half of its variance captured by the two components shown. For PCA the X-variables are shown. For PLS both the X-variables and the Y-variables are overlaid against the X-scores, which reveals how process variables relate to quality variables. Parameters ---------- model : MVmodel object (PCA, or PLS) A fitted latent variable model generated by this library. pc_horiz : int, default 1 Component shown on the horizontal axis (1-based). pc_vert : int, default 2 Component shown on the vertical axis (1-based). variance_ellipses : sequence of float, default (0.5, 1.0) Variance-explained thresholds, each a fraction in (0, 1], at which to draw a concentric ellipse. The conventional choice is the 50% and 100% ellipses; any other thresholds (for example 0.75 and 0.95) are equally valid. settings : dict Default settings:: { "title": "Correlation loadings ...", # str: overall plot title "x_marker_color": None, # str|None: X-variable marker colour; None uses the theme "y_marker_color": None, # str|None: Y-variable marker colour (PLS); None uses the theme "ellipse_color": "grey", # str: colour of the variance ellipses "show_labels": True, # bool: label each variable "show_legend": True, # bool: show clickable legend (PLS only) "html_image_height": 600, # int: image height in pixels "html_aspect_ratio_w_over_h": 1.0, # float: width as ratio of height "template": "pi_journal", # str: registered Plotly theme name } fig : go.Figure, optional An existing figure to draw onto. A new figure is created if omitted. Examples -------- >>> pca.correlation_loadings_plot() >>> pls.correlation_loadings_plot(pc_horiz=1, pc_vert=3) >>> pca.correlation_loadings_plot(variance_ellipses=(0.75, 0.95)) """ if not hasattr(model, "r2_per_variable_"): msg = "Model is not fitted. Call fit() before plotting the correlation loadings." raise ValueError(msg) available = list(model.r2_per_variable_.columns) for axis_name, component in (("pc_horiz", pc_horiz), ("pc_vert", pc_vert)): if component not in available: msg = f"{axis_name}={component} is not a fitted component; choose from {available}." raise ValueError(msg) if pc_horiz == pc_vert: msg = "pc_horiz and pc_vert must be different components." raise ValueError(msg) ellipse_levels = [float(level) for level in variance_ellipses] for level in ellipse_levels: if not 0 < level <= 1: msg = f"Each value in variance_ellipses must be a fraction in (0, 1]; got {level}." raise ValueError(msg) is_pls = hasattr(model, "r2y_per_variable_") x_loadings = model.x_loadings_ if is_pls else model.loadings_ class Settings(BaseModel): """Validated display settings for the correlation-loadings plot.""" title: str = f"Correlation loadings: components {pc_horiz} and {pc_vert}" x_marker_color: str | None = None y_marker_color: str | None = None ellipse_color: str = "grey" show_labels: bool = True show_legend: bool = True html_image_height: float = 600.0 html_aspect_ratio_w_over_h: float = 1.0 template: str = DEFAULT_THEME setdict = Settings(**settings).model_dump() if settings else Settings().model_dump() if fig is None: fig = go.Figure() # Variance ellipses, drawn first so the variable markers sit on top. for level in ellipse_levels: radius = float(np.sqrt(level)) fig.add_shape( type="circle", xref="x", yref="y", x0=-radius, y0=-radius, x1=radius, y1=radius, line=dict(color=setdict["ellipse_color"], width=1, dash="dot"), ) fig.add_annotation( x=0, y=radius, text=f"{level * 100:g}%", showarrow=False, yshift=9, font=dict(color=setdict["ellipse_color"], size=11), ) fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR, line_width=1) fig.add_vline(x=0, line_color=REFERENCE_LINE_COLOR, line_width=1) mode = "markers+text" if setdict["show_labels"] else "markers" fig.add_trace( go.Scatter( x=_correlation_loadings(model.r2_per_variable_, x_loadings, pc_horiz), y=_correlation_loadings(model.r2_per_variable_, x_loadings, pc_vert), mode=mode, text=[str(name) for name in model.r2_per_variable_.index], textposition="top center", marker=dict(color=setdict["x_marker_color"], size=8, symbol="circle"), name="X-variables", ) ) if is_pls: fig.add_trace( go.Scatter( x=_correlation_loadings(model.r2y_per_variable_, model.y_loadings_, pc_horiz), y=_correlation_loadings(model.r2y_per_variable_, model.y_loadings_, pc_vert), mode=mode, text=[str(name) for name in model.r2y_per_variable_.index], textposition="top center", marker=dict(color=setdict["y_marker_color"], size=9, symbol="diamond"), name="Y-variables", ) ) def _axis_title(component: int) -> str: explained = float(model.r2_per_component_[component]) * 100.0 return f"Component {component} ({explained:.1f}%)" axis_common: dict = dict(range=[-1.08, 1.08], zeroline=False) fig.update_layout( template=setdict["template"], title_text=setdict["title"], hovermode="closest", showlegend=setdict["show_legend"] and is_pls, autosize=False, xaxis=dict(title_text=_axis_title(pc_horiz), **axis_common), yaxis=dict(title_text=_axis_title(pc_vert), scaleanchor="x", scaleratio=1, **axis_common), width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"], height=setdict["html_image_height"], ) return fig
[docs] def predictions_vs_observed_plot( model: BaseEstimator, *, y_observed: pd.DataFrame | np.ndarray, variable: str | None = None, settings: dict | None = None, fig: go.Figure | None = None, ) -> go.Figure: """Generate an observed-vs-predicted (parity) plot for a fitted PLS model. Plots the calibration predictions against the observed Y values, with a ``y = x`` reference line and an RMSE annotation. Points lying close to the reference line indicate good predictions. Parameters ---------- model : PLS object A fitted PLS model generated by this library. y_observed : array-like of shape (n_samples, n_targets) The observed Y values, on the same scale as the data used to fit the model (for example the scaled Y from :class:`MCUVScaler`). variable : str, optional Which Y-variable to plot. Defaults to the first Y-variable. settings : dict Default settings:: { "title": "Observed vs predicted ...", # str: overall plot title "marker_color": None, # str|None: data-marker colour; None uses the theme "reference_color": "#9CA3AF", # str: colour of the y = x line "html_image_height": 500, # int: image height in pixels "html_aspect_ratio_w_over_h": 1.0, # float: width as ratio of height "template": "pi_journal", # str: registered Plotly theme name } fig : go.Figure, optional An existing figure to draw onto. A new figure is created if omitted. Examples -------- >>> pls.predictions_vs_observed_plot(y_observed=Y_scaled) >>> pls.predictions_vs_observed_plot(y_observed=Y_scaled, variable="quality") """ if not hasattr(model, "predictions_"): msg = "Model is not fitted. Call fit() before plotting predictions vs observed." raise ValueError(msg) y_observed = y_observed if isinstance(y_observed, pd.DataFrame) else pd.DataFrame(y_observed) if variable is None: variable = str(model.predictions_.columns[0]) if variable not in model.predictions_.columns: msg = f"Unknown Y-variable '{variable}'. Known: {list(model.predictions_.columns)}." raise ValueError(msg) if variable not in y_observed.columns: msg = f"y_observed has no column '{variable}'. Its columns are {list(y_observed.columns)}." raise ValueError(msg) if y_observed.shape[0] != model.predictions_.shape[0]: msg = ( f"y_observed must have {model.predictions_.shape[0]} rows (the number of training " f"observations), got {y_observed.shape[0]}." ) raise ValueError(msg) observed = y_observed[variable].to_numpy(dtype=float) predicted = model.predictions_[variable].to_numpy(dtype=float) rmse = float(np.sqrt(np.mean((observed - predicted) ** 2))) lo = float(min(observed.min(), predicted.min())) hi = float(max(observed.max(), predicted.max())) pad = 0.05 * (hi - lo) if hi > lo else 1.0 class Settings(BaseModel): """Validated display settings for the predictions-vs-observed plot.""" title: str = f"Observed vs predicted for {variable}" marker_color: str | None = None reference_color: str = REFERENCE_LINE_COLOR html_image_height: float = 500.0 html_aspect_ratio_w_over_h: float = 1.0 template: str = DEFAULT_THEME setdict = Settings(**settings).model_dump() if settings else Settings().model_dump() if fig is None: fig = go.Figure() fig.add_trace( go.Scatter( x=[lo - pad, hi + pad], y=[lo - pad, hi + pad], mode="lines", line=dict(color=setdict["reference_color"], dash="dash"), name="y = x", ) ) fig.add_trace( go.Scatter( x=observed, y=predicted, mode="markers", marker=dict(color=setdict["marker_color"], size=7), name="Observations", ) ) fig.add_annotation( x=lo + 0.05 * (hi - lo), y=hi - 0.05 * (hi - lo), text=f"RMSE = {rmse:.4g}", showarrow=False, ) axis_common: dict = dict(range=[lo - pad, hi + pad]) fig.update_layout( template=setdict["template"], title_text=setdict["title"], hovermode="closest", showlegend=False, autosize=False, xaxis=dict(title_text=f"Observed: {variable}", **axis_common), yaxis=dict(title_text=f"Predicted: {variable}", scaleanchor="x", scaleratio=1, **axis_common), width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"], height=setdict["html_image_height"], ) return fig
[docs] def coefficient_plot( model: BaseEstimator, variable: str | None = None, settings: dict | None = None, fig: go.Figure | None = None, ) -> go.Figure: """Generate a bar plot of the PLS regression coefficients. Shows ``beta_coefficients_`` for one Y-variable: one bar per X-variable, mapping the (preprocessed) X onto the predicted Y. Tall bars mark the X-variables that most strongly drive the prediction. Parameters ---------- model : PLS object A fitted PLS model generated by this library. variable : str, optional Which Y-variable's coefficients to plot. Defaults to the first one. settings : dict Default settings:: { "title": "Regression coefficients ...", # str: overall plot title "bar_color": None, # str|None: bar colour; None uses the theme "html_image_height": 500, # int: image height in pixels "html_aspect_ratio_w_over_h": 16/9, # float: width as ratio of height "template": "pi_journal", # str: registered Plotly theme name } fig : go.Figure, optional An existing figure to draw onto. A new figure is created if omitted. Examples -------- >>> pls.coefficient_plot() >>> pls.coefficient_plot(variable="quality") """ if not hasattr(model, "beta_coefficients_"): msg = "Model is not fitted. Call fit() before plotting the coefficients." raise ValueError(msg) if variable is None: variable = str(model.beta_coefficients_.columns[0]) if variable not in model.beta_coefficients_.columns: msg = f"Unknown Y-variable '{variable}'. Known: {list(model.beta_coefficients_.columns)}." raise ValueError(msg) coefficients = model.beta_coefficients_[variable] features = [str(name) for name in coefficients.index] class Settings(BaseModel): """Validated display settings for the regression-coefficient plot.""" title: str = f"Regression coefficients for {variable}" bar_color: str | None = None html_image_height: float = 500.0 html_aspect_ratio_w_over_h: float = 16 / 9.0 template: str = DEFAULT_THEME setdict = Settings(**settings).model_dump() if settings else Settings().model_dump() if fig is None: fig = go.Figure() fig.add_trace( go.Bar( x=features, y=coefficients.to_numpy(dtype=float), marker_color=setdict["bar_color"], name=f"beta: {variable}", ) ) fig.add_hline(y=0, line_color=REFERENCE_LINE_COLOR, line_width=1) fig.update_layout( template=setdict["template"], title_text=setdict["title"], hovermode="x", showlegend=False, autosize=False, xaxis=dict(title_text="X-variable", type="category"), yaxis=dict(title_text=f"Coefficient ({variable})"), width=setdict["html_aspect_ratio_w_over_h"] * setdict["html_image_height"], height=setdict["html_image_height"], ) return fig
class Plot: """Create plots of estimators.""" def __init__(self, parent: BaseEstimator) -> None: self._parent = parent def scores(self, pc_horiz: int = 1, pc_vert: int = 2, **kwargs) -> go.Figure: """Generate a score plot.""" return score_plot(self, pc_horiz=pc_horiz, pc_vert=pc_vert, **kwargs) def loadings(self, pc_horiz: int = 1, pc_vert: int = 2, **kwargs) -> go.Figure: """Generate a loading plot.""" return loading_plot(self, pc_horiz=pc_horiz, pc_vert=pc_vert, **kwargs)