Source code for process_improve.monitoring.metrics
import numpy as np
import pandas as pd
from ..univariate.metrics import Sn
[docs]
def calculate_cpk(
df: pd.DataFrame,
which_column: str,
specifications: tuple[float, float] = (np.nan, np.nan),
trim_percentile: float = 2.5,
) -> float:
"""
Calculate the process capability, Cpk, near either the lower or the upper limit [will be
automatically determined which].
Process capability, nearer the lower limit = (avg - lower_spec)/(3 x std deviation)
Process capability, nearer the upper limit = (upper_spec - avg)/(3 x std deviation)
Parameters
----------
df : pd.DataFrame
Raw data, at least one column is numeric.
which_column : str
Indicates which is the column of data that should be used for the Cpk calculation.
specifications : tuple, optional
Either a value, if the specification is constant over time; if the specification changes
over time, then use two column names here, one of which is the lower specification and
the second is the upper specification.
trim_percentile : float, optional
If non-zero, then robust alternatives are used. The value specified is the percentile of
data that is trimmed away; by default 2.5 percent on the left, and 2.5% on the right.
Returns
-------
float
The Cpk value.
"""
assert trim_percentile >= 0
assert trim_percentile < 40 # typically a max of 10 to 20 is advised.
lower_spec, upper_spec = specifications
if lower_spec is None:
Cpk_lower_spec = float(np.nanpercentile(df[which_column].values, [trim_percentile]))
elif isinstance(lower_spec, str):
Cpk_lower_spec = df[lower_spec]
else:
Cpk_lower_spec = float(lower_spec)
if upper_spec is None:
Cpk_upper_spec = float(np.nanpercentile(df[which_column].values, [100 - trim_percentile]))
elif isinstance(upper_spec, str):
Cpk_upper_spec = df[upper_spec]
else:
Cpk_upper_spec = float(upper_spec)
metric_lower = df[which_column] - Cpk_lower_spec
metric_upper = Cpk_upper_spec - df[which_column]
if trim_percentile > 0:
center_lower, center_upper = metric_lower.median(), metric_upper.median()
spread_lower, spread_upper = Sn(metric_lower), Sn(metric_upper)
else:
center_lower, center_upper = metric_lower.mean(), metric_upper.mean()
spread_lower, spread_upper = metric_lower.std(), metric_upper.std()
# TODO: return the RSD also: rsd = (spread / center) * 100
return np.nanmin([center_lower / (3 * spread_lower), center_upper / (3 * spread_upper)])
_RENAMED = {"calculate_Cpk": "calculate_cpk"}
def __getattr__(name: str) -> None:
if name in _RENAMED:
new = _RENAMED[name]
raise AttributeError(
f"{name!r} has been renamed to {new!r}. "
f"Use: from process_improve.monitoring.metrics import {new}"
)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")