Quick Start#

Installation#

pip install process-improve

PCA Example#

import pandas as pd
from process_improve.multivariate.methods import PCA, MCUVScaler

# Load and scale data
X = pd.read_csv("your_data.csv", index_col=0)
scaler = MCUVScaler().fit(X)
X_scaled = scaler.transform(X)

# Fit model
pca = PCA(n_components=3).fit(X_scaled)

# Results
pca.scores_  # Score matrix (N x A)
pca.loadings_  # Loading matrix (K x A)
pca.r2_cumulative_  # Cumulative R² per component

# Diagnostics
pca.detect_outliers()
pca.score_contributions(pca.scores_.iloc[0].values)

# Plots
pca.score_plot()
pca.loading_plot()
pca.spe_plot()
pca.t2_plot()

PLS Example#

from process_improve.multivariate.methods import PLS, MCUVScaler

# Scale X and Y separately
scaler_x = MCUVScaler().fit(X)
scaler_y = MCUVScaler().fit(Y)
X_scaled = scaler_x.transform(X)
Y_scaled = scaler_y.transform(Y)

# Fit model
pls = PLS(n_components=3).fit(X_scaled, Y_scaled)

# Predict new observations (sklearn-compatible: returns the y_hat array)
y_pred = pls.predict(scaler_x.transform(X_new))

# Predict with full per-row diagnostics (scores, T², SPE, plus y_hat)
result = pls.diagnose(scaler_x.transform(X_new))
result.y_hat  # Predicted Y
result.spe  # SPE diagnostics
result.hotellings_t2  # Hotelling's T² diagnostics

# Diagnostics
pls.detect_outliers()
pls.score_contributions(pls.scores_.iloc[0].values)

Component Selection#

Use cross-validation to select the number of PCA components:

result = PCA.select_n_components(X_scaled, max_components=10)
print(f"Recommended: {result.n_components} components")
print(f"PRESS ratios: {result.press_ratio}")

DOE Strategy Example#

Plan a multi-stage experimental strategy before running any experiments:

from process_improve.experiments.factor import Factor, Response
from process_improve.experiments.strategy import recommend_strategy

factors = [
    Factor(name="Temperature", low=25, high=40, units="degC"),
    Factor(name="pH", low=5.0, high=7.5),
    Factor(name="Glucose", low=10, high=50, units="g/L"),
    Factor(name="Yeast extract", low=1, high=10, units="g/L"),
    Factor(name="Agitation", low=100, high=400, units="rpm"),
    Factor(name="Aeration", low=0.5, high=2.0, units="vvm"),
    Factor(name="Inoculum", low=2, high=10, units="%v/v"),
]

result = recommend_strategy(
    factors=factors,
    responses=[Response(name="Yield", goal="maximize", units="g/L")],
    budget=40,
    domain="fermentation",
)

for stage in result["stages"]:
    print(f"Stage {stage['stage_number']}: {stage['stage_name']} "
          f"({stage['design_type']}, {stage['estimated_runs']} runs)")