Source code for process_improve.experiments.designs

# (c) Kevin Dunn, 2010-2026. MIT License.

"""Unified design generation: ``generate_design()`` dispatcher.

This module provides a single entry point for creating any standard
experimental design.  It dispatches to specialised modules based on
``design_type`` and applies common post-processing (center points,
replication, randomization, coded/actual mapping).

Examples
--------
>>> from process_improve.experiments import generate_design, Factor
>>> factors = [
...     Factor(name="Temperature", low=150, high=200, units="degC"),
...     Factor(name="Pressure", low=1, high=5, units="bar"),
... ]
>>> result = generate_design(factors, design_type="full_factorial")
>>> result.n_runs
7
>>> result.design
"""

from __future__ import annotations

from collections.abc import Callable
from typing import Any

import numpy as np

try:
    from pyDOE3 import ff2n
except ImportError:  # pragma: no cover - exercised via env-without-pyDOE3
    from process_improve._extras import _MissingExtra

    ff2n = _MissingExtra("pyDOE3", "expt")  # type: ignore[assignment]

from process_improve.experiments.designs_utils import build_design_result
from process_improve.experiments.factor import Constraint, DesignResult, Factor, FactorType

# ---------------------------------------------------------------------------
# Dispatch handlers - each returns (coded_matrix, metadata_dict)
# ---------------------------------------------------------------------------


def _dispatch_full_factorial(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    """Full 2^k factorial using pyDOE3.ff2n (returns -1/+1)."""
    k = len(factors)
    coded_matrix = ff2n(k)
    return coded_matrix, {}


def _dispatch_fractional_factorial(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_screening import dispatch_fractional_factorial  # noqa: PLC0415

    return dispatch_fractional_factorial(
        factors,
        resolution=kwargs.get("resolution"),
        generators=kwargs.get("generators"),
    )


def _dispatch_plackett_burman(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_screening import dispatch_plackett_burman  # noqa: PLC0415

    return dispatch_plackett_burman(factors)


def _dispatch_box_behnken(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_response_surface import dispatch_box_behnken  # noqa: PLC0415

    return dispatch_box_behnken(factors, center_points=kwargs.get("center_points", 3))


def _dispatch_ccd(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_response_surface import dispatch_ccd  # noqa: PLC0415

    return dispatch_ccd(
        factors,
        center_points=kwargs.get("center_points", 3),
        alpha=kwargs.get("alpha"),
        cube=kwargs.get("cube", "full"),
        generators=kwargs.get("generators"),
        resolution=kwargs.get("resolution"),
    )


def _dispatch_dsd(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_response_surface import dispatch_dsd  # noqa: PLC0415

    return dispatch_dsd(factors)


def _dispatch_omars(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    # With a run budget, reach the ILP enumerator (a larger OMARS member that
    # leaves error degrees of freedom for a full second-order model); without a
    # budget, return the minimal conference-foldover member, which is identical
    # to the definitive screening design.  This is a thin wrapper over
    # ``design_type="omars_ilp"`` so both spellings reach the same enumerator.
    if kwargs.get("budget") is not None:
        return _dispatch_omars_ilp(factors, **kwargs)

    from process_improve.experiments.designs_omars import dispatch_omars  # noqa: PLC0415

    return dispatch_omars(factors)


def _dispatch_omars_ilp(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_omars_ilp import _dispatch_omars_ilp as _run  # noqa: PLC0415

    return _run(factors, budget=kwargs.get("budget"))


def _dispatch_d_optimal(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_optimal import dispatch_d_optimal  # noqa: PLC0415

    return dispatch_d_optimal(
        factors,
        budget=kwargs.get("budget"),
        hard_to_change=kwargs.get("hard_to_change"),
        constraints=kwargs.get("constraints"),
        model_type=kwargs.get("model_type", "interactions"),
    )


def _dispatch_i_optimal(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_optimal import dispatch_i_optimal  # noqa: PLC0415

    return dispatch_i_optimal(
        factors,
        budget=kwargs.get("budget"),
        hard_to_change=kwargs.get("hard_to_change"),
        constraints=kwargs.get("constraints"),
        model_type=kwargs.get("model_type", "interactions"),
    )


def _dispatch_a_optimal(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_optimal import dispatch_a_optimal  # noqa: PLC0415

    return dispatch_a_optimal(
        factors,
        budget=kwargs.get("budget"),
        hard_to_change=kwargs.get("hard_to_change"),
        constraints=kwargs.get("constraints"),
        model_type=kwargs.get("model_type", "interactions"),
    )


def _dispatch_mixture(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_mixture import dispatch_mixture  # noqa: PLC0415

    return dispatch_mixture(factors, budget=kwargs.get("budget"))


def _dispatch_taguchi(
    factors: list[Factor],
    **kwargs: Any,  # noqa: ANN401
) -> tuple[np.ndarray, dict]:
    from process_improve.experiments.designs_screening import dispatch_taguchi  # noqa: PLC0415

    return dispatch_taguchi(factors)


# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------

_DESIGN_REGISTRY: dict[str, Callable[..., tuple[np.ndarray, dict]]] = {
    "full_factorial": _dispatch_full_factorial,
    "fractional_factorial": _dispatch_fractional_factorial,
    "plackett_burman": _dispatch_plackett_burman,
    "box_behnken": _dispatch_box_behnken,
    "ccd": _dispatch_ccd,
    "dsd": _dispatch_dsd,
    "omars": _dispatch_omars,
    "omars_ilp": _dispatch_omars_ilp,
    "d_optimal": _dispatch_d_optimal,
    "i_optimal": _dispatch_i_optimal,
    "a_optimal": _dispatch_a_optimal,
    "mixture": _dispatch_mixture,
    "taguchi": _dispatch_taguchi,
}


# ---------------------------------------------------------------------------
# Auto-selection
# ---------------------------------------------------------------------------


def _auto_select(
    factors: list[Factor],
    budget: int | None,
    constraints: list[Constraint] | None,
    hard_to_change: list[str] | None,
) -> str:
    """Choose the best design type based on factors, budget, and constraints.

    Parameters
    ----------
    factors : list[Factor]
        Factor specifications.
    budget : int or None
        Maximum number of runs the experimenter can afford.
    constraints : list[Constraint] or None
        Factor-space constraints.
    hard_to_change : list[str] or None
        Names of hard-to-change factors.

    Returns
    -------
    str
        Selected design type key.
    """
    k_mixture = sum(1 for f in factors if f.type == FactorType.mixture)
    k = len(factors) - k_mixture

    # Mixture factors dominate
    if k_mixture > 0 and k_mixture == len(factors):
        return "mixture"

    # Constraints or split-plot -> D-optimal
    if constraints or hard_to_change:
        return "d_optimal"

    effective_budget = budget if budget is not None else float("inf")

    if k <= 5 and effective_budget >= 2**k:
        return "full_factorial"

    if k >= 6 and effective_budget <= 2 * k + 1:
        return "plackett_burman"

    if effective_budget >= 2 ** (k - 1):
        return "fractional_factorial"

    return "d_optimal"


# ---------------------------------------------------------------------------
# Main entry point
# ---------------------------------------------------------------------------


[docs] def generate_design( # noqa: PLR0913 factors: list[Factor], design_type: str | None = None, budget: int | None = None, center_points: int = 3, replicates: int = 1, blocks: int | None = None, resolution: int | None = None, generators: list[str] | None = None, alpha: str | float | None = None, cube: str = "full", constraints: list[Constraint] | None = None, hard_to_change: list[str] | None = None, random_seed: int = 42, ) -> DesignResult: """Generate an experimental design matrix. Parameters ---------- factors : list[Factor] Factor specifications. Each ``Factor`` has a *name*, *type* (``"continuous"``, ``"categorical"``, ``"mixture"``), *low*/*high* bounds (for continuous), *levels* (for categorical), and optional *units*. design_type : str or None One of ``"full_factorial"``, ``"fractional_factorial"``, ``"plackett_burman"``, ``"box_behnken"``, ``"ccd"``, ``"dsd"``, ``"omars"``, ``"omars_ilp"``, ``"d_optimal"``, ``"i_optimal"``, ``"a_optimal"``, ``"mixture"``, ``"taguchi"``. If ``None``, the design type is chosen automatically based on the factor count, budget, and constraints. budget : int or None Maximum number of runs the experimenter can afford. center_points : int Number of center-point replicates (default 3). For designs that embed their own center points (CCD, Box-Behnken), this parameter controls the count within the design structure. replicates : int Number of full replicates of the design (default 1 = no replication). blocks : int or None Number of blocks. resolution : int or None Desired minimum resolution for fractional factorials (III=3, IV=4, V=5). generators : list[str] or None Explicit generators for fractional factorials, e.g. ``["D=ABC", "E=AC"]``. alpha : str, float, or None Axial distance for CCD designs: ``"rotatable"``, ``"face_centered"``, ``"orthogonal"``, or a numeric value. cube : str For CCD designs, how to build the cube (factorial) portion: ``"full"`` (default) uses the complete 2^k factorial; ``"fractional"`` uses a resolution-V (or higher) fractional factorial, keeping the run count practical for k >= 5. When ``"fractional"`` and *generators* is given, those generators define the cube; otherwise a minimum-aberration half-fraction is chosen automatically. constraints : list[Constraint] or None Constraints on the factor space. hard_to_change : list[str] or None Names of hard-to-change factors (triggers split-plot structure). random_seed : int Seed for reproducible randomization (default 42). Returns ------- DesignResult Contains ``design`` (coded ``Expt``), ``design_actual`` (actual-units ``Expt``), ``run_order``, and design metadata (generators, defining relation, resolution, etc.). Raises ------ ValueError If *design_type* is unknown, or if factor/budget constraints cannot be satisfied. Examples -------- >>> from process_improve.experiments import generate_design, Factor >>> factors = [ ... Factor(name="T", low=150, high=200, units="degC"), ... Factor(name="P", low=1, high=5, units="bar"), ... ] >>> result = generate_design(factors, design_type="full_factorial") >>> result.design_actual """ # --- Validate ---------------------------------------------------------- if not factors: raise ValueError("At least one factor must be provided.") if design_type is None: design_type = _auto_select(factors, budget, constraints, hard_to_change) if design_type not in _DESIGN_REGISTRY: raise ValueError(f"Unknown design_type={design_type!r}. Choose from: {', '.join(sorted(_DESIGN_REGISTRY))}.") # --- Dispatch ---------------------------------------------------------- dispatch_fn = _DESIGN_REGISTRY[design_type] # Build kwargs for the dispatch handler dispatch_kwargs: dict[str, Any] = { "budget": budget, "center_points": center_points, "resolution": resolution, "generators": generators, "alpha": alpha, "cube": cube, "hard_to_change": hard_to_change, "constraints": constraints, } coded_matrix, meta = dispatch_fn(factors, **dispatch_kwargs) # --- Determine center-point handling ----------------------------------- # Designs that embed their own center points (CCD, Box-Behnken) # already include them; don't add more. designs_with_embedded_centers = { "ccd", "box_behnken", "dsd", "omars", "omars_ilp", "mixture", "d_optimal", "i_optimal", "a_optimal", } # Optimal designs from pyoptex produce a pre-optimized run order # (especially important for split-plot). Skip randomization for these. optimal_designs = {"d_optimal", "i_optimal", "a_optimal"} effective_seed: int | None = random_seed if design_type in optimal_designs and meta.get("backend") == "pyoptex": effective_seed = None # signal to build_design_result to skip randomization extra_center_points = 0 if design_type in designs_with_embedded_centers else center_points # Mixture designs return proportions (actual units), not coded is_actual = design_type == "mixture" # Extract resolution/generators/defining_relation from metadata result_generators = generators or meta.get("generators_used") result_resolution = resolution or meta.get("resolution") result_alpha = meta.pop("alpha_value", None) return build_design_result( coded_matrix=coded_matrix, factors=factors, design_type=design_type, center_points=extra_center_points, replicates=replicates, blocks=blocks, random_seed=effective_seed, generators=result_generators, defining_relation=meta.get("defining_relation"), resolution=result_resolution, alpha=result_alpha, metadata=meta, is_actual=is_actual, )