Source code for process_improve.experiments.strategy.budget

# (c) Kevin Dunn, 2010-2026. MIT License.

"""Budget allocation logic for multi-stage DOE strategies.

Implements the **25-40-55-15 framework**:
    - Screening:     25-40 % of total budget
    - Optimisation:  40-55 %
    - Confirmation:  5-15 % (minimum 3 runs)

Sources:
    - Montgomery, *Design and Analysis of Experiments*, 10th ed. (25% rule)
    - Stat-Ease SCOR framework
    - NIST Engineering Statistics Handbook section 5.3.3
"""

from __future__ import annotations

import math
from typing import Any

# ---------------------------------------------------------------------------
# Run estimation look-up tables
# ---------------------------------------------------------------------------

# Box-Behnken run counts (excluding center points)
_BBD_RUNS: dict[int, int] = {
    3: 12,
    4: 24,
    5: 40,
    6: 48,
    7: 56,
}

# CCD factorial portion: 2^k for k <= 5, 2^(k-1) for k >= 6
_CCD_FACTORIAL_RUNS: dict[int, int] = {
    2: 4,
    3: 8,
    4: 16,
    5: 32,
    6: 32,  # half-fraction
    7: 64,  # half-fraction
}


# ---------------------------------------------------------------------------
# Run estimation functions
# ---------------------------------------------------------------------------


[docs] def estimate_screening_runs(n_factors: int, design_type: str) -> int: # noqa: PLR0911 """Estimate the number of runs for a screening design. Parameters ---------- n_factors : int Number of factors to screen. design_type : str One of ``"plackett_burman"``, ``"definitive_screening"``, ``"fractional_factorial"``, ``"full_factorial"``. Returns ------- int Estimated run count including center points. """ if design_type == "plackett_burman": # Next multiple of 4 >= k + 1 n = n_factors + 1 return int(math.ceil(n / 4) * 4) if design_type == "definitive_screening": return 2 * n_factors + 1 if design_type == "fractional_factorial": # Smallest 2^(k-p) with resolution >= IV if n_factors <= 4: return 2**n_factors # full factorial feasible if n_factors == 5: return 16 # 2^(5-1) = 16, resolution V if n_factors == 6: return 16 # 2^(6-2) = 16, resolution IV if n_factors == 7: return 16 # 2^(7-3) = 16, resolution IV (with minimum aberration) # k >= 8: 2^(k-p) where p gives resolution >= IV # Conservative: use 32 runs for 8-11 factors, 64 for 12+ if n_factors <= 11: return 32 return 64 if design_type == "full_factorial": return 2**n_factors # Fallback: PB estimate n = n_factors + 1 return int(math.ceil(n / 4) * 4)
[docs] def estimate_rsm_runs(n_factors: int, design_type: str, center_points: int = 3) -> int: """Estimate the number of runs for an RSM design. Parameters ---------- n_factors : int Number of factors (typically 2-5 after screening). design_type : str One of ``"ccd"``, ``"box_behnken"``, ``"ccd_face_centered"``, ``"d_optimal"``. center_points : int Number of center point replicates (default 3). Returns ------- int Estimated run count. """ if design_type in ("ccd", "ccd_face_centered"): factorial = _CCD_FACTORIAL_RUNS.get(n_factors, 2**n_factors) axial = 2 * n_factors return factorial + axial + center_points if design_type == "box_behnken": base = _BBD_RUNS.get(n_factors, 0) if base == 0: # BBD not defined for this factor count; fall back to CCD estimate factorial = _CCD_FACTORIAL_RUNS.get(n_factors, 2**n_factors) return factorial + 2 * n_factors + center_points return base + center_points if design_type == "d_optimal": # Rough estimate: 1.5x the number of model terms for a quadratic model # Quadratic model terms: 1 + k + k(k-1)/2 + k = 1 + 2k + k(k-1)/2 n_terms = 1 + 2 * n_factors + n_factors * (n_factors - 1) // 2 return math.ceil(1.5 * n_terms) # Fallback: CCD estimate factorial = _CCD_FACTORIAL_RUNS.get(n_factors, 2**n_factors) return factorial + 2 * n_factors + center_points
[docs] def estimate_confirmation_runs(min_runs: int = 3) -> int: """Return the number of confirmation runs. Parameters ---------- min_runs : int Minimum confirmation runs (default 3). Returns ------- int Confirmation run count (always at least 3). """ return max(3, min_runs)
# --------------------------------------------------------------------------- # Budget allocation # ---------------------------------------------------------------------------
[docs] def allocate_budget( # noqa: C901, PLR0913, PLR0915 total_budget: int | None, n_factors: int, needs_screening: bool, needs_rsm: bool, screening_design: str = "plackett_burman", rsm_design: str = "box_behnken", domain_weights: dict[str, float] | None = None, min_confirmation: int = 3, center_points: int = 3, ) -> dict[str, Any]: """Allocate a total run budget across experimental stages. Parameters ---------- total_budget : int or None Total runs across all stages. If ``None``, computes an ideal budget. n_factors : int Total number of candidate factors. needs_screening : bool Whether a screening stage is needed. needs_rsm : bool Whether an RSM optimisation stage is needed. screening_design : str Preferred screening design type. rsm_design : str Preferred RSM design type. domain_weights : dict or None Stage-to-fraction mapping from the domain template. min_confirmation : int Minimum confirmation runs (domain-dependent). center_points : int Center points for RSM design. Returns ------- dict Keys: ``"screening"``, ``"optimization"``, ``"confirmation"``, ``"total"``, ``"ideal_total"``, ``"is_tight"``, ``"warnings"``. """ weights = domain_weights or {"screening": 0.30, "optimization": 0.50, "confirmation": 0.10} warnings: list[str] = [] # Estimate ideal runs for each stage ideal_screening = estimate_screening_runs(n_factors, screening_design) if needs_screening else 0 # Assume screening reduces to ~3 significant factors for RSM n_rsm_factors = min(n_factors, 3) if needs_screening else n_factors ideal_rsm = estimate_rsm_runs(n_rsm_factors, rsm_design, center_points) if needs_rsm else 0 ideal_confirmation = estimate_confirmation_runs(min_confirmation) ideal_total = ideal_screening + ideal_rsm + ideal_confirmation if total_budget is None: # No budget constraint: use ideal allocation return { "screening": ideal_screening, "optimization": ideal_rsm, "confirmation": ideal_confirmation, "total": ideal_total, "ideal_total": ideal_total, "is_tight": False, "warnings": [], } # Check if budget is feasible minimum_feasible = 0 if needs_screening: minimum_feasible += max(n_factors + 1, 4) # Absolute minimum: k+1 runs if needs_rsm: minimum_feasible += n_rsm_factors * 2 + 3 # Bare minimum RSM minimum_feasible += 3 # Minimum confirmation is_tight = total_budget < ideal_total is_very_tight = total_budget < minimum_feasible if is_very_tight: warnings.append( f"Budget of {total_budget} runs is very tight for {n_factors} factors. " f"Minimum feasible is ~{minimum_feasible} runs. " "Consider a single Definitive Screening Design to combine screening and curvature detection." ) # Allocate proportionally, respecting minimums if needs_screening and needs_rsm: # Full multi-stage allocation screening_share = weights.get("screening", 0.30) rsm_share = weights.get("optimization", 0.50) conf_share = weights.get("confirmation", 0.10) alloc_confirmation = max(min_confirmation, int(total_budget * conf_share)) remaining = total_budget - alloc_confirmation if remaining <= 0: alloc_confirmation = min_confirmation remaining = total_budget - alloc_confirmation # Split remaining between screening and RSM total_share = screening_share + rsm_share alloc_screening = max(n_factors + 1, int(remaining * screening_share / total_share)) alloc_rsm = remaining - alloc_screening # Ensure RSM has minimum runs if alloc_rsm < n_rsm_factors * 2 + 3: alloc_rsm = min(n_rsm_factors * 2 + 3, remaining) alloc_screening = remaining - alloc_rsm if alloc_screening < n_factors + 1: alloc_screening = n_factors + 1 alloc_rsm = remaining - alloc_screening elif needs_screening: alloc_screening = total_budget - min_confirmation alloc_rsm = 0 alloc_confirmation = min_confirmation elif needs_rsm: alloc_screening = 0 alloc_rsm = total_budget - min_confirmation alloc_confirmation = min_confirmation else: alloc_screening = 0 alloc_rsm = 0 alloc_confirmation = total_budget # Clamp all values to be non-negative alloc_screening = max(0, alloc_screening) alloc_rsm = max(0, alloc_rsm) alloc_confirmation = max(0, alloc_confirmation) if is_tight and not is_very_tight: warnings.append( f"Budget of {total_budget} runs is tight (ideal is {ideal_total}). " "Designs may have reduced power or fewer center points." ) return { "screening": alloc_screening, "optimization": alloc_rsm, "confirmation": alloc_confirmation, "total": alloc_screening + alloc_rsm + alloc_confirmation, "ideal_total": ideal_total, "is_tight": is_tight, "warnings": warnings, }