Source code for process_improve.experiments.strategy.models
# (c) Kevin Dunn, 2010-2026. MIT License.
"""Pydantic models for the DOE strategy recommender.
Defines the input specification (``DOEProblemSpec``), the output
(``ExperimentalStrategy``, ``ExperimentalStage``, ``TransitionRule``),
and supporting types (``DomainType``, ``PriorKnowledge``).
"""
from __future__ import annotations
from enum import Enum
from typing import Any, Literal
from pydantic import BaseModel, Field
from process_improve.experiments.factor import Constraint, Factor, Response
# ---------------------------------------------------------------------------
# Enums
# ---------------------------------------------------------------------------
[docs]
class DomainType(str, Enum):
"""Application domain for domain-specific strategy adjustments."""
pharma_formulation = "pharma_formulation"
fermentation = "fermentation"
food_science = "food_science"
extraction = "extraction"
analytical_method = "analytical_method"
cell_culture = "cell_culture"
bioprocess = "bioprocess"
general = "general"
# ---------------------------------------------------------------------------
# Prior knowledge
# ---------------------------------------------------------------------------
[docs]
class PriorKnowledge(BaseModel):
"""Parsed prior knowledge with a confidence score.
Parameters
----------
raw_text : str
The original free-text description provided by the user.
confidence : float
Confidence score between 0.0 (no knowledge) and 1.0 (confirmed).
known_significant_factors : list[str]
Factor names identified as significant in the prior knowledge.
known_ranges_reliable : bool
Whether the user's factor ranges are informed by prior data.
has_supporting_data : bool
Whether the prior knowledge is backed by experimental data.
"""
raw_text: str = ""
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
known_significant_factors: list[str] = Field(default_factory=list)
known_ranges_reliable: bool = False
has_supporting_data: bool = False
# ---------------------------------------------------------------------------
# Strategy output models
# ---------------------------------------------------------------------------
[docs]
class TransitionRule(BaseModel):
"""Rule governing the transition between consecutive experimental stages.
Parameters
----------
condition : str
Human-readable condition, e.g. ``"2-5 significant factors identified"``.
action : str
Action to take when the condition is met, e.g. ``"proceed_to_rsm"``.
fallback : str
Action if the condition is not met, e.g. ``"broaden_factor_ranges"``.
"""
condition: str
action: str
fallback: str
[docs]
class ExperimentalStage(BaseModel):
"""One stage in a multi-stage experimental strategy.
Parameters
----------
stage_number : int
1-based stage index.
stage_name : str
Human-readable name, e.g. ``"Screening"``, ``"Optimization"``.
design_type : str
Design type key, e.g. ``"plackett_burman"``, ``"ccd"``, ``"bbd"``.
design_params : dict
Design-specific parameters (resolution, center_points, alpha, etc.).
factors : list[str]
Factor names involved in this stage.
estimated_runs : int
Estimated number of experimental runs.
purpose : str
Brief description of what this stage accomplishes.
success_criteria : dict
Criteria for deeming this stage successful.
transition_rules : list[TransitionRule]
Rules governing the transition to the next stage.
"""
stage_number: int
stage_name: str
design_type: str
design_params: dict[str, Any] = Field(default_factory=dict)
factors: list[str] = Field(default_factory=list)
estimated_runs: int = 0
purpose: str = ""
success_criteria: dict[str, Any] = Field(default_factory=dict)
transition_rules: list[TransitionRule] = Field(default_factory=list)
[docs]
class ExperimentalStrategy(BaseModel):
"""Complete multi-stage experimental strategy recommendation.
Parameters
----------
strategy_id : str
Deterministic hash of the input specification.
stages : list[ExperimentalStage]
Ordered list of experimental stages.
total_estimated_runs : int
Sum of estimated runs across all stages.
budget_allocation : dict[str, int]
Stage name to allocated run count mapping.
assumptions : list[str]
Key assumptions underlying the recommendation.
risks : list[str]
Risks and potential issues with the strategy.
alternative_strategies : list[str]
Brief descriptions of alternative approaches.
domain : str
The domain used for domain-specific adjustments.
detail_level : str
The detail level used for explanations.
reasoning : list[str]
Step-by-step explanation of the decision logic.
"""
strategy_id: str = ""
stages: list[ExperimentalStage] = Field(default_factory=list)
total_estimated_runs: int = 0
budget_allocation: dict[str, int] = Field(default_factory=dict)
assumptions: list[str] = Field(default_factory=list)
risks: list[str] = Field(default_factory=list)
alternative_strategies: list[str] = Field(default_factory=list)
domain: str = "general"
detail_level: str = "intermediate"
reasoning: list[str] = Field(default_factory=list)
# ---------------------------------------------------------------------------
# Input specification
# ---------------------------------------------------------------------------
[docs]
class DOEProblemSpec(BaseModel):
"""Validated input specification for the strategy recommender.
Wraps all inputs into a single object for pipeline processing.
Parameters
----------
factors : list[Factor]
All candidate experimental factors.
responses : list[Response]
Response variables with optimisation goals.
budget : int or None
Total run budget across all stages.
constraints : list[Constraint] or None
Factor-space constraints.
hard_to_change_factors : list[str] or None
Factor names that are expensive to reset between runs.
prior_knowledge : PriorKnowledge or None
Parsed prior knowledge with confidence score.
existing_data_summary : dict or None
Summary of any existing experimental data.
domain : DomainType
Application domain.
detail_level : str
``"novice"`` or ``"intermediate"``.
"""
factors: list[Factor]
responses: list[Response] = Field(default_factory=list)
budget: int | None = None
constraints: list[Constraint] | None = None
hard_to_change_factors: list[str] | None = None
prior_knowledge: PriorKnowledge | None = None
existing_data_summary: dict[str, Any] | None = None
domain: DomainType = DomainType.general
detail_level: Literal["novice", "intermediate"] = "intermediate"
@property
def n_factors(self) -> int:
"""Total number of factors."""
return len(self.factors)
@property
def factor_names(self) -> list[str]:
"""Ordered list of factor names."""
return [f.name for f in self.factors]
@property
def n_continuous(self) -> int:
"""Number of continuous factors."""
return sum(1 for f in self.factors if f.type.value == "continuous")
@property
def n_categorical(self) -> int:
"""Number of categorical factors."""
return sum(1 for f in self.factors if f.type.value == "categorical")
@property
def n_mixture(self) -> int:
"""Number of mixture factors."""
return sum(1 for f in self.factors if f.type.value == "mixture")
@property
def has_mixture(self) -> bool:
"""Whether any mixture factors are present."""
return self.n_mixture > 0
@property
def has_hard_to_change(self) -> bool:
"""Whether any hard-to-change factors are specified."""
return bool(self.hard_to_change_factors)
@property
def has_constraints(self) -> bool:
"""Whether any constraints are specified."""
return bool(self.constraints)
@property
def goal_includes_optimization(self) -> bool:
"""Whether any response has an optimisation goal."""
return len(self.responses) > 0