Source code for process_improve.experiments.strategy.models

# (c) Kevin Dunn, 2010-2026. MIT License.

"""Pydantic models for the DOE strategy recommender.

Defines the input specification (``DOEProblemSpec``), the output
(``ExperimentalStrategy``, ``ExperimentalStage``, ``TransitionRule``),
and supporting types (``DomainType``, ``PriorKnowledge``).
"""

from __future__ import annotations

from enum import Enum
from typing import Any, Literal

from pydantic import BaseModel, Field

from process_improve.experiments.factor import Constraint, Factor, Response

# ---------------------------------------------------------------------------
# Enums
# ---------------------------------------------------------------------------



[docs]
class DomainType(str, Enum):
    """Application domain for domain-specific strategy adjustments."""

    pharma_formulation = "pharma_formulation"
    fermentation = "fermentation"
    food_science = "food_science"
    extraction = "extraction"
    analytical_method = "analytical_method"
    cell_culture = "cell_culture"
    bioprocess = "bioprocess"
    general = "general"



# ---------------------------------------------------------------------------
# Prior knowledge
# ---------------------------------------------------------------------------



[docs]
class PriorKnowledge(BaseModel):
    """Parsed prior knowledge with a confidence score.

    Parameters
    ----------
    raw_text : str
        The original free-text description provided by the user.
    confidence : float
        Confidence score between 0.0 (no knowledge) and 1.0 (confirmed).
    known_significant_factors : list[str]
        Factor names identified as significant in the prior knowledge.
    known_ranges_reliable : bool
        Whether the user's factor ranges are informed by prior data.
    has_supporting_data : bool
        Whether the prior knowledge is backed by experimental data.
    """

    raw_text: str = ""
    confidence: float = Field(default=0.0, ge=0.0, le=1.0)
    known_significant_factors: list[str] = Field(default_factory=list)
    known_ranges_reliable: bool = False
    has_supporting_data: bool = False



# ---------------------------------------------------------------------------
# Strategy output models
# ---------------------------------------------------------------------------



[docs]
class TransitionRule(BaseModel):
    """Rule governing the transition between consecutive experimental stages.

    Parameters
    ----------
    condition : str
        Human-readable condition, e.g. ``"2-5 significant factors identified"``.
    action : str
        Action to take when the condition is met, e.g. ``"proceed_to_rsm"``.
    fallback : str
        Action if the condition is not met, e.g. ``"broaden_factor_ranges"``.
    """

    condition: str
    action: str
    fallback: str




[docs]
class ExperimentalStage(BaseModel):
    """One stage in a multi-stage experimental strategy.

    Parameters
    ----------
    stage_number : int
        1-based stage index.
    stage_name : str
        Human-readable name, e.g. ``"Screening"``, ``"Optimization"``.
    design_type : str
        Design type key, e.g. ``"plackett_burman"``, ``"ccd"``, ``"bbd"``.
    design_params : dict
        Design-specific parameters (resolution, center_points, alpha, etc.).
    factors : list[str]
        Factor names involved in this stage.
    estimated_runs : int
        Estimated number of experimental runs.
    purpose : str
        Brief description of what this stage accomplishes.
    success_criteria : dict
        Criteria for deeming this stage successful.
    transition_rules : list[TransitionRule]
        Rules governing the transition to the next stage.
    """

    stage_number: int
    stage_name: str
    design_type: str
    design_params: dict[str, Any] = Field(default_factory=dict)
    factors: list[str] = Field(default_factory=list)
    estimated_runs: int = 0
    purpose: str = ""
    success_criteria: dict[str, Any] = Field(default_factory=dict)
    transition_rules: list[TransitionRule] = Field(default_factory=list)




[docs]
class ExperimentalStrategy(BaseModel):
    """Complete multi-stage experimental strategy recommendation.

    Parameters
    ----------
    strategy_id : str
        Deterministic hash of the input specification.
    stages : list[ExperimentalStage]
        Ordered list of experimental stages.
    total_estimated_runs : int
        Sum of estimated runs across all stages.
    budget_allocation : dict[str, int]
        Stage name to allocated run count mapping.
    assumptions : list[str]
        Key assumptions underlying the recommendation.
    risks : list[str]
        Risks and potential issues with the strategy.
    alternative_strategies : list[str]
        Brief descriptions of alternative approaches.
    domain : str
        The domain used for domain-specific adjustments.
    detail_level : str
        The detail level used for explanations.
    reasoning : list[str]
        Step-by-step explanation of the decision logic.
    """

    strategy_id: str = ""
    stages: list[ExperimentalStage] = Field(default_factory=list)
    total_estimated_runs: int = 0
    budget_allocation: dict[str, int] = Field(default_factory=dict)
    assumptions: list[str] = Field(default_factory=list)
    risks: list[str] = Field(default_factory=list)
    alternative_strategies: list[str] = Field(default_factory=list)
    domain: str = "general"
    detail_level: str = "intermediate"
    reasoning: list[str] = Field(default_factory=list)



# ---------------------------------------------------------------------------
# Input specification
# ---------------------------------------------------------------------------



[docs]
class DOEProblemSpec(BaseModel):
    """Validated input specification for the strategy recommender.

    Wraps all inputs into a single object for pipeline processing.

    Parameters
    ----------
    factors : list[Factor]
        All candidate experimental factors.
    responses : list[Response]
        Response variables with optimisation goals.
    budget : int or None
        Total run budget across all stages.
    constraints : list[Constraint] or None
        Factor-space constraints.
    hard_to_change_factors : list[str] or None
        Factor names that are expensive to reset between runs.
    prior_knowledge : PriorKnowledge or None
        Parsed prior knowledge with confidence score.
    existing_data_summary : dict or None
        Summary of any existing experimental data.
    domain : DomainType
        Application domain.
    detail_level : str
        ``"novice"`` or ``"intermediate"``.
    """

    factors: list[Factor]
    responses: list[Response] = Field(default_factory=list)
    budget: int | None = None
    constraints: list[Constraint] | None = None
    hard_to_change_factors: list[str] | None = None
    prior_knowledge: PriorKnowledge | None = None
    existing_data_summary: dict[str, Any] | None = None
    domain: DomainType = DomainType.general
    detail_level: Literal["novice", "intermediate"] = "intermediate"

    @property
    def n_factors(self) -> int:
        """Total number of factors."""
        return len(self.factors)

    @property
    def factor_names(self) -> list[str]:
        """Ordered list of factor names."""
        return [f.name for f in self.factors]

    @property
    def n_continuous(self) -> int:
        """Number of continuous factors."""
        return sum(1 for f in self.factors if f.type.value == "continuous")

    @property
    def n_categorical(self) -> int:
        """Number of categorical factors."""
        return sum(1 for f in self.factors if f.type.value == "categorical")

    @property
    def n_mixture(self) -> int:
        """Number of mixture factors."""
        return sum(1 for f in self.factors if f.type.value == "mixture")

    @property
    def has_mixture(self) -> bool:
        """Whether any mixture factors are present."""
        return self.n_mixture > 0

    @property
    def has_hard_to_change(self) -> bool:
        """Whether any hard-to-change factors are specified."""
        return bool(self.hard_to_change_factors)

    @property
    def has_constraints(self) -> bool:
        """Whether any constraints are specified."""
        return bool(self.constraints)

    @property
    def goal_includes_optimization(self) -> bool:
        """Whether any response has an optimisation goal."""
        return len(self.responses) > 0