cellstate_pipeline/compiler/
mod.rs

1//! Pack Config Compiler - Transform AST to Runtime Configuration
2//!
3//! This module takes a parsed AST and compiles it into runtime-usable configuration
4//! structs. The compiler validates semantic rules that can't be checked during parsing.
5//!
6//! # Pipeline
7//!
8//! ```text
9//! Pack Source → Lexer → Parser → AST → Compiler → CompiledConfig → Deploy
10//!                                                 ↓
11//!                                           Validation (semantic)
12//! ```
13
14use crate::ast::*;
15use cellstate_core::{DurationMs, MemoryCategory, SecretString};
16use serde::{Deserialize, Serialize};
17use std::collections::HashMap;
18use std::time::Duration;
19use thiserror::Error;
20
21// ============================================================================
22// COMPILE ERRORS
23// ============================================================================
24
25/// Errors that can occur during pack compilation.
26#[derive(Debug, Clone, Error, PartialEq)]
27pub enum CompileError {
28    /// Reference to undefined entity
29    #[error("undefined reference: {kind} '{name}' is not defined")]
30    UndefinedReference { kind: String, name: String },
31
32    /// Duplicate definition
33    #[error("duplicate definition: {kind} '{name}' is already defined")]
34    DuplicateDefinition { kind: String, name: String },
35
36    /// Invalid configuration value
37    #[error("invalid value for {field}: {reason}")]
38    InvalidValue { field: String, reason: String },
39
40    /// Missing required field
41    #[error("missing required field: {field}")]
42    MissingField { field: String },
43
44    /// Circular dependency detected
45    #[error("circular dependency detected: {cycle}")]
46    CircularDependency { cycle: String },
47
48    /// Type mismatch
49    #[error("type mismatch: expected {expected}, got {actual}")]
50    TypeMismatch { expected: String, actual: String },
51
52    /// Invalid duration format
53    #[error("invalid duration format: {value}")]
54    InvalidDuration { value: String },
55
56    /// Semantic validation error
57    #[error("semantic error: {message}")]
58    SemanticError { message: String },
59}
60
61pub type CompileResult<T> = Result<T, CompileError>;
62
63// ============================================================================
64// COMPILED CONFIGURATION TYPES
65// ============================================================================
66
67/// Compiled adapter configuration.
68#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
69pub struct AdapterConfig {
70    pub name: String,
71    pub adapter_type: CompiledAdapterType,
72    pub connection: String,
73    pub options: HashMap<String, String>,
74}
75
76/// Compiled adapter types.
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
78#[serde(rename_all = "snake_case")]
79pub enum CompiledAdapterType {
80    Postgres,
81    Redis,
82    Memory,
83}
84
85/// Compiled memory configuration.
86#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
87pub struct MemoryConfig {
88    pub name: String,
89    /// Memory category (uses `cellstate_core::MemoryCategory`).
90    pub memory_type: MemoryCategory,
91    pub schema: Vec<FieldConfig>,
92    /// Retention policy (uses `cellstate_core::TTL`).
93    pub retention: cellstate_core::TTL,
94    pub lifecycle: CompiledLifecycle,
95    pub parent: Option<String>,
96    pub indexes: Vec<IndexConfig>,
97    pub inject_on: Vec<CompiledTrigger>,
98    pub artifacts: Vec<String>,
99    pub modifiers: MemoryModifiers,
100}
101
102/// Compiled field configuration.
103#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
104pub struct FieldConfig {
105    pub name: String,
106    /// Field type (uses `cellstate_core::FieldType`).
107    pub field_type: cellstate_core::FieldType,
108    pub nullable: bool,
109    /// Security configuration for PII fields.
110    pub security: Option<CompiledFieldSecurity>,
111}
112
113/// Compiled PII classification.
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
115#[serde(rename_all = "snake_case")]
116pub enum CompiledPIIClassification {
117    #[default]
118    Public,
119    Internal,
120    Confidential,
121    Restricted,
122    Secret,
123}
124
125impl CompiledPIIClassification {
126    /// Whether this classification requires runtime redaction.
127    pub fn requires_redaction(&self) -> bool {
128        matches!(self, Self::Confidential | Self::Restricted | Self::Secret)
129    }
130
131    /// Whether this classification requires encrypted vault storage.
132    pub fn requires_encryption(&self) -> bool {
133        matches!(self, Self::Secret)
134    }
135}
136
137/// Compiled field security configuration.
138#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
139pub struct CompiledFieldSecurity {
140    /// Sensitivity classification
141    pub classification: CompiledPIIClassification,
142    /// Agent can pass but not read content
143    pub opaque: bool,
144    /// Cannot be modified after creation
145    pub immutable: bool,
146    /// All access is logged
147    pub audited: bool,
148    /// Redact in logs and error messages
149    pub redact_in_logs: bool,
150    /// Source from environment variable
151    pub env_source: Option<String>,
152}
153
154// NOTE: Field types use `cellstate_core::FieldType`.
155// The former `CompiledFieldType` was removed as it was identical.
156
157// NOTE: Retention uses `cellstate_core::TTL`.
158// The former `CompiledRetention` was removed - Duration is converted to milliseconds.
159
160/// Compiled lifecycle configuration.
161#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
162#[serde(rename_all = "snake_case")]
163pub enum CompiledLifecycle {
164    /// Explicit lifecycle management (manual control)
165    Explicit,
166    /// Auto-close on trigger
167    AutoClose(CompiledTrigger),
168}
169
170/// Compiled trigger types.
171#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
172#[serde(rename_all = "snake_case")]
173pub enum CompiledTrigger {
174    TaskStart,
175    TaskEnd,
176    ScopeClose,
177    TurnEnd,
178    Manual,
179    Schedule(String),
180    DosageReached { threshold: i32 },
181    TurnCount { count: i32 },
182    ArtifactCount { count: i32 },
183}
184
185/// Compiled lifecycle actions.
186#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
187#[serde(rename_all = "snake_case")]
188pub enum CompiledAction {
189    Summarize {
190        target: String,
191    },
192    ExtractArtifacts {
193        target: String,
194    },
195    Checkpoint {
196        target: String,
197    },
198    Prune {
199        target: String,
200        criteria: CompiledFilter,
201    },
202    Notify {
203        target: String,
204    },
205    Inject {
206        target: String,
207        mode: CompiledInjectionMode,
208    },
209    AutoSummarize {
210        source_level: CompiledAbstractionLevel,
211        target_level: CompiledAbstractionLevel,
212        create_edges: bool,
213    },
214}
215
216/// Compiled index configuration.
217#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
218pub struct IndexConfig {
219    pub field: String,
220    pub index_type: CompiledIndexType,
221    pub options: HashMap<String, String>,
222}
223
224/// Compiled index types.
225#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
226#[serde(rename_all = "snake_case")]
227pub enum CompiledIndexType {
228    Btree,
229    Hash,
230    Gin,
231    Hnsw,
232    Ivfflat,
233}
234
235/// Memory modifiers (embeddable, summarizable, lockable).
236#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
237pub struct MemoryModifiers {
238    pub embeddable: Option<EmbeddableConfig>,
239    pub summarizable: Option<SummarizableConfig>,
240    pub lockable: Option<LockableConfig>,
241}
242
243/// Summarizable modifier configuration.
244#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
245pub struct SummarizableConfig {
246    pub style: SummarizeStyle,
247    pub on_triggers: Vec<CompiledTrigger>,
248}
249
250/// Embeddable modifier configuration.
251#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
252pub struct EmbeddableConfig {
253    pub provider: String,
254}
255
256/// Summary styles.
257#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
258#[serde(rename_all = "snake_case")]
259pub enum SummarizeStyle {
260    Brief,
261    Detailed,
262}
263
264/// Lockable modifier configuration.
265#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
266pub struct LockableConfig {
267    pub mode: CompiledLockMode,
268}
269
270/// Compiled lock modes.
271#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
272#[serde(rename_all = "snake_case")]
273pub enum CompiledLockMode {
274    Exclusive,
275    Shared,
276}
277
278/// Compiled policy configuration.
279#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
280pub struct PolicyConfig {
281    pub name: String,
282    pub rules: Vec<CompiledPolicyRule>,
283}
284
285/// Compiled policy rule.
286#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
287pub struct CompiledPolicyRule {
288    pub trigger: CompiledTrigger,
289    pub actions: Vec<CompiledAction>,
290}
291
292/// How server-side OAuth credentials should be delivered to an exec tool.
293#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
294#[serde(rename_all = "snake_case")]
295pub enum CompiledCredentialDelivery {
296    Environment,
297    File,
298}
299
300/// Compiled injection configuration.
301#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
302pub struct InjectionConfig {
303    pub source: String,
304    pub target: String,
305    pub mode: CompiledInjectionMode,
306    pub priority: i32,
307    pub max_tokens: Option<i32>,
308    pub filter: Option<CompiledFilter>,
309}
310
311/// Compiled injection modes.
312#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
313#[serde(rename_all = "snake_case")]
314pub enum CompiledInjectionMode {
315    Full,
316    Summary,
317    TopK { k: i32 },
318    Relevant { threshold: f32 },
319}
320
321/// Compiled filter expression.
322#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
323#[serde(rename_all = "snake_case")]
324pub enum CompiledFilter {
325    Comparison {
326        field: String,
327        op: CompiledOperator,
328        value: CompiledFilterValue,
329    },
330    And(Vec<CompiledFilter>),
331    Or(Vec<CompiledFilter>),
332    Not(Box<CompiledFilter>),
333}
334
335/// Compiled filter values.
336#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum CompiledFilterValue {
339    String(String),
340    Number(f64),
341    Bool(bool),
342    Null,
343    CurrentTrajectory,
344    CurrentScope,
345    Now,
346    Array(Vec<CompiledFilterValue>),
347}
348
349/// Compiled operators.
350#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
351#[serde(rename_all = "snake_case")]
352pub enum CompiledOperator {
353    Eq,
354    Ne,
355    Gt,
356    Lt,
357    Ge,
358    Le,
359    Contains,
360    Regex,
361    In,
362}
363
364/// Compiled trajectory configuration.
365#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
366pub struct TrajectoryConfig {
367    pub name: String,
368    pub description: Option<String>,
369    pub agent_type: String,
370    pub token_budget: i32,
371    pub memory_refs: Vec<String>,
372    pub metadata: Option<serde_json::Value>,
373}
374
375/// Compiled agent configuration.
376#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
377pub struct AgentConfig {
378    pub name: String,
379    pub capabilities: Vec<String>,
380    pub constraints: CompiledAgentConstraints,
381    pub permissions: CompiledPermissionMatrix,
382}
383
384/// Compiled agent constraints.
385#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
386pub struct CompiledAgentConstraints {
387    pub max_concurrent: i32,
388    pub timeout: Duration,
389}
390
391/// Compiled permission matrix.
392#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
393pub struct CompiledPermissionMatrix {
394    pub read: Vec<String>,
395    pub write: Vec<String>,
396    pub lock: Vec<String>,
397}
398
399/// Compiled cache configuration.
400#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
401pub struct CacheConfig {
402    pub backend: CompiledCacheBackend,
403    pub path: Option<String>,
404    pub size_mb: i32,
405    pub default_freshness: CompiledFreshness,
406    pub max_entries: Option<i32>,
407    pub ttl: Option<Duration>,
408}
409
410/// Compiled cache backend types.
411#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
412#[serde(rename_all = "snake_case")]
413pub enum CompiledCacheBackend {
414    Lmdb,
415    Memory,
416}
417
418/// Compiled freshness configuration.
419#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
420#[serde(rename_all = "snake_case")]
421pub enum CompiledFreshness {
422    BestEffort { max_staleness: Duration },
423    Strict,
424}
425
426/// Compiled provider configuration from pack config.
427/// Note: Different from `cellstate_core::config::ProviderConfig` (system bootstrap config).
428#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
429pub struct CompiledProviderConfig {
430    pub name: String,
431    pub provider_type: CompiledProviderType,
432    pub api_key: SecretString,
433    pub model: String,
434    pub options: HashMap<String, String>,
435}
436
437/// Compiled provider types.
438#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
439#[serde(rename_all = "snake_case")]
440pub enum CompiledProviderType {
441    OpenAI,
442    Anthropic,
443    Local,
444    Custom,
445    OpenRouter,
446}
447
448/// Compiled evolution configuration.
449#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
450pub struct EvolutionConfig {
451    pub name: String,
452    pub baseline: String,
453    pub candidates: Vec<String>,
454    pub benchmark_queries: i32,
455    pub metrics: Vec<String>,
456}
457
458/// Compiled summarization policy configuration.
459#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
460pub struct SummarizationPolicyConfig {
461    pub name: String,
462    pub triggers: Vec<CompiledTrigger>,
463    pub source_level: CompiledAbstractionLevel,
464    pub target_level: CompiledAbstractionLevel,
465    pub max_sources: i32,
466    pub create_edges: bool,
467}
468
469/// Compiled abstraction levels.
470#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
471#[serde(rename_all = "snake_case")]
472pub enum CompiledAbstractionLevel {
473    Raw,
474    Summary,
475    Principle,
476}
477
478// ============================================================================
479// TOOL REGISTRY (PACK-DRIVEN RUNTIME CAPABILITIES)
480// ============================================================================
481
482/// Compiled tool kind.
483#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
484#[serde(rename_all = "snake_case")]
485pub enum CompiledToolKind {
486    Exec,
487    /// WASM executable — `.wasm` WASI module executed in the Level 2 wasmtime sandbox.
488    /// Structurally isolated: no network ABI, no filesystem preopens, fuel-limited CPU.
489    WasmExec,
490    Prompt,
491    /// Bash tool — executed in the SDK's TypeScript sandbox (just-bash).
492    /// Commands run against the agent's virtual filesystem (just-bash FS layer).
493    /// File writes flow through MutationPipeline; execution is IO.
494    Bash,
495    /// Browser tool — accessibility-first browser automation via Stagehand/Playwright.
496    /// Executed client-side with resource limits and PII redaction.
497    Browser,
498    /// Composio tool — managed SaaS integrations (800+ tools).
499    /// OAuth and auth handled by Composio; CELLSTATE manages state + event DAG.
500    Composio,
501    /// Composio Gateway meta-tool — dynamic runtime discovery of Composio integrations.
502    /// Not a tool from the pack; synthesized when composio_gateway.enabled = true.
503    ComposioGateway,
504}
505
506/// Compiled tool definition.
507#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
508pub struct CompiledToolConfig {
509    /// Fully-qualified tool id (e.g., "tools.prompts.search")
510    pub id: String,
511    pub kind: CompiledToolKind,
512    pub cmd: Option<String>,
513    pub prompt_md: Option<String>,
514    /// Path to contract JSON Schema file (original reference).
515    pub contract: Option<String>,
516    /// Compiled JSON Schema for runtime input validation.
517    /// Populated during pack compilation if contract is specified.
518    #[serde(default, skip_serializing_if = "Option::is_none")]
519    pub compiled_schema: Option<serde_json::Value>,
520    pub result_format: Option<String>,
521    pub timeout_ms: Option<i32>,
522    pub allow_network: Option<bool>,
523    pub allow_fs: Option<bool>,
524    pub allow_subprocess: Option<bool>,
525    /// How OAuth credentials should be delivered to subprocess tools.
526    #[serde(default, skip_serializing_if = "Option::is_none")]
527    pub credential_delivery: Option<CompiledCredentialDelivery>,
528    /// OAuth providers whose tokens are injected server-side at execution time.
529    #[serde(default, skip_serializing_if = "Option::is_none")]
530    pub oauth_providers: Option<Vec<String>>,
531    /// Required scopes per OAuth provider.
532    #[serde(default, skip_serializing_if = "Option::is_none")]
533    pub oauth_required_scopes: Option<std::collections::HashMap<String, Vec<String>>>,
534    /// Allowed commands whitelist for Bash tools (empty = all allowed).
535    #[serde(default, skip_serializing_if = "Option::is_none")]
536    pub allowed_commands: Option<Vec<String>>,
537    /// Blocked commands blacklist for Bash tools.
538    #[serde(default, skip_serializing_if = "Option::is_none")]
539    pub blocked_commands: Option<Vec<String>>,
540    /// If true, tool execution requires challenge-response authentication.
541    /// The caller must first receive a nonce, sign it, and submit the proof.
542    #[serde(default, skip_serializing_if = "Option::is_none")]
543    pub requires_challenge: Option<bool>,
544    /// Browser tool: allowed domain patterns (glob). Empty = all allowed.
545    #[serde(default, skip_serializing_if = "Option::is_none")]
546    pub allowed_domains: Option<Vec<String>>,
547    /// Browser tool: blocked domain patterns (glob).
548    #[serde(default, skip_serializing_if = "Option::is_none")]
549    pub blocked_domains: Option<Vec<String>>,
550    /// Browser tool: maximum navigations per session.
551    #[serde(default, skip_serializing_if = "Option::is_none")]
552    pub max_navigations: Option<i32>,
553    /// Browser tool: maximum session duration in milliseconds.
554    #[serde(default, skip_serializing_if = "Option::is_none")]
555    pub max_duration_ms: Option<i32>,
556    /// Browser tool: use vision (screenshots) instead of accessibility tree.
557    #[serde(default, skip_serializing_if = "Option::is_none")]
558    pub use_vision: Option<bool>,
559    /// Composio tool: toolkit name (e.g. "GMAIL", "SLACK", "GITHUB").
560    #[serde(default, skip_serializing_if = "Option::is_none")]
561    pub composio_toolkit: Option<String>,
562    /// Composio tool: action names to expose.
563    #[serde(default, skip_serializing_if = "Option::is_none")]
564    pub composio_actions: Option<Vec<String>>,
565    /// Composio Gateway: true if this is the gateway meta-tool.
566    #[serde(default, skip_serializing_if = "Option::is_none")]
567    pub is_composio_gateway: Option<bool>,
568}
569
570/// Compiled toolset definition.
571#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
572pub struct CompiledToolsetConfig {
573    pub name: String,
574    pub tools: Vec<String>,
575}
576
577/// Pack agent-to-toolset bindings with extracted markdown metadata.
578///
579/// This struct represents the fully-compiled agent configuration, combining
580/// manifest settings with extracted markdown metadata. All reference fields
581/// (profile, adapter, format) have been validated during compilation.
582#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
583pub struct CompiledPackAgentConfig {
584    /// Agent identifier (key from [agents.NAME] in manifest).
585    pub name: String,
586    /// Whether agent is enabled. Defaults to true.
587    #[serde(default = "default_true")]
588    pub enabled: bool,
589    /// Profile reference (validated to exist in manifest.profiles).
590    pub profile: String,
591    /// Adapter override. Falls back to profile's adapter if None.
592    #[serde(default, skip_serializing_if = "Option::is_none")]
593    pub adapter: Option<String>,
594    /// Format override. Falls back to profile's format if None.
595    #[serde(default, skip_serializing_if = "Option::is_none")]
596    pub format: Option<String>,
597    /// Resolved format (from agent or profile). Always populated.
598    pub resolved_format: String,
599    /// Token budget override.
600    #[serde(default, skip_serializing_if = "Option::is_none")]
601    pub token_budget: Option<i32>,
602    /// Path to agent prompt markdown.
603    pub prompt_md: String,
604    /// Toolset references (validated to exist in manifest.toolsets).
605    pub toolsets: Vec<String>,
606    /// Constraints extracted from ```constraints block in agent markdown.
607    #[serde(default, skip_serializing_if = "Vec::is_empty")]
608    pub extracted_constraints: Vec<String>,
609    /// Tool references extracted from ```tools block (validated against TOML).
610    #[serde(default, skip_serializing_if = "Vec::is_empty")]
611    pub extracted_tool_refs: Vec<String>,
612    /// RAG configuration extracted from ```rag block.
613    #[serde(default, skip_serializing_if = "Option::is_none")]
614    pub extracted_rag_config: Option<String>,
615    /// Human-readable description (from manifest or extracted from prompt_md).
616    #[serde(default, skip_serializing_if = "Option::is_none")]
617    pub description: Option<String>,
618    /// Tags for skill discovery and categorization.
619    #[serde(default, skip_serializing_if = "Vec::is_empty")]
620    pub tags: Vec<String>,
621    /// Resolved system prompt text extracted from the agent's markdown file.
622    ///
623    /// Populated from `MarkdownDoc.system` during compilation so downstream
624    /// consumers (A2A, AGENTS.md) do not need to re-read the markdown file.
625    #[serde(default, skip_serializing_if = "Option::is_none")]
626    pub system_prompt: Option<String>,
627}
628
629/// Default value for enabled field (true).
630fn default_true() -> bool {
631    true
632}
633
634/// Pack injection metadata for runtime RAG wiring.
635#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
636pub struct CompiledPackInjectionConfig {
637    pub source: String,
638    pub target: String,
639    /// Explicit entity type for injection targeting (e.g., "note", "artifact").
640    pub entity_type: Option<String>,
641    pub mode: CompiledInjectionMode,
642    pub priority: i32,
643    pub max_tokens: Option<i32>,
644}
645
646/// Pack provider routing hints.
647#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
648pub struct CompiledPackRoutingConfig {
649    pub strategy: Option<String>,
650    pub embedding_provider: Option<String>,
651    pub summarization_provider: Option<String>,
652    pub chat_provider: Option<String>,
653    pub sort: Option<String>,
654    pub zdr: Option<bool>,
655    pub provider_order: Option<Vec<String>>,
656    pub data_collection: Option<String>,
657}
658
659// ============================================================================
660// COMPILED CONFIG (THE OUTPUT)
661// ============================================================================
662
663/// The complete compiled configuration from a pack file.
664/// This is the output of the compiler and can be used to configure the runtime.
665#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
666pub struct CompiledConfig {
667    /// Pack version string
668    pub version: String,
669
670    /// Storage adapters
671    pub adapters: Vec<AdapterConfig>,
672
673    /// Memory definitions
674    pub memories: Vec<MemoryConfig>,
675
676    /// Lifecycle policies
677    pub policies: Vec<PolicyConfig>,
678
679    /// Context injections
680    pub injections: Vec<InjectionConfig>,
681
682    /// Trajectory templates
683    pub trajectories: Vec<TrajectoryConfig>,
684
685    /// Agent type definitions
686    pub agents: Vec<AgentConfig>,
687
688    /// Evolution experiments
689    pub evolutions: Vec<EvolutionConfig>,
690
691    /// Summarization policies
692    pub summarization_policies: Vec<SummarizationPolicyConfig>,
693
694    /// Cache configuration (optional, only one allowed)
695    pub cache: Option<CacheConfig>,
696
697    /// LLM providers
698    pub providers: Vec<CompiledProviderConfig>,
699
700    /// Pack tool registry (optional; empty when compiling raw pack)
701    #[serde(default)]
702    pub tools: Vec<CompiledToolConfig>,
703
704    /// Pack toolsets (optional; empty when compiling raw pack)
705    #[serde(default)]
706    pub toolsets: Vec<CompiledToolsetConfig>,
707
708    /// Pack agent bindings (optional; empty when compiling raw pack)
709    #[serde(default)]
710    pub pack_agents: Vec<CompiledPackAgentConfig>,
711
712    /// Pack injection metadata (optional; empty when compiling raw pack)
713    #[serde(default)]
714    pub pack_injections: Vec<CompiledPackInjectionConfig>,
715
716    /// Pack routing hints (optional)
717    #[serde(default)]
718    pub pack_routing: Option<CompiledPackRoutingConfig>,
719
720    /// File hashes for artifact determinism (lockfile support).
721    /// Maps relative file paths to their SHA-256 hashes.
722    /// Used to detect when pack sources change without recompilation.
723    #[serde(default)]
724    pub file_hashes: HashMap<String, String>,
725
726    /// Flow definitions extracted from ```flow blocks
727    #[serde(default)]
728    pub flows: Vec<crate::pack::flow::CompiledFlow>,
729
730    /// Composio MCP Gateway configuration (optional).
731    /// When present and enabled, the `composio_search_tools` meta-tool is active.
732    #[serde(default, skip_serializing_if = "Option::is_none")]
733    pub composio_gateway: Option<ComposioGatewayConfig>,
734
735    // ========================================================================
736    // Pack metadata for AAIF convergence outputs (Skills, AGENTS.md, A2A, llms.txt)
737    // ========================================================================
738    /// Pack project name (from meta.project).
739    #[serde(default, skip_serializing_if = "Option::is_none")]
740    pub pack_meta_project: Option<String>,
741
742    /// Pack version (from meta.version).
743    #[serde(default, skip_serializing_if = "Option::is_none")]
744    pub pack_meta_version: Option<String>,
745
746    /// Pack description (from meta.description).
747    #[serde(default, skip_serializing_if = "Option::is_none")]
748    pub pack_meta_description: Option<String>,
749
750    /// Build/test/coding convention instructions (from meta.instructions).
751    #[serde(default, skip_serializing_if = "Option::is_none")]
752    pub pack_meta_instructions: Option<String>,
753
754    /// Project homepage URL (from meta.homepage).
755    #[serde(default, skip_serializing_if = "Option::is_none")]
756    pub pack_meta_homepage: Option<String>,
757
758    /// License identifier (from meta.license).
759    #[serde(default, skip_serializing_if = "Option::is_none")]
760    pub pack_meta_license: Option<String>,
761}
762
763/// Compiled Composio MCP Gateway configuration.
764#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
765pub struct ComposioGatewayConfig {
766    /// Whether gateway discovery is enabled.
767    pub enabled: bool,
768    /// Max tools to surface per search.
769    pub max_tools: usize,
770    /// Allowed toolkits filter (empty = all).
771    #[serde(default)]
772    pub allowed_toolkits: Vec<String>,
773    /// Blocked toolkits filter.
774    #[serde(default)]
775    pub blocked_toolkits: Vec<String>,
776}
777
778// ============================================================================
779// PACK COMPILER
780// ============================================================================
781
782/// The Pack Config Compiler transforms a parsed AST into runtime configuration.
783///
784/// Input is a `CellstateAst` built from a `cstate.toml` manifest and YAML fence
785/// blocks in Markdown prompt files. Use [`compose_pack`] to obtain the AST, then
786/// call `PipelineCompiler::compile` to produce a `CompiledConfig`.
787///
788/// # Example
789///
790/// ```ignore
791/// use cellstate_pipeline::{compose_pack, PackInput};
792/// let output = compose_pack(PackInput { ... })?;
793/// // output.config is the compiled runtime configuration
794/// ```
795pub struct PipelineCompiler {
796    /// Current configuration being built
797    config: CompiledConfig,
798
799    /// Name registry for duplicate detection
800    names: NameRegistry,
801}
802
803/// Registry for tracking defined names to detect duplicates.
804#[derive(Debug, Default)]
805struct NameRegistry {
806    adapters: HashMap<String, bool>,
807    memories: HashMap<String, bool>,
808    policies: HashMap<String, bool>,
809    trajectories: HashMap<String, bool>,
810    agents: HashMap<String, bool>,
811    evolutions: HashMap<String, bool>,
812    summarization_policies: HashMap<String, bool>,
813    providers: HashMap<String, bool>,
814}
815
816impl NameRegistry {
817    fn register(&mut self, kind: &str, name: &str) -> CompileResult<()> {
818        let map = match kind {
819            "adapter" => &mut self.adapters,
820            "memory" => &mut self.memories,
821            "policy" => &mut self.policies,
822            "trajectory" => &mut self.trajectories,
823            "agent" => &mut self.agents,
824            "evolution" => &mut self.evolutions,
825            "summarization_policy" => &mut self.summarization_policies,
826            "provider" => &mut self.providers,
827            _ => return Ok(()), // Unknown kind, skip
828        };
829
830        if map.contains_key(name) {
831            return Err(CompileError::DuplicateDefinition {
832                kind: kind.to_string(),
833                name: name.to_string(),
834            });
835        }
836
837        map.insert(name.to_string(), true);
838        Ok(())
839    }
840}
841
842impl PipelineCompiler {
843    /// Create a new compiler instance.
844    pub fn new() -> Self {
845        Self {
846            config: CompiledConfig::default(),
847            names: NameRegistry::default(),
848        }
849    }
850
851    /// Compile an AST into a runtime configuration.
852    pub fn compile(ast: &CellstateAst) -> CompileResult<CompiledConfig> {
853        let mut compiler = Self::new();
854        compiler.config.version = ast.version.clone();
855
856        // First pass: register all names for reference checking
857        for def in &ast.definitions {
858            compiler.register_definition(def)?;
859        }
860
861        // Second pass: compile each definition
862        for def in &ast.definitions {
863            compiler.compile_definition(def)?;
864        }
865
866        // Final validation pass
867        compiler.validate()?;
868
869        Ok(compiler.config)
870    }
871
872    /// Register a definition's name for duplicate detection.
873    fn register_definition(&mut self, def: &Definition) -> CompileResult<()> {
874        match def {
875            Definition::Adapter(d) => self.names.register("adapter", &d.name),
876            Definition::Memory(d) => self.names.register("memory", &d.name),
877            Definition::Policy(d) => self.names.register("policy", &d.name),
878            Definition::Trajectory(d) => self.names.register("trajectory", &d.name),
879            Definition::Agent(d) => self.names.register("agent", &d.name),
880            Definition::Evolution(d) => self.names.register("evolution", &d.name),
881            Definition::SummarizationPolicy(d) => {
882                self.names.register("summarization_policy", &d.name)
883            }
884            Definition::Provider(d) => self.names.register("provider", &d.name),
885            Definition::Intent(d) => self.names.register("intent", &d.name),
886            Definition::Injection(_) | Definition::Cache(_) => Ok(()), // Anonymous or singleton
887        }
888    }
889
890    /// Compile a single definition.
891    fn compile_definition(&mut self, def: &Definition) -> CompileResult<()> {
892        match def {
893            Definition::Adapter(d) => {
894                let config = Self::compile_adapter(d)?;
895                self.config.adapters.push(config);
896            }
897            Definition::Memory(d) => {
898                let config = Self::compile_memory(d)?;
899                self.config.memories.push(config);
900            }
901            Definition::Policy(d) => {
902                let config = Self::compile_policy(d)?;
903                self.config.policies.push(config);
904            }
905            Definition::Injection(d) => {
906                let config = Self::compile_injection(d)?;
907                self.config.injections.push(config);
908            }
909            Definition::Evolution(d) => {
910                let config = Self::compile_evolution(d)?;
911                self.config.evolutions.push(config);
912            }
913            Definition::SummarizationPolicy(d) => {
914                let config = Self::compile_summarization_policy(d)?;
915                self.config.summarization_policies.push(config);
916            }
917            Definition::Trajectory(d) => {
918                let config = Self::compile_trajectory(d)?;
919                self.config.trajectories.push(config);
920            }
921            Definition::Agent(d) => {
922                let config = Self::compile_agent(d)?;
923                self.config.agents.push(config);
924            }
925            Definition::Cache(d) => {
926                if self.config.cache.is_some() {
927                    return Err(CompileError::DuplicateDefinition {
928                        kind: "cache".to_string(),
929                        name: "cache".to_string(),
930                    });
931                }
932                let config = Self::compile_cache(d)?;
933                self.config.cache = Some(config);
934            }
935            Definition::Provider(d) => {
936                let config = Self::compile_provider(d)?;
937                self.config.providers.push(config);
938            }
939            Definition::Intent(_) => {
940                // Intent definitions are consumed by the runtime gate system,
941                // not compiled into the CompiledConfig. They are stored in the
942                // AST and extracted directly by the API layer.
943            }
944        }
945        Ok(())
946    }
947
948    /// Compile an adapter definition.
949    fn compile_adapter(def: &AdapterDef) -> CompileResult<AdapterConfig> {
950        let adapter_type = match def.adapter_type {
951            AdapterType::Postgres => CompiledAdapterType::Postgres,
952            AdapterType::Redis => CompiledAdapterType::Redis,
953            AdapterType::Memory => CompiledAdapterType::Memory,
954        };
955
956        let options: HashMap<String, String> = def.options.iter().cloned().collect();
957
958        Ok(AdapterConfig {
959            name: def.name.clone(),
960            adapter_type,
961            connection: def.connection.clone(),
962            options,
963        })
964    }
965
966    /// Compile a memory definition.
967    fn compile_memory(def: &MemoryDef) -> CompileResult<MemoryConfig> {
968        let memory_type = Self::compile_memory_type(&def.memory_type)?;
969        let schema = def
970            .schema
971            .iter()
972            .map(Self::compile_field)
973            .collect::<CompileResult<Vec<_>>>()?;
974        let retention = Self::compile_retention(&def.retention)?;
975        let lifecycle = Self::compile_lifecycle(&def.lifecycle)?;
976        let indexes = def
977            .indexes
978            .iter()
979            .map(Self::compile_index)
980            .collect::<CompileResult<Vec<_>>>()?;
981        let inject_on = def
982            .inject_on
983            .iter()
984            .map(Self::compile_trigger)
985            .collect::<CompileResult<Vec<_>>>()?;
986        let modifiers = Self::compile_modifiers(&def.modifiers)?;
987
988        Ok(MemoryConfig {
989            name: def.name.clone(),
990            memory_type,
991            schema,
992            retention,
993            lifecycle,
994            parent: def.parent.clone(),
995            indexes,
996            inject_on,
997            artifacts: def.artifacts.clone(),
998            modifiers,
999        })
1000    }
1001
1002    fn compile_memory_type(mt: &MemoryType) -> CompileResult<MemoryCategory> {
1003        Ok(match mt {
1004            MemoryType::Ephemeral => MemoryCategory::Ephemeral,
1005            MemoryType::Working => MemoryCategory::Working,
1006            MemoryType::Episodic => MemoryCategory::Episodic,
1007            MemoryType::Semantic => MemoryCategory::Semantic,
1008            MemoryType::Procedural => MemoryCategory::Procedural,
1009            MemoryType::Meta => MemoryCategory::Meta,
1010        })
1011    }
1012
1013    fn compile_field(def: &FieldDef) -> CompileResult<FieldConfig> {
1014        let field_type = Self::compile_field_type(&def.field_type)?;
1015        let security = def.security.as_ref().map(Self::compile_field_security);
1016        Ok(FieldConfig {
1017            name: def.name.clone(),
1018            field_type,
1019            nullable: def.nullable,
1020            security,
1021        })
1022    }
1023
1024    fn compile_field_security(sec: &FieldSecurity) -> CompiledFieldSecurity {
1025        use crate::ast::PIIClassification;
1026        CompiledFieldSecurity {
1027            classification: match sec.classification {
1028                PIIClassification::Public => CompiledPIIClassification::Public,
1029                PIIClassification::Internal => CompiledPIIClassification::Internal,
1030                PIIClassification::Confidential => CompiledPIIClassification::Confidential,
1031                PIIClassification::Restricted => CompiledPIIClassification::Restricted,
1032                PIIClassification::Secret => CompiledPIIClassification::Secret,
1033            },
1034            opaque: sec.opaque,
1035            immutable: sec.immutable,
1036            audited: sec.audited,
1037            redact_in_logs: sec.redact_in_logs,
1038            env_source: sec.env_source.clone(),
1039        }
1040    }
1041
1042    fn compile_field_type(ft: &FieldType) -> CompileResult<cellstate_core::FieldType> {
1043        use cellstate_core::FieldType as CoreFieldType;
1044        Ok(match ft {
1045            FieldType::Uuid => CoreFieldType::Uuid,
1046            FieldType::Text => CoreFieldType::Text,
1047            FieldType::Int => CoreFieldType::Int,
1048            FieldType::Float => CoreFieldType::Float,
1049            FieldType::Bool => CoreFieldType::Bool,
1050            FieldType::Timestamp => CoreFieldType::Timestamp,
1051            FieldType::Json => CoreFieldType::Json,
1052            FieldType::Embedding(dimensions) => CoreFieldType::Embedding {
1053                dimensions: *dimensions,
1054            },
1055            FieldType::Enum(variants) => CoreFieldType::Enum {
1056                variants: variants.clone(),
1057            },
1058            FieldType::Array(inner) => {
1059                let compiled_inner = Self::compile_field_type(inner)?;
1060                CoreFieldType::Array(Box::new(compiled_inner))
1061            }
1062        })
1063    }
1064
1065    fn compile_retention(ret: &Retention) -> CompileResult<cellstate_core::TTL> {
1066        use cellstate_core::TTL;
1067        Ok(match ret {
1068            Retention::Persistent => TTL::Persistent,
1069            Retention::Session => TTL::Session,
1070            Retention::Scope => TTL::Scope,
1071            Retention::Duration(s) => {
1072                let duration = Self::parse_duration(s)?;
1073                // Convert Duration to milliseconds for TTL
1074                TTL::Duration(DurationMs::new(duration.as_millis() as i64))
1075            }
1076            Retention::Max(n) => TTL::Max(*n),
1077        })
1078    }
1079
1080    fn compile_lifecycle(lc: &Lifecycle) -> CompileResult<CompiledLifecycle> {
1081        Ok(match lc {
1082            Lifecycle::Explicit => CompiledLifecycle::Explicit,
1083            Lifecycle::AutoClose(trigger) => {
1084                let compiled_trigger = Self::compile_trigger(trigger)?;
1085                CompiledLifecycle::AutoClose(compiled_trigger)
1086            }
1087        })
1088    }
1089
1090    fn compile_trigger(trigger: &Trigger) -> CompileResult<CompiledTrigger> {
1091        Ok(match trigger {
1092            Trigger::TaskStart => CompiledTrigger::TaskStart,
1093            Trigger::TaskEnd => CompiledTrigger::TaskEnd,
1094            Trigger::ScopeClose => CompiledTrigger::ScopeClose,
1095            Trigger::TurnEnd => CompiledTrigger::TurnEnd,
1096            Trigger::Manual => CompiledTrigger::Manual,
1097            Trigger::Schedule(s) => CompiledTrigger::Schedule(s.clone()),
1098        })
1099    }
1100
1101    fn compile_action(action: &Action) -> CompileResult<CompiledAction> {
1102        Ok(match action {
1103            Action::Summarize(target) => CompiledAction::Summarize {
1104                target: target.clone(),
1105            },
1106            Action::ExtractArtifacts(target) => CompiledAction::ExtractArtifacts {
1107                target: target.clone(),
1108            },
1109            Action::Checkpoint(target) => CompiledAction::Checkpoint {
1110                target: target.clone(),
1111            },
1112            Action::Prune { target, criteria } => CompiledAction::Prune {
1113                target: target.clone(),
1114                criteria: Self::compile_filter(criteria)?,
1115            },
1116            Action::Notify(target) => CompiledAction::Notify {
1117                target: target.clone(),
1118            },
1119            Action::Inject { target, mode } => CompiledAction::Inject {
1120                target: target.clone(),
1121                mode: Self::compile_injection_mode(mode)?,
1122            },
1123            Action::AutoSummarize {
1124                source_level,
1125                target_level,
1126                create_edges,
1127            } => CompiledAction::AutoSummarize {
1128                source_level: Self::compile_abstraction_level(source_level)?,
1129                target_level: Self::compile_abstraction_level(target_level)?,
1130                create_edges: *create_edges,
1131            },
1132        })
1133    }
1134
1135    fn compile_index(def: &IndexDef) -> CompileResult<IndexConfig> {
1136        let index_type = match def.index_type {
1137            IndexType::Btree => CompiledIndexType::Btree,
1138            IndexType::Hash => CompiledIndexType::Hash,
1139            IndexType::Gin => CompiledIndexType::Gin,
1140            IndexType::Hnsw => CompiledIndexType::Hnsw,
1141            IndexType::Ivfflat => CompiledIndexType::Ivfflat,
1142        };
1143
1144        Ok(IndexConfig {
1145            field: def.field.clone(),
1146            index_type,
1147            options: def.options.iter().cloned().collect(),
1148        })
1149    }
1150
1151    fn compile_modifiers(modifiers: &[ModifierDef]) -> CompileResult<MemoryModifiers> {
1152        let mut result = MemoryModifiers::default();
1153
1154        for modifier in modifiers {
1155            match modifier {
1156                ModifierDef::Embeddable { provider } => {
1157                    result.embeddable = Some(EmbeddableConfig {
1158                        provider: provider.clone(),
1159                    });
1160                }
1161                ModifierDef::Summarizable { style, on_triggers } => {
1162                    let compiled_style = match style {
1163                        SummaryStyle::Brief => SummarizeStyle::Brief,
1164                        SummaryStyle::Detailed => SummarizeStyle::Detailed,
1165                    };
1166                    let triggers = on_triggers
1167                        .iter()
1168                        .map(Self::compile_trigger)
1169                        .collect::<CompileResult<Vec<_>>>()?;
1170                    result.summarizable = Some(SummarizableConfig {
1171                        style: compiled_style,
1172                        on_triggers: triggers,
1173                    });
1174                }
1175                ModifierDef::Lockable { mode } => {
1176                    let compiled_mode = match mode {
1177                        LockMode::Exclusive => CompiledLockMode::Exclusive,
1178                        LockMode::Shared => CompiledLockMode::Shared,
1179                    };
1180                    result.lockable = Some(LockableConfig {
1181                        mode: compiled_mode,
1182                    });
1183                }
1184            }
1185        }
1186
1187        Ok(result)
1188    }
1189
1190    /// Compile a policy definition.
1191    fn compile_policy(def: &PolicyDef) -> CompileResult<PolicyConfig> {
1192        let rules = def
1193            .rules
1194            .iter()
1195            .map(Self::compile_policy_rule)
1196            .collect::<CompileResult<Vec<_>>>()?;
1197        Ok(PolicyConfig {
1198            name: def.name.clone(),
1199            rules,
1200        })
1201    }
1202
1203    fn compile_policy_rule(rule: &PolicyRule) -> CompileResult<CompiledPolicyRule> {
1204        let trigger = Self::compile_trigger(&rule.trigger)?;
1205        let actions = rule
1206            .actions
1207            .iter()
1208            .map(Self::compile_action)
1209            .collect::<CompileResult<Vec<_>>>()?;
1210        Ok(CompiledPolicyRule { trigger, actions })
1211    }
1212
1213    /// Compile an injection definition.
1214    fn compile_injection(def: &InjectionDef) -> CompileResult<InjectionConfig> {
1215        let mode = Self::compile_injection_mode(&def.mode)?;
1216        let filter = def.filter.as_ref().map(Self::compile_filter).transpose()?;
1217
1218        Ok(InjectionConfig {
1219            source: def.source.clone(),
1220            target: def.target.clone(),
1221            mode,
1222            priority: def.priority,
1223            max_tokens: def.max_tokens,
1224            filter,
1225        })
1226    }
1227
1228    fn compile_injection_mode(mode: &InjectionMode) -> CompileResult<CompiledInjectionMode> {
1229        Ok(match mode {
1230            InjectionMode::Full => CompiledInjectionMode::Full,
1231            InjectionMode::Summary => CompiledInjectionMode::Summary,
1232            InjectionMode::TopK(k) => CompiledInjectionMode::TopK { k: *k as i32 },
1233            InjectionMode::Relevant(threshold) => CompiledInjectionMode::Relevant {
1234                threshold: *threshold,
1235            },
1236        })
1237    }
1238
1239    fn compile_filter(expr: &FilterExpr) -> CompileResult<CompiledFilter> {
1240        Ok(match expr {
1241            FilterExpr::Comparison { field, op, value } => {
1242                let compiled_op = Self::compile_compare_op(op)?;
1243                let compiled_value = Self::compile_filter_value(value)?;
1244                CompiledFilter::Comparison {
1245                    field: field.clone(),
1246                    op: compiled_op,
1247                    value: compiled_value,
1248                }
1249            }
1250            FilterExpr::And(exprs) => {
1251                let compiled = exprs
1252                    .iter()
1253                    .map(Self::compile_filter)
1254                    .collect::<CompileResult<Vec<_>>>()?;
1255                CompiledFilter::And(compiled)
1256            }
1257            FilterExpr::Or(exprs) => {
1258                let compiled = exprs
1259                    .iter()
1260                    .map(Self::compile_filter)
1261                    .collect::<CompileResult<Vec<_>>>()?;
1262                CompiledFilter::Or(compiled)
1263            }
1264            FilterExpr::Not(inner) => {
1265                let compiled_inner = Self::compile_filter(inner)?;
1266                CompiledFilter::Not(Box::new(compiled_inner))
1267            }
1268        })
1269    }
1270
1271    fn compile_compare_op(op: &CompareOp) -> CompileResult<CompiledOperator> {
1272        Ok(match op {
1273            CompareOp::Eq => CompiledOperator::Eq,
1274            CompareOp::Ne => CompiledOperator::Ne,
1275            CompareOp::Gt => CompiledOperator::Gt,
1276            CompareOp::Lt => CompiledOperator::Lt,
1277            CompareOp::Ge => CompiledOperator::Ge,
1278            CompareOp::Le => CompiledOperator::Le,
1279            CompareOp::Contains => CompiledOperator::Contains,
1280            CompareOp::Regex => CompiledOperator::Regex,
1281            CompareOp::In => CompiledOperator::In,
1282        })
1283    }
1284
1285    fn compile_filter_value(value: &FilterValue) -> CompileResult<CompiledFilterValue> {
1286        Ok(match value {
1287            FilterValue::String(s) => CompiledFilterValue::String(s.clone()),
1288            FilterValue::Number(n) => CompiledFilterValue::Number(*n),
1289            FilterValue::Bool(b) => CompiledFilterValue::Bool(*b),
1290            FilterValue::Null => CompiledFilterValue::Null,
1291            FilterValue::CurrentTrajectory => CompiledFilterValue::CurrentTrajectory,
1292            FilterValue::CurrentScope => CompiledFilterValue::CurrentScope,
1293            FilterValue::Now => CompiledFilterValue::Now,
1294            FilterValue::Array(arr) => {
1295                let compiled = arr
1296                    .iter()
1297                    .map(Self::compile_filter_value)
1298                    .collect::<CompileResult<Vec<_>>>()?;
1299                CompiledFilterValue::Array(compiled)
1300            }
1301        })
1302    }
1303
1304    /// Compile a trajectory definition.
1305    fn compile_trajectory(def: &TrajectoryDef) -> CompileResult<TrajectoryConfig> {
1306        if def.token_budget <= 0 {
1307            return Err(CompileError::InvalidValue {
1308                field: "token_budget".to_string(),
1309                reason: "must be greater than 0".to_string(),
1310            });
1311        }
1312
1313        Ok(TrajectoryConfig {
1314            name: def.name.clone(),
1315            description: def.description.clone(),
1316            agent_type: def.agent_type.clone(),
1317            token_budget: def.token_budget,
1318            memory_refs: def.memory_refs.clone(),
1319            metadata: def.metadata.clone(),
1320        })
1321    }
1322
1323    /// Compile an agent definition.
1324    fn compile_agent(def: &AgentDef) -> CompileResult<AgentConfig> {
1325        let constraints = CompiledAgentConstraints {
1326            max_concurrent: def.constraints.max_concurrent,
1327            timeout: Duration::from_millis(def.constraints.timeout_ms as u64),
1328        };
1329
1330        let permissions = CompiledPermissionMatrix {
1331            read: def.permissions.read.clone(),
1332            write: def.permissions.write.clone(),
1333            lock: def.permissions.lock.clone(),
1334        };
1335
1336        Ok(AgentConfig {
1337            name: def.name.clone(),
1338            capabilities: def.capabilities.clone(),
1339            constraints,
1340            permissions,
1341        })
1342    }
1343
1344    /// Compile a cache definition.
1345    fn compile_cache(def: &CacheDef) -> CompileResult<CacheConfig> {
1346        let backend = match def.backend {
1347            CacheBackendType::Lmdb => CompiledCacheBackend::Lmdb,
1348            CacheBackendType::Memory => CompiledCacheBackend::Memory,
1349        };
1350
1351        let default_freshness = Self::compile_freshness(&def.default_freshness)?;
1352
1353        let ttl = def
1354            .ttl
1355            .as_ref()
1356            .map(|s| Self::parse_duration(s))
1357            .transpose()?;
1358
1359        Ok(CacheConfig {
1360            backend,
1361            path: def.path.clone(),
1362            size_mb: def.size_mb,
1363            default_freshness,
1364            max_entries: def.max_entries,
1365            ttl,
1366        })
1367    }
1368
1369    fn compile_freshness(def: &FreshnessDef) -> CompileResult<CompiledFreshness> {
1370        Ok(match def {
1371            FreshnessDef::BestEffort { max_staleness } => {
1372                let duration = Self::parse_duration(max_staleness)?;
1373                CompiledFreshness::BestEffort {
1374                    max_staleness: duration,
1375                }
1376            }
1377            FreshnessDef::Strict => CompiledFreshness::Strict,
1378        })
1379    }
1380
1381    /// Compile a provider definition.
1382    fn compile_provider(def: &ProviderDef) -> CompileResult<CompiledProviderConfig> {
1383        let provider_type = match def.provider_type {
1384            ProviderType::OpenAI => CompiledProviderType::OpenAI,
1385            ProviderType::Anthropic => CompiledProviderType::Anthropic,
1386            ProviderType::Local => CompiledProviderType::Local,
1387            ProviderType::Custom => CompiledProviderType::Custom,
1388        };
1389
1390        let api_key = Self::resolve_env_value(&def.api_key)?;
1391        let options: HashMap<String, String> = def.options.iter().cloned().collect();
1392
1393        Ok(CompiledProviderConfig {
1394            name: def.name.clone(),
1395            provider_type,
1396            api_key: SecretString::new(api_key),
1397            model: def.model.clone(),
1398            options,
1399        })
1400    }
1401
1402    fn resolve_env_value(env: &EnvValue) -> CompileResult<String> {
1403        // At compile time, we just return a placeholder or the literal
1404        // Actual env var resolution happens at runtime
1405        Ok(match env {
1406            EnvValue::Env(var_name) => format!("${{env:{}}}", var_name),
1407            EnvValue::Literal(value) => value.clone(),
1408        })
1409    }
1410
1411    /// Compile an evolution definition.
1412    fn compile_evolution(def: &EvolutionDef) -> CompileResult<EvolutionConfig> {
1413        if def.benchmark_queries <= 0 {
1414            return Err(CompileError::InvalidValue {
1415                field: "benchmark_queries".to_string(),
1416                reason: "must be greater than 0".to_string(),
1417            });
1418        }
1419
1420        Ok(EvolutionConfig {
1421            name: def.name.clone(),
1422            baseline: def.baseline.clone(),
1423            candidates: def.candidates.clone(),
1424            benchmark_queries: def.benchmark_queries,
1425            metrics: def.metrics.clone(),
1426        })
1427    }
1428
1429    /// Compile a summarization policy definition.
1430    fn compile_summarization_policy(
1431        def: &SummarizationPolicyDef,
1432    ) -> CompileResult<SummarizationPolicyConfig> {
1433        let triggers = def
1434            .triggers
1435            .iter()
1436            .map(Self::compile_summarization_trigger)
1437            .collect::<CompileResult<Vec<_>>>()?;
1438        let source_level = Self::compile_abstraction_level(&def.source_level)?;
1439        let target_level = Self::compile_abstraction_level(&def.target_level)?;
1440
1441        if def.max_sources <= 0 {
1442            return Err(CompileError::InvalidValue {
1443                field: "max_sources".to_string(),
1444                reason: "must be greater than 0".to_string(),
1445            });
1446        }
1447
1448        Ok(SummarizationPolicyConfig {
1449            name: def.name.clone(),
1450            triggers,
1451            source_level,
1452            target_level,
1453            max_sources: def.max_sources,
1454            create_edges: def.create_edges,
1455        })
1456    }
1457
1458    fn compile_summarization_trigger(
1459        trigger: &SummarizationTriggerParsed,
1460    ) -> CompileResult<CompiledTrigger> {
1461        Ok(match trigger {
1462            SummarizationTriggerParsed::DosageThreshold { percent } => {
1463                CompiledTrigger::DosageReached {
1464                    threshold: *percent as i32,
1465                }
1466            }
1467            SummarizationTriggerParsed::ScopeClose => CompiledTrigger::ScopeClose,
1468            SummarizationTriggerParsed::TurnCount { count } => {
1469                CompiledTrigger::TurnCount { count: *count }
1470            }
1471            SummarizationTriggerParsed::ArtifactCount { count } => {
1472                CompiledTrigger::ArtifactCount { count: *count }
1473            }
1474            SummarizationTriggerParsed::Manual => CompiledTrigger::Manual,
1475        })
1476    }
1477
1478    fn compile_abstraction_level(
1479        level: &AbstractionLevelParsed,
1480    ) -> CompileResult<CompiledAbstractionLevel> {
1481        Ok(match level {
1482            AbstractionLevelParsed::Raw => CompiledAbstractionLevel::Raw,
1483            AbstractionLevelParsed::Summary => CompiledAbstractionLevel::Summary,
1484            AbstractionLevelParsed::Principle => CompiledAbstractionLevel::Principle,
1485        })
1486    }
1487
1488    /// Parse a duration string (e.g., "30s", "5m", "1h", "24h").
1489    fn parse_duration(s: &str) -> CompileResult<Duration> {
1490        let s = s.trim();
1491        if s.is_empty() {
1492            return Err(CompileError::InvalidDuration {
1493                value: s.to_string(),
1494            });
1495        }
1496
1497        // Find where the number ends and unit begins
1498        let num_end = s
1499            .chars()
1500            .position(|c| !c.is_ascii_digit() && c != '.')
1501            .unwrap_or(s.len());
1502
1503        let (num_str, unit) = s.split_at(num_end);
1504        let num: f64 = num_str
1505            .parse()
1506            .map_err(|_e| CompileError::InvalidDuration {
1507                value: s.to_string(),
1508            })?;
1509
1510        let multiplier = match unit.trim() {
1511            "ms" => 1,
1512            "s" => 1000,
1513            "m" => 60 * 1000,
1514            "h" => 60 * 60 * 1000,
1515            "d" => 24 * 60 * 60 * 1000,
1516            _ => {
1517                return Err(CompileError::InvalidDuration {
1518                    value: s.to_string(),
1519                })
1520            }
1521        };
1522
1523        Ok(Duration::from_millis((num * multiplier as f64) as u64))
1524    }
1525
1526    /// Final validation pass - check cross-references.
1527    fn validate(&self) -> CompileResult<()> {
1528        // Validate agent references in trajectories
1529        for trajectory in &self.config.trajectories {
1530            if !self.names.agents.contains_key(&trajectory.agent_type) {
1531                return Err(CompileError::UndefinedReference {
1532                    kind: "agent".to_string(),
1533                    name: trajectory.agent_type.clone(),
1534                });
1535            }
1536        }
1537
1538        // Validate memory references in trajectories
1539        for trajectory in &self.config.trajectories {
1540            for memory_ref in &trajectory.memory_refs {
1541                if !self.names.memories.contains_key(memory_ref) {
1542                    return Err(CompileError::UndefinedReference {
1543                        kind: "memory".to_string(),
1544                        name: memory_ref.clone(),
1545                    });
1546                }
1547            }
1548        }
1549
1550        // Validate agent permission references to memories
1551        for agent in &self.config.agents {
1552            for mem in &agent.permissions.read {
1553                if !self.names.memories.contains_key(mem) {
1554                    return Err(CompileError::UndefinedReference {
1555                        kind: "memory".to_string(),
1556                        name: mem.clone(),
1557                    });
1558                }
1559            }
1560            for mem in &agent.permissions.write {
1561                if !self.names.memories.contains_key(mem) {
1562                    return Err(CompileError::UndefinedReference {
1563                        kind: "memory".to_string(),
1564                        name: mem.clone(),
1565                    });
1566                }
1567            }
1568            for mem in &agent.permissions.lock {
1569                if !self.names.memories.contains_key(mem) {
1570                    return Err(CompileError::UndefinedReference {
1571                        kind: "memory".to_string(),
1572                        name: mem.clone(),
1573                    });
1574                }
1575            }
1576        }
1577
1578        // Validate injection source/target references
1579        for injection in &self.config.injections {
1580            if !self.names.memories.contains_key(&injection.source) {
1581                return Err(CompileError::UndefinedReference {
1582                    kind: "memory".to_string(),
1583                    name: injection.source.clone(),
1584                });
1585            }
1586            // Target could be a context slot, not necessarily a memory
1587        }
1588
1589        // Validate evolution baseline/candidate references
1590        // (These reference snapshot names which are runtime entities, skip for now)
1591
1592        // Validate parent memory references
1593        for memory in &self.config.memories {
1594            if let Some(ref parent) = memory.parent {
1595                if !self.names.memories.contains_key(parent) {
1596                    return Err(CompileError::UndefinedReference {
1597                        kind: "memory".to_string(),
1598                        name: parent.clone(),
1599                    });
1600                }
1601            }
1602        }
1603
1604        Ok(())
1605    }
1606}
1607
1608impl Default for PipelineCompiler {
1609    fn default() -> Self {
1610        Self::new()
1611    }
1612}
1613
1614#[cfg(test)]
1615mod tests {
1616    use super::*;
1617
1618    #[test]
1619    fn test_parse_duration() {
1620        assert_eq!(
1621            PipelineCompiler::parse_duration("30s").expect("valid duration '30s'"),
1622            Duration::from_secs(30)
1623        );
1624        assert_eq!(
1625            PipelineCompiler::parse_duration("5m").expect("valid duration '5m'"),
1626            Duration::from_secs(300)
1627        );
1628        assert_eq!(
1629            PipelineCompiler::parse_duration("1h").expect("valid duration '1h'"),
1630            Duration::from_secs(3600)
1631        );
1632        assert_eq!(
1633            PipelineCompiler::parse_duration("100ms").expect("valid duration '100ms'"),
1634            Duration::from_millis(100)
1635        );
1636        assert_eq!(
1637            PipelineCompiler::parse_duration("1d").expect("valid duration '1d'"),
1638            Duration::from_secs(86400)
1639        );
1640    }
1641
1642    #[test]
1643    fn test_duplicate_detection() {
1644        let mut registry = NameRegistry::default();
1645        registry
1646            .register("adapter", "pg")
1647            .expect("first registration should succeed");
1648        let err = registry.register("adapter", "pg").unwrap_err();
1649        assert!(matches!(err, CompileError::DuplicateDefinition { .. }));
1650    }
1651
1652    #[test]
1653    fn test_parse_duration_invalid() {
1654        let err = PipelineCompiler::parse_duration("10weeks").unwrap_err();
1655        assert!(matches!(err, CompileError::InvalidDuration { value: _ }));
1656    }
1657
1658    #[test]
1659    fn test_compile_undefined_agent_reference() {
1660        // Construct AST directly instead of parsing
1661        let ast = CellstateAst {
1662            version: "1.0".to_string(),
1663            definitions: vec![Definition::Trajectory(TrajectoryDef {
1664                name: "t1".to_string(),
1665                description: None,
1666                agent_type: "missing".to_string(), // Reference to non-existent agent
1667                token_budget: 1000,
1668                memory_refs: vec![],
1669                metadata: None,
1670            })],
1671        };
1672        let err = PipelineCompiler::compile(&ast).unwrap_err();
1673        assert!(matches!(
1674            err,
1675            CompileError::UndefinedReference { kind, .. } if kind == "agent"
1676        ));
1677    }
1678
1679    #[test]
1680    fn test_compile_undefined_memory_reference() {
1681        // Construct AST directly instead of parsing
1682        let ast = CellstateAst {
1683            version: "1.0".to_string(),
1684            definitions: vec![
1685                Definition::Agent(AgentDef {
1686                    name: "worker".to_string(),
1687                    capabilities: vec![],
1688                    constraints: AgentConstraints::default(),
1689                    permissions: PermissionMatrix::default(),
1690                }),
1691                Definition::Trajectory(TrajectoryDef {
1692                    name: "t1".to_string(),
1693                    description: None,
1694                    agent_type: "worker".to_string(),
1695                    token_budget: 1000,
1696                    memory_refs: vec!["notes".to_string()], // Reference to non-existent memory
1697                    metadata: None,
1698                }),
1699            ],
1700        };
1701        let err = PipelineCompiler::compile(&ast).unwrap_err();
1702        assert!(matches!(
1703            err,
1704            CompileError::UndefinedReference { kind, .. } if kind == "memory"
1705        ));
1706    }
1707
1708    #[test]
1709    fn test_compile_invalid_token_budget() {
1710        // Construct AST directly instead of parsing
1711        let ast = CellstateAst {
1712            version: "1.0".to_string(),
1713            definitions: vec![
1714                Definition::Agent(AgentDef {
1715                    name: "worker".to_string(),
1716                    capabilities: vec![],
1717                    constraints: AgentConstraints::default(),
1718                    permissions: PermissionMatrix::default(),
1719                }),
1720                Definition::Trajectory(TrajectoryDef {
1721                    name: "t1".to_string(),
1722                    description: None,
1723                    agent_type: "worker".to_string(),
1724                    token_budget: 0, // Invalid: must be > 0
1725                    memory_refs: vec![],
1726                    metadata: None,
1727                }),
1728            ],
1729        };
1730        let err = PipelineCompiler::compile(&ast).unwrap_err();
1731        assert!(matches!(
1732            err,
1733            CompileError::InvalidValue { field, .. } if field == "token_budget"
1734        ));
1735    }
1736
1737    #[test]
1738    fn test_compile_invalid_evolution_benchmark_queries() {
1739        // Construct AST directly instead of parsing
1740        let ast = CellstateAst {
1741            version: "1.0".to_string(),
1742            definitions: vec![Definition::Evolution(EvolutionDef {
1743                name: "config".to_string(),
1744                baseline: "base".to_string(),
1745                candidates: vec!["c1".to_string()],
1746                benchmark_queries: 0, // Invalid: must be > 0
1747                metrics: vec!["latency".to_string()],
1748            })],
1749        };
1750        let err = PipelineCompiler::compile(&ast).unwrap_err();
1751        assert!(matches!(
1752            err,
1753            CompileError::InvalidValue { field, .. } if field == "benchmark_queries"
1754        ));
1755    }
1756
1757    #[test]
1758    fn test_compiled_provider_debug_redacts_api_key() {
1759        let provider = CompiledProviderConfig {
1760            name: "p".to_string(),
1761            provider_type: CompiledProviderType::OpenAI,
1762            api_key: SecretString::new("sk_super_secret"),
1763            model: "gpt-4".to_string(),
1764            options: HashMap::new(),
1765        };
1766        let dbg = format!("{provider:?}");
1767        assert!(dbg.contains("[REDACTED"));
1768        assert!(!dbg.contains("sk_super_secret"));
1769    }
1770}