cellstate_pipeline/pack/
ir.rs

1//! Pack IR and validation
2
3use crate::ast::AdapterDef as AstAdapterDef;
4use crate::ast::InjectionDef as AstInjectionDef;
5use crate::ast::IntentDef as AstIntentDef;
6use crate::ast::{Action, InjectionMode, MemoryDef, Trigger};
7use crate::ast::{AdapterType, CellstateAst, Definition, PolicyDef, PolicyRule};
8use crate::ast::{EnvValue, ProviderDef as AstProviderDef, ProviderType};
9use crate::compiler::{
10    CompiledCredentialDelivery, CompiledInjectionMode, CompiledPackAgentConfig,
11    CompiledPackInjectionConfig, CompiledPackRoutingConfig, CompiledToolConfig, CompiledToolKind,
12    CompiledToolsetConfig,
13};
14use crate::config::*;
15use std::collections::HashSet;
16
17use super::flow::FlowDefinition;
18use super::markdown::{FenceKind, MarkdownDoc};
19use super::schema::*;
20
21#[derive(Debug, Clone)]
22pub struct PackIr {
23    pub manifest: PackManifest,
24    pub markdown: Vec<MarkdownDoc>,
25    pub adapters: Vec<AstAdapterDef>,
26    pub policies: Vec<PolicyDef>,
27    pub injections: Vec<AstInjectionDef>,
28    pub providers: Vec<AstProviderDef>,
29    pub memories: Vec<MemoryDef>,
30    pub intents: Vec<AstIntentDef>,
31    pub flows: Vec<FlowDefinition>,
32}
33
34impl PackIr {
35    /// Constructs a PackIr by validating the provided manifest, extracting configurations from
36    /// Markdown fence blocks, checking for duplicates, and merging TOML- and Markdown-derived
37    /// definitions into a single intermediate representation.
38    ///
39    /// This performs the following high-level steps:
40    /// - Validates profiles, toolsets, agents, injections, and routing declared in the manifest.
41    /// - Builds adapters, policies, injections, and providers from the TOML manifest.
42    /// - Extracts adapters, policies, injections, and providers from Markdown fence blocks.
43    /// - Checks for duplicates within Markdown and across TOML vs Markdown (adapters, policies,
44    ///   providers, and injections). Duplicate definitions cause a validation error.
45    /// - Merges Markdown-derived definitions into the TOML-derived lists and returns the merged IR.
46    ///
47    /// # Returns
48    ///
49    /// `Ok(Self)` containing the manifest, the original Markdown documents, and the merged lists of
50    /// adapters, policies, injections, and providers on success; `Err(PackError)` if validation,
51    /// TOML parsing, or Markdown extraction fails.
52    ///
53    /// # Examples
54    ///
55    /// ```ignore
56    /// // Construct a PackIr from a manifest and any Markdown docs.
57    /// // (The concrete construction of `manifest` and `markdown` depends on your crate's API.)
58    /// # use crate::ir::{PackIr, PackManifest, MarkdownDoc};
59    /// # fn build_manifest() -> PackManifest { unimplemented!() }
60    /// # fn load_markdown() -> Vec<MarkdownDoc> { Vec::new() }
61    /// let manifest = build_manifest();
62    /// let markdown = load_markdown();
63    /// let pack_ir = PackIr::new(manifest, markdown).expect("manifest and markdown must be valid");
64    /// ```
65    pub fn new(manifest: PackManifest, markdown: Vec<MarkdownDoc>) -> Result<Self, PackError> {
66        validate_meta(&manifest)?;
67        validate_defaults(&manifest)?;
68        validate_profiles(&manifest)?;
69        validate_toolsets(&manifest)?;
70        validate_tools(&manifest)?;
71        validate_agents(&manifest, &markdown)?;
72        validate_injections(&manifest)?;
73        validate_routing(&manifest)?;
74
75        // Build from TOML manifest (legacy)
76        let mut adapters = build_adapters(&manifest)?;
77        let mut policies = build_policies(&manifest)?;
78        let mut injections = build_injections(&manifest)?;
79        let mut providers = build_providers(&manifest)?;
80
81        // Extract configs from Markdown fence blocks (NEW)
82        let md_adapters =
83            extract_from_markdown(&markdown, FenceKind::Adapter, parse_adapter_block)?;
84        let md_policies = extract_from_markdown(&markdown, FenceKind::Policy, parse_policy_block)?;
85        let md_injections =
86            extract_from_markdown(&markdown, FenceKind::Injection, parse_injection_block)?;
87        let md_providers =
88            extract_from_markdown(&markdown, FenceKind::Provider, parse_provider_block)?;
89        let md_memories = extract_from_markdown(&markdown, FenceKind::Memory, parse_memory_block)?;
90        let md_intents = extract_from_markdown(&markdown, FenceKind::Intent, parse_intent_block)?;
91        let md_flows = extract_flows_from_markdown(&markdown)?;
92
93        // Check for duplicates within Markdown configs
94        check_markdown_duplicates(
95            &md_adapters,
96            &md_policies,
97            &md_injections,
98            &md_providers,
99            &md_intents,
100        )?;
101
102        // Merge: Check for duplicates before merging
103        // Default behavior: ERROR on duplicates (no silent override)
104
105        // Check adapter duplicates
106        let toml_adapter_names: HashSet<_> = adapters.iter().map(|a| &a.name).collect();
107        for md_adapter in &md_adapters {
108            if toml_adapter_names.contains(&md_adapter.name) {
109                return Err(PackError::Validation(format!(
110                    "Duplicate adapter name '{}' found in both TOML and Markdown",
111                    md_adapter.name
112                )));
113            }
114        }
115
116        // Check policy duplicates
117        let toml_policy_names: HashSet<_> = policies.iter().map(|p| &p.name).collect();
118        for md_policy in &md_policies {
119            if toml_policy_names.contains(&md_policy.name) {
120                return Err(PackError::Validation(format!(
121                    "Duplicate policy name '{}' found in both TOML and Markdown",
122                    md_policy.name
123                )));
124            }
125        }
126
127        // Check provider duplicates
128        let toml_provider_names: HashSet<_> = providers.iter().map(|p| &p.name).collect();
129        for md_provider in &md_providers {
130            if toml_provider_names.contains(&md_provider.name) {
131                return Err(PackError::Validation(format!(
132                    "Duplicate provider name '{}' found in both TOML and Markdown",
133                    md_provider.name
134                )));
135            }
136        }
137
138        // Check injection duplicates (by source, target tuple since no name field)
139        let toml_injection_keys: HashSet<_> =
140            injections.iter().map(|i| (&i.source, &i.target)).collect();
141        for md_injection in &md_injections {
142            let key = (&md_injection.source, &md_injection.target);
143            if toml_injection_keys.contains(&key) {
144                return Err(PackError::Validation(format!(
145                    "Duplicate injection (source: '{}', target: '{}') found in both TOML and Markdown",
146                    md_injection.source, md_injection.target
147                )));
148            }
149        }
150
151        // All clear - merge configs
152        adapters.extend(md_adapters);
153        policies.extend(md_policies);
154        injections.extend(md_injections);
155        providers.extend(md_providers);
156
157        Ok(Self {
158            manifest,
159            markdown,
160            adapters,
161            policies,
162            injections,
163            providers,
164            memories: md_memories,
165            intents: md_intents,
166            flows: md_flows,
167        })
168    }
169}
170
171#[derive(Debug, Clone)]
172pub enum PackError {
173    Toml(String),
174    Validation(String),
175    Markdown(MarkdownError),
176}
177
178impl std::fmt::Display for PackError {
179    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
180        match self {
181            PackError::Toml(msg) => write!(f, "TOML error: {}", msg),
182            PackError::Validation(msg) => write!(f, "Validation error: {}", msg),
183            PackError::Markdown(err) => write!(f, "Markdown error: {}", err),
184        }
185    }
186}
187
188impl std::error::Error for PackError {}
189
190impl From<MarkdownError> for PackError {
191    fn from(err: MarkdownError) -> Self {
192        PackError::Markdown(err)
193    }
194}
195
196impl From<crate::compiler::CompileError> for PackError {
197    fn from(err: crate::compiler::CompileError) -> Self {
198        PackError::Validation(err.to_string())
199    }
200}
201
202#[derive(Debug, Clone)]
203pub struct MarkdownError {
204    pub file: String,
205    pub line: usize,
206    pub column: usize,
207    pub message: String,
208}
209
210impl std::fmt::Display for MarkdownError {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        write!(
213            f,
214            "{}:{}:{}: {}",
215            self.file, self.line, self.column, self.message
216        )
217    }
218}
219
220fn validate_profiles(manifest: &PackManifest) -> Result<(), PackError> {
221    let Some(settings) = &manifest.settings else {
222        // still run provider/format reference checks below
223        // even when settings.matrix is absent.
224        for (name, profile) in &manifest.profiles {
225            validate_profile_cross_refs(manifest, name, profile)?;
226        }
227        return Ok(());
228    };
229    let Some(matrix) = &settings.matrix else {
230        for (name, profile) in &manifest.profiles {
231            validate_profile_cross_refs(manifest, name, profile)?;
232        }
233        return Ok(());
234    };
235    let mut allowed = HashSet::new();
236    for p in &matrix.allowed {
237        allowed.insert(profile_key(
238            &p.retention,
239            &p.index,
240            &p.embeddings,
241            &p.format,
242        ));
243    }
244    for (name, p) in &manifest.profiles {
245        let key = profile_key(&p.retention, &p.index, &p.embeddings, &p.format);
246        if !allowed.contains(&key) {
247            return Err(PackError::Validation(format!(
248                "profile '{}' does not satisfy settings.matrix.allowed",
249                name
250            )));
251        }
252        validate_profile_cross_refs(manifest, name, p)?;
253    }
254    Ok(())
255}
256
257fn validate_defaults(manifest: &PackManifest) -> Result<(), PackError> {
258    let Some(defaults) = &manifest.defaults else {
259        return Ok(());
260    };
261
262    if let Some(context_format) = defaults.context_format.as_deref() {
263        let normalized = context_format.trim().to_lowercase();
264        if normalized.is_empty() {
265            return Err(PackError::Validation(
266                "defaults.context_format must not be empty".to_string(),
267            ));
268        }
269
270        let is_builtin = matches!(normalized.as_str(), "markdown" | "json" | "xml");
271        if !is_builtin
272            && !manifest
273                .formats
274                .keys()
275                .any(|name| name.eq_ignore_ascii_case(context_format))
276        {
277            return Err(PackError::Validation(format!(
278                "defaults.context_format '{}' is unknown. Define it under [formats.*] or use one of markdown|json|xml.",
279                context_format
280            )));
281        }
282    }
283
284    Ok(())
285}
286
287fn validate_profile_cross_refs(
288    manifest: &PackManifest,
289    profile_name: &str,
290    profile: &ProfileDef,
291) -> Result<(), PackError> {
292    let embeddings = profile.embeddings.trim();
293    if embeddings.is_empty() {
294        return Err(PackError::Validation(format!(
295            "profile '{}' has empty embeddings value",
296            profile_name
297        )));
298    }
299
300    let embeddings_builtin = matches!(
301        embeddings.to_lowercase().as_str(),
302        "none" | "local" | "disabled"
303    );
304    if !embeddings_builtin
305        && !manifest.providers.is_empty()
306        && !manifest
307            .providers
308            .keys()
309            .any(|name| name.eq_ignore_ascii_case(embeddings))
310    {
311        return Err(PackError::Validation(format!(
312            "profile '{}' references unknown embeddings provider '{}'. Declare it under [providers.*] or use none|local|disabled.",
313            profile_name, profile.embeddings
314        )));
315    }
316
317    let format = profile.format.trim();
318    if format.is_empty() {
319        return Err(PackError::Validation(format!(
320            "profile '{}' has empty format value",
321            profile_name
322        )));
323    }
324
325    let format_builtin = matches!(format.to_lowercase().as_str(), "markdown" | "json" | "xml");
326    if !format_builtin
327        && !manifest
328            .formats
329            .keys()
330            .any(|name| name.eq_ignore_ascii_case(format))
331    {
332        return Err(PackError::Validation(format!(
333            "profile '{}' references unknown format '{}'. Declare it under [formats.*] or use markdown|json|xml.",
334            profile_name, profile.format
335        )));
336    }
337
338    Ok(())
339}
340
341fn validate_toolsets(manifest: &PackManifest) -> Result<(), PackError> {
342    let tool_ids = collect_tool_ids(&manifest.tools);
343    for (set_name, set) in &manifest.toolsets {
344        for tool in &set.tools {
345            if !tool_ids.contains(tool) {
346                return Err(PackError::Validation(format!(
347                    "toolset '{}' references unknown tool id '{}'",
348                    set_name, tool
349                )));
350            }
351        }
352    }
353    Ok(())
354}
355
356fn validate_agents(manifest: &PackManifest, markdown: &[MarkdownDoc]) -> Result<(), PackError> {
357    let toolsets: HashSet<String> = manifest.toolsets.keys().cloned().collect();
358    let profiles: HashSet<String> = manifest.profiles.keys().cloned().collect();
359    let adapters: HashSet<String> = manifest.adapters.keys().cloned().collect();
360    let formats: HashSet<String> = manifest.formats.keys().cloned().collect();
361    let md_paths: HashSet<String> = markdown.iter().map(|m| m.file.clone()).collect();
362
363    for (name, agent) in &manifest.agents {
364        // Validate profile reference exists
365        if !profiles.contains(&agent.profile) {
366            return Err(PackError::Validation(format!(
367                "agent '{}' references unknown profile '{}'. Available profiles: {:?}",
368                name,
369                agent.profile,
370                profiles.iter().collect::<Vec<_>>()
371            )));
372        }
373
374        // Validate adapter reference exists (if specified)
375        if let Some(ref adapter_name) = agent.adapter {
376            if !adapters.contains(adapter_name) {
377                return Err(PackError::Validation(format!(
378                    "agent '{}' references unknown adapter '{}'. Available adapters: {:?}",
379                    name,
380                    adapter_name,
381                    adapters.iter().collect::<Vec<_>>()
382                )));
383            }
384        }
385
386        // Validate format reference exists (if specified)
387        if let Some(ref format_name) = agent.format {
388            let builtin = matches!(
389                format_name.trim().to_lowercase().as_str(),
390                "markdown" | "json" | "xml"
391            );
392            if !builtin && !formats.contains(format_name) {
393                return Err(PackError::Validation(format!(
394                    "agent '{}' references unknown format '{}'. Available formats: {:?}",
395                    name,
396                    format_name,
397                    formats.iter().collect::<Vec<_>>()
398                )));
399            }
400        }
401
402        // Validate token_budget is positive (if specified)
403        if let Some(budget) = agent.token_budget {
404            if budget <= 0 {
405                return Err(PackError::Validation(format!(
406                    "agent '{}' has invalid token_budget '{}'. Must be greater than 0.",
407                    name, budget
408                )));
409            }
410        }
411
412        // Validate toolset references
413        for toolset in &agent.toolsets {
414            if !toolsets.contains(toolset) {
415                return Err(PackError::Validation(format!(
416                    "agent '{}' references unknown toolset '{}'",
417                    name, toolset
418                )));
419            }
420        }
421
422        // Validate prompt markdown path
423        if !md_paths.contains(&agent.prompt_md) {
424            // allow relative path match by suffix
425            let found = md_paths.iter().any(|p| p.ends_with(&agent.prompt_md));
426            if !found {
427                return Err(PackError::Validation(format!(
428                    "agent '{}' prompt_md '{}' not found in pack markdowns",
429                    name, agent.prompt_md
430                )));
431            }
432        }
433    }
434    Ok(())
435}
436
437fn validate_tools(manifest: &PackManifest) -> Result<(), PackError> {
438    for (name, tool) in &manifest.tools.prompts {
439        if tool.prompt_md.trim().is_empty() {
440            return Err(PackError::Validation(format!(
441                "tools.prompts.{}: prompt_md must not be empty",
442                name
443            )));
444        }
445
446        if let Some(result_format) = tool.result_format.as_deref() {
447            let normalized = result_format.trim().to_lowercase();
448            if normalized.is_empty() {
449                return Err(PackError::Validation(format!(
450                    "tools.prompts.{}: result_format must not be empty",
451                    name
452                )));
453            }
454
455            let is_builtin = matches!(normalized.as_str(), "markdown" | "json" | "xml");
456            if !is_builtin
457                && !manifest
458                    .formats
459                    .keys()
460                    .any(|format_name| format_name.eq_ignore_ascii_case(result_format))
461            {
462                return Err(PackError::Validation(format!(
463                    "tools.prompts.{}: result_format '{}' is unknown. Define it under [formats.*] or use markdown|json|xml.",
464                    name, result_format
465                )));
466            }
467        }
468    }
469
470    for (name, tool) in &manifest.tools.bin {
471        for provider in &tool.oauth_providers {
472            let provider = provider.trim();
473            if provider.is_empty() {
474                return Err(PackError::Validation(format!(
475                    "tools.bin.{}: oauth_providers contains an empty provider name",
476                    name
477                )));
478            }
479
480            if !manifest
481                .providers
482                .keys()
483                .any(|known| known.eq_ignore_ascii_case(provider))
484            {
485                return Err(PackError::Validation(format!(
486                    "tools.bin.{}: oauth_providers references unknown provider '{}'. Declare it under [providers.*].",
487                    name, provider
488                )));
489            }
490        }
491
492        for provider in tool.oauth_required_scopes.keys() {
493            let provider = provider.trim();
494            if provider.is_empty() {
495                return Err(PackError::Validation(format!(
496                    "tools.bin.{}: oauth_required_scopes contains an empty provider key",
497                    name
498                )));
499            }
500
501            if !tool
502                .oauth_providers
503                .iter()
504                .any(|configured| configured.trim().eq_ignore_ascii_case(provider))
505            {
506                return Err(PackError::Validation(format!(
507                    "tools.bin.{}: oauth_required_scopes references provider '{}' which is not declared in oauth_providers",
508                    name, provider
509                )));
510            }
511
512            if !manifest
513                .providers
514                .keys()
515                .any(|known| known.eq_ignore_ascii_case(provider))
516            {
517                return Err(PackError::Validation(format!(
518                    "tools.bin.{}: oauth_required_scopes references unknown provider '{}'. Declare it under [providers.*].",
519                    name, provider
520                )));
521            }
522        }
523    }
524
525    Ok(())
526}
527
528/// Maximum priority allowed for pack injections.
529/// Priorities 900+ are reserved for platform-level injections.
530const MAX_PACK_INJECTION_PRIORITY: i32 = 899;
531
532fn validate_injections(manifest: &PackManifest) -> Result<(), PackError> {
533    for (name, injection) in &manifest.injections {
534        if injection.source.trim().is_empty() {
535            return Err(PackError::Validation(format!(
536                "injections.{}: source must not be empty",
537                name
538            )));
539        }
540        if injection.target.trim().is_empty() {
541            return Err(PackError::Validation(format!(
542                "injections.{}: target must not be empty",
543                name
544            )));
545        }
546
547        // Validate entity type if specified
548        if let Some(entity_type) = injection.entity_type.as_deref() {
549            let normalized = entity_type.to_lowercase();
550            let valid = matches!(
551                normalized.as_str(),
552                "note" | "notes" | "artifact" | "artifacts"
553            );
554            if !valid {
555                return Err(PackError::Validation(format!(
556                    "injections.{}: invalid entity_type '{}' (expected 'note' or 'artifact')",
557                    name, entity_type
558                )));
559            }
560        }
561
562        // Validate priority is within pack range (0-899)
563        if injection.priority > MAX_PACK_INJECTION_PRIORITY {
564            return Err(PackError::Validation(format!(
565                "injections.{}: priority {} exceeds pack maximum ({}). Priorities {}+ are reserved for platform.",
566                name, injection.priority, MAX_PACK_INJECTION_PRIORITY, MAX_PACK_INJECTION_PRIORITY + 1
567            )));
568        }
569
570        if let Some(max_tokens) = injection.max_tokens {
571            if max_tokens <= 0 {
572                return Err(PackError::Validation(format!(
573                    "injections.{}: max_tokens must be greater than 0",
574                    name
575                )));
576            }
577        }
578
579        if let Some(top_k) = injection.top_k {
580            if top_k == 0 {
581                return Err(PackError::Validation(format!(
582                    "injections.{}: top_k must be greater than 0",
583                    name
584                )));
585            }
586        }
587
588        if let Some(threshold) = injection.threshold {
589            if !(0.0..=1.0).contains(&threshold) {
590                return Err(PackError::Validation(format!(
591                    "injections.{}: threshold must be within [0.0, 1.0]",
592                    name
593                )));
594            }
595        }
596    }
597    Ok(())
598}
599
600fn validate_meta(manifest: &PackManifest) -> Result<(), PackError> {
601    let Some(meta) = manifest.meta.as_ref() else {
602        return Ok(());
603    };
604    if let Some(spec) = meta.spec.as_deref() {
605        if spec != "cstate.toml/v1.0" {
606            return Err(PackError::Validation(format!(
607                "meta.spec: unsupported value '{}'. Expected \"cstate.toml/v1.0\"",
608                spec
609            )));
610        }
611    }
612    if meta.project.is_none() {
613        return Err(PackError::Validation(
614            "meta.project is required. Set it to your project name, e.g. project = \"my-agent\""
615                .to_string(),
616        ));
617    }
618    Ok(())
619}
620
621fn validate_routing(manifest: &PackManifest) -> Result<(), PackError> {
622    let Some(routing) = manifest.routing.as_ref() else {
623        return Ok(());
624    };
625
626    if let Some(strategy) = routing.strategy.as_deref() {
627        let strategy = strategy.to_lowercase();
628        let valid = matches!(
629            strategy.as_str(),
630            "first" | "round_robin" | "roundrobin" | "random" | "least_latency" | "leastlatency"
631        );
632        if !valid {
633            return Err(PackError::Validation(format!(
634                "routing.strategy: invalid value '{}' (expected first|round_robin|random|least_latency)",
635                strategy
636            )));
637        }
638    }
639
640    if let Some(provider) = routing.embedding_provider.as_deref() {
641        if !manifest.providers.contains_key(provider) {
642            return Err(PackError::Validation(format!(
643                "routing.embedding_provider: unknown provider '{}'",
644                provider
645            )));
646        }
647    }
648
649    if let Some(provider) = routing.summarization_provider.as_deref() {
650        if !manifest.providers.contains_key(provider) {
651            return Err(PackError::Validation(format!(
652                "routing.summarization_provider: unknown provider '{}'",
653                provider
654            )));
655        }
656    }
657
658    if let Some(provider) = routing.chat_provider.as_deref() {
659        if !manifest.providers.contains_key(provider) {
660            return Err(PackError::Validation(format!(
661                "routing.chat_provider: unknown provider '{}'",
662                provider
663            )));
664        }
665    }
666
667    if let Some(provider_order) = routing.provider_order.as_ref() {
668        for provider in provider_order {
669            if !manifest.providers.contains_key(provider) {
670                return Err(PackError::Validation(format!(
671                    "routing.provider_order references unknown provider '{}'",
672                    provider
673                )));
674            }
675        }
676    }
677
678    Ok(())
679}
680
681fn collect_tool_ids(tools: &ToolsSection) -> HashSet<String> {
682    let mut ids = HashSet::new();
683    for name in tools.bin.keys() {
684        ids.insert(format!("tools.bin.{}", name));
685    }
686    for name in tools.prompts.keys() {
687        ids.insert(format!("tools.prompts.{}", name));
688    }
689    for name in tools.bash.keys() {
690        ids.insert(format!("tools.bash.{}", name));
691    }
692    for name in tools.browser.keys() {
693        ids.insert(format!("tools.browser.{}", name));
694    }
695    for name in tools.composio.keys() {
696        ids.insert(format!("tools.composio.{}", name));
697    }
698    // Composio Gateway meta-tool (synthesized at compile time if enabled)
699    if tools.composio_gateway.as_ref().is_some_and(|gw| gw.enabled) {
700        ids.insert("composio_search_tools".to_string());
701    }
702    ids
703}
704
705/// Contract files loaded from the pack for schema compilation.
706pub type ContractFiles = std::collections::HashMap<String, String>;
707
708/// Validate that a tool command is a safe executable path.
709///
710/// Commands must be:
711/// - A relative path starting with `./tools/`
712/// - Free of shell metacharacters that could enable injection
713/// - Free of path traversal sequences (`..`)
714///
715/// This prevents shell injection attacks like `cmd = "rm -rf / && echo hacked"`
716/// and path traversal attacks like `cmd = "./tools/../../../bin/sh"`.
717fn is_valid_executable_path(cmd: &str) -> bool {
718    // Forbidden shell metacharacters
719    const FORBIDDEN: &[char] = &[
720        ';', '|', '&', '$', '`', '(', ')', '{', '}', '<', '>', '!', '\n', '\r',
721    ];
722
723    // Must not contain forbidden characters
724    if cmd.chars().any(|c| FORBIDDEN.contains(&c)) {
725        return false;
726    }
727
728    // Block path traversal attempts (e.g., "../../../etc/passwd")
729    if cmd.contains("..") {
730        return false;
731    }
732
733    // Must be under ./tools to prevent arbitrary absolute/bare executable invocation.
734    // This keeps execution constrained to explicitly provisioned tool binaries/scripts.
735    cmd.starts_with("./tools/")
736}
737
738/// Compile pack tool registry into runtime tool configs.
739/// `contracts` maps contract paths to their JSON content.
740pub fn compile_tools(
741    manifest: &PackManifest,
742    contracts: &ContractFiles,
743) -> Result<Vec<CompiledToolConfig>, PackError> {
744    let mut tools = Vec::new();
745
746    for (name, def) in &manifest.tools.bin {
747        // Validate command is a safe executable path, not arbitrary shell
748        if !is_valid_executable_path(&def.cmd) {
749            return Err(PackError::Validation(format!(
750                "tools.bin.{}: cmd must be a relative executable path under ./tools/ \
751                and must not contain shell metacharacters. Got: '{}'",
752                name, def.cmd
753            )));
754        }
755
756        let oauth_required_scopes = if def.oauth_required_scopes.is_empty() {
757            None
758        } else {
759            let mut out = std::collections::HashMap::new();
760            for (provider, scopes) in &def.oauth_required_scopes {
761                let provider = provider.trim().to_ascii_lowercase();
762                if provider.is_empty() {
763                    return Err(PackError::Validation(format!(
764                        "tools.bin.{}: oauth_required_scopes has empty provider key",
765                        name
766                    )));
767                }
768                if !def
769                    .oauth_providers
770                    .iter()
771                    .any(|configured| configured.trim().eq_ignore_ascii_case(&provider))
772                {
773                    return Err(PackError::Validation(format!(
774                        "tools.bin.{}: oauth_required_scopes references provider '{}' which is not declared in oauth_providers",
775                        name, provider
776                    )));
777                }
778
779                let mut normalized_scopes = scopes
780                    .iter()
781                    .map(|scope| scope.trim().to_string())
782                    .filter(|scope| !scope.is_empty())
783                    .collect::<Vec<_>>();
784                normalized_scopes.sort();
785                normalized_scopes.dedup();
786                if normalized_scopes.is_empty() {
787                    return Err(PackError::Validation(format!(
788                        "tools.bin.{}: oauth_required_scopes for provider '{}' must include at least one scope",
789                        name, provider
790                    )));
791                }
792                out.insert(provider, normalized_scopes);
793            }
794            Some(out)
795        };
796
797        let credential_delivery = match def.credential_delivery.as_deref() {
798            None => None,
799            Some(raw) => match raw.trim().to_ascii_lowercase().as_str() {
800                "environment" => Some(CompiledCredentialDelivery::Environment),
801                "file" => Some(CompiledCredentialDelivery::File),
802                other => {
803                    return Err(PackError::Validation(format!(
804                        "tools.bin.{}: invalid credential_delivery '{}' (expected 'environment' or 'file')",
805                        name, other
806                    )));
807                }
808            },
809        };
810
811        // Auto-detect WASM tools by file extension: .wasm files get
812        // WasmExec kind (Level 2 wasmtime sandbox), all others get Exec.
813        let kind = if def.cmd.ends_with(".wasm") {
814            CompiledToolKind::WasmExec
815        } else {
816            CompiledToolKind::Exec
817        };
818
819        tools.push(CompiledToolConfig {
820            id: format!("tools.bin.{}", name),
821            kind,
822            cmd: Some(def.cmd.clone()),
823            prompt_md: None,
824            contract: None,
825            compiled_schema: None,
826            result_format: None,
827            timeout_ms: def.timeout_ms,
828            allow_network: def.allow_network,
829            allow_fs: def.allow_fs,
830            allow_subprocess: def.allow_subprocess,
831            credential_delivery,
832            oauth_providers: if def.oauth_providers.is_empty() {
833                None
834            } else {
835                Some(def.oauth_providers.clone())
836            },
837            oauth_required_scopes,
838            allowed_commands: None,
839            blocked_commands: None,
840            requires_challenge: None,
841            allowed_domains: None,
842            blocked_domains: None,
843            max_navigations: None,
844            max_duration_ms: None,
845            use_vision: None,
846            composio_toolkit: None,
847            composio_actions: None,
848            is_composio_gateway: None,
849        });
850    }
851
852    for (name, def) in &manifest.tools.prompts {
853        // If contract specified, compile the schema
854        let compiled_schema = if let Some(contract_path) = &def.contract {
855            let json_str = contracts.get(contract_path).ok_or_else(|| {
856                PackError::Validation(format!(
857                    "tools.prompts.{}: contract file '{}' not found",
858                    name, contract_path
859                ))
860            })?;
861            let schema: serde_json::Value = serde_json::from_str(json_str).map_err(|e| {
862                PackError::Validation(format!(
863                    "tools.prompts.{}: contract '{}' is invalid JSON: {}",
864                    name, contract_path, e
865                ))
866            })?;
867            Some(schema)
868        } else {
869            None
870        };
871
872        tools.push(CompiledToolConfig {
873            id: format!("tools.prompts.{}", name),
874            kind: CompiledToolKind::Prompt,
875            cmd: None,
876            prompt_md: Some(def.prompt_md.clone()),
877            contract: def.contract.clone(),
878            compiled_schema,
879            result_format: def.result_format.clone(),
880            timeout_ms: def.timeout_ms,
881            allow_network: None,
882            allow_fs: None,
883            allow_subprocess: None,
884            credential_delivery: None,
885            oauth_providers: None,
886            oauth_required_scopes: None,
887            allowed_commands: None,
888            blocked_commands: None,
889            requires_challenge: None,
890            allowed_domains: None,
891            blocked_domains: None,
892            max_navigations: None,
893            max_duration_ms: None,
894            use_vision: None,
895            composio_toolkit: None,
896            composio_actions: None,
897            is_composio_gateway: None,
898        });
899    }
900
901    // Compile bash tools
902    for (name, def) in &manifest.tools.bash {
903        tools.push(CompiledToolConfig {
904            id: format!("tools.bash.{}", name),
905            kind: CompiledToolKind::Bash,
906            cmd: None,
907            prompt_md: None,
908            contract: None,
909            compiled_schema: None,
910            result_format: None,
911            timeout_ms: def.timeout_ms,
912            allow_network: def.allow_network,
913            allow_fs: def.allow_fs,
914            allow_subprocess: None,
915            credential_delivery: None,
916            oauth_providers: None,
917            oauth_required_scopes: None,
918            allowed_commands: if def.allowed_commands.is_empty() {
919                None
920            } else {
921                Some(def.allowed_commands.clone())
922            },
923            blocked_commands: if def.blocked_commands.is_empty() {
924                None
925            } else {
926                Some(def.blocked_commands.clone())
927            },
928            requires_challenge: None,
929            allowed_domains: None,
930            blocked_domains: None,
931            max_navigations: None,
932            max_duration_ms: None,
933            use_vision: None,
934            composio_toolkit: None,
935            composio_actions: None,
936            is_composio_gateway: None,
937        });
938    }
939
940    // Compile browser tools
941    for (name, def) in &manifest.tools.browser {
942        tools.push(CompiledToolConfig {
943            id: format!("tools.browser.{}", name),
944            kind: CompiledToolKind::Browser,
945            cmd: None,
946            prompt_md: None,
947            contract: None,
948            compiled_schema: None,
949            result_format: None,
950            timeout_ms: def.timeout_ms,
951            allow_network: def.allow_network,
952            allow_fs: None,
953            allow_subprocess: None,
954            credential_delivery: None,
955            oauth_providers: None,
956            oauth_required_scopes: None,
957            allowed_commands: None,
958            blocked_commands: None,
959            requires_challenge: None,
960            allowed_domains: if def.allowed_domains.is_empty() {
961                None
962            } else {
963                Some(def.allowed_domains.clone())
964            },
965            blocked_domains: if def.blocked_domains.is_empty() {
966                None
967            } else {
968                Some(def.blocked_domains.clone())
969            },
970            max_navigations: def.max_navigations,
971            max_duration_ms: def.max_duration_ms,
972            use_vision: def.use_vision,
973            composio_toolkit: None,
974            composio_actions: None,
975            is_composio_gateway: None,
976        });
977    }
978
979    // Compile composio tools
980    for (name, def) in &manifest.tools.composio {
981        tools.push(CompiledToolConfig {
982            id: format!("tools.composio.{}", name),
983            kind: CompiledToolKind::Composio,
984            cmd: None,
985            prompt_md: None,
986            contract: None,
987            compiled_schema: None,
988            result_format: None,
989            timeout_ms: def.timeout_ms,
990            allow_network: Some(true), // Composio always needs network
991            allow_fs: None,
992            allow_subprocess: None,
993            credential_delivery: None,
994            oauth_providers: None,
995            oauth_required_scopes: None,
996            allowed_commands: None,
997            blocked_commands: None,
998            requires_challenge: None,
999            allowed_domains: None,
1000            blocked_domains: None,
1001            max_navigations: None,
1002            max_duration_ms: None,
1003            use_vision: None,
1004            composio_toolkit: Some(def.toolkit.clone()),
1005            composio_actions: if def.actions.is_empty() {
1006                None
1007            } else {
1008                Some(def.actions.clone())
1009            },
1010            is_composio_gateway: None,
1011        });
1012    }
1013
1014    // Compile Composio Gateway meta-tool (if enabled)
1015    if let Some(ref gateway) = manifest.tools.composio_gateway {
1016        if gateway.enabled {
1017            tools.push(CompiledToolConfig {
1018                id: "composio_search_tools".to_string(),
1019                kind: CompiledToolKind::ComposioGateway,
1020                cmd: None,
1021                prompt_md: None,
1022                contract: None,
1023                compiled_schema: None,
1024                result_format: None,
1025                timeout_ms: Some(30_000),
1026                allow_network: Some(true),
1027                allow_fs: None,
1028                allow_subprocess: None,
1029                credential_delivery: None,
1030                oauth_providers: None,
1031                oauth_required_scopes: None,
1032                allowed_commands: None,
1033                blocked_commands: None,
1034                requires_challenge: None,
1035                allowed_domains: None,
1036                blocked_domains: None,
1037                max_navigations: None,
1038                max_duration_ms: None,
1039                use_vision: None,
1040                composio_toolkit: None,
1041                composio_actions: None,
1042                is_composio_gateway: Some(true),
1043            });
1044        }
1045    }
1046
1047    Ok(tools)
1048}
1049
1050/// Compile pack toolsets into runtime toolset configs.
1051pub fn compile_toolsets(manifest: &PackManifest) -> Vec<CompiledToolsetConfig> {
1052    manifest
1053        .toolsets
1054        .iter()
1055        .map(|(name, set)| CompiledToolsetConfig {
1056            name: name.clone(),
1057            tools: set.tools.clone(),
1058        })
1059        .collect()
1060}
1061
1062/// Compile pack agent bindings to toolsets with extracted markdown metadata.
1063///
1064/// This function transforms manifest agent definitions into runtime-ready
1065/// configurations, combining TOML settings with markdown-extracted metadata.
1066/// All reference validations (profile, adapter, format, toolsets) have been
1067/// performed during the IR validation phase.
1068pub fn compile_pack_agents(
1069    manifest: &PackManifest,
1070    markdown_docs: &[super::MarkdownDoc],
1071) -> Vec<CompiledPackAgentConfig> {
1072    // Build lookup from markdown file path to extracted data
1073    let md_by_path: std::collections::HashMap<&str, &super::MarkdownDoc> =
1074        markdown_docs.iter().map(|m| (m.file.as_str(), m)).collect();
1075
1076    manifest
1077        .agents
1078        .iter()
1079        .map(|(name, agent)| {
1080            // Find matching markdown doc by prompt_md path
1081            let md: Option<&super::MarkdownDoc> = md_by_path
1082                .get(agent.prompt_md.as_str())
1083                .copied()
1084                .or_else(|| {
1085                    // Try suffix match for relative paths
1086                    md_by_path
1087                        .iter()
1088                        .find(|(path, _)| path.ends_with(&agent.prompt_md))
1089                        .map(|(_, doc)| *doc)
1090                });
1091
1092            let (constraints, tool_refs, rag_config, system_prompt) = match md {
1093                Some(doc) => (
1094                    doc.extracted_constraints.clone(),
1095                    doc.extracted_tool_refs.clone(),
1096                    doc.extracted_rag_config.clone(),
1097                    Some(doc.system.clone()).filter(|s| !s.is_empty()),
1098                ),
1099                None => (Vec::new(), Vec::new(), None, None),
1100            };
1101
1102            // PROFILE INHERITANCE: Resolve format from agent or profile
1103            let profile_def = manifest.profiles.get(&agent.profile);
1104            let resolved_format = agent
1105                .format
1106                .clone()
1107                .or_else(|| profile_def.map(|p| p.format.clone()))
1108                .unwrap_or_else(|| "markdown".to_string());
1109
1110            CompiledPackAgentConfig {
1111                name: name.clone(),
1112                enabled: agent.enabled.unwrap_or(true),
1113                profile: agent.profile.clone(),
1114                adapter: agent.adapter.clone(),
1115                format: agent.format.clone(),
1116                resolved_format,
1117                token_budget: agent.token_budget,
1118                prompt_md: agent.prompt_md.clone(),
1119                toolsets: agent.toolsets.clone(),
1120                extracted_constraints: constraints,
1121                extracted_tool_refs: tool_refs,
1122                extracted_rag_config: rag_config,
1123                description: agent.description.clone(),
1124                tags: agent.tags.clone(),
1125                system_prompt,
1126            }
1127        })
1128        .collect()
1129}
1130
1131/// Compile pack injection metadata for runtime wiring.
1132pub fn compile_pack_injections(
1133    manifest: &PackManifest,
1134) -> Result<Vec<CompiledPackInjectionConfig>, PackError> {
1135    let mut out = Vec::new();
1136    for def in manifest.injections.values() {
1137        let mode = compile_injection_mode_compiled(def)?;
1138        out.push(CompiledPackInjectionConfig {
1139            source: def.source.clone(),
1140            target: def.target.clone(),
1141            entity_type: def.entity_type.clone().map(|s| s.to_lowercase()),
1142            mode,
1143            priority: def.priority,
1144            max_tokens: def.max_tokens,
1145        });
1146    }
1147    Ok(out)
1148}
1149
1150/// Compile pack provider routing hints.
1151pub fn compile_pack_routing(manifest: &PackManifest) -> Option<CompiledPackRoutingConfig> {
1152    manifest
1153        .routing
1154        .as_ref()
1155        .map(|routing| CompiledPackRoutingConfig {
1156            strategy: routing.strategy.clone().map(|s| s.to_lowercase()),
1157            embedding_provider: routing.embedding_provider.clone(),
1158            summarization_provider: routing.summarization_provider.clone(),
1159            chat_provider: routing.chat_provider.clone(),
1160            sort: routing.sort.clone(),
1161            zdr: routing.zdr,
1162            provider_order: routing.provider_order.clone(),
1163            data_collection: routing.data_collection.clone(),
1164        })
1165}
1166
1167fn build_adapters(manifest: &PackManifest) -> Result<Vec<AstAdapterDef>, PackError> {
1168    let mut adapters = Vec::new();
1169    for (name, def) in &manifest.adapters {
1170        let adapter_type = match def.adapter_type.to_lowercase().as_str() {
1171            "postgres" => AdapterType::Postgres,
1172            "redis" => AdapterType::Redis,
1173            "memory" => AdapterType::Memory,
1174            other => {
1175                return Err(PackError::Validation(format!(
1176                    "adapter '{}' has invalid type '{}'",
1177                    name, other
1178                )))
1179            }
1180        };
1181        let options = def
1182            .options
1183            .iter()
1184            .map(|(k, v)| (k.clone(), v.clone()))
1185            .collect();
1186        adapters.push(AstAdapterDef {
1187            name: name.clone(),
1188            adapter_type,
1189            connection: def.connection.clone(),
1190            options,
1191        });
1192    }
1193    Ok(adapters)
1194}
1195
1196fn build_policies(manifest: &PackManifest) -> Result<Vec<PolicyDef>, PackError> {
1197    let mut policies = Vec::new();
1198    for (name, def) in &manifest.policies {
1199        let trigger = parse_trigger(&def.trigger)?;
1200        let mut actions = Vec::new();
1201        for action in &def.actions {
1202            actions.push(parse_action(action)?);
1203        }
1204        policies.push(PolicyDef {
1205            name: name.clone(),
1206            rules: vec![PolicyRule { trigger, actions }],
1207        });
1208    }
1209    Ok(policies)
1210}
1211
1212fn build_injections(manifest: &PackManifest) -> Result<Vec<AstInjectionDef>, PackError> {
1213    let mut injections = Vec::new();
1214    for def in manifest.injections.values() {
1215        let mode = parse_injection_mode(def)?;
1216        injections.push(AstInjectionDef {
1217            source: def.source.clone(),
1218            target: def.target.clone(),
1219            mode,
1220            priority: def.priority,
1221            max_tokens: def.max_tokens,
1222            filter: None,
1223        });
1224    }
1225    Ok(injections)
1226}
1227
1228fn build_providers(manifest: &PackManifest) -> Result<Vec<AstProviderDef>, PackError> {
1229    let mut providers = Vec::new();
1230    for (name, def) in &manifest.providers {
1231        let provider_type = match def.provider_type.to_lowercase().as_str() {
1232            "openai" => ProviderType::OpenAI,
1233            "anthropic" => ProviderType::Anthropic,
1234            "custom" => ProviderType::Custom,
1235            other => {
1236                return Err(PackError::Validation(format!(
1237                    "provider '{}' has invalid type '{}'",
1238                    name, other
1239                )))
1240            }
1241        };
1242
1243        let api_key = parse_env_value(def.api_key.expose_secret());
1244        let options = def
1245            .options
1246            .iter()
1247            .map(|(k, v)| (k.clone(), v.clone()))
1248            .collect::<Vec<_>>();
1249
1250        providers.push(AstProviderDef {
1251            name: name.clone(),
1252            provider_type,
1253            api_key,
1254            model: def.model.clone(),
1255            options,
1256        });
1257    }
1258    Ok(providers)
1259}
1260
1261fn parse_env_value(value: &str) -> EnvValue {
1262    if let Some(rest) = value.strip_prefix("env:") {
1263        EnvValue::Env(rest.trim().to_string())
1264    } else {
1265        EnvValue::Literal(value.to_string())
1266    }
1267}
1268
1269fn parse_trigger(value: &str) -> Result<Trigger, PackError> {
1270    match value.to_lowercase().as_str() {
1271        "task_start" => Ok(Trigger::TaskStart),
1272        "task_end" => Ok(Trigger::TaskEnd),
1273        "scope_close" => Ok(Trigger::ScopeClose),
1274        "turn_end" => Ok(Trigger::TurnEnd),
1275        "manual" => Ok(Trigger::Manual),
1276        other if other.starts_with("schedule:") => Ok(Trigger::Schedule(
1277            other["schedule:".len()..].trim().to_string(),
1278        )),
1279        other => Err(PackError::Validation(format!(
1280            "invalid trigger '{}'",
1281            other
1282        ))),
1283    }
1284}
1285
1286fn parse_action(action: &PolicyActionDef) -> Result<Action, PackError> {
1287    let typ = action.action_type.to_lowercase();
1288    match typ.as_str() {
1289        "summarize" => Ok(Action::Summarize(action.target.clone().ok_or_else(
1290            || PackError::Validation("summarize action missing target".into()),
1291        )?)),
1292        "checkpoint" => Ok(Action::Checkpoint(action.target.clone().ok_or_else(
1293            || PackError::Validation("checkpoint action missing target".into()),
1294        )?)),
1295        "extract_artifacts" => Ok(Action::ExtractArtifacts(action.target.clone().ok_or_else(
1296            || PackError::Validation("extract_artifacts action missing target".into()),
1297        )?)),
1298        "notify" => Ok(Action::Notify(action.target.clone().ok_or_else(|| {
1299            PackError::Validation("notify action missing target".into())
1300        })?)),
1301        "inject" => Ok(Action::Inject {
1302            target: action
1303                .target
1304                .clone()
1305                .ok_or_else(|| PackError::Validation("inject action missing target".into()))?,
1306            mode: InjectionMode::Full,
1307        }),
1308        other => Err(PackError::Validation(format!(
1309            "unsupported action type '{}'",
1310            other
1311        ))),
1312    }
1313}
1314
1315fn parse_injection_mode(def: &InjectionDef) -> Result<InjectionMode, PackError> {
1316    match def.mode.to_lowercase().as_str() {
1317        "full" => Ok(InjectionMode::Full),
1318        "summary" => Ok(InjectionMode::Summary),
1319        "topk" => Ok(InjectionMode::TopK(def.top_k.ok_or_else(|| {
1320            PackError::Validation("topk mode requires top_k".into())
1321        })?)),
1322        "relevant" => Ok(InjectionMode::Relevant(def.threshold.ok_or_else(|| {
1323            PackError::Validation("relevant mode requires threshold".into())
1324        })?)),
1325        other => Err(PackError::Validation(format!(
1326            "invalid injection mode '{}'",
1327            other
1328        ))),
1329    }
1330}
1331
1332fn compile_injection_mode_compiled(def: &InjectionDef) -> Result<CompiledInjectionMode, PackError> {
1333    match def.mode.to_lowercase().as_str() {
1334        "full" => Ok(CompiledInjectionMode::Full),
1335        "summary" => Ok(CompiledInjectionMode::Summary),
1336        "topk" => {
1337            let k = def
1338                .top_k
1339                .ok_or_else(|| PackError::Validation("topk mode requires top_k".into()))?;
1340            let k = i32::try_from(k)
1341                .map_err(|e| PackError::Validation(format!("top_k out of range: {e}")))?;
1342            Ok(CompiledInjectionMode::TopK { k })
1343        }
1344        "relevant" => {
1345            let threshold = def
1346                .threshold
1347                .ok_or_else(|| PackError::Validation("relevant mode requires threshold".into()))?;
1348            Ok(CompiledInjectionMode::Relevant { threshold })
1349        }
1350        other => Err(PackError::Validation(format!(
1351            "invalid injection mode '{}'",
1352            other
1353        ))),
1354    }
1355}
1356
1357fn profile_key(ret: &str, idx: &str, emb: &str, fmt: &str) -> String {
1358    format!(
1359        "{}|{}|{}|{}",
1360        ret.to_lowercase(),
1361        idx.to_lowercase(),
1362        emb.to_lowercase(),
1363        fmt.to_lowercase()
1364    )
1365}
1366
1367/// Builds a CELLSTATE AST from a pack intermediate representation.
1368///
1369/// The resulting AST contains definitions for adapters, policies, injections, and providers
1370/// extracted from the given `PackIr`. The AST version is taken from `ir.manifest.meta.version`
1371/// if present; otherwise `"1.0"` is used.
1372///
1373/// # Examples
1374///
1375/// ```
1376/// // Construct a minimal PackIr (fields elided for brevity) and convert it.
1377/// // let ir = PackIr { manifest: ..., markdown: vec![], adapters: vec![], policies: vec![], injections: vec![], providers: vec![] };
1378/// // let ast = ast_from_ir(&ir);
1379/// // assert_eq!(ast.version, "1.0");
1380/// ```
1381pub fn ast_from_ir(ir: &PackIr) -> CellstateAst {
1382    let mut defs: Vec<Definition> = Vec::new();
1383    for a in &ir.adapters {
1384        defs.push(Definition::Adapter(a.clone()));
1385    }
1386    for p in &ir.policies {
1387        defs.push(Definition::Policy(p.clone()));
1388    }
1389    for i in &ir.injections {
1390        defs.push(Definition::Injection(i.clone()));
1391    }
1392    for provider in &ir.providers {
1393        defs.push(Definition::Provider(provider.clone()));
1394    }
1395    for memory in &ir.memories {
1396        defs.push(Definition::Memory(memory.clone()));
1397    }
1398    for intent in &ir.intents {
1399        defs.push(Definition::Intent(intent.clone()));
1400    }
1401    CellstateAst {
1402        version: ir
1403            .manifest
1404            .meta
1405            .as_ref()
1406            .and_then(|m| m.version.clone())
1407            .unwrap_or_else(|| "1.0".to_string()),
1408        definitions: defs,
1409    }
1410}
1411
1412// ============================================================================
1413// MARKDOWN CONFIG EXTRACTION (NEW)
1414// ============================================================================
1415
1416/// Validates that Markdown-extracted adapters, policies, injections, and providers contain no duplicate definitions.
1417///
1418/// Returns an error if any adapter, policy, or provider name appears more than once, or if any injection's (source, target) pair is duplicated.
1419///
1420/// # Errors
1421///
1422/// Returns `PackError::Validation` with a descriptive message for the first duplicate encountered.
1423///
1424/// # Examples
1425///
1426/// ```ignore
1427/// // Accepts empty collections when there are no duplicates
1428/// let adapters: Vec<_> = vec![];
1429/// let policies: Vec<_> = vec![];
1430/// let injections: Vec<_> = vec![];
1431/// let providers: Vec<_> = vec![];
1432/// assert!(check_markdown_duplicates(&adapters, &policies, &injections, &providers).is_ok());
1433/// ```
1434fn check_markdown_duplicates(
1435    adapters: &[AstAdapterDef],
1436    policies: &[PolicyDef],
1437    injections: &[AstInjectionDef],
1438    providers: &[AstProviderDef],
1439    intents: &[AstIntentDef],
1440) -> Result<(), PackError> {
1441    // Check for duplicate adapter names
1442    let mut adapter_names = HashSet::new();
1443    for adapter in adapters {
1444        if !adapter_names.insert(&adapter.name) {
1445            return Err(PackError::Validation(format!(
1446                "Duplicate adapter name '{}' found in Markdown configs",
1447                adapter.name
1448            )));
1449        }
1450    }
1451
1452    // Check for duplicate policy names
1453    let mut policy_names = HashSet::new();
1454    for policy in policies {
1455        if !policy_names.insert(&policy.name) {
1456            return Err(PackError::Validation(format!(
1457                "Duplicate policy name '{}' found in Markdown configs",
1458                policy.name
1459            )));
1460        }
1461    }
1462
1463    // Check for duplicate provider names
1464    let mut provider_names = HashSet::new();
1465    for provider in providers {
1466        if !provider_names.insert(&provider.name) {
1467            return Err(PackError::Validation(format!(
1468                "Duplicate provider name '{}' found in Markdown configs",
1469                provider.name
1470            )));
1471        }
1472    }
1473
1474    // Check for duplicate injection (source, target) tuples
1475    let mut injection_keys = HashSet::new();
1476    for injection in injections {
1477        let key = (&injection.source, &injection.target);
1478        if !injection_keys.insert(key) {
1479            return Err(PackError::Validation(format!(
1480                "Duplicate injection (source: '{}', target: '{}') found in Markdown configs",
1481                injection.source, injection.target
1482            )));
1483        }
1484    }
1485
1486    // Check for duplicate intent names
1487    let mut intent_names = HashSet::new();
1488    for intent in intents {
1489        if !intent_names.insert(&intent.name) {
1490            return Err(PackError::Validation(format!(
1491                "Duplicate intent name '{}' found in Markdown configs",
1492                intent.name
1493            )));
1494        }
1495    }
1496
1497    Ok(())
1498}
1499
1500/// Generic helper that extracts items from Markdown fence blocks.
1501///
1502/// Scans all documents, users, and blocks for blocks matching `kind`, then
1503/// calls `parser` on each match. This eliminates the repeated triple-nested
1504/// loop that was duplicated across the six typed extraction functions.
1505fn extract_from_markdown<T>(
1506    markdown: &[MarkdownDoc],
1507    kind: FenceKind,
1508    parser: impl Fn(Option<&str>, &str) -> Result<T, ConfigError>,
1509) -> Result<Vec<T>, PackError> {
1510    let mut items = Vec::new();
1511    for doc in markdown {
1512        for user in &doc.users {
1513            for block in &user.blocks {
1514                if block.kind == kind {
1515                    let item = parser(block.header_name.as_deref(), &block.content)?;
1516                    items.push(item);
1517                }
1518            }
1519        }
1520    }
1521    Ok(items)
1522}
1523
1524/// Extract flow definitions from Markdown fence blocks.
1525fn extract_flows_from_markdown(markdown: &[MarkdownDoc]) -> Result<Vec<FlowDefinition>, PackError> {
1526    let mut flows = Vec::new();
1527
1528    for doc in markdown {
1529        for user in &doc.users {
1530            for block in &user.blocks {
1531                if block.kind == FenceKind::Flow {
1532                    let flow: FlowDefinition = serde_yaml::from_str(&block.content)
1533                        .map_err(|e| PackError::Validation(format!("Invalid flow YAML: {}", e)))?;
1534                    flows.push(flow);
1535                }
1536            }
1537        }
1538    }
1539
1540    Ok(flows)
1541}
1542
1543#[cfg(test)]
1544mod tests {
1545    use super::*;
1546
1547    // ── parse_env_value ────────────────────────────────────────────────
1548
1549    #[test]
1550    fn parse_env_value_literal() -> Result<(), PackError> {
1551        match parse_env_value("hello world") {
1552            EnvValue::Literal(v) => assert_eq!(v, "hello world"),
1553            other => {
1554                return Err(PackError::Validation(format!(
1555                    "expected Literal, got {:?}",
1556                    other
1557                )))
1558            }
1559        }
1560        Ok(())
1561    }
1562
1563    #[test]
1564    fn parse_env_value_env_ref() -> Result<(), PackError> {
1565        match parse_env_value("env:MY_VAR") {
1566            EnvValue::Env(v) => assert_eq!(v, "MY_VAR"),
1567            other => {
1568                return Err(PackError::Validation(format!(
1569                    "expected Env, got {:?}",
1570                    other
1571                )))
1572            }
1573        }
1574        Ok(())
1575    }
1576
1577    #[test]
1578    fn parse_env_value_env_ref_trimmed() -> Result<(), PackError> {
1579        match parse_env_value("env:  SPACED_VAR  ") {
1580            EnvValue::Env(v) => assert_eq!(v, "SPACED_VAR"),
1581            other => {
1582                return Err(PackError::Validation(format!(
1583                    "expected Env, got {:?}",
1584                    other
1585                )))
1586            }
1587        }
1588        Ok(())
1589    }
1590
1591    #[test]
1592    fn parse_env_value_empty_string() -> Result<(), PackError> {
1593        match parse_env_value("") {
1594            EnvValue::Literal(v) => assert_eq!(v, ""),
1595            other => {
1596                return Err(PackError::Validation(format!(
1597                    "expected Literal, got {:?}",
1598                    other
1599                )))
1600            }
1601        }
1602        Ok(())
1603    }
1604
1605    #[test]
1606    fn parse_env_value_env_prefix_only() -> Result<(), PackError> {
1607        match parse_env_value("env:") {
1608            EnvValue::Env(v) => assert_eq!(v, ""),
1609            other => {
1610                return Err(PackError::Validation(format!(
1611                    "expected Env, got {:?}",
1612                    other
1613                )))
1614            }
1615        }
1616        Ok(())
1617    }
1618
1619    // ── parse_trigger ──────────────────────────────────────────────────
1620
1621    #[test]
1622    fn parse_trigger_task_start() {
1623        assert!(matches!(
1624            parse_trigger("task_start").unwrap(),
1625            Trigger::TaskStart
1626        ));
1627    }
1628
1629    #[test]
1630    fn parse_trigger_task_end() {
1631        assert!(matches!(
1632            parse_trigger("task_end").unwrap(),
1633            Trigger::TaskEnd
1634        ));
1635    }
1636
1637    #[test]
1638    fn parse_trigger_scope_close() {
1639        assert!(matches!(
1640            parse_trigger("scope_close").unwrap(),
1641            Trigger::ScopeClose
1642        ));
1643    }
1644
1645    #[test]
1646    fn parse_trigger_turn_end() {
1647        assert!(matches!(
1648            parse_trigger("turn_end").unwrap(),
1649            Trigger::TurnEnd
1650        ));
1651    }
1652
1653    #[test]
1654    fn parse_trigger_manual() {
1655        assert!(matches!(parse_trigger("manual").unwrap(), Trigger::Manual));
1656    }
1657
1658    #[test]
1659    fn parse_trigger_case_insensitive() {
1660        assert!(matches!(
1661            parse_trigger("TASK_START").unwrap(),
1662            Trigger::TaskStart
1663        ));
1664        assert!(matches!(parse_trigger("Manual").unwrap(), Trigger::Manual));
1665    }
1666
1667    #[test]
1668    fn parse_trigger_schedule() -> Result<(), PackError> {
1669        match parse_trigger("schedule:0 * * * *").unwrap() {
1670            Trigger::Schedule(cron) => assert_eq!(cron, "0 * * * *"),
1671            other => {
1672                return Err(PackError::Validation(format!(
1673                    "expected Schedule, got {:?}",
1674                    other
1675                )))
1676            }
1677        }
1678        Ok(())
1679    }
1680
1681    #[test]
1682    fn parse_trigger_invalid() {
1683        assert!(parse_trigger("bogus").is_err());
1684    }
1685
1686    // ── is_valid_executable_path ───────────────────────────────────────
1687
1688    #[test]
1689    fn valid_executable_path() {
1690        assert!(is_valid_executable_path("./tools/my_tool"));
1691        assert!(is_valid_executable_path("./tools/sub/deep/tool.sh"));
1692    }
1693
1694    #[test]
1695    fn rejects_absolute_path() {
1696        assert!(!is_valid_executable_path("/usr/bin/bash"));
1697    }
1698
1699    #[test]
1700    fn rejects_bare_command() {
1701        assert!(!is_valid_executable_path("curl"));
1702    }
1703
1704    #[test]
1705    fn rejects_path_traversal() {
1706        assert!(!is_valid_executable_path("./tools/../../../bin/sh"));
1707    }
1708
1709    #[test]
1710    fn rejects_shell_metacharacters() {
1711        assert!(!is_valid_executable_path("./tools/a; rm -rf /"));
1712        assert!(!is_valid_executable_path("./tools/a | cat"));
1713        assert!(!is_valid_executable_path("./tools/a && echo"));
1714        assert!(!is_valid_executable_path("./tools/a`whoami`"));
1715    }
1716
1717    #[test]
1718    fn rejects_wrong_prefix() {
1719        assert!(!is_valid_executable_path("./bin/tool"));
1720        assert!(!is_valid_executable_path("tools/tool")); // missing ./
1721    }
1722
1723    // ── PackError Display ──────────────────────────────────────────────
1724
1725    #[test]
1726    fn pack_error_validation_display() {
1727        let err = PackError::Validation("bad config".into());
1728        let s = format!("{}", err);
1729        assert!(s.contains("bad config"));
1730    }
1731
1732    #[test]
1733    fn pack_error_toml_display() {
1734        let err = PackError::Toml("parse error".into());
1735        let s = format!("{}", err);
1736        assert!(s.contains("parse error"));
1737    }
1738
1739    // ── parse_manifest (schema.rs) ─────────────────────────────────────
1740
1741    #[test]
1742    fn parse_manifest_minimal() {
1743        let toml = r#"
1744[meta]
1745project = "test-pack"
1746version = "1.0.0"
1747"#;
1748        let manifest = super::super::schema::parse_manifest(toml).unwrap();
1749        let meta = manifest.meta.as_ref().unwrap();
1750        assert_eq!(meta.project.as_deref(), Some("test-pack"));
1751        assert_eq!(meta.version.as_deref(), Some("1.0.0"));
1752    }
1753
1754    #[test]
1755    fn parse_manifest_invalid_toml() {
1756        let result = super::super::schema::parse_manifest("not valid toml {{{}}}");
1757        assert!(result.is_err());
1758    }
1759
1760    #[test]
1761    fn parse_manifest_with_defaults() {
1762        let toml = r#"
1763[meta]
1764project = "test"
1765version = "0.1.0"
1766
1767[defaults]
1768context_format = "markdown"
1769token_budget = 16000
1770"#;
1771        let manifest = super::super::schema::parse_manifest(toml).unwrap();
1772        let defaults = manifest.defaults.as_ref().unwrap();
1773        assert_eq!(defaults.context_format.as_deref(), Some("markdown"));
1774        assert_eq!(defaults.token_budget, Some(16000));
1775    }
1776
1777    #[test]
1778    fn parse_manifest_with_tools() {
1779        let toml = r#"
1780[meta]
1781project = "test"
1782version = "0.1.0"
1783
1784[tools.bash.my_script]
1785description = "A test script"
1786"#;
1787        let manifest = super::super::schema::parse_manifest(toml).unwrap();
1788        assert!(manifest.tools.bash.contains_key("my_script"));
1789    }
1790
1791    // ── profile_key ────────────────────────────────────────────────────
1792
1793    #[test]
1794    fn profile_key_lowercases() {
1795        let key = profile_key("FULL", "HNSW", "ADA", "JSON");
1796        assert_eq!(key, "full|hnsw|ada|json");
1797    }
1798
1799    #[test]
1800    fn profile_key_deterministic() {
1801        let a = profile_key("a", "b", "c", "d");
1802        let b = profile_key("a", "b", "c", "d");
1803        assert_eq!(a, b);
1804    }
1805}