cellstate_pipeline/pack/
mod.rs

1//! Pack compiler -- the canonical entry point for compiling a CELLSTATE pack.
2//!
3//! Call [`compose_pack()`] with a [`PackInput`] (TOML manifest + Markdown files) to
4//! get a [`PackOutput`] containing both the intermediate `CellstateAst` and the final
5//! `CompiledConfig`.
6//!
7//! This module orchestrates the full pipeline:
8//!
9//! 1. Parse the TOML manifest (`cstate.toml`) via [`schema::parse_manifest`].
10//! 2. Parse Markdown prompt files, extracting fence blocks ([`markdown`] sub-module).
11//! 3. Build the intermediate representation ([`ir::PackIr`]), merging TOML and
12//!    Markdown-derived definitions and validating cross-references.
13//! 4. Convert the IR into a [`CellstateAst`] ([`ast::build_ast`]).
14//! 5. Compile the AST into a [`CompiledConfig`] via [`PipelineCompiler::compile`].
15//! 6. Inject pack-specific metadata (tools, toolsets, agents, injections, routing,
16//!    file hashes) into the compiled config.
17//!
18//! The `ast` module provides the shared types that this module produces. The
19//! `config/` module provides the YAML fence-block parsers that this module calls
20//! during step 2.
21
22mod ast;
23pub mod flow;
24mod ir;
25mod markdown;
26mod mcp;
27mod schema;
28
29// AAIF convergence modules — emit/consume open agent ecosystem formats
30pub mod a2a;
31pub mod a2ui_compat;
32pub mod agents_md;
33pub mod llms_txt;
34pub mod skills;
35
36use crate::{
37    ast::CellstateAst,
38    compiler::{CompiledConfig, ComposioGatewayConfig},
39    PipelineCompiler,
40};
41use sha2::{Digest, Sha256};
42use std::collections::HashMap;
43use std::path::PathBuf;
44
45pub use ast::build_ast;
46pub use flow::*;
47pub use ir::*;
48pub use markdown::*;
49pub use mcp::*;
50pub use schema::*;
51
52#[derive(Debug, Clone)]
53pub struct PackInput {
54    pub root: PathBuf,
55    pub manifest: String,
56    pub markdowns: Vec<PackMarkdownFile>,
57    /// Contract JSON Schema files: maps relative path to content.
58    pub contracts: HashMap<String, String>,
59}
60
61#[derive(Debug, Clone)]
62pub struct PackMarkdownFile {
63    pub path: PathBuf,
64    pub content: String,
65}
66
67#[derive(Debug, Clone)]
68pub struct PackOutput {
69    pub ast: CellstateAst,
70    pub compiled: CompiledConfig,
71}
72
73pub fn compose_pack(input: PackInput) -> Result<PackOutput, PackError> {
74    let manifest = parse_manifest(&input.manifest)?;
75
76    // Enforce the x-* extension namespace. Any top-level key not matching a
77    // known field AND not prefixed with "x-" is a typo or unsupported feature.
78    for key in manifest.extensions.keys() {
79        if !key.starts_with("x-") {
80            return Err(PackError::Validation(format!(
81                "Unknown top-level key '{}'. Custom extensions must use the 'x-' prefix (e.g. 'x-{}')",
82                key, key
83            )));
84        }
85    }
86
87    let md_docs = parse_markdown_files(&manifest, &input.markdowns)?;
88    let ir = PackIr::new(manifest.clone(), md_docs)?;
89    let ast = build_ast(&ir)?;
90    let mut compiled = PipelineCompiler::compile(&ast)?;
91
92    // Inject pack tool registry + toolsets into the runtime config.
93    // Pass contract files for schema compilation.
94    compiled.tools = compile_tools(&manifest, &input.contracts)?;
95    compiled.toolsets = compile_toolsets(&manifest);
96    compiled.pack_agents = compile_pack_agents(&manifest, &ir.markdown);
97    compiled.pack_injections = compile_pack_injections(&manifest)?;
98    compiled.pack_routing = compile_pack_routing(&manifest);
99    compiled.composio_gateway = compile_composio_gateway(&manifest);
100
101    // Compute file hashes for artifact determinism (lockfile support).
102    compiled.file_hashes = compute_file_hashes(&input)?;
103
104    // Compile flow definitions from Markdown fence blocks.
105    compiled.flows = compile_flows(&ir.flows)?;
106
107    // Inject pack metadata for AAIF convergence outputs.
108    inject_pack_metadata(&manifest, &mut compiled);
109
110    Ok(PackOutput { ast, compiled })
111}
112
113/// Compile flow definitions into compiled flows with content hashes.
114fn compile_flows(flows: &[flow::FlowDefinition]) -> Result<Vec<flow::CompiledFlow>, PackError> {
115    flows
116        .iter()
117        .map(|flow_def| {
118            let steps: Vec<flow::CompiledFlowStep> = flow_def
119                .steps
120                .iter()
121                .map(|step| flow::CompiledFlowStep {
122                    id: step.id.clone(),
123                    tool: step.tool.clone(),
124                    inputs: step.inputs.clone(),
125                    outputs: step.outputs.clone(),
126                    content_hash: step.compute_hash(),
127                })
128                .collect();
129
130            // Compute flow-level hash from name + all step hashes
131            let mut hasher = Sha256::new();
132            hasher.update(flow_def.name.as_bytes());
133            for step in &steps {
134                hasher.update(step.content_hash);
135            }
136            let flow_hash: [u8; 32] = hasher.finalize().into();
137
138            Ok(flow::CompiledFlow {
139                name: flow_def.name.clone(),
140                flow_hash,
141                steps,
142            })
143        })
144        .collect()
145}
146
147/// Inject pack-level metadata from the manifest into CompiledConfig.
148///
149/// These fields power the AAIF convergence outputs: SKILL.md, AGENTS.md,
150/// A2A Agent Card, llms.txt, and Google A2UI compatibility.
151fn inject_pack_metadata(manifest: &PackManifest, compiled: &mut CompiledConfig) {
152    if let Some(ref meta) = manifest.meta {
153        compiled.pack_meta_project = meta.project.clone();
154        compiled.pack_meta_version = meta.version.clone();
155        compiled.pack_meta_description = meta.description.clone();
156        compiled.pack_meta_instructions = meta.instructions.clone();
157        compiled.pack_meta_homepage = meta.homepage.clone();
158        compiled.pack_meta_license = meta.license.clone();
159    }
160}
161
162/// Compile Composio MCP Gateway configuration from manifest.
163fn compile_composio_gateway(manifest: &PackManifest) -> Option<ComposioGatewayConfig> {
164    manifest
165        .tools
166        .composio_gateway
167        .as_ref()
168        .filter(|gw| gw.enabled)
169        .map(|gw| ComposioGatewayConfig {
170            enabled: gw.enabled,
171            max_tools: gw.max_tools,
172            allowed_toolkits: gw.allowed_toolkits.clone(),
173            blocked_toolkits: gw.blocked_toolkits.clone(),
174        })
175}
176
177/// Compute SHA-256 hashes for all pack source files.
178/// These hashes enable runtime drift detection without recompilation.
179fn compute_file_hashes(input: &PackInput) -> Result<HashMap<String, String>, PackError> {
180    let mut hashes = HashMap::new();
181
182    // Hash the manifest (cstate.toml)
183    let manifest_hash = sha256_hex(&input.manifest);
184    hashes.insert("cstate.toml".to_string(), manifest_hash);
185
186    // Hash all markdown files
187    for md in &input.markdowns {
188        let rel_path = md
189            .path
190            .strip_prefix(&input.root)
191            .unwrap_or(&md.path)
192            .to_string_lossy()
193            .to_string();
194        let hash = sha256_hex(&md.content);
195        hashes.insert(rel_path, hash);
196    }
197
198    // Hash all contract files
199    for (path, content) in &input.contracts {
200        if path.contains("..") || path.starts_with('/') || path.starts_with('\\') {
201            return Err(PackError::Validation(format!(
202                "invalid contract path: {path}"
203            )));
204        }
205        let hash = sha256_hex(content);
206        hashes.insert(path.clone(), hash);
207    }
208
209    Ok(hashes)
210}
211
212/// Compute SHA-256 hash of content, returning hex-encoded string.
213fn sha256_hex(content: &str) -> String {
214    let mut hasher = Sha256::new();
215    hasher.update(content.as_bytes());
216    let result = hasher.finalize();
217    hex::encode(result)
218}
219
220#[cfg(test)]
221mod security_tests {
222    use super::*;
223    use std::collections::HashMap;
224
225    #[test]
226    fn test_contract_path_traversal_rejected() {
227        let mut contracts = HashMap::new();
228        contracts.insert("../etc/passwd".to_string(), "malicious".to_string());
229
230        let input = PackInput {
231            root: PathBuf::from("/tmp/test"),
232            manifest: String::new(),
233            markdowns: vec![],
234            contracts,
235        };
236
237        let result = compute_file_hashes(&input);
238        assert!(result.is_err(), "paths with '..' must be rejected");
239        let err = result.unwrap_err();
240        match &err {
241            PackError::Validation(msg) => {
242                assert!(
243                    msg.contains("invalid contract path"),
244                    "error should mention invalid path: {msg}"
245                );
246            }
247            other => panic!("expected PackError::Validation, got {:?}", other),
248        }
249
250        // Also test absolute paths
251        let mut contracts2 = HashMap::new();
252        contracts2.insert("/etc/passwd".to_string(), "malicious".to_string());
253        let input2 = PackInput {
254            root: PathBuf::from("/tmp/test"),
255            manifest: String::new(),
256            markdowns: vec![],
257            contracts: contracts2,
258        };
259        assert!(
260            compute_file_hashes(&input2).is_err(),
261            "absolute paths must be rejected"
262        );
263
264        // Test backslash-prefixed paths
265        let mut contracts3 = HashMap::new();
266        contracts3.insert("\\windows\\system32".to_string(), "malicious".to_string());
267        let input3 = PackInput {
268            root: PathBuf::from("/tmp/test"),
269            manifest: String::new(),
270            markdowns: vec![],
271            contracts: contracts3,
272        };
273        assert!(
274            compute_file_hashes(&input3).is_err(),
275            "backslash-prefixed paths must be rejected"
276        );
277
278        // Test valid paths still work
279        let mut valid_contracts = HashMap::new();
280        valid_contracts.insert("schemas/contract.json".to_string(), "{}".to_string());
281        let valid_input = PackInput {
282            root: PathBuf::from("/tmp/test"),
283            manifest: String::new(),
284            markdowns: vec![],
285            contracts: valid_contracts,
286        };
287        assert!(
288            compute_file_hashes(&valid_input).is_ok(),
289            "valid relative paths should be accepted"
290        );
291    }
292}
293
294#[cfg(test)]
295mod flow_tests {
296    use super::*;
297    use crate::pack::flow::{FlowDefinition, FlowErrorHandler, FlowStep};
298    use std::collections::HashMap;
299
300    #[test]
301    fn test_compile_flows_empty() {
302        let result = compile_flows(&[]);
303        assert!(result.is_ok());
304        assert!(result.unwrap().is_empty());
305    }
306
307    #[test]
308    fn test_compile_flows_single() {
309        let flow = FlowDefinition {
310            name: "test-flow".to_string(),
311            description: Some("A test flow".to_string()),
312            steps: vec![FlowStep {
313                id: "step1".to_string(),
314                tool: "bash".to_string(),
315                inputs: {
316                    let mut m = HashMap::new();
317                    m.insert("command".to_string(), "echo hello".to_string());
318                    m
319                },
320                outputs: vec!["stdout".to_string()],
321                content_hash: [0u8; 32], // Will be recomputed
322            }],
323            on_error: FlowErrorHandler::Abort,
324        };
325
326        let result = compile_flows(&[flow]).unwrap();
327        assert_eq!(result.len(), 1);
328        assert_eq!(result[0].name, "test-flow");
329        assert_eq!(result[0].steps.len(), 1);
330        assert_eq!(result[0].steps[0].tool, "bash");
331        // Verify hash was computed (not all zeros)
332        assert_ne!(result[0].flow_hash, [0u8; 32]);
333        assert_ne!(result[0].steps[0].content_hash, [0u8; 32]);
334    }
335
336    #[test]
337    fn test_compile_flows_multiple() {
338        let flow1 = FlowDefinition {
339            name: "flow-a".to_string(),
340            description: None,
341            steps: vec![FlowStep {
342                id: "s1".to_string(),
343                tool: "http".to_string(),
344                inputs: HashMap::new(),
345                outputs: vec![],
346                content_hash: [0u8; 32],
347            }],
348            on_error: FlowErrorHandler::Abort,
349        };
350
351        let flow2 = FlowDefinition {
352            name: "flow-b".to_string(),
353            description: Some("second flow".to_string()),
354            steps: vec![
355                FlowStep {
356                    id: "s1".to_string(),
357                    tool: "bash".to_string(),
358                    inputs: {
359                        let mut m = HashMap::new();
360                        m.insert("cmd".to_string(), "ls".to_string());
361                        m
362                    },
363                    outputs: vec!["listing".to_string()],
364                    content_hash: [0u8; 32],
365                },
366                FlowStep {
367                    id: "s2".to_string(),
368                    tool: "file_write".to_string(),
369                    inputs: {
370                        let mut m = HashMap::new();
371                        m.insert("path".to_string(), "/tmp/out.txt".to_string());
372                        m
373                    },
374                    outputs: vec![],
375                    content_hash: [0u8; 32],
376                },
377            ],
378            on_error: FlowErrorHandler::SkipToNext,
379        };
380
381        let result = compile_flows(&[flow1, flow2]).unwrap();
382        assert_eq!(result.len(), 2);
383        assert_eq!(result[0].name, "flow-a");
384        assert_eq!(result[1].name, "flow-b");
385        assert_eq!(result[1].steps.len(), 2);
386        // Different flows should have different flow hashes
387        assert_ne!(result[0].flow_hash, result[1].flow_hash);
388    }
389
390    #[test]
391    fn test_compile_flows_deterministic_hashes() {
392        let make_flow = || FlowDefinition {
393            name: "deterministic".to_string(),
394            description: None,
395            steps: vec![FlowStep {
396                id: "step1".to_string(),
397                tool: "echo".to_string(),
398                inputs: {
399                    let mut m = HashMap::new();
400                    m.insert("msg".to_string(), "hello".to_string());
401                    m
402                },
403                outputs: vec!["out".to_string()],
404                content_hash: [0u8; 32],
405            }],
406            on_error: FlowErrorHandler::Abort,
407        };
408
409        let result1 = compile_flows(&[make_flow()]).unwrap();
410        let result2 = compile_flows(&[make_flow()]).unwrap();
411
412        assert_eq!(result1[0].flow_hash, result2[0].flow_hash);
413        assert_eq!(
414            result1[0].steps[0].content_hash,
415            result2[0].steps[0].content_hash
416        );
417    }
418}