cellstate_pipeline/pack/
schema.rs

1//! Pack manifest schema (cstate.toml)
2
3use cellstate_core::SecretString;
4use serde::Deserialize;
5use std::collections::HashMap;
6
7// NOTE: deny_unknown_fields is intentionally NOT on PackManifest to support
8// the x-* extension namespace. Unknown top-level keys are validated post-parse
9// in compose_pack() — any key not starting with "x-" is rejected there.
10// All nested structs still use deny_unknown_fields for strict validation.
11#[derive(Debug, Clone, Deserialize)]
12pub struct PackManifest {
13    pub meta: Option<MetaSection>,
14    pub defaults: Option<DefaultsSection>,
15    pub settings: Option<SettingsSection>,
16    pub routing: Option<RoutingSection>,
17    #[serde(default)]
18    pub profiles: HashMap<String, ProfileDef>,
19    #[serde(default)]
20    pub adapters: HashMap<String, AdapterDef>,
21    #[serde(default)]
22    pub formats: HashMap<String, FormatDef>,
23    #[serde(default)]
24    pub policies: HashMap<String, PolicyDef>,
25    #[serde(default)]
26    pub injections: HashMap<String, InjectionDef>,
27    #[serde(default)]
28    pub providers: HashMap<String, ProviderDef>,
29    #[serde(default)]
30    pub tools: ToolsSection,
31    #[serde(default)]
32    pub toolsets: HashMap<String, ToolsetDef>,
33    #[serde(default)]
34    pub agents: HashMap<String, PackAgentBinding>,
35    /// Vendor/tool-specific extension keys (must start with "x-").
36    ///
37    /// Example: `x-mycompany-feature = { enabled = true }`
38    #[serde(flatten)]
39    pub extensions: HashMap<String, toml::Value>,
40}
41
42#[derive(Debug, Clone, Deserialize)]
43#[serde(deny_unknown_fields)]
44pub struct MetaSection {
45    /// pack spec version. Must be "cstate.toml/v1.0" when present.
46    pub spec: Option<String>,
47    pub version: Option<String>,
48    /// Project name. Required — emitters fall back gracefully but this should always be set.
49    pub project: Option<String>,
50    pub env: Option<String>,
51    /// Project-level description for SKILL.md, AGENTS.md, A2A Agent Card, llms.txt.
52    pub description: Option<String>,
53    /// Build/test/coding convention instructions for AGENTS.md generation.
54    /// Can be inline text or a path to a markdown file (e.g., "instructions.md").
55    pub instructions: Option<String>,
56    /// Project homepage URL for A2A Agent Card and llms.txt.
57    pub homepage: Option<String>,
58    /// License identifier (e.g., "Apache-2.0") for SKILL.md compliance.
59    pub license: Option<String>,
60}
61
62#[derive(Debug, Clone, Deserialize)]
63#[serde(deny_unknown_fields)]
64pub struct DefaultsSection {
65    pub context_format: Option<String>,
66    pub token_budget: Option<i32>,
67    pub strict_markdown: Option<bool>,
68    pub strict_refs: Option<bool>,
69    pub secrets_mode: Option<String>,
70}
71
72#[derive(Debug, Clone, Deserialize)]
73#[serde(deny_unknown_fields)]
74pub struct SettingsSection {
75    pub matrix: Option<SettingsMatrix>,
76}
77
78#[derive(Debug, Clone, Deserialize)]
79#[serde(deny_unknown_fields)]
80pub struct SettingsMatrix {
81    pub allowed: Vec<ProfileBinding>,
82    #[serde(default)]
83    pub enforce_profiles_only: bool,
84}
85
86#[derive(Debug, Clone, Deserialize)]
87#[serde(deny_unknown_fields)]
88pub struct RoutingSection {
89    /// Routing strategy hint (first|round_robin|random|least_latency).
90    pub strategy: Option<String>,
91    /// Preferred provider name for embeddings.
92    pub embedding_provider: Option<String>,
93    /// Preferred provider name for summarization.
94    pub summarization_provider: Option<String>,
95    /// Preferred provider name for chat completions.
96    #[serde(default)]
97    pub chat_provider: Option<String>,
98    /// OpenRouter sort preference (e.g., "price", "throughput").
99    #[serde(default)]
100    pub sort: Option<String>,
101    /// Zero Data Retention opt-in.
102    #[serde(default)]
103    pub zdr: Option<bool>,
104    /// Explicit provider ordering for OpenRouter.
105    #[serde(default)]
106    pub provider_order: Option<Vec<String>>,
107    /// OpenRouter data collection preference ("deny" | "allow").
108    #[serde(default)]
109    pub data_collection: Option<String>,
110}
111
112#[derive(Debug, Clone, Deserialize)]
113#[serde(deny_unknown_fields)]
114pub struct ProfileBinding {
115    pub name: String,
116    pub retention: String,
117    pub index: String,
118    pub embeddings: String,
119    pub format: String,
120}
121
122#[derive(Debug, Clone, Deserialize)]
123#[serde(deny_unknown_fields)]
124pub struct ProfileDef {
125    pub retention: String,
126    pub index: String,
127    pub embeddings: String,
128    pub format: String,
129}
130
131#[derive(Debug, Clone, Deserialize)]
132#[serde(deny_unknown_fields)]
133pub struct AdapterDef {
134    #[serde(rename = "type")]
135    pub adapter_type: String,
136    pub connection: String,
137    #[serde(default)]
138    pub options: HashMap<String, String>,
139}
140
141#[derive(Debug, Clone, Deserialize)]
142#[serde(deny_unknown_fields)]
143pub struct FormatDef {
144    #[serde(rename = "type")]
145    pub format_type: String,
146    pub include_audit: Option<bool>,
147    pub include_sources: Option<bool>,
148}
149
150#[derive(Debug, Clone, Deserialize)]
151#[serde(deny_unknown_fields)]
152pub struct PolicyDef {
153    pub trigger: String,
154    #[serde(default)]
155    pub actions: Vec<PolicyActionDef>,
156}
157
158#[derive(Debug, Clone, Deserialize)]
159#[serde(deny_unknown_fields)]
160pub struct PolicyActionDef {
161    #[serde(rename = "type")]
162    pub action_type: String,
163    pub target: Option<String>,
164    pub max_tokens: Option<i32>,
165    pub mode: Option<String>,
166}
167
168#[derive(Debug, Clone, Deserialize)]
169#[serde(deny_unknown_fields)]
170pub struct InjectionDef {
171    pub source: String,
172    pub target: String,
173    /// Explicit entity type this injection targets (e.g., "note", "artifact").
174    pub entity_type: Option<String>,
175    pub mode: String,
176    #[serde(default)]
177    pub priority: i32,
178    pub max_tokens: Option<i32>,
179    pub top_k: Option<usize>,
180    pub threshold: Option<f32>,
181}
182
183#[derive(Debug, Clone, Deserialize)]
184#[serde(deny_unknown_fields)]
185pub struct ProviderDef {
186    #[serde(rename = "type")]
187    pub provider_type: String,
188    pub api_key: SecretString,
189    pub model: String,
190    #[serde(default)]
191    pub options: HashMap<String, String>,
192}
193
194#[derive(Debug, Clone, Deserialize, Default)]
195#[serde(deny_unknown_fields)]
196pub struct ToolsSection {
197    #[serde(default)]
198    pub bin: HashMap<String, ToolExecDef>,
199    #[serde(default)]
200    pub prompts: HashMap<String, ToolPromptDef>,
201    /// Bash tools — sandboxed TypeScript bash interpreter (just-bash).
202    /// Commands execute against the agent's virtual filesystem (AgentFs / just-bash FS layer).
203    #[serde(default)]
204    pub bash: HashMap<String, ToolBashDef>,
205    /// Browser tools — accessibility-first browser automation via Stagehand/Playwright.
206    /// Executed client-side with resource limits and PII redaction.
207    #[serde(default)]
208    pub browser: HashMap<String, ToolBrowserDef>,
209    /// Composio tools — managed SaaS integrations (800+ tools).
210    /// OAuth and authentication handled by Composio; CELLSTATE manages state.
211    #[serde(default)]
212    pub composio: HashMap<String, ToolComposioDef>,
213    /// Dynamic Composio MCP Gateway configuration.
214    /// When enabled, exposes a `composio_search_tools` meta-tool for runtime discovery.
215    #[serde(default)]
216    pub composio_gateway: Option<ComposioGatewayDef>,
217}
218
219/// Dynamic Composio MCP Gateway configuration.
220/// Enables runtime discovery of Composio's 800+ tool integrations via a meta-tool.
221#[derive(Debug, Clone, Deserialize)]
222#[serde(deny_unknown_fields)]
223pub struct ComposioGatewayDef {
224    /// Whether gateway discovery is enabled.
225    #[serde(default)]
226    pub enabled: bool,
227    /// Max tools to surface per search (default 30).
228    #[serde(default = "default_max_tools")]
229    pub max_tools: usize,
230    /// Allowed toolkits filter (empty = all).
231    #[serde(default)]
232    pub allowed_toolkits: Vec<String>,
233    /// Blocked toolkits filter.
234    #[serde(default)]
235    pub blocked_toolkits: Vec<String>,
236}
237
238fn default_max_tools() -> usize {
239    30
240}
241
242/// Bash tool definition — sandboxed command execution via just-bash.
243#[derive(Debug, Clone, Deserialize)]
244#[serde(deny_unknown_fields)]
245pub struct ToolBashDef {
246    #[serde(rename = "kind")]
247    pub kind: Option<String>,
248    /// Description of what this bash tool does
249    pub description: Option<String>,
250    /// Timeout in milliseconds (default: 30000)
251    pub timeout_ms: Option<i32>,
252    /// Whether to allow network access (default: false)
253    pub allow_network: Option<bool>,
254    /// Whether to allow filesystem writes (default: true)
255    pub allow_fs: Option<bool>,
256    /// Allowed commands whitelist (empty = all allowed)
257    #[serde(default)]
258    pub allowed_commands: Vec<String>,
259    /// Blocked commands blacklist
260    #[serde(default)]
261    pub blocked_commands: Vec<String>,
262}
263
264/// Browser tool definition — accessibility-first browser automation.
265#[derive(Debug, Clone, Deserialize)]
266#[serde(deny_unknown_fields)]
267pub struct ToolBrowserDef {
268    #[serde(rename = "kind")]
269    pub kind: Option<String>,
270    /// Description of what this browser tool does.
271    pub description: Option<String>,
272    /// Timeout for individual actions in milliseconds (default: 30000).
273    pub timeout_ms: Option<i32>,
274    /// Maximum navigations per session (default: 50).
275    pub max_navigations: Option<i32>,
276    /// Maximum session duration in milliseconds (default: 300000).
277    pub max_duration_ms: Option<i32>,
278    /// Allowed domain patterns (glob). Empty = all allowed.
279    #[serde(default)]
280    pub allowed_domains: Vec<String>,
281    /// Blocked domain patterns (glob). Takes precedence over allowed.
282    #[serde(default)]
283    pub blocked_domains: Vec<String>,
284    /// Use vision (screenshots) instead of accessibility tree. Default: false.
285    pub use_vision: Option<bool>,
286    /// Whether to allow network access (default: true for browser tools).
287    pub allow_network: Option<bool>,
288}
289
290/// Composio tool definition — managed SaaS integrations via Composio REST API.
291/// Composio handles OAuth, authentication, and API mapping for 800+ tools.
292/// CELLSTATE records why the tool was called, what memory informed the decision,
293/// and what gates it passed.
294#[derive(Debug, Clone, Deserialize)]
295#[serde(deny_unknown_fields)]
296pub struct ToolComposioDef {
297    #[serde(rename = "kind")]
298    pub kind: Option<String>,
299    /// Composio toolkit name (e.g. "GMAIL", "SLACK", "GITHUB", "LINEAR").
300    pub toolkit: String,
301    /// Composio action names to expose (e.g. ["GMAIL_SEND_EMAIL", "GMAIL_LIST_EMAILS"]).
302    #[serde(default)]
303    pub actions: Vec<String>,
304    /// Description of what this tool integration does.
305    pub description: Option<String>,
306    /// Timeout in milliseconds for Composio API calls (default: 30000).
307    pub timeout_ms: Option<i32>,
308}
309
310#[derive(Debug, Clone, Deserialize)]
311#[serde(deny_unknown_fields)]
312pub struct ToolExecDef {
313    #[serde(rename = "kind")]
314    pub kind: Option<String>,
315    pub cmd: String,
316    pub timeout_ms: Option<i32>,
317    pub allow_network: Option<bool>,
318    pub allow_fs: Option<bool>,
319    pub allow_subprocess: Option<bool>,
320    /// How OAuth credentials should be delivered to the tool.
321    pub credential_delivery: Option<String>,
322    /// OAuth provider IDs whose tokens should be injected server-side.
323    #[serde(default)]
324    pub oauth_providers: Vec<String>,
325    /// Required OAuth scopes per provider, enforced at tool-execution time.
326    /// Example:
327    /// oauth_required_scopes.github = ["repo", "workflow"]
328    #[serde(default)]
329    pub oauth_required_scopes: HashMap<String, Vec<String>>,
330}
331
332#[derive(Debug, Clone, Deserialize)]
333#[serde(deny_unknown_fields)]
334pub struct ToolPromptDef {
335    #[serde(rename = "kind")]
336    pub kind: Option<String>,
337    pub prompt_md: String,
338    pub contract: Option<String>,
339    pub result_format: Option<String>,
340    pub timeout_ms: Option<i32>,
341}
342
343#[derive(Debug, Clone, Deserialize)]
344#[serde(deny_unknown_fields)]
345pub struct ToolsetDef {
346    pub tools: Vec<String>,
347}
348
349/// Pack agent binding - wires toolsets and prompts to an agent role.
350/// Not to be confused with ast::AgentDef which defines agent capabilities.
351#[derive(Debug, Clone, Deserialize)]
352#[serde(deny_unknown_fields)]
353pub struct PackAgentBinding {
354    pub enabled: Option<bool>,
355    pub profile: String,
356    pub adapter: Option<String>,
357    pub format: Option<String>,
358    pub token_budget: Option<i32>,
359    pub prompt_md: String,
360    #[serde(default)]
361    pub toolsets: Vec<String>,
362    /// Human-readable description of what this agent does.
363    /// Used for SKILL.md description, A2A Agent Card, AGENTS.md, and progressive disclosure.
364    pub description: Option<String>,
365    /// Tags for skill discovery and categorization (e.g., ["code-review", "security"]).
366    #[serde(default)]
367    pub tags: Vec<String>,
368}
369
370pub fn parse_manifest(toml_source: &str) -> Result<PackManifest, PackError> {
371    const MAX_MANIFEST_BYTES: usize = 10 * 1024 * 1024; // 10 MB
372    if toml_source.len() > MAX_MANIFEST_BYTES {
373        return Err(PackError::Validation(format!(
374            "manifest size {} exceeds maximum {MAX_MANIFEST_BYTES}",
375            toml_source.len()
376        )));
377    }
378    toml::from_str(toml_source).map_err(|e| PackError::Toml(e.to_string()))
379}
380
381// Error lives in ir.rs
382use super::ir::PackError;
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387
388    #[test]
389    fn parse_manifest_empty_fails() {
390        let result = parse_manifest("");
391        // Empty string may parse as empty manifest or fail — either is acceptable
392        // The key is it doesn't panic
393        let _ = result;
394    }
395
396    #[test]
397    fn parse_manifest_meta_only() {
398        let toml = r#"
399[meta]
400project = "minimal"
401version = "0.0.1"
402"#;
403        let m = parse_manifest(toml).unwrap();
404        let meta = m.meta.as_ref().unwrap();
405        assert_eq!(meta.project.as_deref(), Some("minimal"));
406        assert_eq!(meta.version.as_deref(), Some("0.0.1"));
407        assert!(m.defaults.is_none());
408        assert!(m.profiles.is_empty());
409    }
410
411    #[test]
412    fn parse_manifest_with_routing() {
413        let toml = r#"
414[meta]
415project = "routed"
416version = "1.0.0"
417
418[routing]
419strategy = "intent"
420"#;
421        let m = parse_manifest(toml).unwrap();
422        assert!(m.routing.is_some());
423    }
424
425    #[test]
426    fn parse_manifest_with_settings() {
427        let toml = r#"
428[meta]
429project = "settings-test"
430version = "1.0.0"
431
432[settings.matrix]
433allowed = []
434"#;
435        let m = parse_manifest(toml).unwrap();
436        assert!(m.settings.is_some());
437    }
438
439    #[test]
440    fn default_max_tools_is_positive() {
441        assert!(default_max_tools() > 0);
442    }
443
444    #[test]
445    fn test_manifest_size_limit() {
446        // 11 MB of 'a' characters — exceeds the 10 MB limit
447        let oversized = "a".repeat(11 * 1024 * 1024);
448        let result = parse_manifest(&oversized);
449        assert!(result.is_err(), "manifest exceeding 10 MB must be rejected");
450        let err = result.unwrap_err();
451        match &err {
452            PackError::Validation(msg) => {
453                assert!(
454                    msg.contains("manifest size") && msg.contains("exceeds maximum"),
455                    "error should mention size limit: {msg}"
456                );
457            }
458            other => panic!("expected PackError::Validation, got {:?}", other),
459        }
460    }
461
462    #[test]
463    fn pack_manifest_toml_roundtrip() {
464        let toml = r#"
465[meta]
466project = "roundtrip"
467version = "2.0.0"
468description = "Test roundtrip"
469"#;
470        let m = parse_manifest(toml).unwrap();
471        let meta = m.meta.as_ref().unwrap();
472        assert_eq!(meta.project.as_deref(), Some("roundtrip"));
473        assert_eq!(meta.version.as_deref(), Some("2.0.0"));
474        assert_eq!(meta.description.as_deref(), Some("Test roundtrip"));
475    }
476}