cellstate_pipeline/pack/
llms_txt.rs

1//! llms.txt generation from Pack configuration.
2//!
3//! Generates an llms.txt file for AI-agent discovery. This follows the
4//! emerging standard (844k+ sites) for making agent capabilities
5//! discoverable by LLMs.
6//!
7//! Re-export path: cellstate_pipeline::pack::llms_txt::*
8
9use crate::compiler::CompiledConfig;
10
11/// CELLSTATE schema version comment embedded in emitted llms.txt.
12pub const LLMS_TXT_SCHEMA_VERSION: &str = "cellstate.llms_txt.v1";
13
14/// Generate an llms.txt string from a compiled CELLSTATE config.
15///
16/// Output follows the llms.txt specification: Markdown format with
17/// structured sections describing the agent's capabilities.
18pub fn generate_llms_txt(config: &CompiledConfig, base_url: Option<&str>) -> String {
19    let mut out = String::new();
20
21    // Title
22    let project_name = config
23        .pack_meta_project
24        .as_deref()
25        .unwrap_or("CELLSTATE Agent");
26    out.push_str(&format!("# {}\n\n", project_name));
27    out.push_str(&format!(
28        "<!-- schema_version: {} -->\n\n",
29        LLMS_TXT_SCHEMA_VERSION
30    ));
31
32    // Description
33    let description = config
34        .pack_meta_description
35        .as_deref()
36        .unwrap_or("An AI agent powered by CELLSTATE — event-sourced, auditable agent memory.");
37    out.push_str(&format!("> {}\n\n", description));
38
39    // API Documentation links
40    if let Some(url) = base_url {
41        out.push_str("## Documentation\n\n");
42        out.push_str(&format!(
43            "- [API Reference]({}/docs/api)\n",
44            url.trim_end_matches('/')
45        ));
46        out.push_str(&format!(
47            "- [Health Check]({}/health/live)\n",
48            url.trim_end_matches('/')
49        ));
50        out.push_str(&format!(
51            "- [Agent Card]({}/agent.json)\n",
52            url.trim_end_matches('/')
53        ));
54        out.push('\n');
55    }
56
57    // Agents
58    if !config.pack_agents.is_empty() {
59        out.push_str("## Agents\n\n");
60        for agent in &config.pack_agents {
61            if !agent.enabled {
62                continue;
63            }
64            let desc = agent
65                .description
66                .as_deref()
67                .unwrap_or("No description available");
68            out.push_str(&format!("- **{}**: {}\n", agent.name, desc));
69        }
70        out.push('\n');
71    }
72
73    // Tools
74    if !config.tools.is_empty() {
75        out.push_str("## Tools\n\n");
76        for tool in &config.tools {
77            let kind_label = match tool.kind {
78                crate::compiler::CompiledToolKind::Exec => "exec",
79                crate::compiler::CompiledToolKind::WasmExec => "wasm-exec",
80                crate::compiler::CompiledToolKind::Prompt => "prompt",
81                crate::compiler::CompiledToolKind::Bash => "bash",
82                crate::compiler::CompiledToolKind::Browser => "browser",
83                crate::compiler::CompiledToolKind::Composio => "composio",
84                crate::compiler::CompiledToolKind::ComposioGateway => "gateway",
85            };
86            out.push_str(&format!("- `{}` ({})\n", tool.id, kind_label));
87        }
88        out.push('\n');
89    }
90
91    // Protocols
92    out.push_str("## Protocols\n\n");
93    out.push_str("- MCP (Model Context Protocol) — tool serving\n");
94    out.push_str("- AG-UI — agent-to-user streaming\n");
95    out.push_str("- A2A — agent-to-agent discovery\n");
96    out.push_str("- Agent Skills — capability packaging\n");
97    out.push('\n');
98
99    // Platform
100    out.push_str("## Platform\n\n");
101    out.push_str("- Runtime: CELLSTATE (event-sourced agent memory)\n");
102    out.push_str("- Features: cryptographic receipts, context assembly, policy gates\n");
103
104    if let Some(ref homepage) = config.pack_meta_homepage {
105        out.push_str(&format!("- Homepage: {}\n", homepage));
106    }
107
108    out.push('\n');
109
110    out
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    #[test]
118    fn test_generate_llms_txt_defaults() {
119        let config = CompiledConfig::default();
120        let txt = generate_llms_txt(&config, None);
121
122        assert!(txt.starts_with("# CELLSTATE Agent"));
123        assert!(txt.contains("schema_version: cellstate.llms_txt.v1"));
124        assert!(txt.contains("## Protocols"));
125        assert!(txt.contains("MCP"));
126        assert!(txt.contains("AG-UI"));
127        assert!(txt.contains("A2A"));
128        assert!(txt.contains("Agent Skills"));
129    }
130
131    #[test]
132    fn test_generate_llms_txt_with_base_url() {
133        let config = CompiledConfig::default();
134        let txt = generate_llms_txt(&config, Some("https://api.example.com"));
135
136        assert!(txt.contains("## Documentation"));
137        assert!(txt.contains("https://api.example.com/docs/api"));
138        assert!(txt.contains("https://api.example.com/health/live"));
139    }
140
141    #[test]
142    fn test_generate_llms_txt_with_project() {
143        let config = CompiledConfig {
144            pack_meta_project: Some("DevOps Bot".to_string()),
145            pack_meta_description: Some("Automates infrastructure management".to_string()),
146            ..CompiledConfig::default()
147        };
148
149        let txt = generate_llms_txt(&config, None);
150        assert!(txt.starts_with("# DevOps Bot"));
151        assert!(txt.contains("Automates infrastructure management"));
152    }
153}