1use super::ir::{MarkdownError, PackError};
4use super::schema::{PackManifest, ToolsSection};
5use cellstate_pcp::{lint_markdown_semantics, LintIssueType};
6use std::str::FromStr;
7
8const MAX_FENCE_BLOCK_BYTES: usize = 1_024 * 1024; #[derive(Debug, Clone)]
11pub struct MarkdownDoc {
12 pub file: String,
13 pub system: String,
14 pub pcp: String,
15 pub users: Vec<UserSection>,
16 pub extracted_constraints: Vec<String>,
18 pub extracted_tool_refs: Vec<String>,
20 pub extracted_rag_config: Option<String>,
22}
23
24#[derive(Debug, Clone)]
25pub struct UserSection {
26 pub content: String,
27 pub blocks: Vec<FencedBlock>,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
32pub enum FenceKind {
33 Adapter,
35 Memory,
36 Policy,
37 Injection,
38 Provider,
39 Cache,
40 Trajectory,
41 Agent,
42
43 Intent,
45
46 Tool,
48 Rag,
49 Json,
50 Xml,
51 Constraints,
52 Tools,
53
54 Manifest,
56
57 Flow,
59}
60
61impl FromStr for FenceKind {
62 type Err = PackError;
63
64 fn from_str(s: &str) -> Result<Self, Self::Err> {
83 match s.to_lowercase().as_str() {
85 "adapter" => Ok(FenceKind::Adapter),
86 "memory" => Ok(FenceKind::Memory),
87 "policy" => Ok(FenceKind::Policy),
88 "injection" => Ok(FenceKind::Injection),
89 "provider" => Ok(FenceKind::Provider),
90 "cache" => Ok(FenceKind::Cache),
91 "trajectory" => Ok(FenceKind::Trajectory),
92 "agent" => Ok(FenceKind::Agent),
93 "intent" => Ok(FenceKind::Intent),
94 "tool" => Ok(FenceKind::Tool),
95 "rag" => Ok(FenceKind::Rag),
96 "json" => Ok(FenceKind::Json),
97 "xml" => Ok(FenceKind::Xml),
98 "constraints" => Ok(FenceKind::Constraints),
99 "tools" => Ok(FenceKind::Tools),
100 "manifest" => Ok(FenceKind::Manifest),
101 "flow" => Ok(FenceKind::Flow),
102 other => Err(PackError::Validation(format!(
103 "unsupported fence type '{}'",
104 other
105 ))),
106 }
107 }
108}
109
110impl std::fmt::Display for FenceKind {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122 let s = match self {
123 FenceKind::Adapter => "adapter",
124 FenceKind::Memory => "memory",
125 FenceKind::Policy => "policy",
126 FenceKind::Injection => "injection",
127 FenceKind::Provider => "provider",
128 FenceKind::Cache => "cache",
129 FenceKind::Trajectory => "trajectory",
130 FenceKind::Agent => "agent",
131 FenceKind::Intent => "intent",
132 FenceKind::Tool => "tool",
133 FenceKind::Rag => "rag",
134 FenceKind::Json => "json",
135 FenceKind::Xml => "xml",
136 FenceKind::Constraints => "constraints",
137 FenceKind::Tools => "tools",
138 FenceKind::Manifest => "manifest",
139 FenceKind::Flow => "flow",
140 };
141 write!(f, "{}", s)
142 }
143}
144
145#[derive(Debug, Clone)]
146pub struct FencedBlock {
147 pub kind: FenceKind,
148 pub header_name: Option<String>,
149 pub content: String,
150 pub line: usize,
151}
152
153pub fn parse_markdown_files(
154 manifest: &PackManifest,
155 files: &[super::PackMarkdownFile],
156) -> Result<Vec<MarkdownDoc>, PackError> {
157 let mut out = Vec::new();
158 for file in files {
159 out.push(parse_markdown(
160 manifest,
161 &file.path.display().to_string(),
162 &file.content,
163 )?);
164 }
165 Ok(out)
166}
167
168fn parse_markdown(
197 manifest: &PackManifest,
198 file: &str,
199 content: &str,
200) -> Result<MarkdownDoc, PackError> {
201 if let Some(issue) = lint_markdown_semantics(content).into_iter().next() {
202 return Err(PackError::Markdown(MarkdownError {
203 file: file.to_string(),
204 line: issue.line,
205 column: 1,
206 message: format!(
207 "semantic lint [{}]: {}",
208 lint_issue_label(issue.issue_type),
209 issue.message
210 ),
211 }));
212 }
213
214 let strict_refs = manifest
215 .defaults
216 .as_ref()
217 .and_then(|d| d.strict_refs)
218 .unwrap_or(false);
219
220 let tool_ids = collect_tool_ids(&manifest.tools);
221 let mut system = String::new();
222 let mut pcp = String::new();
223 let mut users: Vec<UserSection> = Vec::new();
224
225 enum Section {
226 None,
227 System,
228 Pcp,
229 User,
230 }
231 let mut section = Section::None;
232 let mut current_user: Option<UserSection> = None;
233 let mut in_block: Option<FencedBlock> = None;
234 let mut last_heading = 0;
235
236 for (idx, line) in content.lines().enumerate() {
237 let line_no = idx + 1;
238 if let Some(block) = &mut in_block {
239 if line.trim_start().starts_with("```") {
240 let finished = in_block.take().expect("in_block verified as Some above");
242 if let Section::User = section {
243 if let Some(u) = &mut current_user {
244 u.blocks.push(finished);
245 }
246 } else {
247 return Err(PackError::Markdown(MarkdownError {
248 file: file.to_string(),
249 line: line_no,
250 column: 1,
251 message: "fenced blocks only allowed under ### User".into(),
252 }));
253 }
254 continue;
255 }
256 block.content.push_str(line);
257 block.content.push('\n');
258 if block.content.len() > MAX_FENCE_BLOCK_BYTES {
259 return Err(PackError::Markdown(MarkdownError {
260 file: file.to_string(),
261 line: line_no,
262 column: 1,
263 message: format!("fence block exceeds {MAX_FENCE_BLOCK_BYTES} byte limit"),
264 }));
265 }
266 continue;
267 }
268
269 if let Some(heading) = heading_level(line) {
270 match heading {
271 1 => {
272 if line.trim() != "# System" {
273 return Err(PackError::Markdown(MarkdownError {
274 file: file.to_string(),
275 line: line_no,
276 column: 1,
277 message: "first H1 must be '# System'".into(),
278 }));
279 }
280 if last_heading > 1 {
281 return Err(PackError::Markdown(MarkdownError {
282 file: file.to_string(),
283 line: line_no,
284 column: 1,
285 message: "H1 must come before H2/H3".into(),
286 }));
287 }
288 section = Section::System;
289 last_heading = 1;
290 continue;
291 }
292 2 => {
293 if line.trim() != "## PCP" {
294 return Err(PackError::Markdown(MarkdownError {
295 file: file.to_string(),
296 line: line_no,
297 column: 1,
298 message: "H2 must be '## PCP'".into(),
299 }));
300 }
301 if last_heading < 1 {
302 return Err(PackError::Markdown(MarkdownError {
303 file: file.to_string(),
304 line: line_no,
305 column: 1,
306 message: "H2 must follow '# System'".into(),
307 }));
308 }
309 if let Some(u) = current_user.take() {
310 users.push(u);
311 }
312 section = Section::Pcp;
313 last_heading = 2;
314 continue;
315 }
316 3 => {
317 if line.trim() != "### User" {
318 return Err(PackError::Markdown(MarkdownError {
319 file: file.to_string(),
320 line: line_no,
321 column: 1,
322 message: "H3 must be '### User'".into(),
323 }));
324 }
325 if last_heading < 2 {
326 return Err(PackError::Markdown(MarkdownError {
327 file: file.to_string(),
328 line: line_no,
329 column: 1,
330 message: "H3 must follow '## PCP'".into(),
331 }));
332 }
333 if let Some(u) = current_user.take() {
334 users.push(u);
335 }
336 section = Section::User;
337 last_heading = 3;
338 current_user = Some(UserSection {
339 content: String::new(),
340 blocks: Vec::new(),
341 });
342 continue;
343 }
344 _ => {}
345 }
346 }
347
348 if line.trim_start().starts_with("```") {
349 let info = line.trim().trim_start_matches("```").trim();
350 if info.is_empty() {
351 return Err(PackError::Markdown(MarkdownError {
352 file: file.to_string(),
353 line: line_no,
354 column: 1,
355 message: "fenced block must have a type".into(),
356 }));
357 }
358 let (kind, header_name) = parse_fence_info(info).map_err(|e| MarkdownError {
359 file: file.to_string(),
360 line: line_no,
361 column: 1,
362 message: e.to_string(),
363 })?;
364 in_block = Some(FencedBlock {
365 kind,
366 header_name,
367 content: String::new(),
368 line: line_no,
369 });
370 continue;
371 }
372
373 match section {
374 Section::System => {
375 system.push_str(line);
376 system.push('\n');
377 }
378 Section::Pcp => {
379 pcp.push_str(line);
380 pcp.push('\n');
381 }
382 Section::User => {
383 if let Some(u) = &mut current_user {
384 u.content.push_str(line);
385 u.content.push('\n');
386 }
387 }
388 Section::None => {}
389 }
390 }
391
392 if in_block.is_some() {
393 return Err(PackError::Markdown(MarkdownError {
394 file: file.to_string(),
395 line: content.lines().count(),
396 column: 1,
397 message: "unterminated fenced block".into(),
398 }));
399 }
400 if let Some(u) = current_user.take() {
401 users.push(u);
402 }
403 if system.trim().is_empty() || pcp.trim().is_empty() || users.is_empty() {
404 return Err(PackError::Markdown(MarkdownError {
405 file: file.to_string(),
406 line: 1,
407 column: 1,
408 message: "missing required sections (# System, ## PCP, ### User)".into(),
409 }));
410 }
411
412 let mut all_constraints = Vec::new();
414 let mut all_tool_refs = Vec::new();
415 let mut rag_config = None;
416 for user in &users {
417 let extracted = validate_blocks(file, user, &tool_ids, strict_refs)?;
418 all_constraints.extend(extracted.constraints);
419 all_tool_refs.extend(extracted.tool_refs);
420 if extracted.rag_config.is_some() {
421 rag_config = extracted.rag_config;
422 }
423 }
424
425 Ok(MarkdownDoc {
426 file: file.to_string(),
427 system: system.trim().to_string(),
428 pcp: pcp.trim().to_string(),
429 users,
430 extracted_constraints: all_constraints,
431 extracted_tool_refs: all_tool_refs,
432 extracted_rag_config: rag_config,
433 })
434}
435
436#[derive(Debug, Clone, Default)]
438pub struct ExtractedBlocks {
439 pub constraints: Vec<String>,
440 pub tool_refs: Vec<String>,
441 pub rag_config: Option<String>,
442}
443
444fn validate_blocks(
482 file: &str,
483 user: &UserSection,
484 tool_ids: &std::collections::HashSet<String>,
485 strict_refs: bool,
486) -> Result<ExtractedBlocks, PackError> {
487 let mut extracted = ExtractedBlocks::default();
488 let mut i = 0;
489 while i < user.blocks.len() {
490 let block = &user.blocks[i];
491 match block.kind {
492 FenceKind::Tool => {
493 let tool_ref = block.content.trim();
494 if !is_ref(tool_ref) {
495 return Err(PackError::Markdown(MarkdownError {
496 file: file.to_string(),
497 line: block.line,
498 column: 1,
499 message: "tool block must contain a single ${...} ref".into(),
500 }));
501 }
502 let tool_id = strip_ref(tool_ref);
503 if !tool_ids.contains(tool_id) {
504 return Err(PackError::Markdown(MarkdownError {
505 file: file.to_string(),
506 line: block.line,
507 column: 1,
508 message: format!("unknown tool id '{}'", tool_id),
509 }));
510 }
511 if strict_refs {
512 }
514 if i + 1 < user.blocks.len() {
516 let next = &user.blocks[i + 1];
517 if next.kind == FenceKind::Json || next.kind == FenceKind::Xml {
518 if strict_refs && !is_ref(next.content.trim()) {
519 return Err(PackError::Markdown(MarkdownError {
520 file: file.to_string(),
521 line: next.line,
522 column: 1,
523 message: "payload block must be a ${...} ref in strict_refs".into(),
524 }));
525 }
526 i += 2;
527 continue;
528 }
529 }
530 i += 1;
531 }
532 FenceKind::Json | FenceKind::Xml => {
533 return Err(PackError::Markdown(MarkdownError {
534 file: file.to_string(),
535 line: block.line,
536 column: 1,
537 message: "payload block must follow a tool block".into(),
538 }));
539 }
540 FenceKind::Constraints => {
542 for line in block.content.lines() {
544 let trimmed = line.trim();
545 if !trimmed.is_empty() && !trimmed.starts_with('#') {
546 extracted.constraints.push(trimmed.to_string());
547 }
548 }
549 i += 1;
550 }
551 FenceKind::Tools => {
552 for line in block.content.lines() {
554 let trimmed = line.trim().trim_start_matches('-').trim();
555 if trimmed.is_empty() || trimmed.starts_with('#') {
556 continue;
557 }
558 if !tool_ids.contains(trimmed) {
559 return Err(PackError::Markdown(MarkdownError {
560 file: file.to_string(),
561 line: block.line,
562 column: 1,
563 message: format!(
564 "tools block references unknown tool '{}'. Must match TOML-declared tool IDs.",
565 trimmed
566 ),
567 }));
568 }
569 extracted.tool_refs.push(trimmed.to_string());
570 }
571 i += 1;
572 }
573 FenceKind::Rag => {
574 let content = block.content.trim();
576 if !content.is_empty() {
577 extracted.rag_config = Some(content.to_string());
578 }
579 i += 1;
580 }
581 FenceKind::Adapter
583 | FenceKind::Memory
584 | FenceKind::Policy
585 | FenceKind::Injection
586 | FenceKind::Provider
587 | FenceKind::Cache
588 | FenceKind::Trajectory
589 | FenceKind::Agent
590 | FenceKind::Intent
591 | FenceKind::Manifest
592 | FenceKind::Flow => {
593 i += 1;
596 }
597 }
598 }
599 Ok(extracted)
600}
601
602fn parse_fence_info(info: &str) -> Result<(FenceKind, Option<String>), PackError> {
623 let parts: Vec<&str> = info.split_whitespace().collect();
624
625 match parts.as_slice() {
626 [] => Err(PackError::Validation(
627 "fence block must have a type".to_string(),
628 )),
629 [kind_str] => {
630 let kind = FenceKind::from_str(kind_str)?;
632 Ok((kind, None))
633 }
634 [kind_str, name] => {
635 let kind = FenceKind::from_str(kind_str)?;
637 Ok((kind, Some(name.to_string())))
638 }
639 _ => Err(PackError::Validation(format!(
640 "invalid fence header '{}' (expected 'kind' or 'kind name')",
641 info
642 ))),
643 }
644}
645
646fn heading_level(line: &str) -> Option<usize> {
661 if line.starts_with("# ") {
662 Some(1)
663 } else if line.starts_with("## ") {
664 Some(2)
665 } else if line.starts_with("### ") {
666 Some(3)
667 } else {
668 None
669 }
670}
671
672fn is_ref(s: &str) -> bool {
673 s.starts_with("${") && s.ends_with('}')
674}
675
676fn strip_ref(s: &str) -> &str {
677 s.trim().trim_start_matches("${").trim_end_matches('}')
678}
679
680fn collect_tool_ids(tools: &ToolsSection) -> std::collections::HashSet<String> {
681 let mut ids = std::collections::HashSet::new();
682 for name in tools.bin.keys() {
683 ids.insert(format!("tools.bin.{}", name));
684 }
685 for name in tools.prompts.keys() {
686 ids.insert(format!("tools.prompts.{}", name));
687 }
688 ids
689}
690
691fn lint_issue_label(issue_type: LintIssueType) -> &'static str {
692 match issue_type {
693 LintIssueType::TooLarge => "too_large",
694 LintIssueType::Duplicate => "duplicate",
695 LintIssueType::MissingEmbedding => "missing_embedding",
696 LintIssueType::LowConfidence => "low_confidence",
697 LintIssueType::SyntaxError => "syntax_error",
698 LintIssueType::ReservedCharacterLeak => "reserved_character_leak",
699 }
700}
701
702#[cfg(test)]
703mod tests {
704 use super::*;
705 use crate::pack::schema::parse_manifest;
706
707 #[test]
708 fn parse_markdown_rejects_unescaped_mention_tokens() {
709 let manifest = parse_manifest("").expect("manifest should parse");
710 let content = r#"
711# System
712rules
713
714## PCP
715Keep context clean.
716
717### User
718Please load @skill_parser before running this.
719"#;
720
721 let err = parse_markdown(&manifest, "prompt.md", content).expect_err("must fail lint");
722 match err {
723 PackError::Markdown(md) => {
724 assert!(
725 md.message.contains("reserved_character_leak"),
726 "unexpected message: {}",
727 md.message
728 );
729 }
730 other => panic!("expected markdown error, got {other:?}"),
731 }
732 }
733
734 #[test]
735 fn parse_markdown_rejects_unterminated_fence() {
736 let manifest = parse_manifest("").expect("manifest should parse");
737 let content = r#"
738# System
739rules
740
741## PCP
742pcp text
743
744### User
745```constraints
746never leak secrets
747"#;
748
749 let err = parse_markdown(&manifest, "prompt.md", content).expect_err("must fail lint");
750 match err {
751 PackError::Markdown(md) => {
752 assert!(
753 md.message.contains("syntax_error"),
754 "unexpected message: {}",
755 md.message
756 );
757 }
758 other => panic!("expected markdown error, got {other:?}"),
759 }
760 }
761}