1use crate::pcp::{LintIssueType, MarkdownSemanticIssue};
10use regex::Regex;
11use std::sync::OnceLock;
12
13fn reserved_at_regex() -> &'static Regex {
14 static RE: OnceLock<Regex> = OnceLock::new();
15 RE.get_or_init(|| {
16 Regex::new(r"(^|[^\w\\])@([A-Za-z0-9_./-]+)")
17 .expect("reserved @ mention regex must compile")
18 })
19}
20
21fn markdown_table_separator_regex() -> &'static Regex {
22 static RE: OnceLock<Regex> = OnceLock::new();
23 RE.get_or_init(|| {
24 Regex::new(r"^:?-{3,}:?$").expect("markdown table separator regex must compile")
25 })
26}
27
28fn markdown_table_column_count(line: &str) -> usize {
29 let trimmed = line.trim();
30 if !trimmed.contains('|') {
31 return 0;
32 }
33 let core = trimmed.trim_matches('|').trim();
34 if core.is_empty() {
35 return 0;
36 }
37 core.split('|').count()
38}
39
40fn is_markdown_separator_row(line: &str) -> bool {
41 let core = line.trim().trim_matches('|').trim();
42 if core.is_empty() {
43 return false;
44 }
45 core.split('|')
46 .map(str::trim)
47 .all(|seg| markdown_table_separator_regex().is_match(seg))
48}
49
50pub fn lint_markdown_semantics(content: &str) -> Vec<MarkdownSemanticIssue> {
57 let mut issues = Vec::new();
58 let lines: Vec<&str> = content.lines().collect();
59
60 for (idx, line) in lines.iter().enumerate() {
62 if reserved_at_regex().is_match(line) {
63 issues.push(MarkdownSemanticIssue {
64 issue_type: LintIssueType::ReservedCharacterLeak,
65 line: idx + 1,
66 message: "unescaped '@' token detected; escape as '\\@' to avoid agent import side-effects".to_string(),
67 });
68 }
69 }
70
71 let mut open_fence_line: Option<usize> = None;
73 for (idx, line) in lines.iter().enumerate() {
74 if line.trim_start().starts_with("```") {
75 if open_fence_line.is_some() {
76 open_fence_line = None;
77 } else {
78 open_fence_line = Some(idx + 1);
79 }
80 }
81 }
82 if let Some(line) = open_fence_line {
83 issues.push(MarkdownSemanticIssue {
84 issue_type: LintIssueType::SyntaxError,
85 line,
86 message: format!("unterminated fenced code block opened at line {}", line),
87 });
88 }
89
90 let mut i = 0usize;
92 while i + 1 < lines.len() {
93 if !lines[i].contains('|') || !is_markdown_separator_row(lines[i + 1]) {
94 i += 1;
95 continue;
96 }
97
98 let expected_cols = markdown_table_column_count(lines[i]);
99 if expected_cols == 0 {
100 i += 1;
101 continue;
102 }
103
104 let mut j = i + 2;
105 while j < lines.len() && lines[j].contains('|') {
106 let cols = markdown_table_column_count(lines[j]);
107 if cols != expected_cols {
108 issues.push(MarkdownSemanticIssue {
109 issue_type: LintIssueType::SyntaxError,
110 line: j + 1,
111 message: format!(
112 "malformed markdown table row: expected {} columns, found {}",
113 expected_cols, cols
114 ),
115 });
116 break;
117 }
118 j += 1;
119 }
120
121 i = j;
122 }
123
124 issues
125}
126
127#[cfg(test)]
128mod tests {
129 use super::*;
130
131 #[test]
132 fn test_lint_markdown_semantics_clean() {
133 let issues = lint_markdown_semantics("# Hello\n\nSome text.");
134 assert!(issues.is_empty());
135 }
136
137 #[test]
138 fn test_lint_markdown_semantics_unterminated_fence() {
139 let content = "```rust\nfn main() {}\n";
140 let issues = lint_markdown_semantics(content);
141 assert!(issues
142 .iter()
143 .any(|i| i.issue_type == LintIssueType::SyntaxError));
144 }
145
146 #[test]
147 fn test_lint_markdown_semantics_reserved_at_mention() {
148 let content = "Please use @my_tool for this.";
149 let issues = lint_markdown_semantics(content);
150 assert!(issues
151 .iter()
152 .any(|i| i.issue_type == LintIssueType::ReservedCharacterLeak));
153 }
154
155 #[test]
156 fn test_lint_markdown_semantics_detects_table_shape_mismatch() {
157 let content = r#"
158| a | b |
159| --- | --- |
160| 1 | 2 | 3 |
161"#;
162 let issues = lint_markdown_semantics(content);
163 assert!(issues
164 .iter()
165 .any(|i| i.issue_type == LintIssueType::SyntaxError));
166 }
167}