1use serde::{Deserialize, Serialize};
15use std::fmt;
16use std::sync::OnceLock;
17use uuid::Uuid;
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
25#[serde(rename_all = "snake_case")]
26pub enum PiiType {
27 Ssn,
28 CreditCard,
29 Email,
30 Phone,
31 AwsKey,
32 PrivateKey,
33 ConnectionString,
34 Password,
35 ApiKey,
36 ServerPath,
37 Custom,
38}
39
40impl fmt::Display for PiiType {
41 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42 match self {
43 Self::Ssn => write!(f, "ssn"),
44 Self::CreditCard => write!(f, "credit_card"),
45 Self::Email => write!(f, "email"),
46 Self::Phone => write!(f, "phone"),
47 Self::AwsKey => write!(f, "aws_key"),
48 Self::PrivateKey => write!(f, "private_key"),
49 Self::ConnectionString => write!(f, "connection_string"),
50 Self::Password => write!(f, "password"),
51 Self::ApiKey => write!(f, "api_key"),
52 Self::ServerPath => write!(f, "server_path"),
53 Self::Custom => write!(f, "custom"),
54 }
55 }
56}
57
58#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
64pub struct RedactionSpan {
65 pub token_id: Uuid,
67 pub pii_type: PiiType,
69 pub placeholder: String,
71 pub confidence: f32,
73}
74
75#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
84pub struct VaultInsert {
85 pub token_id: Uuid,
87 pub pii_type: PiiType,
89 pub ciphertext: Vec<u8>,
91}
92
93#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
102pub struct RedactionManifest {
103 pub spans: Vec<RedactionSpan>,
105 pub vault_inserts: Vec<VaultInsert>,
107}
108
109impl RedactionManifest {
110 pub fn empty() -> Self {
112 Self {
113 spans: Vec::new(),
114 vault_inserts: Vec::new(),
115 }
116 }
117
118 pub fn has_redactions(&self) -> bool {
120 !self.spans.is_empty()
121 }
122
123 pub fn span_count(&self) -> usize {
125 self.spans.len()
126 }
127
128 pub fn pii_types(&self) -> Vec<PiiType> {
130 let mut types: Vec<PiiType> = self.spans.iter().map(|s| s.pii_type).collect();
131 types.sort_by_key(|t| *t as u8);
132 types.dedup();
133 types
134 }
135
136 pub fn token_ids(&self) -> Vec<Uuid> {
138 self.spans.iter().map(|s| s.token_id).collect()
139 }
140}
141
142#[derive(Clone, PartialEq, Serialize, Deserialize)]
155pub struct ScrubbedText {
156 text: String,
157 manifest: RedactionManifest,
158}
159
160impl ScrubbedText {
161 pub fn as_redacted_str(&self) -> &str {
163 &self.text
164 }
165
166 pub fn manifest(&self) -> &RedactionManifest {
168 &self.manifest
169 }
170
171 pub fn into_string(self) -> String {
173 self.text
174 }
175
176 pub(crate) fn new_verified(text: String, manifest: RedactionManifest) -> Self {
181 Self { text, manifest }
182 }
183}
184
185impl fmt::Debug for ScrubbedText {
186 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
187 write!(f, "ScrubbedText(\"{}\")", self.text)
188 }
189}
190
191impl fmt::Display for ScrubbedText {
192 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
193 f.write_str(&self.text)
194 }
195}
196
197#[derive(Clone, PartialEq, Serialize, Deserialize)]
205pub struct ScrubbedPayload {
206 value: serde_json::Value,
207 manifest: RedactionManifest,
208}
209
210impl ScrubbedPayload {
211 pub fn as_value(&self) -> &serde_json::Value {
213 &self.value
214 }
215
216 pub fn manifest(&self) -> &RedactionManifest {
218 &self.manifest
219 }
220
221 pub fn into_value(self) -> serde_json::Value {
223 self.value
224 }
225
226 pub(crate) fn new_verified(value: serde_json::Value, manifest: RedactionManifest) -> Self {
228 Self { value, manifest }
229 }
230}
231
232impl fmt::Debug for ScrubbedPayload {
233 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
234 write!(
235 f,
236 "ScrubbedPayload({} redactions)",
237 self.manifest.span_count()
238 )
239 }
240}
241
242#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
248pub struct PiiViolation {
249 pub pii_type: PiiType,
251 pub byte_offset: usize,
253 pub byte_len: usize,
255 pub confidence: f32,
257}
258
259#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
261pub struct ManifestViolation {
262 pub reason: String,
264 pub token_id: Option<Uuid>,
266}
267
268struct PiiDetector {
274 pii_type: PiiType,
275 regex: regex::Regex,
276 confidence: f32,
277}
278
279fn default_detectors() -> &'static [PiiDetector] {
281 static DETECTORS: OnceLock<Vec<PiiDetector>> = OnceLock::new();
282 DETECTORS.get_or_init(|| {
283 vec![
284 PiiDetector {
286 pii_type: PiiType::Ssn,
287 regex: regex::Regex::new(r"\b\d{3}-\d{2}-\d{4}\b").expect("valid SSN regex"),
288 confidence: 0.95,
289 },
290 PiiDetector {
292 pii_type: PiiType::CreditCard,
293 regex: regex::Regex::new(r"\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14})\b")
294 .expect("valid credit card regex"),
295 confidence: 0.90,
296 },
297 PiiDetector {
299 pii_type: PiiType::Email,
300 regex: regex::Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
301 .expect("valid email regex"),
302 confidence: 0.85,
303 },
304 PiiDetector {
306 pii_type: PiiType::Phone,
307 regex: regex::Regex::new(r"\b(?:\+1[-.]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b")
308 .expect("valid phone regex"),
309 confidence: 0.80,
310 },
311 PiiDetector {
313 pii_type: PiiType::AwsKey,
314 regex: regex::Regex::new(r"AKIA[0-9A-Z]{16}").expect("valid AWS key regex"),
315 confidence: 0.95,
316 },
317 PiiDetector {
319 pii_type: PiiType::PrivateKey,
320 regex: regex::Regex::new(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")
321 .expect("valid private key regex"),
322 confidence: 0.99,
323 },
324 PiiDetector {
326 pii_type: PiiType::ConnectionString,
327 regex: regex::Regex::new(
328 r"(?i)(?:postgres|mysql|mongodb|redis)://[^:]+:[^@]+@[^/\s]+",
329 )
330 .expect("valid connection string regex"),
331 confidence: 0.95,
332 },
333 PiiDetector {
335 pii_type: PiiType::Password,
336 regex: regex::Regex::new(r#"(?i)password["']?\s*[:=]\s*["'][^"']+["']"#)
337 .expect("valid password pattern regex"),
338 confidence: 0.85,
339 },
340 PiiDetector {
342 pii_type: PiiType::ApiKey,
343 regex: regex::Regex::new(r#"(?i)api[_-]?key["']?\s*[:=]\s*["'][^"']+["']"#)
344 .expect("valid API key pattern regex"),
345 confidence: 0.85,
346 },
347 PiiDetector {
349 pii_type: PiiType::ServerPath,
350 regex: regex::Regex::new(r"/(?:home|var|usr|etc)/[a-zA-Z0-9_-]+/")
351 .expect("valid server path regex"),
352 confidence: 0.70,
353 },
354 ]
355 })
356}
357
358pub struct RedactionRegistry {
370 confidence_threshold: f32,
371}
372
373impl RedactionRegistry {
374 pub fn with_defaults(confidence_threshold: f32) -> Self {
376 let _ = default_detectors();
378 Self {
379 confidence_threshold,
380 }
381 }
382
383 pub fn default_threshold() -> f32 {
385 0.8
386 }
387
388 pub fn verify(&self, text: &str, confidence_threshold: f32) -> Vec<PiiViolation> {
392 let mut violations = Vec::new();
393 for detector in default_detectors() {
394 if detector.confidence < confidence_threshold {
395 continue;
396 }
397 for m in detector.regex.find_iter(text) {
398 violations.push(PiiViolation {
399 pii_type: detector.pii_type,
400 byte_offset: m.start(),
401 byte_len: m.len(),
402 confidence: detector.confidence,
403 });
404 }
405 }
406 violations
407 }
408
409 pub fn verify_manifest(
415 &self,
416 payload: &serde_json::Value,
417 manifest: &RedactionManifest,
418 ) -> Result<(), Vec<ManifestViolation>> {
419 let mut violations = Vec::new();
420 let payload_text = payload.to_string();
421
422 for span in &manifest.spans {
423 if !payload_text.contains(&span.placeholder) {
425 violations.push(ManifestViolation {
426 reason: format!("placeholder '{}' not found in payload", span.placeholder),
427 token_id: Some(span.token_id),
428 });
429 }
430
431 if requires_vault_insert(span.pii_type) {
433 let has_insert = manifest
434 .vault_inserts
435 .iter()
436 .any(|vi| vi.token_id == span.token_id);
437 if !has_insert {
438 violations.push(ManifestViolation {
439 reason: format!(
440 "span {} ({}) requires vault insert but none provided",
441 span.token_id, span.pii_type
442 ),
443 token_id: Some(span.token_id),
444 });
445 }
446 }
447 }
448
449 for vi in &manifest.vault_inserts {
451 let has_span = manifest.spans.iter().any(|s| s.token_id == vi.token_id);
452 if !has_span {
453 violations.push(ManifestViolation {
454 reason: format!("vault insert {} has no matching span", vi.token_id),
455 token_id: Some(vi.token_id),
456 });
457 }
458 }
459
460 if violations.is_empty() {
461 Ok(())
462 } else {
463 Err(violations)
464 }
465 }
466
467 pub fn scrub_redact_only(&self, text: &str, threshold: f32) -> ScrubbedText {
473 let detectors = default_detectors();
474 let mut matches = Vec::new();
475
476 for detector in detectors {
477 if detector.confidence < threshold {
478 continue;
479 }
480 for m in detector.regex.find_iter(text) {
481 matches.push(DetectedMatch {
482 pii_type: detector.pii_type,
483 start: m.start(),
484 end: m.end(),
485 confidence: detector.confidence,
486 });
487 }
488 }
489
490 matches.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end)));
492
493 let resolved = resolve_overlaps(&matches);
495
496 let mut result = String::with_capacity(text.len());
498 let mut spans = Vec::with_capacity(resolved.len());
499 let mut last_end = 0;
500
501 for dm in &resolved {
502 result.push_str(&text[last_end..dm.start]);
504
505 let token_id = Uuid::new_v4();
507 let short_id = &token_id.to_string()[..8];
508 let placeholder = format!("[REDACTED:{}:{}]", dm.pii_type, short_id);
509
510 result.push_str(&placeholder);
511 spans.push(RedactionSpan {
512 token_id,
513 pii_type: dm.pii_type,
514 placeholder,
515 confidence: dm.confidence,
516 });
517 last_end = dm.end;
518 }
519 result.push_str(&text[last_end..]);
520
521 let manifest = RedactionManifest {
522 spans,
523 vault_inserts: Vec::new(), };
525
526 ScrubbedText::new_verified(result, manifest)
527 }
528
529 pub fn mark_verified(&self, text: String, manifest: RedactionManifest) -> ScrubbedText {
536 ScrubbedText::new_verified(text, manifest)
537 }
538
539 pub fn mark_static_clean(&self, text: String) -> ScrubbedText {
544 ScrubbedText::new_verified(text, RedactionManifest::empty())
545 }
546
547 pub fn scrub_payload_redact_only(
549 &self,
550 value: serde_json::Value,
551 threshold: f32,
552 ) -> ScrubbedPayload {
553 let mut all_spans = Vec::new();
554 let scrubbed_value = self.scrub_json_value(value, threshold, &mut all_spans);
555 let manifest = RedactionManifest {
556 spans: all_spans,
557 vault_inserts: Vec::new(),
558 };
559 ScrubbedPayload::new_verified(scrubbed_value, manifest)
560 }
561
562 fn scrub_json_value(
564 &self,
565 value: serde_json::Value,
566 threshold: f32,
567 spans: &mut Vec<RedactionSpan>,
568 ) -> serde_json::Value {
569 match value {
570 serde_json::Value::String(s) => {
571 let scrubbed = self.scrub_redact_only(&s, threshold);
572 spans.extend(scrubbed.manifest.spans);
573 serde_json::Value::String(scrubbed.text)
574 }
575 serde_json::Value::Array(arr) => {
576 let scrubbed_arr: Vec<serde_json::Value> = arr
577 .into_iter()
578 .map(|v| self.scrub_json_value(v, threshold, spans))
579 .collect();
580 serde_json::Value::Array(scrubbed_arr)
581 }
582 serde_json::Value::Object(map) => {
583 let scrubbed_map: serde_json::Map<String, serde_json::Value> = map
584 .into_iter()
585 .map(|(k, v)| (k, self.scrub_json_value(v, threshold, spans)))
586 .collect();
587 serde_json::Value::Object(scrubbed_map)
588 }
589 other => other,
590 }
591 }
592}
593
594impl fmt::Debug for RedactionRegistry {
595 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
596 f.debug_struct("RedactionRegistry")
597 .field("confidence_threshold", &self.confidence_threshold)
598 .field("detector_count", &default_detectors().len())
599 .finish()
600 }
601}
602
603fn requires_vault_insert(pii_type: PiiType) -> bool {
609 matches!(
610 pii_type,
611 PiiType::Ssn
612 | PiiType::CreditCard
613 | PiiType::Email
614 | PiiType::Phone
615 | PiiType::AwsKey
616 | PiiType::PrivateKey
617 | PiiType::ConnectionString
618 | PiiType::Password
619 | PiiType::ApiKey
620 )
621}
622
623#[derive(Debug, Clone)]
625struct DetectedMatch {
626 pii_type: PiiType,
627 start: usize,
628 end: usize,
629 confidence: f32,
630}
631
632fn resolve_overlaps(matches: &[DetectedMatch]) -> Vec<DetectedMatch> {
635 let mut resolved: Vec<DetectedMatch> = Vec::with_capacity(matches.len());
636 for m in matches {
637 if let Some(last) = resolved.last() {
638 if m.start < last.end {
639 if (m.end - m.start) > (last.end - last.start) {
641 resolved.pop();
642 resolved.push(m.clone());
643 }
644 continue;
646 }
647 }
648 resolved.push(m.clone());
649 }
650 resolved
651}
652
653#[cfg(test)]
658mod tests {
659 use super::*;
660
661 fn registry() -> RedactionRegistry {
662 RedactionRegistry::with_defaults(0.8)
663 }
664
665 #[test]
670 fn pii_type_display() {
671 assert_eq!(PiiType::Ssn.to_string(), "ssn");
672 assert_eq!(PiiType::CreditCard.to_string(), "credit_card");
673 assert_eq!(PiiType::Email.to_string(), "email");
674 assert_eq!(PiiType::ConnectionString.to_string(), "connection_string");
675 }
676
677 #[test]
682 fn empty_manifest() {
683 let m = RedactionManifest::empty();
684 assert!(!m.has_redactions());
685 assert_eq!(m.span_count(), 0);
686 assert!(m.pii_types().is_empty());
687 assert!(m.token_ids().is_empty());
688 }
689
690 #[test]
695 fn scrubbed_text_debug_shows_redacted() {
696 let st = ScrubbedText::new_verified(
697 "hello [REDACTED:ssn:abc12345]".to_string(),
698 RedactionManifest::empty(),
699 );
700 let debug = format!("{:?}", st);
701 assert!(debug.contains("ScrubbedText"));
702 assert!(debug.contains("[REDACTED:ssn:abc12345]"));
703 }
704
705 #[test]
706 fn scrubbed_text_display() {
707 let st = ScrubbedText::new_verified("clean text".to_string(), RedactionManifest::empty());
708 assert_eq!(st.to_string(), "clean text");
709 }
710
711 #[test]
712 fn scrubbed_text_clone() {
713 let st = ScrubbedText::new_verified("text".to_string(), RedactionManifest::empty());
714 let cloned = st.clone();
715 assert_eq!(st.as_redacted_str(), cloned.as_redacted_str());
716 }
717
718 #[test]
723 fn verify_clean_text_returns_empty() {
724 let r = registry();
725 let violations = r.verify("Hello world, nothing sensitive here", 0.8);
726 assert!(violations.is_empty());
727 }
728
729 #[test]
730 fn verify_detects_ssn() {
731 let r = registry();
732 let violations = r.verify("My SSN is 123-45-6789", 0.8);
733 assert_eq!(violations.len(), 1);
734 assert_eq!(violations[0].pii_type, PiiType::Ssn);
735 }
736
737 #[test]
738 fn verify_detects_credit_card() {
739 let r = registry();
740 let violations = r.verify("Card: 4111111111111111", 0.8);
741 assert_eq!(violations.len(), 1);
742 assert_eq!(violations[0].pii_type, PiiType::CreditCard);
743 }
744
745 #[test]
746 fn verify_detects_email() {
747 let r = registry();
748 let violations = r.verify("Contact me at user@example.com", 0.8);
749 assert_eq!(violations.len(), 1);
750 assert_eq!(violations[0].pii_type, PiiType::Email);
751 }
752
753 #[test]
754 fn verify_detects_aws_key() {
755 let r = registry();
756 let violations = r.verify("Key: AKIAIOSFODNN7EXAMPLE", 0.8);
757 assert_eq!(violations.len(), 1);
758 assert_eq!(violations[0].pii_type, PiiType::AwsKey);
759 }
760
761 #[test]
762 fn verify_detects_private_key() {
763 let r = registry();
764 let violations = r.verify("-----BEGIN RSA PRIVATE KEY-----\nMIIEow...", 0.8);
765 assert_eq!(violations.len(), 1);
766 assert_eq!(violations[0].pii_type, PiiType::PrivateKey);
767 }
768
769 #[test]
770 fn verify_detects_connection_string() {
771 let r = registry();
772 let violations = r.verify("postgres://user:pass@host/db", 0.8);
773 assert_eq!(violations.len(), 1);
774 assert_eq!(violations[0].pii_type, PiiType::ConnectionString);
775 }
776
777 #[test]
778 fn verify_detects_password_pattern() {
779 let r = registry();
780 let violations = r.verify(r#"password: "hunter2""#, 0.8);
781 assert_eq!(violations.len(), 1);
782 assert_eq!(violations[0].pii_type, PiiType::Password);
783 }
784
785 #[test]
786 fn verify_detects_api_key_pattern() {
787 let r = registry();
788 let violations = r.verify(r#"api_key: "sk-abc123def456""#, 0.8);
789 assert_eq!(violations.len(), 1);
790 assert_eq!(violations[0].pii_type, PiiType::ApiKey);
791 }
792
793 #[test]
794 fn verify_detects_multiple() {
795 let r = registry();
796 let text = "SSN: 123-45-6789, Card: 4111111111111111";
797 let violations = r.verify(text, 0.8);
798 assert_eq!(violations.len(), 2);
799 let types: Vec<PiiType> = violations.iter().map(|v| v.pii_type).collect();
800 assert!(types.contains(&PiiType::Ssn));
801 assert!(types.contains(&PiiType::CreditCard));
802 }
803
804 #[test]
805 fn verify_respects_threshold() {
806 let r = registry();
807 let violations = r.verify("/home/user/data/", 0.8);
809 assert!(violations.is_empty());
810 let violations = r.verify("/home/user/data/", 0.5);
812 assert_eq!(violations.len(), 1);
813 assert_eq!(violations[0].pii_type, PiiType::ServerPath);
814 }
815
816 #[test]
821 fn verify_manifest_valid() {
822 let r = registry();
823 let token = Uuid::new_v4();
824 let manifest = RedactionManifest {
825 spans: vec![RedactionSpan {
826 token_id: token,
827 pii_type: PiiType::Ssn,
828 placeholder: "[REDACTED:ssn:abc12345]".to_string(),
829 confidence: 0.95,
830 }],
831 vault_inserts: vec![VaultInsert {
832 token_id: token,
833 pii_type: PiiType::Ssn,
834 ciphertext: vec![1, 2, 3],
835 }],
836 };
837 let payload = serde_json::json!({"name": "test", "ssn": "[REDACTED:ssn:abc12345]"});
838 assert!(r.verify_manifest(&payload, &manifest).is_ok());
839 }
840
841 #[test]
842 fn verify_manifest_missing_placeholder() {
843 let r = registry();
844 let token = Uuid::new_v4();
845 let manifest = RedactionManifest {
846 spans: vec![RedactionSpan {
847 token_id: token,
848 pii_type: PiiType::Ssn,
849 placeholder: "[REDACTED:ssn:abc12345]".to_string(),
850 confidence: 0.95,
851 }],
852 vault_inserts: vec![VaultInsert {
853 token_id: token,
854 pii_type: PiiType::Ssn,
855 ciphertext: vec![1, 2, 3],
856 }],
857 };
858 let payload = serde_json::json!({"name": "test", "ssn": "not redacted"});
860 let err = r.verify_manifest(&payload, &manifest).unwrap_err();
861 assert_eq!(err.len(), 1);
862 assert!(err[0].reason.contains("not found in payload"));
863 }
864
865 #[test]
866 fn verify_manifest_missing_vault_insert() {
867 let r = registry();
868 let token = Uuid::new_v4();
869 let manifest = RedactionManifest {
870 spans: vec![RedactionSpan {
871 token_id: token,
872 pii_type: PiiType::Ssn,
873 placeholder: "[REDACTED:ssn:abc12345]".to_string(),
874 confidence: 0.95,
875 }],
876 vault_inserts: vec![], };
878 let payload = serde_json::json!({"ssn": "[REDACTED:ssn:abc12345]"});
879 let err = r.verify_manifest(&payload, &manifest).unwrap_err();
880 assert_eq!(err.len(), 1);
881 assert!(err[0].reason.contains("requires vault insert"));
882 }
883
884 #[test]
885 fn verify_manifest_orphan_vault_insert() {
886 let r = registry();
887 let orphan_token = Uuid::new_v4();
888 let manifest = RedactionManifest {
889 spans: vec![],
890 vault_inserts: vec![VaultInsert {
891 token_id: orphan_token,
892 pii_type: PiiType::Email,
893 ciphertext: vec![4, 5, 6],
894 }],
895 };
896 let payload = serde_json::json!({"clean": "data"});
897 let err = r.verify_manifest(&payload, &manifest).unwrap_err();
898 assert_eq!(err.len(), 1);
899 assert!(err[0].reason.contains("no matching span"));
900 }
901
902 #[test]
903 fn verify_manifest_server_path_no_vault_required() {
904 let r = registry();
905 let token = Uuid::new_v4();
906 let manifest = RedactionManifest {
908 spans: vec![RedactionSpan {
909 token_id: token,
910 pii_type: PiiType::ServerPath,
911 placeholder: "[REDACTED:server_path:abc12345]".to_string(),
912 confidence: 0.70,
913 }],
914 vault_inserts: vec![], };
916 let payload = serde_json::json!({"path": "[REDACTED:server_path:abc12345]"});
917 assert!(r.verify_manifest(&payload, &manifest).is_ok());
918 }
919
920 #[test]
925 fn scrub_redact_only_clean_text() {
926 let r = registry();
927 let scrubbed = r.scrub_redact_only("Hello world", 0.8);
928 assert_eq!(scrubbed.as_redacted_str(), "Hello world");
929 assert!(!scrubbed.manifest().has_redactions());
930 }
931
932 #[test]
933 fn scrub_redact_only_ssn() {
934 let r = registry();
935 let scrubbed = r.scrub_redact_only("SSN: 123-45-6789", 0.8);
936 assert!(!scrubbed.as_redacted_str().contains("123-45-6789"));
937 assert!(scrubbed.as_redacted_str().contains("[REDACTED:ssn:"));
938 assert_eq!(scrubbed.manifest().span_count(), 1);
939 assert_eq!(scrubbed.manifest().spans[0].pii_type, PiiType::Ssn);
940 assert!(scrubbed.manifest().vault_inserts.is_empty());
942 }
943
944 #[test]
945 fn scrub_redact_only_multiple() {
946 let r = registry();
947 let text = "SSN: 123-45-6789, email: user@example.com";
948 let scrubbed = r.scrub_redact_only(text, 0.8);
949 assert!(!scrubbed.as_redacted_str().contains("123-45-6789"));
950 assert!(!scrubbed.as_redacted_str().contains("user@example.com"));
951 assert_eq!(scrubbed.manifest().span_count(), 2);
952 }
953
954 #[test]
955 fn scrub_redact_only_preserves_surrounding_text() {
956 let r = registry();
957 let scrubbed = r.scrub_redact_only("before 123-45-6789 after", 0.8);
958 let text = scrubbed.as_redacted_str();
959 assert!(text.starts_with("before "));
960 assert!(text.ends_with(" after"));
961 }
962
963 #[test]
968 fn overlap_resolution_longest_wins() {
969 let matches = vec![
971 DetectedMatch {
972 pii_type: PiiType::ServerPath,
973 start: 10,
974 end: 30,
975 confidence: 0.70,
976 },
977 DetectedMatch {
978 pii_type: PiiType::ConnectionString,
979 start: 5,
980 end: 50,
981 confidence: 0.95,
982 },
983 ];
984 let mut sorted = matches;
986 sorted.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end)));
987 let resolved = resolve_overlaps(&sorted);
988 assert_eq!(resolved.len(), 1);
990 assert_eq!(resolved[0].pii_type, PiiType::ConnectionString);
991 }
992
993 #[test]
994 fn overlap_resolution_non_overlapping_preserved() {
995 let r = registry();
996 let text = "SSN: 123-45-6789 email: user@example.com";
998 let scrubbed = r.scrub_redact_only(text, 0.8);
999 assert_eq!(scrubbed.manifest().span_count(), 2);
1000 }
1001
1002 #[test]
1007 fn scrub_payload_redact_only() {
1008 let r = registry();
1009 let payload = serde_json::json!({
1010 "name": "John",
1011 "ssn": "123-45-6789",
1012 "notes": ["Contact at user@example.com"]
1013 });
1014 let scrubbed = r.scrub_payload_redact_only(payload, 0.8);
1015 let v = scrubbed.as_value();
1016 let ssn = v["ssn"].as_str().unwrap();
1018 assert!(!ssn.contains("123-45-6789"));
1019 assert!(ssn.contains("[REDACTED:ssn:"));
1020 let note = v["notes"][0].as_str().unwrap();
1022 assert!(!note.contains("user@example.com"));
1023 assert!(note.contains("[REDACTED:email:"));
1024 assert_eq!(v["name"].as_str().unwrap(), "John");
1026 assert_eq!(scrubbed.manifest().span_count(), 2);
1028 }
1029
1030 #[test]
1031 fn scrub_payload_preserves_non_string_values() {
1032 let r = registry();
1033 let payload = serde_json::json!({
1034 "count": 42,
1035 "active": true,
1036 "data": null
1037 });
1038 let scrubbed = r.scrub_payload_redact_only(payload, 0.8);
1039 let v = scrubbed.as_value();
1040 assert_eq!(v["count"], 42);
1041 assert_eq!(v["active"], true);
1042 assert!(v["data"].is_null());
1043 assert!(!scrubbed.manifest().has_redactions());
1044 }
1045
1046 #[test]
1051 fn manifest_serde_roundtrip() {
1052 let token = Uuid::new_v4();
1053 let manifest = RedactionManifest {
1054 spans: vec![RedactionSpan {
1055 token_id: token,
1056 pii_type: PiiType::Email,
1057 placeholder: "[REDACTED:email:abc12345]".to_string(),
1058 confidence: 0.85,
1059 }],
1060 vault_inserts: vec![VaultInsert {
1061 token_id: token,
1062 pii_type: PiiType::Email,
1063 ciphertext: vec![10, 20, 30],
1064 }],
1065 };
1066 let json = serde_json::to_string(&manifest).unwrap();
1067 let deserialized: RedactionManifest = serde_json::from_str(&json).unwrap();
1068 assert_eq!(manifest, deserialized);
1069 }
1070
1071 #[test]
1072 fn pii_type_serde_snake_case() {
1073 let json = serde_json::to_string(&PiiType::CreditCard).unwrap();
1074 assert_eq!(json, "\"credit_card\"");
1075 let deserialized: PiiType = serde_json::from_str(&json).unwrap();
1076 assert_eq!(deserialized, PiiType::CreditCard);
1077 }
1078}