cellstate_core/
web_mcp.rs

1//! WebMCP - Browser-native agent tool discovery primitives.
2//!
3//! Defines the core types for the Web Model Context Protocol (WebMCP),
4//! which enables browser-side AI agents to discover and execute tools
5//! exposed by web applications via the `navigator.modelContext` API.
6//!
7//! ## Architecture
8//!
9//! These are pure data types (no behavior) following the cellstate-core pattern.
10//! The API crate adds behavior (WebMcpConnector), the SDK wraps it in
11//! headless state machines, and the app dogfoods it via the SDK.
12//!
13//! ## Relationship to MCP
14//!
15//! Standard MCP operates server-to-agent (tools exposed by an MCP server).
16//! WebMCP operates browser-to-agent: web pages expose tools via
17//! `navigator.modelContext.registerTool()` that any connected agent
18//! (browser sidebar, desktop app, or programmatic client) can discover
19//! and execute. This module provides the type vocabulary for both sides.
20//!
21//! ## Discovery Flow
22//!
23//! ```text
24//! ┌────────────────────────────────────────────────────┐
25//! │                WebMCP Discovery Lifecycle           │
26//! │                                                    │
27//! │  Page Load → Detect → Schema Parse → Ready         │
28//! │     ↑                                  │           │
29//! │     │          Execute ← Approve ← ─ ─┘           │
30//! │     │             │                                │
31//! │     └─── Context Update ←──┘                       │
32//! └────────────────────────────────────────────────────┘
33//! ```
34
35use serde::{Deserialize, Serialize};
36use std::collections::HashMap;
37
38// ============================================================================
39// TOOL SCHEMA (mirrors navigator.modelContext.registerTool shape)
40// ============================================================================
41
42/// A tool exposed by a web page via WebMCP.
43///
44/// Maps directly to the `navigator.modelContext.registerTool()` call.
45/// The agent discovers these tools upon navigating to a page.
46#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
47#[serde(rename_all = "camelCase")]
48#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
49pub struct WebMcpTool {
50    /// Unique tool name within the page context.
51    pub name: String,
52    /// Human-readable description of what the tool does.
53    pub description: String,
54    /// JSON Schema describing the input parameters.
55    pub input_schema: WebMcpInputSchema,
56    /// Origin URL where this tool was discovered.
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub origin: Option<String>,
59    /// Security annotations for this tool.
60    #[serde(default)]
61    pub annotations: WebMcpToolAnnotations,
62}
63
64/// JSON Schema for tool input parameters.
65///
66/// Simplified representation of JSON Schema sufficient for WebMCP tool
67/// input validation. Maps to the `inputSchema` field in registerTool.
68#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
69#[serde(rename_all = "camelCase")]
70#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
71pub struct WebMcpInputSchema {
72    /// Always "object" for tool inputs.
73    #[serde(rename = "type")]
74    pub schema_type: String,
75    /// Property definitions keyed by parameter name.
76    #[serde(default)]
77    pub properties: HashMap<String, WebMcpPropertySchema>,
78    /// Required parameter names.
79    #[serde(default)]
80    pub required: Vec<String>,
81}
82
83/// Schema for a single property within a tool's input.
84#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
85#[serde(rename_all = "camelCase")]
86#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
87pub struct WebMcpPropertySchema {
88    /// JSON Schema type (string, number, boolean, object, array).
89    #[serde(rename = "type")]
90    pub prop_type: String,
91    /// Human-readable description of this parameter.
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub description: Option<String>,
94    /// Default value (if any).
95    #[serde(skip_serializing_if = "Option::is_none")]
96    pub default: Option<serde_json::Value>,
97    /// Allowed values (enum constraint).
98    #[serde(rename = "enum", skip_serializing_if = "Option::is_none")]
99    pub enum_values: Option<Vec<serde_json::Value>>,
100}
101
102/// Security and behavioral annotations for a WebMCP tool.
103///
104/// Agents use these to decide whether to auto-execute or prompt for
105/// user approval (aligns with CELLSTATE's intent/autonomy system).
106#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
107#[serde(rename_all = "camelCase")]
108#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
109pub struct WebMcpToolAnnotations {
110    /// Whether this tool is read-only (safe to auto-execute).
111    #[serde(default)]
112    pub read_only: bool,
113    /// Whether this tool mutates external state.
114    #[serde(default)]
115    pub destructive: bool,
116    /// Required OAuth scopes for execution.
117    #[serde(default)]
118    pub required_scopes: Vec<String>,
119    /// Maximum cost in tokens this tool typically consumes.
120    #[serde(skip_serializing_if = "Option::is_none")]
121    pub estimated_cost: Option<u64>,
122}
123
124// ============================================================================
125// CONTEXT PROVIDER (mirrors navigator.modelContext.provideContext)
126// ============================================================================
127
128/// Page context metadata exposed via WebMCP.
129///
130/// Maps to `navigator.modelContext.provideContext()`. Provides the agent
131/// with structured information about the current page state without
132/// requiring visual inference (screenshots).
133#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
134#[serde(rename_all = "camelCase")]
135#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
136pub struct WebMcpContext {
137    /// URL of the page providing context.
138    pub url: String,
139    /// Human-readable page title.
140    #[serde(skip_serializing_if = "Option::is_none")]
141    pub title: Option<String>,
142    /// Structured page state (application-specific).
143    #[serde(default)]
144    pub state: HashMap<String, serde_json::Value>,
145    /// List of available user actions on the page.
146    #[serde(default)]
147    pub available_actions: Vec<String>,
148    /// Current user identity context (if authenticated).
149    #[serde(skip_serializing_if = "Option::is_none")]
150    pub user_context: Option<WebMcpUserContext>,
151}
152
153/// User identity context within a WebMCP session.
154#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
155#[serde(rename_all = "camelCase")]
156#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
157pub struct WebMcpUserContext {
158    /// Opaque user identifier.
159    #[serde(skip_serializing_if = "Option::is_none")]
160    pub user_id: Option<String>,
161    /// Display name.
162    #[serde(skip_serializing_if = "Option::is_none")]
163    pub display_name: Option<String>,
164    /// Granted OAuth scopes.
165    #[serde(default)]
166    pub scopes: Vec<String>,
167}
168
169// ============================================================================
170// DISCOVERY STATE (lifecycle of tool discovery on a page)
171// ============================================================================
172
173/// State of the WebMCP discovery lifecycle for a single page.
174#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
175#[serde(rename_all = "snake_case")]
176#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
177pub enum WebMcpDiscoveryPhase {
178    /// Agent has navigated to a URL but hasn't queried for tools yet.
179    Detecting,
180    /// Tools have been detected, schemas are being parsed.
181    Parsing,
182    /// Discovery complete, tools are ready for execution.
183    Ready,
184    /// Discovery failed (page doesn't support WebMCP or schema error).
185    Failed,
186    /// Tools were available but have been unregistered by the page.
187    Revoked,
188}
189
190/// Complete discovery state for a single page.
191#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
192#[serde(rename_all = "camelCase")]
193#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
194pub struct WebMcpDiscoveryState {
195    /// URL being discovered.
196    pub url: String,
197    /// Current phase of discovery.
198    pub phase: WebMcpDiscoveryPhase,
199    /// Discovered tools (populated when phase == Ready).
200    #[serde(default)]
201    pub tools: Vec<WebMcpTool>,
202    /// Page context (if provided).
203    #[serde(skip_serializing_if = "Option::is_none")]
204    pub context: Option<WebMcpContext>,
205    /// Error message (if phase == Failed).
206    #[serde(skip_serializing_if = "Option::is_none")]
207    pub error: Option<String>,
208}
209
210// ============================================================================
211// EXECUTION (tool call and result)
212// ============================================================================
213
214/// A request to execute a WebMCP tool.
215#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
216#[serde(rename_all = "camelCase")]
217#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
218pub struct WebMcpToolCall {
219    /// Name of the tool to execute.
220    pub tool_name: String,
221    /// Input arguments (must conform to the tool's input_schema).
222    #[serde(default)]
223    pub arguments: serde_json::Value,
224    /// Origin URL where the tool was discovered.
225    #[serde(skip_serializing_if = "Option::is_none")]
226    pub origin: Option<String>,
227    /// Request ID for correlation.
228    #[serde(skip_serializing_if = "Option::is_none")]
229    pub request_id: Option<String>,
230}
231
232/// Result of a WebMCP tool execution.
233#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
234#[serde(rename_all = "camelCase")]
235#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
236pub struct WebMcpToolResult {
237    /// Whether execution succeeded.
238    pub success: bool,
239    /// Return value from the tool's executor function.
240    #[serde(skip_serializing_if = "Option::is_none")]
241    pub result: Option<serde_json::Value>,
242    /// Error message (if success == false).
243    #[serde(skip_serializing_if = "Option::is_none")]
244    pub error: Option<String>,
245    /// Request ID echoed back for correlation.
246    #[serde(skip_serializing_if = "Option::is_none")]
247    pub request_id: Option<String>,
248}
249
250// ============================================================================
251// SECURITY POLICY (OAuth 2.1 + multi-tenant)
252// ============================================================================
253
254/// Security policy governing WebMCP tool access.
255///
256/// Aligns with CELLSTATE's intent system: the security policy determines
257/// which tools an agent can auto-execute vs. which require approval,
258/// mapping to the AutonomyLevel and DelegationBoundary primitives.
259#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
260#[serde(rename_all = "camelCase")]
261#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
262pub struct WebMcpSecurityPolicy {
263    /// Allowed origin patterns (e.g., "https://*.example.com").
264    #[serde(default)]
265    pub allowed_origins: Vec<String>,
266    /// OAuth 2.1 scopes required for tool execution.
267    #[serde(default)]
268    pub required_scopes: Vec<String>,
269    /// Maximum token budget per tool execution.
270    #[serde(skip_serializing_if = "Option::is_none")]
271    pub max_tokens_per_call: Option<u64>,
272    /// Whether to enforce audit logging for all tool calls.
273    #[serde(default)]
274    pub audit_logging: bool,
275    /// Sandbox mode for tool execution.
276    #[serde(default)]
277    pub sandbox_mode: WebMcpSandboxMode,
278}
279
280/// Sandbox execution mode for WebMCP tools.
281#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
282#[serde(rename_all = "snake_case")]
283#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
284pub enum WebMcpSandboxMode {
285    /// Tools execute in the page's context (default for read-only tools).
286    #[default]
287    PageContext,
288    /// Tools execute in an isolated iframe sandbox.
289    Isolated,
290    /// Tools execute server-side via CELLSTATE's sandbox infrastructure.
291    ServerSide,
292}
293
294// ============================================================================
295// MANIFEST (declarative HTML form fallback)
296// ============================================================================
297
298/// WebMCP manifest for declarative (non-JS) tool exposure.
299///
300/// Used as an HTML form fallback when JavaScript execution is restricted.
301/// Pages can include `<meta name="webmcp-manifest" content="URL">` to
302/// declare their tools via a static JSON manifest.
303#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
304#[serde(rename_all = "camelCase")]
305#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
306pub struct WebMcpManifest {
307    /// Schema version.
308    pub version: String,
309    /// Application name.
310    pub name: String,
311    /// Application description.
312    #[serde(skip_serializing_if = "Option::is_none")]
313    pub description: Option<String>,
314    /// Tools exposed by this application.
315    #[serde(default)]
316    pub tools: Vec<WebMcpTool>,
317    /// Security policy for tool access.
318    #[serde(skip_serializing_if = "Option::is_none")]
319    pub security: Option<WebMcpSecurityPolicy>,
320}
321
322// ============================================================================
323// CAPABILITY DECLARATION (bidirectional: what the agent CAN do)
324// ============================================================================
325
326/// Capabilities an agent advertises to a WebMCP-enabled page.
327///
328/// This is the bidirectional aspect: not just what tools the page
329/// exposes, but what the connecting agent is capable of providing
330/// back to the page (e.g., memory recall, context assembly, search).
331#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
332#[serde(rename_all = "camelCase")]
333#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
334pub struct WebMcpAgentCapabilities {
335    /// Agent identifier.
336    pub agent_id: String,
337    /// CELLSTATE tools the agent can expose back to the page.
338    #[serde(default)]
339    pub provided_tools: Vec<String>,
340    /// Memory operations the agent supports.
341    #[serde(default)]
342    pub memory_capabilities: Vec<WebMcpMemoryCapability>,
343    /// Model routing capabilities.
344    #[serde(default)]
345    pub model_routing: Vec<String>,
346}
347
348/// Memory capabilities an agent can provide to a WebMCP page.
349#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
350#[serde(rename_all = "snake_case")]
351#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
352pub enum WebMcpMemoryCapability {
353    /// Agent can recall past interactions.
354    Recall,
355    /// Agent can store new artifacts.
356    Store,
357    /// Agent can search across trajectories.
358    Search,
359    /// Agent can assemble context packages.
360    ContextAssembly,
361    /// Agent can provide summarization.
362    Summarize,
363    /// Agent can ingest multimodal content.
364    Ingest,
365}
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370    use std::collections::HashMap;
371
372    #[test]
373    fn web_mcp_tool_serde_roundtrip() {
374        let tool = WebMcpTool {
375            name: "search".into(),
376            description: "Search the web".into(),
377            input_schema: WebMcpInputSchema {
378                schema_type: "object".into(),
379                properties: HashMap::from([(
380                    "query".into(),
381                    WebMcpPropertySchema {
382                        prop_type: "string".into(),
383                        description: Some("Search query".into()),
384                        default: None,
385                        enum_values: None,
386                    },
387                )]),
388                required: vec!["query".into()],
389            },
390            origin: None,
391            annotations: WebMcpToolAnnotations::default(),
392        };
393        let json = serde_json::to_string(&tool).unwrap();
394        let d: WebMcpTool = serde_json::from_str(&json).unwrap();
395        assert_eq!(tool.name, d.name);
396        assert_eq!(d.input_schema.required, vec!["query"]);
397    }
398
399    #[test]
400    fn web_mcp_tool_annotations_default_is_safe() {
401        let ann = WebMcpToolAnnotations::default();
402        assert!(!ann.read_only);
403        assert!(!ann.destructive);
404        assert!(ann.required_scopes.is_empty());
405        assert!(ann.estimated_cost.is_none());
406    }
407
408    #[test]
409    fn web_mcp_discovery_phase_serde_roundtrip() {
410        let phases = vec![
411            WebMcpDiscoveryPhase::Detecting,
412            WebMcpDiscoveryPhase::Parsing,
413            WebMcpDiscoveryPhase::Ready,
414            WebMcpDiscoveryPhase::Failed,
415            WebMcpDiscoveryPhase::Revoked,
416        ];
417        for phase in phases {
418            let json = serde_json::to_string(&phase).unwrap();
419            let d: WebMcpDiscoveryPhase = serde_json::from_str(&json).unwrap();
420            assert_eq!(phase, d);
421        }
422    }
423
424    #[test]
425    fn web_mcp_sandbox_mode_default_is_page_context() {
426        assert_eq!(WebMcpSandboxMode::default(), WebMcpSandboxMode::PageContext);
427    }
428
429    #[test]
430    fn web_mcp_sandbox_mode_serde_roundtrip() {
431        let modes = vec![
432            WebMcpSandboxMode::PageContext,
433            WebMcpSandboxMode::Isolated,
434            WebMcpSandboxMode::ServerSide,
435        ];
436        for mode in modes {
437            let json = serde_json::to_string(&mode).unwrap();
438            let d: WebMcpSandboxMode = serde_json::from_str(&json).unwrap();
439            assert_eq!(mode, d);
440        }
441    }
442
443    #[test]
444    fn web_mcp_memory_capability_serde_roundtrip() {
445        let caps = vec![
446            WebMcpMemoryCapability::Recall,
447            WebMcpMemoryCapability::Store,
448            WebMcpMemoryCapability::Search,
449            WebMcpMemoryCapability::ContextAssembly,
450            WebMcpMemoryCapability::Summarize,
451            WebMcpMemoryCapability::Ingest,
452        ];
453        for cap in caps {
454            let json = serde_json::to_string(&cap).unwrap();
455            let d: WebMcpMemoryCapability = serde_json::from_str(&json).unwrap();
456            assert_eq!(cap, d);
457        }
458    }
459
460    #[test]
461    fn web_mcp_security_policy_defaults() {
462        let policy: WebMcpSecurityPolicy = serde_json::from_str("{}").unwrap();
463        assert!(policy.allowed_origins.is_empty());
464        assert!(!policy.audit_logging);
465        assert_eq!(policy.sandbox_mode, WebMcpSandboxMode::PageContext);
466    }
467
468    #[test]
469    fn web_mcp_manifest_serde_roundtrip() {
470        let manifest = WebMcpManifest {
471            version: "1.0.0".into(),
472            name: "test-server".into(),
473            description: Some("A test MCP server".into()),
474            tools: vec![],
475            security: Some(WebMcpSecurityPolicy {
476                allowed_origins: vec!["https://example.com".into()],
477                required_scopes: vec![],
478                max_tokens_per_call: Some(1000),
479                audit_logging: true,
480                sandbox_mode: WebMcpSandboxMode::Isolated,
481            }),
482        };
483        let json = serde_json::to_string(&manifest).unwrap();
484        let d: WebMcpManifest = serde_json::from_str(&json).unwrap();
485        assert_eq!(manifest.name, d.name);
486        assert_eq!(manifest.version, d.version);
487        assert!(d.security.is_some());
488    }
489
490    #[test]
491    fn web_mcp_tool_call_serde_roundtrip() {
492        let call = WebMcpToolCall {
493            tool_name: "search".into(),
494            arguments: serde_json::json!({"query": "test"}),
495            origin: Some("https://example.com".into()),
496            request_id: Some("req-123".into()),
497        };
498        let json = serde_json::to_string(&call).unwrap();
499        let d: WebMcpToolCall = serde_json::from_str(&json).unwrap();
500        assert_eq!(call.tool_name, d.tool_name);
501    }
502
503    #[test]
504    fn web_mcp_tool_result_success() {
505        let result = WebMcpToolResult {
506            success: true,
507            result: Some(serde_json::json!({"data": [1, 2, 3]})),
508            error: None,
509            request_id: Some("req-123".into()),
510        };
511        let json = serde_json::to_string(&result).unwrap();
512        let d: WebMcpToolResult = serde_json::from_str(&json).unwrap();
513        assert!(d.success);
514        assert!(d.error.is_none());
515    }
516
517    #[test]
518    fn web_mcp_tool_result_failure() {
519        let result = WebMcpToolResult {
520            success: false,
521            result: None,
522            error: Some("tool not found".into()),
523            request_id: None,
524        };
525        assert!(!result.success);
526        assert!(result.error.is_some());
527    }
528
529    #[test]
530    fn web_mcp_agent_capabilities_serde_roundtrip() {
531        let caps = WebMcpAgentCapabilities {
532            agent_id: "agent-1".into(),
533            provided_tools: vec!["recall".into(), "search".into()],
534            memory_capabilities: vec![
535                WebMcpMemoryCapability::Recall,
536                WebMcpMemoryCapability::Store,
537            ],
538            model_routing: vec!["openai".into()],
539        };
540        let json = serde_json::to_string(&caps).unwrap();
541        let d: WebMcpAgentCapabilities = serde_json::from_str(&json).unwrap();
542        assert_eq!(caps.agent_id, d.agent_id);
543        assert_eq!(d.memory_capabilities.len(), 2);
544    }
545
546    #[test]
547    fn web_mcp_context_serde_roundtrip() {
548        let ctx = WebMcpContext {
549            url: "https://example.com/dashboard".into(),
550            title: Some("Dashboard".into()),
551            state: HashMap::from([("page".into(), serde_json::json!("home"))]),
552            available_actions: vec!["click_button".into()],
553            user_context: Some(WebMcpUserContext {
554                user_id: Some("user-1".into()),
555                display_name: Some("Test User".into()),
556                scopes: vec!["read".into()],
557            }),
558        };
559        let json = serde_json::to_string(&ctx).unwrap();
560        let d: WebMcpContext = serde_json::from_str(&json).unwrap();
561        assert_eq!(ctx.url, d.url);
562        assert!(d.user_context.is_some());
563    }
564}