Testing Tutorial¶
This tutorial covers how to write and run tests for ClawSeed.
Test Infrastructure¶
ClawSeed's test infrastructure lives in crates/clawseed-agent/tests/common/:
tests/
├── common/
│ ├── mod.rs # Shared module declarations
│ ├── helpers.rs # Test builders
│ ├── mock_provider.rs # Mock Provider
│ └── mock_tools.rs # Mock Tool
├── agent_integration.rs # Integration tests
├── agent_robustness.rs # Robustness tests
└── agent_system.rs # System-level tests
Mock Provider¶
MockProvider — Scripted Responses¶
Returns predefined responses in FIFO order:
use clawseed_agent::tests::common::mock_provider::{MockProvider, text_response, tool_response};
use clawseed_api::{ChatResponse, ToolCall};
// Create a Mock Provider that returns a sequence of responses
let provider = Box::new(MockProvider::new(vec![
// First call: return a tool call
tool_response(vec![ToolCall {
id: "tc1".into(),
name: "echo".into(),
arguments: r#"{"message": "hello"}"#.into(),
}]),
// Second call: return text-only
text_response("Tool executed successfully"),
]));
RecordingProvider — Request Recording¶
In addition to returning scripted responses, it records all received requests for assertion:
use clawseed_agent::tests::common::mock_provider::RecordingProvider;
let recorded = Arc::new(Mutex::new(Vec::new()));
let provider = Box::new(RecordingProvider::new(
vec![text_response("Hello")],
recorded.clone(),
));
// ... run test ...
// Assert messages sent to the Provider
let requests = recorded.lock().unwrap();
assert_eq!(requests.len(), 1);
Helper Functions¶
// Create a text-only response
pub fn text_response(text: &str) -> ChatResponse { ... }
// Create a tool call response
pub fn tool_response(calls: Vec<ToolCall>) -> ChatResponse { ... }
Mock Tools¶
EchoTool — Echo Tool¶
Returns arguments as-is:
pub struct EchoTool;
impl Tool for EchoTool {
fn name(&self) -> &str { "echo" }
fn description(&self) -> &str { "Echo back the input" }
fn parameters_schema(&self) -> Value { json!({ "type": "object", "properties": { "message": { "type": "string" } } }) }
async fn execute(&self, args: Value, _ctx: &dyn ToolContext) -> anyhow::Result<ToolResult> {
let msg = args.get("message").and_then(|v| v.as_str()).unwrap_or("");
Ok(ToolResult { success: true, output: msg.to_string(), error: None })
}
}
CountingTool — Counting Tool¶
Tracks how many times the tool was called:
pub struct CountingTool {
count: Arc<Mutex<usize>>,
}
impl CountingTool {
pub fn new(count: Arc<Mutex<usize>>) -> Self { Self { count } }
pub fn count(&self) -> usize { *self.count.lock().unwrap() }
}
RecordingTool — Argument Recording¶
Captures tool call arguments for assertion:
pub struct RecordingTool {
name: String,
calls: Arc<Mutex<Vec<Value>>>,
}
impl RecordingTool {
pub fn new(name: &str, calls: Arc<Mutex<Vec<Value>>>) -> Self { ... }
pub fn calls(&self) -> Vec<Value> { self.calls.lock().unwrap().clone() }
}
FailingTool — Simulated Failure¶
Always returns an error, for testing error-handling paths:
pub struct FailingTool;
impl Tool for FailingTool {
fn name(&self) -> &str { "failing" }
async fn execute(&self, _args: Value, _ctx: &dyn ToolContext) -> anyhow::Result<ToolResult> {
Ok(ToolResult { success: false, output: String::new(), error: Some("Intentional failure".into()) })
}
}
Agent Builders¶
Basic Builder¶
use clawseed_agent::tests::common::helpers::build_agent;
let agent = build_agent(provider, vec![Box::new(EchoTool)]);
build_agent internally creates NoneMemory and NoopObserver, using NativeToolDispatcher.
With SQLite Memory¶
use clawseed_agent::tests::common::helpers::build_agent_with_sqlite_memory;
let temp_dir = tempfile::tempdir().unwrap();
let agent = build_agent_with_sqlite_memory(provider, vec![Box::new(EchoTool)], temp_dir.path());
Uses a real SQLite backend, suitable for testing memory-related integrations.
Manual Builder¶
For finer control, use Agent::builder() directly:
let agent = Agent::builder()
.provider(provider)
.tools(vec![Box::new(EchoTool), Box::new(CountingTool::new(count))])
.memory(Arc::new(NoneMemory))
.observer(Arc::new(crate::observer::NoopObserver))
.tool_dispatcher(Box::new(NativeToolDispatcher))
.workspace_dir(std::env::temp_dir())
.build()
.unwrap();
Writing Integration Tests¶
Single Tool Call Cycle¶
#[tokio::test]
async fn test_single_tool_call() {
let provider = Box::new(MockProvider::new(vec![
tool_response(vec![ToolCall {
id: "tc1".into(),
name: "echo".into(),
arguments: r#"{"message": "hello"}"#.into(),
}]),
text_response("Echo: hello"),
]));
let agent = build_agent(provider, vec![Box::new(EchoTool)]);
let response = agent.turn("run echo with hello").await.unwrap();
assert!(response.contains("Echo: hello"));
}
Multiple Tool Calls¶
#[tokio::test]
async fn test_multiple_tool_calls() {
let count = Arc::new(Mutex::new(0));
let provider = Box::new(MockProvider::new(vec![
tool_response(vec![
ToolCall { id: "tc1".into(), name: "counter".into(), arguments: "{}".into() },
ToolCall { id: "tc2".into(), name: "counter".into(), arguments: "{}".into() },
]),
text_response("Done"),
]));
let agent = build_agent(provider, vec![Box::new(CountingTool::new(count.clone()))]);
agent.turn("run counter twice").await.unwrap();
assert_eq!(*count.lock().unwrap(), 2);
}
Tool Error Handling¶
#[tokio::test]
async fn test_tool_failure_handling() {
let provider = Box::new(MockProvider::new(vec![
tool_response(vec![ToolCall {
id: "tc1".into(),
name: "failing".into(),
arguments: "{}".into(),
}]),
text_response("Tool failed, but I recovered"),
]));
let agent = build_agent(provider, vec![Box::new(FailingTool)]);
let response = agent.turn("run failing tool").await.unwrap();
assert!(response.contains("recovered"));
}
Hook Testing¶
#[tokio::test]
async fn test_hook_cancels_tool() {
let provider = Box::new(MockProvider::new(vec![
tool_response(vec![ToolCall {
id: "tc1".into(),
name: "echo".into(),
arguments: r#"{"message": "hello"}"#.into(),
}]),
text_response("Tool was cancelled"),
]));
let mut hook_runner = HookRunner::new();
hook_runner.register(Box::new(CancelEchoHook)); // Hook that cancels the echo tool
let mut agent = build_agent(provider, vec![Box::new(EchoTool)]);
agent.set_hook_runner(hook_runner);
let response = agent.turn("run echo").await.unwrap();
assert!(response.contains("cancelled"));
}
Memory Integration Test¶
#[tokio::test]
async fn test_memory_store_and_recall() {
let temp_dir = tempfile::tempdir().unwrap();
let provider = Box::new(MockProvider::new(vec![
tool_response(vec![ToolCall {
id: "tc1".into(),
name: "memory_store".into(),
arguments: r#"{"content": "test memory", "category": "context"}"#.into(),
}]),
text_response("Stored"),
]));
let agent = build_agent_with_sqlite_memory(provider, memory_tools(), temp_dir.path());
let response = agent.turn("store a memory").await.unwrap();
assert!(response.contains("Stored"));
}
Writing Unit Tests¶
Testing Tool Logic¶
#[tokio::test]
async fn test_calculator_divide() {
let tool = CalculatorTool::new();
let ctx = MockToolContext::new(); // Need to implement ToolContext
let result = tool.execute(
json!({"function": "divide", "a": 10, "b": 2}),
&ctx,
).await.unwrap();
assert!(result.success);
assert_eq!(result.output, "5");
}
#[tokio::test]
async fn test_calculator_divide_by_zero() {
let tool = CalculatorTool::new();
let ctx = MockToolContext::new();
let result = tool.execute(
json!({"function": "divide", "a": 10, "b": 0}),
&ctx,
).await.unwrap();
assert!(!result.success);
assert!(result.error.unwrap().contains("zero"));
}
Testing Hooks¶
#[test]
fn test_approval_hook_cancels_dangerous_tool() {
let hook = ApprovalHook::new();
let mut call = ToolCall {
id: "1".into(),
name: "shell".into(),
arguments: json!({"command": "rm -rf /"}),
};
let result = hook.before_tool_call(&mut call);
assert!(matches!(result, HookResult::Cancel(_)));
}
#[test]
fn test_approval_hook_allows_safe_tool() {
let hook = ApprovalHook::new();
let mut call = ToolCall {
id: "1".into(),
name: "file_read".into(),
arguments: json!({"path": "test.txt"}),
};
let result = hook.before_tool_call(&mut call);
assert!(matches!(result, HookResult::Continue));
}
Running Tests¶
# Run all tests
cargo test
# Run tests for a specific crate
cargo test -p clawseed-agent
# Run a specific test
cargo test -p clawseed-agent test_single_tool_call
# Run integration tests (requires --test flag)
cargo test -p clawseed-agent --test agent_integration
# Show output (no truncation)
cargo test -- --nocapture
# Run ignored tests
cargo test -- --ignored
Test Categories¶
| Type | Location | Characteristics |
|---|---|---|
| Unit tests | #[cfg(test)] mod tests within src/ files |
Test individual functions/structs |
| Integration tests | tests/agent_integration.rs |
Test full agent cycles |
| Robustness tests | tests/agent_robustness.rs |
Test error handling and edge cases |
| System tests | tests/agent_system.rs |
Test with real backends (SQLite, etc.) |
CI Configuration¶
The project's CI profile is configured for faster compilation:
Recommended for CI: