mirror of
https://github.com/anthropics/claude-code-sdk-python.git
synced 2025-12-23 09:19:52 +00:00
Add Docker-based test infrastructure for e2e tests (#424)
## Summary - Add `Dockerfile.test`: Python 3.12 image with Claude Code CLI installed - Add `scripts/test-docker.sh`: Local script to run tests in Docker - Add `test-e2e-docker` job to CI workflow that runs the full e2e suite in a container - Add `.dockerignore` to speed up Docker builds ## Context This helps catch Docker-specific issues like #406 where filesystem-based agents loaded via `setting_sources=["project"]` may silently fail in Docker environments. ## Local Usage ```bash # Run unit tests in Docker (no API key needed) ./scripts/test-docker.sh unit # Run e2e tests in Docker ANTHROPIC_API_KEY=sk-... ./scripts/test-docker.sh e2e # Run all tests ANTHROPIC_API_KEY=sk-... ./scripts/test-docker.sh all ``` ## Test plan - [x] Unit tests pass in Docker locally (129 passed) - [ ] CI job runs successfully 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
parent
904c2ec33c
commit
a0ce44a3fa
7 changed files with 381 additions and 19 deletions
9
.claude/agents/test-agent.md
Normal file
9
.claude/agents/test-agent.md
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
---
|
||||
name: test-agent
|
||||
description: A simple test agent for SDK testing
|
||||
tools: Read
|
||||
---
|
||||
|
||||
# Test Agent
|
||||
|
||||
You are a simple test agent. When asked a question, provide a brief, helpful answer.
|
||||
49
.dockerignore
Normal file
49
.dockerignore
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
# Python
|
||||
__pycache__
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Virtual environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Testing/Coverage
|
||||
.coverage
|
||||
.pytest_cache/
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
|
||||
# Misc
|
||||
*.log
|
||||
.DS_Store
|
||||
18
.github/workflows/test.yml
vendored
18
.github/workflows/test.yml
vendored
|
|
@ -81,6 +81,24 @@ jobs:
|
|||
run: |
|
||||
python -m pytest e2e-tests/ -v -m e2e
|
||||
|
||||
test-e2e-docker:
|
||||
runs-on: ubuntu-latest
|
||||
needs: test # Run after unit tests pass
|
||||
# Run e2e tests in Docker to catch container-specific issues like #406
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Build Docker test image
|
||||
run: docker build -f Dockerfile.test -t claude-sdk-test .
|
||||
|
||||
- name: Run e2e tests in Docker
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
run: |
|
||||
docker run --rm -e ANTHROPIC_API_KEY \
|
||||
claude-sdk-test python -m pytest e2e-tests/ -v -m e2e
|
||||
|
||||
test-examples:
|
||||
runs-on: ubuntu-latest
|
||||
needs: test-e2e # Run after e2e tests
|
||||
|
|
|
|||
29
Dockerfile.test
Normal file
29
Dockerfile.test
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
# Dockerfile for running SDK tests in a containerized environment
|
||||
# This helps catch Docker-specific issues like #406
|
||||
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Install dependencies for Claude CLI and git (needed for some tests)
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
git \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Claude Code CLI
|
||||
RUN curl -fsSL https://claude.ai/install.sh | bash
|
||||
ENV PATH="/root/.local/bin:$PATH"
|
||||
|
||||
# Set up working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the SDK source
|
||||
COPY . .
|
||||
|
||||
# Install SDK with dev dependencies
|
||||
RUN pip install -e ".[dev]"
|
||||
|
||||
# Verify CLI installation
|
||||
RUN claude -v
|
||||
|
||||
# Default: run unit tests
|
||||
CMD ["python", "-m", "pytest", "tests/", "-v"]
|
||||
|
|
@ -38,15 +38,88 @@ async def test_agent_definition():
|
|||
async for message in client.receive_response():
|
||||
if isinstance(message, SystemMessage) and message.subtype == "init":
|
||||
agents = message.data.get("agents", [])
|
||||
assert isinstance(
|
||||
agents, list
|
||||
), f"agents should be a list of strings, got: {type(agents)}"
|
||||
assert (
|
||||
"test-agent" in agents
|
||||
), f"test-agent should be available, got: {agents}"
|
||||
assert isinstance(agents, list), (
|
||||
f"agents should be a list of strings, got: {type(agents)}"
|
||||
)
|
||||
assert "test-agent" in agents, (
|
||||
f"test-agent should be available, got: {agents}"
|
||||
)
|
||||
break
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@pytest.mark.asyncio
|
||||
async def test_filesystem_agent_loading():
|
||||
"""Test that filesystem-based agents load via setting_sources and produce full response.
|
||||
|
||||
This is the core test for issue #406. It verifies that when using
|
||||
setting_sources=["project"] with a .claude/agents/ directory containing
|
||||
agent definitions, the SDK:
|
||||
1. Loads the agents (they appear in init message)
|
||||
2. Produces a full response with AssistantMessage
|
||||
3. Completes with a ResultMessage
|
||||
|
||||
The bug in #406 causes the iterator to complete after only the
|
||||
init SystemMessage, never yielding AssistantMessage or ResultMessage.
|
||||
"""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
# Create a temporary project with a filesystem agent
|
||||
project_dir = Path(tmpdir)
|
||||
agents_dir = project_dir / ".claude" / "agents"
|
||||
agents_dir.mkdir(parents=True)
|
||||
|
||||
# Create a test agent file
|
||||
agent_file = agents_dir / "fs-test-agent.md"
|
||||
agent_file.write_text(
|
||||
"""---
|
||||
name: fs-test-agent
|
||||
description: A filesystem test agent for SDK testing
|
||||
tools: Read
|
||||
---
|
||||
|
||||
# Filesystem Test Agent
|
||||
|
||||
You are a simple test agent. When asked a question, provide a brief, helpful answer.
|
||||
"""
|
||||
)
|
||||
|
||||
options = ClaudeAgentOptions(
|
||||
setting_sources=["project"],
|
||||
cwd=project_dir,
|
||||
max_turns=1,
|
||||
)
|
||||
|
||||
messages = []
|
||||
async with ClaudeSDKClient(options=options) as client:
|
||||
await client.query("Say hello in exactly 3 words")
|
||||
async for msg in client.receive_response():
|
||||
messages.append(msg)
|
||||
|
||||
# Must have at least init, assistant, result
|
||||
message_types = [type(m).__name__ for m in messages]
|
||||
|
||||
assert "SystemMessage" in message_types, "Missing SystemMessage (init)"
|
||||
assert "AssistantMessage" in message_types, (
|
||||
f"Missing AssistantMessage - got only: {message_types}. "
|
||||
"This may indicate issue #406 (silent failure with filesystem agents)."
|
||||
)
|
||||
assert "ResultMessage" in message_types, "Missing ResultMessage"
|
||||
|
||||
# Find the init message and check for the filesystem agent
|
||||
for msg in messages:
|
||||
if isinstance(msg, SystemMessage) and msg.subtype == "init":
|
||||
agents = msg.data.get("agents", [])
|
||||
# Agents are returned as strings (just names)
|
||||
assert "fs-test-agent" in agents, (
|
||||
f"fs-test-agent not loaded from filesystem. Found: {agents}"
|
||||
)
|
||||
break
|
||||
|
||||
# On Windows, wait for file handles to be released before cleanup
|
||||
if sys.platform == "win32":
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@pytest.mark.asyncio
|
||||
async def test_setting_sources_default():
|
||||
|
|
@ -74,12 +147,12 @@ async def test_setting_sources_default():
|
|||
async for message in client.receive_response():
|
||||
if isinstance(message, SystemMessage) and message.subtype == "init":
|
||||
output_style = message.data.get("output_style")
|
||||
assert (
|
||||
output_style != "local-test-style"
|
||||
), f"outputStyle should NOT be from local settings (default is no settings), got: {output_style}"
|
||||
assert (
|
||||
output_style == "default"
|
||||
), f"outputStyle should be 'default', got: {output_style}"
|
||||
assert output_style != "local-test-style", (
|
||||
f"outputStyle should NOT be from local settings (default is no settings), got: {output_style}"
|
||||
)
|
||||
assert output_style == "default", (
|
||||
f"outputStyle should be 'default', got: {output_style}"
|
||||
)
|
||||
break
|
||||
|
||||
# On Windows, wait for file handles to be released before cleanup
|
||||
|
|
@ -121,9 +194,9 @@ This is a test command.
|
|||
async for message in client.receive_response():
|
||||
if isinstance(message, SystemMessage) and message.subtype == "init":
|
||||
commands = message.data.get("slash_commands", [])
|
||||
assert (
|
||||
"testcmd" not in commands
|
||||
), f"testcmd should NOT be available with user-only sources, got: {commands}"
|
||||
assert "testcmd" not in commands, (
|
||||
f"testcmd should NOT be available with user-only sources, got: {commands}"
|
||||
)
|
||||
break
|
||||
|
||||
# On Windows, wait for file handles to be released before cleanup
|
||||
|
|
@ -159,11 +232,11 @@ async def test_setting_sources_project_included():
|
|||
async for message in client.receive_response():
|
||||
if isinstance(message, SystemMessage) and message.subtype == "init":
|
||||
output_style = message.data.get("output_style")
|
||||
assert (
|
||||
output_style == "local-test-style"
|
||||
), f"outputStyle should be from local settings, got: {output_style}"
|
||||
assert output_style == "local-test-style", (
|
||||
f"outputStyle should be from local settings, got: {output_style}"
|
||||
)
|
||||
break
|
||||
|
||||
# On Windows, wait for file handles to be released before cleanup
|
||||
if sys.platform == "win32":
|
||||
await asyncio.sleep(0.5)
|
||||
await asyncio.sleep(0.5)
|
||||
|
|
|
|||
107
examples/filesystem_agents.py
Normal file
107
examples/filesystem_agents.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Example of loading filesystem-based agents via setting_sources.
|
||||
|
||||
This example demonstrates how to load agents defined in .claude/agents/ files
|
||||
using the setting_sources option. This is different from inline AgentDefinition
|
||||
objects - these agents are loaded from markdown files on disk.
|
||||
|
||||
This example tests the scenario from issue #406 where filesystem-based agents
|
||||
loaded via setting_sources=["project"] may silently fail in certain environments.
|
||||
|
||||
Usage:
|
||||
./examples/filesystem_agents.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from claude_agent_sdk import (
|
||||
AssistantMessage,
|
||||
ClaudeAgentOptions,
|
||||
ClaudeSDKClient,
|
||||
ResultMessage,
|
||||
SystemMessage,
|
||||
TextBlock,
|
||||
)
|
||||
|
||||
|
||||
def extract_agents(msg: SystemMessage) -> list[str]:
|
||||
"""Extract agent names from system message init data."""
|
||||
if msg.subtype == "init":
|
||||
agents = msg.data.get("agents", [])
|
||||
# Agents can be either strings or dicts with a 'name' field
|
||||
result = []
|
||||
for a in agents:
|
||||
if isinstance(a, str):
|
||||
result.append(a)
|
||||
elif isinstance(a, dict):
|
||||
result.append(a.get("name", ""))
|
||||
return result
|
||||
return []
|
||||
|
||||
|
||||
async def main():
|
||||
"""Test loading filesystem-based agents."""
|
||||
print("=== Filesystem Agents Example ===")
|
||||
print("Testing: setting_sources=['project'] with .claude/agents/test-agent.md")
|
||||
print()
|
||||
|
||||
# Use the SDK repo directory which has .claude/agents/test-agent.md
|
||||
sdk_dir = Path(__file__).parent.parent
|
||||
|
||||
options = ClaudeAgentOptions(
|
||||
setting_sources=["project"],
|
||||
cwd=sdk_dir,
|
||||
)
|
||||
|
||||
message_types: list[str] = []
|
||||
agents_found: list[str] = []
|
||||
|
||||
async with ClaudeSDKClient(options=options) as client:
|
||||
await client.query("Say hello in exactly 3 words")
|
||||
|
||||
async for msg in client.receive_response():
|
||||
message_types.append(type(msg).__name__)
|
||||
|
||||
if isinstance(msg, SystemMessage) and msg.subtype == "init":
|
||||
agents_found = extract_agents(msg)
|
||||
print(f"Init message received. Agents loaded: {agents_found}")
|
||||
|
||||
elif isinstance(msg, AssistantMessage):
|
||||
for block in msg.content:
|
||||
if isinstance(block, TextBlock):
|
||||
print(f"Assistant: {block.text}")
|
||||
|
||||
elif isinstance(msg, ResultMessage):
|
||||
print(
|
||||
f"Result: subtype={msg.subtype}, cost=${msg.total_cost_usd or 0:.4f}"
|
||||
)
|
||||
|
||||
print()
|
||||
print("=== Summary ===")
|
||||
print(f"Message types received: {message_types}")
|
||||
print(f"Total messages: {len(message_types)}")
|
||||
|
||||
# Validate the results
|
||||
has_init = "SystemMessage" in message_types
|
||||
has_assistant = "AssistantMessage" in message_types
|
||||
has_result = "ResultMessage" in message_types
|
||||
has_test_agent = "test-agent" in agents_found
|
||||
|
||||
print()
|
||||
if has_init and has_assistant and has_result:
|
||||
print("SUCCESS: Received full response (init, assistant, result)")
|
||||
else:
|
||||
print("FAILURE: Did not receive full response")
|
||||
print(f" - Init: {has_init}")
|
||||
print(f" - Assistant: {has_assistant}")
|
||||
print(f" - Result: {has_result}")
|
||||
|
||||
if has_test_agent:
|
||||
print("SUCCESS: test-agent was loaded from filesystem")
|
||||
else:
|
||||
print("WARNING: test-agent was NOT loaded (may not exist in .claude/agents/)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
77
scripts/test-docker.sh
Executable file
77
scripts/test-docker.sh
Executable file
|
|
@ -0,0 +1,77 @@
|
|||
#!/bin/bash
|
||||
# Run SDK tests in a Docker container
|
||||
# This helps catch Docker-specific issues like #406
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/test-docker.sh [unit|e2e|all]
|
||||
#
|
||||
# Examples:
|
||||
# ./scripts/test-docker.sh unit # Run unit tests only
|
||||
# ANTHROPIC_API_KEY=sk-... ./scripts/test-docker.sh e2e # Run e2e tests
|
||||
# ANTHROPIC_API_KEY=sk-... ./scripts/test-docker.sh all # Run all tests
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [unit|e2e|all]"
|
||||
echo ""
|
||||
echo "Commands:"
|
||||
echo " unit - Run unit tests only (no API key needed)"
|
||||
echo " e2e - Run e2e tests (requires ANTHROPIC_API_KEY)"
|
||||
echo " all - Run both unit and e2e tests"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 unit"
|
||||
echo " ANTHROPIC_API_KEY=sk-... $0 e2e"
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "Building Docker test image..."
|
||||
docker build -f Dockerfile.test -t claude-sdk-test .
|
||||
|
||||
case "${1:-unit}" in
|
||||
unit)
|
||||
echo ""
|
||||
echo "Running unit tests in Docker..."
|
||||
docker run --rm claude-sdk-test \
|
||||
python -m pytest tests/ -v
|
||||
;;
|
||||
e2e)
|
||||
if [ -z "$ANTHROPIC_API_KEY" ]; then
|
||||
echo "Error: ANTHROPIC_API_KEY environment variable is required for e2e tests"
|
||||
echo ""
|
||||
echo "Usage: ANTHROPIC_API_KEY=sk-... $0 e2e"
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
echo "Running e2e tests in Docker..."
|
||||
docker run --rm -e ANTHROPIC_API_KEY \
|
||||
claude-sdk-test python -m pytest e2e-tests/ -v -m e2e
|
||||
;;
|
||||
all)
|
||||
echo ""
|
||||
echo "Running unit tests in Docker..."
|
||||
docker run --rm claude-sdk-test \
|
||||
python -m pytest tests/ -v
|
||||
|
||||
echo ""
|
||||
if [ -n "$ANTHROPIC_API_KEY" ]; then
|
||||
echo "Running e2e tests in Docker..."
|
||||
docker run --rm -e ANTHROPIC_API_KEY \
|
||||
claude-sdk-test python -m pytest e2e-tests/ -v -m e2e
|
||||
else
|
||||
echo "Skipping e2e tests (ANTHROPIC_API_KEY not set)"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
echo "Done!"
|
||||
Loading…
Add table
Add a link
Reference in a new issue