Implementation Guide
Implementation Guide
Purpose: Step-by-step guide to implement the automation system from scratch. Audience: Developers, future maintainers Time: 1-2 days for full implementation + testing
Prerequisites
Required Tools
# Check versions
python3 --version # 3.10+
node --version # 20+
npm --version # 10+
git --version # 2.30+
# Install Python packages
pip install anthropic google-generativeai langgraph
# Verify Jorvis dependencies
cd analytics-platform
npm ci # Install if not already done
Required API Keys
# Get API keys
# 1. Claude: https://console.anthropic.com/settings/keys
# 2. Gemini: https://aistudio.google.com/app/apikey
# Set environment variables
export ANTHROPIC_API_KEY="sk-ant-..."
export GOOGLE_API_KEY="AIza..."
# Verify
echo $ANTHROPIC_API_KEY # Should print key
Step 1: Create Core Workflow Script
File: scripts/langgraph_workflow.py
Full implementation (850 lines):
# Create file
touch scripts/langgraph_workflow.py
chmod +x scripts/langgraph_workflow.py
Add shebang and imports:
#!/usr/bin/env python3
"""
LangGraph 2-Agent Workflow for Jorvis Phase Q
Architect (Opus) + Executor (Gemini) + Quality Gate
"""
import os
import sys
import json
import subprocess
import time
import re
from typing import TypedDict, List, Optional, Literal
from datetime import datetime
import anthropic
import google.generativeai as genai
from langgraph.graph import StateGraph, END
Add state definition:
class TaskState(TypedDict):
task_id: str
task_description: str
adr: Optional[str]
code_files: List[str]
validation_errors: List[dict]
conversation_history: List[dict]
review_feedback: Optional[str]
status: Literal["design", "implementation", "review", "approved", "failed"]
iteration: int
max_iterations: int
warnings: List[str]
Implement all node functions (see full code in previous responses):
architect_design()executor_implement()run_quality_gate()architect_review()
Implement all helper functions:
load_invariants()load_current_dependencies()validate_adr()parse_and_write_files()classify_errors()is_hallucination()auto_fix_trivial_errors()find_symbol_definition()calculate_relative_import()add_import_to_files()prefix_unused_var()call_llm_with_retry()generate_tactical_feedback()generate_strategic_feedback()load_task_description()determine_max_iterations()
Create workflow:
def create_workflow() -> StateGraph:
workflow = StateGraph(TaskState)
workflow.add_node("architect_design", architect_design)
workflow.add_node("executor_implement", executor_implement)
workflow.add_node("run_quality_gate", run_quality_gate)
workflow.add_node("architect_review", architect_review)
workflow.set_entry_point("architect_design")
workflow.add_edge("architect_design", "executor_implement")
workflow.add_edge("executor_implement", "run_quality_gate")
workflow.add_conditional_edges(
"run_quality_gate",
lambda state: state["status"],
{
"approved": END,
"review": "architect_review"
}
)
workflow.add_conditional_edges(
"architect_review",
lambda state: state["status"],
{
"implementation": "executor_implement",
"design": "architect_design",
"failed": END
}
)
return workflow.compile()
Add main function:
def main():
if len(sys.argv) < 2:
print("Usage: python langgraph_workflow.py <task_id>")
sys.exit(1)
task_id = sys.argv[1]
task_description = load_task_description(task_id)
initial_state: TaskState = {
"task_id": task_id,
"task_description": task_description,
"adr": None,
"code_files": [],
"validation_errors": [],
"conversation_history": [],
"review_feedback": None,
"status": "design",
"iteration": 1,
"max_iterations": determine_max_iterations(task_description),
"warnings": []
}
app = create_workflow()
print(f"\nš Starting workflow for {task_id}")
print(f" Max iterations: {initial_state['max_iterations']}")
final_state = app.invoke(initial_state)
# Report results
print(f"\n{'='*60}")
print(f"FINAL RESULT: {final_state['status'].upper()}")
print(f"{'='*60}")
print(f"Iterations: {final_state['iteration']}")
print(f"Files generated: {len(final_state['code_files'])}")
if final_state["status"] == "approved":
print(f"\nā
Task {task_id} completed successfully!")
print(f"\nNext steps:")
print(f"1. Review changes: git diff")
print(f"2. Create PR: gh pr create --title 'feat: {task_id}'")
else:
print(f"\nā Task {task_id} failed after {final_state['iteration']} iterations")
for error in final_state["validation_errors"]:
print(f" - {error['check']}: {error['error'][:100]}")
if __name__ == "__main__":
main()
Step 2: Create Multi-Task Runner
File: scripts/multi_task_runner.py
touch scripts/multi_task_runner.py
chmod +x scripts/multi_task_runner.py
Full implementation:
#!/usr/bin/env python3
"""Multi-task pilot execution with git branch isolation"""
import subprocess
import json
import re
from datetime import datetime
from typing import List
def run_single_task_isolated(task_id: str, base_branch: str = "main") -> dict:
"""Run task in isolated git branch"""
branch_name = f"task/{task_id.lower()}"
print(f"\n[{task_id}] Creating branch {branch_name}...")
# Clean state
subprocess.run(["git", "checkout", base_branch], check=True)
subprocess.run(["git", "pull", "origin", base_branch], check=True)
# Delete if exists
subprocess.run(["git", "branch", "-D", branch_name],
capture_output=True)
subprocess.run(["git", "checkout", "-b", branch_name], check=True)
# Run workflow
print(f"[{task_id}] Running workflow...")
start_time = datetime.now()
result = subprocess.run(
["python3", "scripts/langgraph_workflow.py", task_id],
capture_output=True,
text=True
)
end_time = datetime.now()
duration = (end_time - start_time).total_seconds()
# Parse result
success = "ā
Task" in result.stdout and "completed successfully" in result.stdout
# Extract iterations
iterations_match = re.search(r'Iterations:\s*(\d+)', result.stdout)
iterations = int(iterations_match.group(1)) if iterations_match else 0
# Cleanup
print(f"[{task_id}] Cleaning up...")
subprocess.run(["git", "checkout", base_branch], check=True)
if success:
subprocess.run(["git", "branch", "-D", branch_name], check=True)
else:
print(f"[{task_id}] ā ļø Keeping branch {branch_name} for debugging")
return {
"task_id": task_id,
"success": success,
"duration_sec": duration,
"iterations": iterations,
"branch": branch_name,
"stdout": result.stdout[-1000:],
"stderr": result.stderr[-1000:] if result.stderr else ""
}
def run_pilot_sequential(task_ids: List[str]) -> dict:
"""Run pilot sequentially"""
print(f"š Starting SEQUENTIAL pilot: {len(task_ids)} tasks")
results = []
for i, task_id in enumerate(task_ids, 1):
print(f"\n{'='*60}")
print(f"TASK {i}/{len(task_ids)}: {task_id}")
print(f"{'='*60}")
try:
result = run_single_task_isolated(task_id)
results.append(result)
status = "ā
SUCCESS" if result["success"] else "ā FAILED"
print(f"\n[{task_id}] {status}")
print(f" Duration: {result['duration_sec']:.1f}s")
print(f" Iterations: {result['iterations']}")
except Exception as e:
print(f"\n[{task_id}] ā EXCEPTION: {e}")
results.append({
"task_id": task_id,
"success": False,
"error": str(e),
"iterations": 0
})
# Metrics
successful = sum(1 for r in results if r["success"])
total = len(results)
successful_iterations = [r["iterations"] for r in results
if r["success"] and r.get("iterations", 0) > 0]
avg_iterations = (sum(successful_iterations) / len(successful_iterations)
if successful_iterations else 0)
metrics = {
"total_tasks": total,
"successful": successful,
"failed": total - successful,
"success_rate": (successful / total * 100) if total > 0 else 0,
"avg_iterations": round(avg_iterations, 1),
"results": results
}
# Save report
with open("pilot_report.json", "w") as f:
json.dump(metrics, f, indent=2)
print(f"\n{'='*60}")
print(f"PILOT COMPLETE")
print(f"{'='*60}")
print(f"Success: {successful}/{total} ({metrics['success_rate']:.1f}%)")
print(f"Avg Iterations: {metrics['avg_iterations']}")
print(f"Report: pilot_report.json")
return metrics
def handle_pilot_results(metrics: dict) -> str:
"""Decision tree for pilot results"""
success_rate = metrics["success_rate"]
avg_iterations = metrics["avg_iterations"]
print(f"\n{'='*60}")
print(f"PILOT RESULTS ANALYSIS")
print(f"{'='*60}")
print(f"Success Rate: {success_rate}%")
print(f"Avg Iterations: {avg_iterations}")
if success_rate >= 70:
decision = "GO"
action = "Proceed to Phase Q with current settings"
elif success_rate >= 60:
decision = "GO_WITH_ADJUSTMENTS"
action = "Proceed with: +1 max_iterations, monitor first 3 tasks"
elif success_rate >= 50:
decision = "RETRY_PILOT"
action = "Retry pilot: analyze failures, adjust prompts, re-run"
else:
decision = "NO_GO"
action = "Manual execution recommended for Phase Q"
print(f"\nDECISION: {decision}")
print(f"ACTION: {action}")
return decision
if __name__ == "__main__":
# Define pilot tasks
pilot_tasks = [
"Task-104",
"Task-105",
"Task-106",
"Task-107",
"Task-108",
"Task-109",
"Task-110",
"Task-111",
"Task-112",
"Task-113",
]
metrics = run_pilot_sequential(pilot_tasks)
decision = handle_pilot_results(metrics)
Step 3: Create Configuration File
File: CONFIG.yaml
touch CONFIG.yaml
# Jorvis Task Automation Configuration
# Version: 1.0.0
# Rollback thresholds
rollback:
success_rate_go: 70 # ā„70% ā GO
success_rate_go_adjusted: 60 # ā„60% ā GO with adjustments
success_rate_retry: 50 # ā„50% ā Retry pilot
human_intervention_max: 35 # ā¤35% intervention for GO
# Iteration limits
iterations:
simple_max: 3
moderate_max: 5
complex_max: 7
# API retry
api_retry:
max_attempts: 3
backoff_base: 2 # seconds
# Multi-task execution
execution:
parallel_enabled: false # MUST be false
base_branch: main
cleanup_success_branches: true
# Safe imports whitelist (for auto-fix)
safe_imports:
- Injectable
- Controller
- Get
- Post
- Body
- Param
- Query
- GraphOrchestratorService
- SchemaContextService
- ToolRouterService
- SqlTraceService
- SanitizationService
- GraphState
- ConversationQuestionDto
- TransparencyTrace
Step 4: Testing
Test 1: Single Task (Synthetic)
# Create synthetic task in TASK_BOARD.md
echo "**Task-999**: Test automation - Add console.log to main.ts" >> docs/agent_ops/TASK_BOARD.md
# Run workflow
python3 scripts/langgraph_workflow.py Task-999
# Expected output:
# ============================================================
# ARCHITECT DESIGN (Iteration 1)
# ============================================================
# ā
ADR Created
#
# ============================================================
# EXECUTOR IMPLEMENT (Iteration 1)
# ============================================================
# ā
Generated 1 files
#
# ============================================================
# QUALITY GATE
# ============================================================
# ā
Lint passed
# ā
Build passed
# ā
Tests passed
# ā
Coverage 82% ā„ 80%
#
# ============================================================
# FINAL RESULT: APPROVED
# ============================================================
# Iterations: 1
Test 2: Error Handling
# Create task with known error
echo "**Task-998**: Import non-existent service 'FakeService'" >> docs/agent_ops/TASK_BOARD.md
# Run
python3 scripts/langgraph_workflow.py Task-998
# Expected behavior:
# - Iteration 1: Gemini generates code with FakeService
# - Quality Gate: Build fails (cannot find name 'FakeService')
# - Architect Review: Classifies as hallucination
# - Iteration 2: Gemini regenerates without FakeService
# - Quality Gate: Passes
# - APPROVED
Test 3: Multi-Task Runner
# Run on 3 synthetic tasks
python3 scripts/multi_task_runner.py
# Edit script to use Task-997, Task-998, Task-999
# Expected output:
# TASK 1/3: Task-997
# [Task-997] ā
SUCCESS (45.3s, 1 iteration)
#
# TASK 2/3: Task-998
# [Task-998] ā
SUCCESS (89.7s, 2 iterations)
#
# TASK 3/3: Task-999
# [Task-999] ā
SUCCESS (32.1s, 1 iteration)
#
# PILOT COMPLETE
# Success: 3/3 (100.0%)
# Avg Iterations: 1.3
# Report: pilot_report.json
Step 5: Pilot Preparation
Select 10 Real Phase Q Tasks
# Review TASK_BOARD.md
cat docs/agent_ops/TASK_BOARD.md | grep "Task-1"
# Select tasks similar to:
# - Task-104: Contextual Explanations (moderate complexity)
# - Task-105: Error Analysis (moderate)
# - Task-106: Session Consistency (complex)
# - ... etc.
# Update multi_task_runner.py:
pilot_tasks = [
"Task-104",
"Task-105",
"Task-106",
"Task-107",
"Task-108",
"Task-109",
"Task-110",
"Task-111",
"Task-112",
"Task-113",
]
Pre-Pilot Checklist
# 1. Verify Quality Gate works locally
npm --prefix analytics-platform run lint
npm --prefix analytics-platform run build
npm --prefix analytics-platform test -- --runInBand
# 2. Verify API keys
echo $ANTHROPIC_API_KEY | wc -c # Should be >50
echo $GOOGLE_API_KEY | wc -c # Should be >30
# 3. Verify git state
git status # Should be clean
git branch # Should be on main
# 4. Create backup branch
git checkout -b backup-before-pilot
git checkout main
# 5. Notify team (if applicable)
# Post to Slack: "Starting automation pilot on 10 Phase Q tasks"
Step 6: Run Pilot
# Start pilot
python3 scripts/multi_task_runner.py
# Monitor progress (in separate terminal)
watch -n 10 'git branch | grep task/'
# Expected duration: 3-6 hours for 10 tasks
During Pilot
Monitor for:
- API rate limits (script will retry automatically)
- Unexpected errors (check stderr in report)
- Git branch accumulation (should auto-cleanup on success)
If issues occur:
- Check
pilot_report.jsonfor task-specific errors - Review failed task branches:
git log task/task-XXX - Debug single task:
python3 scripts/langgraph_workflow.py Task-XXX
Step 7: Analyze Results
# View report
cat pilot_report.json | jq '.'
# Key metrics
cat pilot_report.json | jq '{
success_rate,
avg_iterations,
failed_tasks: [.results[] | select(.success == false) | .task_id]
}'
# Example output:
# {
# "success_rate": 70.0,
# "avg_iterations": 2.4,
# "failed_tasks": ["Task-106", "Task-110", "Task-112"]
# }
Decision Matrix
| Success Rate | Decision | Action |
|---|---|---|
| ā„70% | GO | Proceed to Phase Q |
| 60-69% | GO_ADJUSTED | Increase max_iterations +1, monitor |
| 50-59% | RETRY | Analyze failures, adjust prompts, re-run |
| <50% | NO_GO | Manual execution for Phase Q |
Step 8: Phase Q Deployment (If GO)
# Update multi_task_runner.py with Phase Q tasks
phase_q_tasks = [
"Task-104",
"Task-105",
# ... 9 tasks total
]
# Run Phase Q
python3 scripts/multi_task_runner.py
# Monitor closely
# - Abort if >3 failures
# - Human review after each task completion
Troubleshooting
Issue: "Cannot find module 'langgraph'"
Solution:
pip install langgraph
# Or create virtual environment
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
Issue: "API key not found"
Solution:
# Check env vars
echo $ANTHROPIC_API_KEY
# If empty, export
export ANTHROPIC_API_KEY="sk-ant-..."
# Persist in shell profile
echo 'export ANTHROPIC_API_KEY="sk-ant-..."' >> ~/.zshrc
source ~/.zshrc
Issue: "Git branch already exists"
Solution:
# Delete all task branches
git branch | grep "task/" | xargs git branch -D
# Or specific branch
git branch -D task/task-104
Issue: "Quality Gate fails on clean code"
Solution:
# Ensure on main branch
git checkout main
git pull origin main
# Clean build
rm -rf analytics-platform/node_modules analytics-platform/dist
npm --prefix analytics-platform ci
npm --prefix analytics-platform run build
# Re-run quality checks
npm --prefix analytics-platform run lint
npm --prefix analytics-platform test
Maintenance
Weekly: Review Safe Imports Whitelist
# Check if new services added
git log --since="1 week ago" --oneline -- "analytics-platform/src/**/*.service.ts"
# If new services, update CONFIG.yaml
Monthly: Update Hallucination Patterns
# Review pilot reports for new hallucination types
cat pilot_report.json | jq '.results[] | select(.success == false)'
# Add patterns to langgraph_workflow.py ā is_hallucination()
Rollback Procedure
If Pilot Fails (<50% success)
# 1. Stop execution
# Press Ctrl+C if still running
# 2. Return to the tracked main baseline without history rewrite
git fetch origin
git checkout main
git pull --ff-only origin main
# 3. Delete local task branches if they are no longer needed
git for-each-ref --format='%(refname:short)' refs/heads/task/ | xargs -r git branch -D
# 4. Review failure patterns
cat pilot_report.json | jq '.results[] | select(.success == false) | {task_id, error}'
# 5. Document failures
# Create docs/automation/PILOT_FAILURE_ANALYSIS.md
# 6. Manual execution for Phase Q
# Follow existing manual workflow
Important
This rollback path is for local pilot cleanup only. Do not use hard reset or any direct push to main as part of pilot recovery.
Next: See docs/automation/TROUBLESHOOTING.md for detailed error resolution.
Last Updated: 2026-01-19