evals.yml 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. name: Evals
  2. on:
  3. pull_request:
  4. types: [labeled]
  5. workflow_dispatch:
  6. env:
  7. DOCKER_BUILDKIT: 1
  8. COMPOSE_DOCKER_CLI_BUILD: 1
  9. jobs:
  10. evals:
  11. # Run if triggered manually or if PR has 'evals' label.
  12. if: github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'evals')
  13. runs-on: blacksmith-16vcpu-ubuntu-2404
  14. timeout-minutes: 45
  15. defaults:
  16. run:
  17. working-directory: packages/evals
  18. steps:
  19. - name: Checkout repository
  20. uses: actions/checkout@v4
  21. - name: Set up Docker Buildx
  22. uses: docker/setup-buildx-action@v3
  23. - name: Create environment
  24. run: |
  25. cat > .env.local << EOF
  26. OPENROUTER_API_KEY=${{ secrets.OPENROUTER_API_KEY || 'test-key-for-build' }}
  27. EOF
  28. cat > .env.development << EOF
  29. NODE_ENV=development
  30. DATABASE_URL=postgresql://postgres:password@db:5432/evals_development
  31. REDIS_URL=redis://redis:6379
  32. HOST_EXECUTION_METHOD=docker
  33. EOF
  34. - name: Build image
  35. uses: docker/build-push-action@v6
  36. with:
  37. context: .
  38. file: packages/evals/Dockerfile.runner
  39. tags: evals-runner:latest
  40. cache-from: type=gha
  41. cache-to: type=gha,mode=max
  42. push: false
  43. load: true
  44. - name: Tag image
  45. run: docker tag evals-runner:latest evals-runner
  46. - name: Start containers
  47. run: |
  48. docker compose up -d db redis
  49. timeout 60 bash -c 'until docker compose exec -T db pg_isready -U postgres; do sleep 2; done'
  50. timeout 60 bash -c 'until docker compose exec -T redis redis-cli ping | grep -q PONG; do sleep 2; done'
  51. docker compose run --rm runner sh -c 'nc -z db 5432 && echo "✓ Runner -> Database connection successful"'
  52. docker compose run --rm runner sh -c 'nc -z redis 6379 && echo "✓ Runner -> Redis connection successful"'
  53. docker compose run --rm runner docker ps
  54. - name: Run database migrations
  55. run: docker compose run --rm runner pnpm --filter @roo-code/evals db:migrate
  56. - name: Run evals
  57. run: docker compose run --rm runner pnpm --filter @roo-code/evals cli --ci
  58. - name: Cleanup
  59. if: always()
  60. run: docker compose down -v --remove-orphans