diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 0000000..60c8242 --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,116 @@ +name: E2E Tests + +on: + pull_request: + types: + - opened + - reopened + - synchronize + +# Cancel in-progress runs for the same PR when new commits are pushed +concurrency: + group: e2e-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + e2e: + name: Run E2E Tests + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Setup proto files + run: make proto-setup + + - name: Generate proto descriptors + run: make proto-generate + + - name: Download WireMock + run: make mock-download + + - name: Run E2E tests + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + MODEL_KEY: ${{ secrets.OPENAI_API_KEY }} + run: make e2e-test + + - name: Parse test results + if: always() + id: parse_results + run: | + RESULTS_FILE="e2e-tests/mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json" + if [ -f "$RESULTS_FILE" ]; then + cd e2e-tests/mcpchecker + + # Get stats in GitHub Actions format + ../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json --github-output >> $GITHUB_OUTPUT + + # Get human-readable summary for PR comment + SUMMARY=$(../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json) + echo "summary<> $GITHUB_OUTPUT + echo "$SUMMARY" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + # Add to GitHub Actions step summary + echo "## E2E Test Results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + else + echo "total=0" >> $GITHUB_OUTPUT + echo "passed=0" >> $GITHUB_OUTPUT + echo "failed=0" >> $GITHUB_OUTPUT + echo "summary=No results file found" >> $GITHUB_OUTPUT + fi + + - name: Find existing comment + if: always() + uses: peter-evans/find-comment@v3 + id: find_comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: '' + + - name: Create or update comment - Success + if: success() + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ steps.find_comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + edit-mode: replace + body: | + + ## E2E Test Results + + **Commit:** ${{ github.event.pull_request.head.sha }} + **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) + + ``` + ${{ steps.parse_results.outputs.summary }} + ``` + + - name: Create or update comment - Failure + if: failure() + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ steps.find_comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + edit-mode: replace + body: | + + ### ❌ Job Failed + + **Commit:** ${{ github.event.pull_request.head.sha }} + **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) diff --git a/e2e-tests/mcpchecker/eval.yaml b/e2e-tests/mcpchecker/eval.yaml index 6c14da2..3b6e73e 100644 --- a/e2e-tests/mcpchecker/eval.yaml +++ b/e2e-tests/mcpchecker/eval.yaml @@ -3,8 +3,8 @@ metadata: name: "stackrox-mcp-e2e" config: agent: - type: "builtin.claude-code" - model: "claude-sonnet-4-5" + type: "builtin.openai-agent" + model: "gpt-5-nano" llmJudge: env: typeKey: JUDGE_TYPE diff --git a/e2e-tests/scripts/run-tests.sh b/e2e-tests/scripts/run-tests.sh index bbed336..1e01f41 100755 --- a/e2e-tests/scripts/run-tests.sh +++ b/e2e-tests/scripts/run-tests.sh @@ -58,8 +58,9 @@ if [ ! -f "$E2E_DIR/bin/mcpchecker" ]; then fi -# Set agent model (defaults to claude-sonnet-4-5) -export AGENT_MODEL_NAME="${AGENT_MODEL_NAME:-claude-sonnet-4-5}" +# Set agent environment variables (use OpenAI) +export MODEL_BASE_URL="${MODEL_BASE_URL:-https://api.openai.com/v1}" +export MODEL_KEY="${MODEL_KEY:-$OPENAI_API_KEY}" # Set judge environment variables (use OpenAI) export JUDGE_BASE_URL="${JUDGE_BASE_URL:-https://api.openai.com/v1}" @@ -68,6 +69,7 @@ export JUDGE_MODEL_NAME="${JUDGE_MODEL_NAME:-gpt-5-nano}" echo "Configuration:" echo " Central URL: $STACKROX_MCP__CENTRAL__URL (WireMock)" +echo " Agent: gpt-5-nano (OpenAI)" echo " Judge: $JUDGE_MODEL_NAME (OpenAI)" echo " MCP Server: stackrox-mcp (via go run)" echo ""