From be39d09d03accac5465820c56b32f94d2efd86e3 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 2 Mar 2026 17:52:48 +0100 Subject: [PATCH 1/7] Add E2E tests to CI with PR comment reporting Adds GitHub Actions workflow to run E2E tests on every PR with: - Automatic cancellation of old runs when new commits are pushed - Single PR comment that updates with results instead of creating duplicates - Test results showing commit SHA and workflow run link - Uses OpenAI API key from GitHub secrets for LLM judge Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/e2e.yml | 123 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 .github/workflows/e2e.yml diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 0000000..58a4f53 --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,123 @@ +name: E2E Tests + +on: + pull_request: + types: + - opened + - reopened + - synchronize + +# Cancel in-progress runs for the same PR when new commits are pushed +concurrency: + group: e2e-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + e2e: + name: Run E2E Tests + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Download dependencies + run: find . -name go.mod -execdir go mod download \; + + - name: Setup proto files + run: make proto-setup + + - name: Generate proto descriptors + run: make proto-generate + + - name: Download WireMock + run: make mock-download + + - name: Build mcpchecker + run: cd e2e-tests && ./scripts/build-mcpchecker.sh + + - name: Run E2E tests + id: e2e_tests + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + set +e # Don't exit on error + cd e2e-tests && ./scripts/run-tests.sh + EXIT_CODE=$? + echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT + exit $EXIT_CODE + + - name: Parse test results + if: always() + id: parse_results + run: | + RESULTS_FILE="e2e-tests/mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json" + if [ -f "$RESULTS_FILE" ]; then + # Count total and passed tests + TOTAL=$(jq '. | length' "$RESULTS_FILE") + PASSED=$(jq '[.[] | select(.taskPassed == true)] | length' "$RESULTS_FILE") + FAILED=$((TOTAL - PASSED)) + + echo "total=$TOTAL" >> $GITHUB_OUTPUT + echo "passed=$PASSED" >> $GITHUB_OUTPUT + echo "failed=$FAILED" >> $GITHUB_OUTPUT + + # Generate summary table + SUMMARY=$(jq -r '.[] | "| \(.taskName) | \(if .taskPassed then "✅ Pass" else "❌ Fail" end) |"' "$RESULTS_FILE") + echo "summary<> $GITHUB_OUTPUT + echo "$SUMMARY" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + else + echo "total=0" >> $GITHUB_OUTPUT + echo "passed=0" >> $GITHUB_OUTPUT + echo "failed=0" >> $GITHUB_OUTPUT + echo "summary=No results file found" >> $GITHUB_OUTPUT + fi + + - name: Find existing comment + if: always() + uses: peter-evans/find-comment@v3 + id: find_comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: '' + + - name: Create or update comment + if: always() + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ steps.find_comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + edit-mode: replace + body: | + + ## E2E Test Results + + **Commit:** ${{ github.event.pull_request.head.sha }} + **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) + + ### Summary + - **Total Tests:** ${{ steps.parse_results.outputs.total }} + - **Passed:** ${{ steps.parse_results.outputs.passed }} ✅ + - **Failed:** ${{ steps.parse_results.outputs.failed }} ❌ + + ### Test Cases + | Test Name | Result | + |-----------|--------| + ${{ steps.parse_results.outputs.summary }} + + --- + *Last updated: ${{ github.event.head_commit.timestamp }}* + + - name: Fail workflow if tests failed + if: always() && steps.e2e_tests.outputs.exit_code != '0' + run: exit 1 From ab92c37e1f93c1f3edf1c457f70af95880342ba8 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 2 Mar 2026 17:59:07 +0100 Subject: [PATCH 2/7] Simplify E2E workflow - Remove redundant dependency download (Go handles automatically) - Remove redundant mcpchecker build (script handles it) - Remove unnecessary EXIT_CODE capture - Add failure comment when job fails Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/e2e.yml | 46 ++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 58a4f53..9b9ae7f 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -29,9 +29,6 @@ jobs: with: go-version-file: go.mod - - name: Download dependencies - run: find . -name go.mod -execdir go mod download \; - - name: Setup proto files run: make proto-setup @@ -41,19 +38,10 @@ jobs: - name: Download WireMock run: make mock-download - - name: Build mcpchecker - run: cd e2e-tests && ./scripts/build-mcpchecker.sh - - name: Run E2E tests - id: e2e_tests env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: | - set +e # Don't exit on error - cd e2e-tests && ./scripts/run-tests.sh - EXIT_CODE=$? - echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT - exit $EXIT_CODE + run: cd e2e-tests && ./scripts/run-tests.sh - name: Parse test results if: always() @@ -91,8 +79,8 @@ jobs: comment-author: 'github-actions[bot]' body-includes: '' - - name: Create or update comment - if: always() + - name: Create or update comment - Success + if: success() uses: peter-evans/create-or-update-comment@v4 with: comment-id: ${{ steps.find_comment.outputs.comment-id }} @@ -118,6 +106,28 @@ jobs: --- *Last updated: ${{ github.event.head_commit.timestamp }}* - - name: Fail workflow if tests failed - if: always() && steps.e2e_tests.outputs.exit_code != '0' - run: exit 1 + - name: Create or update comment - Failure + if: failure() + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ steps.find_comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + edit-mode: replace + body: | + + ## E2E Test Results + + **Commit:** ${{ github.event.pull_request.head.sha }} + **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) + + ### ❌ Job Failed + + The E2E test job failed to complete. This could be due to: + - Test execution errors + - Infrastructure issues + - Configuration problems + + Please check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details. + + --- + *Last updated: ${{ github.event.head_commit.timestamp }}* From 29855632a3226418164b682b02f23eb31400a4b9 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 2 Mar 2026 17:59:23 +0100 Subject: [PATCH 3/7] Use make target instead of direct script invocation Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 9b9ae7f..142ebcd 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -41,7 +41,7 @@ jobs: - name: Run E2E tests env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: cd e2e-tests && ./scripts/run-tests.sh + run: make e2e-test - name: Parse test results if: always() From fff63df38d089c307acabe750ea3c9f51a72220c Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 2 Mar 2026 18:04:47 +0100 Subject: [PATCH 4/7] Use mcpchecker summary command with GitHub Actions format - Use built-in mcpchecker summary instead of manual jq parsing - Add results to GitHub Actions step summary - Cleaner and more maintainable output generation Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/e2e.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 142ebcd..d93c710 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -49,20 +49,20 @@ jobs: run: | RESULTS_FILE="e2e-tests/mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json" if [ -f "$RESULTS_FILE" ]; then - # Count total and passed tests - TOTAL=$(jq '. | length' "$RESULTS_FILE") - PASSED=$(jq '[.[] | select(.taskPassed == true)] | length' "$RESULTS_FILE") - FAILED=$((TOTAL - PASSED)) + # Use mcpchecker summary with GitHub Actions format + cd e2e-tests/mcpchecker + ../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json --github-output >> $GITHUB_OUTPUT - echo "total=$TOTAL" >> $GITHUB_OUTPUT - echo "passed=$PASSED" >> $GITHUB_OUTPUT - echo "failed=$FAILED" >> $GITHUB_OUTPUT - - # Generate summary table - SUMMARY=$(jq -r '.[] | "| \(.taskName) | \(if .taskPassed then "✅ Pass" else "❌ Fail" end) |"' "$RESULTS_FILE") + # Generate markdown summary table + SUMMARY=$(jq -r '.[] | "| \(.taskName) | \(if .taskPassed then "✅ Pass" else "❌ Fail" end) |"' mcpchecker-stackrox-mcp-e2e-out.json) echo "summary<> $GITHUB_OUTPUT echo "$SUMMARY" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT + + # Add to GitHub Actions step summary + echo "## E2E Test Results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + ../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json >> $GITHUB_STEP_SUMMARY else echo "total=0" >> $GITHUB_OUTPUT echo "passed=0" >> $GITHUB_OUTPUT From e2ec873bed7c75c4ccfcf5c9657e965ab3592bb8 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 2 Mar 2026 18:06:52 +0100 Subject: [PATCH 5/7] Use mcpchecker human-readable summary in PR comments Use the built-in text output from mcpchecker summary directly instead of creating custom markdown tables. Simpler and cleaner. Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/e2e.yml | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index d93c710..02cff62 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -49,12 +49,13 @@ jobs: run: | RESULTS_FILE="e2e-tests/mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json" if [ -f "$RESULTS_FILE" ]; then - # Use mcpchecker summary with GitHub Actions format cd e2e-tests/mcpchecker + + # Get stats in GitHub Actions format ../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json --github-output >> $GITHUB_OUTPUT - # Generate markdown summary table - SUMMARY=$(jq -r '.[] | "| \(.taskName) | \(if .taskPassed then "✅ Pass" else "❌ Fail" end) |"' mcpchecker-stackrox-mcp-e2e-out.json) + # Get human-readable summary for PR comment + SUMMARY=$(../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json) echo "summary<> $GITHUB_OUTPUT echo "$SUMMARY" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT @@ -62,7 +63,9 @@ jobs: # Add to GitHub Actions step summary echo "## E2E Test Results" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - ../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY else echo "total=0" >> $GITHUB_OUTPUT echo "passed=0" >> $GITHUB_OUTPUT @@ -93,15 +96,9 @@ jobs: **Commit:** ${{ github.event.pull_request.head.sha }} **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) - ### Summary - - **Total Tests:** ${{ steps.parse_results.outputs.total }} - - **Passed:** ${{ steps.parse_results.outputs.passed }} ✅ - - **Failed:** ${{ steps.parse_results.outputs.failed }} ❌ - - ### Test Cases - | Test Name | Result | - |-----------|--------| + ``` ${{ steps.parse_results.outputs.summary }} + ``` --- *Last updated: ${{ github.event.head_commit.timestamp }}* From 6f795c2ac43b5172f19fa9dd98fa445ce15336cf Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 2 Mar 2026 18:14:41 +0100 Subject: [PATCH 6/7] Use OpenAI agent (gpt-5-nano) for E2E tests Changed from claude-code to OpenAI agent to support CI environments where Claude CLI is not available. Using gpt-5-nano for cost efficiency. - Update eval.yaml to use builtin.openai-agent with gpt-5-nano - Set MODEL_BASE_URL and MODEL_KEY env vars in run-tests.sh - Pass MODEL_KEY in GitHub Actions workflow Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/e2e.yml | 1 + e2e-tests/mcpchecker/eval.yaml | 4 ++-- e2e-tests/scripts/run-tests.sh | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 02cff62..6a2f38f 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -41,6 +41,7 @@ jobs: - name: Run E2E tests env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + MODEL_KEY: ${{ secrets.OPENAI_API_KEY }} run: make e2e-test - name: Parse test results diff --git a/e2e-tests/mcpchecker/eval.yaml b/e2e-tests/mcpchecker/eval.yaml index 6c14da2..3b6e73e 100644 --- a/e2e-tests/mcpchecker/eval.yaml +++ b/e2e-tests/mcpchecker/eval.yaml @@ -3,8 +3,8 @@ metadata: name: "stackrox-mcp-e2e" config: agent: - type: "builtin.claude-code" - model: "claude-sonnet-4-5" + type: "builtin.openai-agent" + model: "gpt-5-nano" llmJudge: env: typeKey: JUDGE_TYPE diff --git a/e2e-tests/scripts/run-tests.sh b/e2e-tests/scripts/run-tests.sh index bbed336..1e01f41 100755 --- a/e2e-tests/scripts/run-tests.sh +++ b/e2e-tests/scripts/run-tests.sh @@ -58,8 +58,9 @@ if [ ! -f "$E2E_DIR/bin/mcpchecker" ]; then fi -# Set agent model (defaults to claude-sonnet-4-5) -export AGENT_MODEL_NAME="${AGENT_MODEL_NAME:-claude-sonnet-4-5}" +# Set agent environment variables (use OpenAI) +export MODEL_BASE_URL="${MODEL_BASE_URL:-https://api.openai.com/v1}" +export MODEL_KEY="${MODEL_KEY:-$OPENAI_API_KEY}" # Set judge environment variables (use OpenAI) export JUDGE_BASE_URL="${JUDGE_BASE_URL:-https://api.openai.com/v1}" @@ -68,6 +69,7 @@ export JUDGE_MODEL_NAME="${JUDGE_MODEL_NAME:-gpt-5-nano}" echo "Configuration:" echo " Central URL: $STACKROX_MCP__CENTRAL__URL (WireMock)" +echo " Agent: gpt-5-nano (OpenAI)" echo " Judge: $JUDGE_MODEL_NAME (OpenAI)" echo " MCP Server: stackrox-mcp (via go run)" echo "" From d78d8874eef7ce161783177de047e671a380bcfc Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 2 Mar 2026 18:22:07 +0100 Subject: [PATCH 7/7] fix Signed-off-by: Tomasz Janiszewski --- .github/workflows/e2e.yml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 6a2f38f..60c8242 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -101,9 +101,6 @@ jobs: ${{ steps.parse_results.outputs.summary }} ``` - --- - *Last updated: ${{ github.event.head_commit.timestamp }}* - - name: Create or update comment - Failure if: failure() uses: peter-evans/create-or-update-comment@v4 @@ -113,19 +110,7 @@ jobs: edit-mode: replace body: | - ## E2E Test Results + ### ❌ Job Failed **Commit:** ${{ github.event.pull_request.head.sha }} **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) - - ### ❌ Job Failed - - The E2E test job failed to complete. This could be due to: - - Test execution errors - - Infrastructure issues - - Configuration problems - - Please check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details. - - --- - *Last updated: ${{ github.event.head_commit.timestamp }}*