From be39d09d03accac5465820c56b32f94d2efd86e3 Mon Sep 17 00:00:00 2001
From: Tomasz Janiszewski <tomek@redhat.com>
Date: Mon, 2 Mar 2026 17:52:48 +0100
Subject: [PATCH 1/7] Add E2E tests to CI with PR comment reporting

Adds GitHub Actions workflow to run E2E tests on every PR with:
- Automatic cancellation of old runs when new commits are pushed
- Single PR comment that updates with results instead of creating duplicates
- Test results showing commit SHA and workflow run link
- Uses OpenAI API key from GitHub secrets for LLM judge

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .github/workflows/e2e.yml | 123 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 .github/workflows/e2e.yml

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
new file mode 100644
index 0000000..58a4f53
--- /dev/null
+++ b/.github/workflows/e2e.yml
@@ -0,0 +1,123 @@
+name: E2E Tests
+
+on:
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+
+# Cancel in-progress runs for the same PR when new commits are pushed
+concurrency:
+  group: e2e-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    name: Run E2E Tests
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+
+      - name: Download dependencies
+        run: find . -name go.mod -execdir go mod download \;
+
+      - name: Setup proto files
+        run: make proto-setup
+
+      - name: Generate proto descriptors
+        run: make proto-generate
+
+      - name: Download WireMock
+        run: make mock-download
+
+      - name: Build mcpchecker
+        run: cd e2e-tests && ./scripts/build-mcpchecker.sh
+
+      - name: Run E2E tests
+        id: e2e_tests
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          set +e  # Don't exit on error
+          cd e2e-tests && ./scripts/run-tests.sh
+          EXIT_CODE=$?
+          echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
+          exit $EXIT_CODE
+
+      - name: Parse test results
+        if: always()
+        id: parse_results
+        run: |
+          RESULTS_FILE="e2e-tests/mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json"
+          if [ -f "$RESULTS_FILE" ]; then
+            # Count total and passed tests
+            TOTAL=$(jq '. | length' "$RESULTS_FILE")
+            PASSED=$(jq '[.[] | select(.taskPassed == true)] | length' "$RESULTS_FILE")
+            FAILED=$((TOTAL - PASSED))
+
+            echo "total=$TOTAL" >> $GITHUB_OUTPUT
+            echo "passed=$PASSED" >> $GITHUB_OUTPUT
+            echo "failed=$FAILED" >> $GITHUB_OUTPUT
+
+            # Generate summary table
+            SUMMARY=$(jq -r '.[] | "| \(.taskName) | \(if .taskPassed then "✅ Pass" else "❌ Fail" end) |"' "$RESULTS_FILE")
+            echo "summary<<EOF" >> $GITHUB_OUTPUT
+            echo "$SUMMARY" >> $GITHUB_OUTPUT
+            echo "EOF" >> $GITHUB_OUTPUT
+          else
+            echo "total=0" >> $GITHUB_OUTPUT
+            echo "passed=0" >> $GITHUB_OUTPUT
+            echo "failed=0" >> $GITHUB_OUTPUT
+            echo "summary=No results file found" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Find existing comment
+        if: always()
+        uses: peter-evans/find-comment@v3
+        id: find_comment
+        with:
+          issue-number: ${{ github.event.pull_request.number }}
+          comment-author: 'github-actions[bot]'
+          body-includes: '<!-- e2e-test-results -->'
+
+      - name: Create or update comment
+        if: always()
+        uses: peter-evans/create-or-update-comment@v4
+        with:
+          comment-id: ${{ steps.find_comment.outputs.comment-id }}
+          issue-number: ${{ github.event.pull_request.number }}
+          edit-mode: replace
+          body: |
+            <!-- e2e-test-results -->
+            ## E2E Test Results
+
+            **Commit:** ${{ github.event.pull_request.head.sha }}
+            **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
+
+            ### Summary
+            - **Total Tests:** ${{ steps.parse_results.outputs.total }}
+            - **Passed:** ${{ steps.parse_results.outputs.passed }} ✅
+            - **Failed:** ${{ steps.parse_results.outputs.failed }} ❌
+
+            ### Test Cases
+            | Test Name | Result |
+            |-----------|--------|
+            ${{ steps.parse_results.outputs.summary }}
+
+            ---
+            *Last updated: ${{ github.event.head_commit.timestamp }}*
+
+      - name: Fail workflow if tests failed
+        if: always() && steps.e2e_tests.outputs.exit_code != '0'
+        run: exit 1

From ab92c37e1f93c1f3edf1c457f70af95880342ba8 Mon Sep 17 00:00:00 2001
From: Tomasz Janiszewski <tomek@redhat.com>
Date: Mon, 2 Mar 2026 17:59:07 +0100
Subject: [PATCH 2/7] Simplify E2E workflow

- Remove redundant dependency download (Go handles automatically)
- Remove redundant mcpchecker build (script handles it)
- Remove unnecessary EXIT_CODE capture
- Add failure comment when job fails

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .github/workflows/e2e.yml | 46 ++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 58a4f53..9b9ae7f 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -29,9 +29,6 @@ jobs:
         with:
           go-version-file: go.mod
 
-      - name: Download dependencies
-        run: find . -name go.mod -execdir go mod download \;
-
       - name: Setup proto files
         run: make proto-setup
 
@@ -41,19 +38,10 @@ jobs:
       - name: Download WireMock
         run: make mock-download
 
-      - name: Build mcpchecker
-        run: cd e2e-tests && ./scripts/build-mcpchecker.sh
-
       - name: Run E2E tests
-        id: e2e_tests
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        run: |
-          set +e  # Don't exit on error
-          cd e2e-tests && ./scripts/run-tests.sh
-          EXIT_CODE=$?
-          echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
-          exit $EXIT_CODE
+        run: cd e2e-tests && ./scripts/run-tests.sh
 
       - name: Parse test results
         if: always()
@@ -91,8 +79,8 @@ jobs:
           comment-author: 'github-actions[bot]'
           body-includes: '<!-- e2e-test-results -->'
 
-      - name: Create or update comment
-        if: always()
+      - name: Create or update comment - Success
+        if: success()
         uses: peter-evans/create-or-update-comment@v4
         with:
           comment-id: ${{ steps.find_comment.outputs.comment-id }}
@@ -118,6 +106,28 @@ jobs:
             ---
             *Last updated: ${{ github.event.head_commit.timestamp }}*
 
-      - name: Fail workflow if tests failed
-        if: always() && steps.e2e_tests.outputs.exit_code != '0'
-        run: exit 1
+      - name: Create or update comment - Failure
+        if: failure()
+        uses: peter-evans/create-or-update-comment@v4
+        with:
+          comment-id: ${{ steps.find_comment.outputs.comment-id }}
+          issue-number: ${{ github.event.pull_request.number }}
+          edit-mode: replace
+          body: |
+            <!-- e2e-test-results -->
+            ## E2E Test Results
+
+            **Commit:** ${{ github.event.pull_request.head.sha }}
+            **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
+
+            ### ❌ Job Failed
+
+            The E2E test job failed to complete. This could be due to:
+            - Test execution errors
+            - Infrastructure issues
+            - Configuration problems
+
+            Please check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
+
+            ---
+            *Last updated: ${{ github.event.head_commit.timestamp }}*

From 29855632a3226418164b682b02f23eb31400a4b9 Mon Sep 17 00:00:00 2001
From: Tomasz Janiszewski <tomek@redhat.com>
Date: Mon, 2 Mar 2026 17:59:23 +0100
Subject: [PATCH 3/7] Use make target instead of direct script invocation

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .github/workflows/e2e.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 9b9ae7f..142ebcd 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -41,7 +41,7 @@ jobs:
       - name: Run E2E tests
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        run: cd e2e-tests && ./scripts/run-tests.sh
+        run: make e2e-test
 
       - name: Parse test results
         if: always()

From fff63df38d089c307acabe750ea3c9f51a72220c Mon Sep 17 00:00:00 2001
From: Tomasz Janiszewski <tomek@redhat.com>
Date: Mon, 2 Mar 2026 18:04:47 +0100
Subject: [PATCH 4/7] Use mcpchecker summary command with GitHub Actions format

- Use built-in mcpchecker summary instead of manual jq parsing
- Add results to GitHub Actions step summary
- Cleaner and more maintainable output generation

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .github/workflows/e2e.yml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 142ebcd..d93c710 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -49,20 +49,20 @@ jobs:
         run: |
           RESULTS_FILE="e2e-tests/mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json"
           if [ -f "$RESULTS_FILE" ]; then
-            # Count total and passed tests
-            TOTAL=$(jq '. | length' "$RESULTS_FILE")
-            PASSED=$(jq '[.[] | select(.taskPassed == true)] | length' "$RESULTS_FILE")
-            FAILED=$((TOTAL - PASSED))
+            # Use mcpchecker summary with GitHub Actions format
+            cd e2e-tests/mcpchecker
+            ../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json --github-output >> $GITHUB_OUTPUT
 
-            echo "total=$TOTAL" >> $GITHUB_OUTPUT
-            echo "passed=$PASSED" >> $GITHUB_OUTPUT
-            echo "failed=$FAILED" >> $GITHUB_OUTPUT
-
-            # Generate summary table
-            SUMMARY=$(jq -r '.[] | "| \(.taskName) | \(if .taskPassed then "✅ Pass" else "❌ Fail" end) |"' "$RESULTS_FILE")
+            # Generate markdown summary table
+            SUMMARY=$(jq -r '.[] | "| \(.taskName) | \(if .taskPassed then "✅ Pass" else "❌ Fail" end) |"' mcpchecker-stackrox-mcp-e2e-out.json)
             echo "summary<<EOF" >> $GITHUB_OUTPUT
             echo "$SUMMARY" >> $GITHUB_OUTPUT
             echo "EOF" >> $GITHUB_OUTPUT
+
+            # Add to GitHub Actions step summary
+            echo "## E2E Test Results" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            ../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json >> $GITHUB_STEP_SUMMARY
           else
             echo "total=0" >> $GITHUB_OUTPUT
             echo "passed=0" >> $GITHUB_OUTPUT

From e2ec873bed7c75c4ccfcf5c9657e965ab3592bb8 Mon Sep 17 00:00:00 2001
From: Tomasz Janiszewski <tomek@redhat.com>
Date: Mon, 2 Mar 2026 18:06:52 +0100
Subject: [PATCH 5/7] Use mcpchecker human-readable summary in PR comments

Use the built-in text output from mcpchecker summary directly
instead of creating custom markdown tables. Simpler and cleaner.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .github/workflows/e2e.yml | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index d93c710..02cff62 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -49,12 +49,13 @@ jobs:
         run: |
           RESULTS_FILE="e2e-tests/mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json"
           if [ -f "$RESULTS_FILE" ]; then
-            # Use mcpchecker summary with GitHub Actions format
             cd e2e-tests/mcpchecker
+
+            # Get stats in GitHub Actions format
             ../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json --github-output >> $GITHUB_OUTPUT
 
-            # Generate markdown summary table
-            SUMMARY=$(jq -r '.[] | "| \(.taskName) | \(if .taskPassed then "✅ Pass" else "❌ Fail" end) |"' mcpchecker-stackrox-mcp-e2e-out.json)
+            # Get human-readable summary for PR comment
+            SUMMARY=$(../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json)
             echo "summary<<EOF" >> $GITHUB_OUTPUT
             echo "$SUMMARY" >> $GITHUB_OUTPUT
             echo "EOF" >> $GITHUB_OUTPUT
@@ -62,7 +63,9 @@ jobs:
             # Add to GitHub Actions step summary
             echo "## E2E Test Results" >> $GITHUB_STEP_SUMMARY
             echo "" >> $GITHUB_STEP_SUMMARY
-            ../../e2e-tests/bin/mcpchecker summary mcpchecker-stackrox-mcp-e2e-out.json >> $GITHUB_STEP_SUMMARY
+            echo '```' >> $GITHUB_STEP_SUMMARY
+            echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
+            echo '```' >> $GITHUB_STEP_SUMMARY
           else
             echo "total=0" >> $GITHUB_OUTPUT
             echo "passed=0" >> $GITHUB_OUTPUT
@@ -93,15 +96,9 @@ jobs:
             **Commit:** ${{ github.event.pull_request.head.sha }}
             **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
 
-            ### Summary
-            - **Total Tests:** ${{ steps.parse_results.outputs.total }}
-            - **Passed:** ${{ steps.parse_results.outputs.passed }} ✅
-            - **Failed:** ${{ steps.parse_results.outputs.failed }} ❌
-
-            ### Test Cases
-            | Test Name | Result |
-            |-----------|--------|
+            ```
             ${{ steps.parse_results.outputs.summary }}
+            ```
 
             ---
             *Last updated: ${{ github.event.head_commit.timestamp }}*

From 6f795c2ac43b5172f19fa9dd98fa445ce15336cf Mon Sep 17 00:00:00 2001
From: Tomasz Janiszewski <tomek@redhat.com>
Date: Mon, 2 Mar 2026 18:14:41 +0100
Subject: [PATCH 6/7] Use OpenAI agent (gpt-5-nano) for E2E tests

Changed from claude-code to OpenAI agent to support CI environments
where Claude CLI is not available. Using gpt-5-nano for cost efficiency.

- Update eval.yaml to use builtin.openai-agent with gpt-5-nano
- Set MODEL_BASE_URL and MODEL_KEY env vars in run-tests.sh
- Pass MODEL_KEY in GitHub Actions workflow

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .github/workflows/e2e.yml      | 1 +
 e2e-tests/mcpchecker/eval.yaml | 4 ++--
 e2e-tests/scripts/run-tests.sh | 6 ++++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 02cff62..6a2f38f 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -41,6 +41,7 @@ jobs:
       - name: Run E2E tests
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          MODEL_KEY: ${{ secrets.OPENAI_API_KEY }}
         run: make e2e-test
 
       - name: Parse test results
diff --git a/e2e-tests/mcpchecker/eval.yaml b/e2e-tests/mcpchecker/eval.yaml
index 6c14da2..3b6e73e 100644
--- a/e2e-tests/mcpchecker/eval.yaml
+++ b/e2e-tests/mcpchecker/eval.yaml
@@ -3,8 +3,8 @@ metadata:
   name: "stackrox-mcp-e2e"
 config:
   agent:
-    type: "builtin.claude-code"
-    model: "claude-sonnet-4-5"
+    type: "builtin.openai-agent"
+    model: "gpt-5-nano"
   llmJudge:
     env:
       typeKey: JUDGE_TYPE
diff --git a/e2e-tests/scripts/run-tests.sh b/e2e-tests/scripts/run-tests.sh
index bbed336..1e01f41 100755
--- a/e2e-tests/scripts/run-tests.sh
+++ b/e2e-tests/scripts/run-tests.sh
@@ -58,8 +58,9 @@ if [ ! -f "$E2E_DIR/bin/mcpchecker" ]; then
 fi
 
 
-# Set agent model (defaults to claude-sonnet-4-5)
-export AGENT_MODEL_NAME="${AGENT_MODEL_NAME:-claude-sonnet-4-5}"
+# Set agent environment variables (use OpenAI)
+export MODEL_BASE_URL="${MODEL_BASE_URL:-https://api.openai.com/v1}"
+export MODEL_KEY="${MODEL_KEY:-$OPENAI_API_KEY}"
 
 # Set judge environment variables (use OpenAI)
 export JUDGE_BASE_URL="${JUDGE_BASE_URL:-https://api.openai.com/v1}"
@@ -68,6 +69,7 @@ export JUDGE_MODEL_NAME="${JUDGE_MODEL_NAME:-gpt-5-nano}"
 
 echo "Configuration:"
 echo "  Central URL: $STACKROX_MCP__CENTRAL__URL (WireMock)"
+echo "  Agent: gpt-5-nano (OpenAI)"
 echo "  Judge: $JUDGE_MODEL_NAME (OpenAI)"
 echo "  MCP Server: stackrox-mcp (via go run)"
 echo ""

From d78d8874eef7ce161783177de047e671a380bcfc Mon Sep 17 00:00:00 2001
From: Tomasz Janiszewski <tomek@redhat.com>
Date: Mon, 2 Mar 2026 18:22:07 +0100
Subject: [PATCH 7/7] fix

Signed-off-by: Tomasz Janiszewski <tomek@redhat.com>
---
 .github/workflows/e2e.yml | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 6a2f38f..60c8242 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -101,9 +101,6 @@ jobs:
             ${{ steps.parse_results.outputs.summary }}
             ```
 
-            ---
-            *Last updated: ${{ github.event.head_commit.timestamp }}*
-
       - name: Create or update comment - Failure
         if: failure()
         uses: peter-evans/create-or-update-comment@v4
@@ -113,19 +110,7 @@ jobs:
           edit-mode: replace
           body: |
             <!-- e2e-test-results -->
-            ## E2E Test Results
+            ### ❌ Job Failed
 
             **Commit:** ${{ github.event.pull_request.head.sha }}
             **Workflow Run:** [View Details](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
-
-            ### ❌ Job Failed
-
-            The E2E test job failed to complete. This could be due to:
-            - Test execution errors
-            - Infrastructure issues
-            - Configuration problems
-
-            Please check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
-
-            ---
-            *Last updated: ${{ github.event.head_commit.timestamp }}*