From a2f76a7cf0ca7a0af5b10905a29042585deab3b7 Mon Sep 17 00:00:00 2001
From: seaona <mariona@gmx.es>
Date: Mon, 23 Feb 2026 09:53:38 +0100
Subject: [PATCH] poc flaky analyzer extension

---
 .../actions/analyze-flaky-tests/action.yml    |  79 +++
 .github/actions/flaky-test-report/action.yml  |  27 +-
 .github/scripts/analyze-flaky-tests/index.ts  | 278 ++++++++++
 .../knowledge/extension-flakiness-patterns.md | 502 ++++++++++++++++++
 .../llm/claude-analyzer.ts                    | 136 +++++
 .../analyze-flaky-tests/llm/prompt-builder.ts |  50 ++
 .../scripts/analyze-flaky-tests/llm/tools.ts  | 284 ++++++++++
 .github/scripts/analyze-flaky-tests/types.ts  |  45 ++
 .../utils/job-log-fetcher.ts                  | 140 +++++
 .../utils/knowledge-base.ts                   |  71 +++
 .../utils/past-fixes-fetcher.ts               | 123 +++++
 .../utils/slack-reporter.ts                   | 137 +++++
 .../utils/test-source-reader.ts               | 124 +++++
 .github/scripts/create-flaky-test-report.mjs  |  79 ++-
 .github/workflows/flaky-test-ai-analysis.yml  |  70 +++
 package.json                                  |   2 +
 yarn.lock                                     |  89 ++++
 17 files changed, 2223 insertions(+), 13 deletions(-)
 create mode 100644 .github/actions/analyze-flaky-tests/action.yml
 create mode 100644 .github/scripts/analyze-flaky-tests/index.ts
 create mode 100644 .github/scripts/analyze-flaky-tests/knowledge/extension-flakiness-patterns.md
 create mode 100644 .github/scripts/analyze-flaky-tests/llm/claude-analyzer.ts
 create mode 100644 .github/scripts/analyze-flaky-tests/llm/prompt-builder.ts
 create mode 100644 .github/scripts/analyze-flaky-tests/llm/tools.ts
 create mode 100644 .github/scripts/analyze-flaky-tests/types.ts
 create mode 100644 .github/scripts/analyze-flaky-tests/utils/job-log-fetcher.ts
 create mode 100644 .github/scripts/analyze-flaky-tests/utils/knowledge-base.ts
 create mode 100644 .github/scripts/analyze-flaky-tests/utils/past-fixes-fetcher.ts
 create mode 100644 .github/scripts/analyze-flaky-tests/utils/slack-reporter.ts
 create mode 100644 .github/scripts/analyze-flaky-tests/utils/test-source-reader.ts
 create mode 100644 .github/workflows/flaky-test-ai-analysis.yml

diff --git a/.github/actions/analyze-flaky-tests/action.yml b/.github/actions/analyze-flaky-tests/action.yml
new file mode 100644
index 00000000..61e957d6
--- /dev/null
+++ b/.github/actions/analyze-flaky-tests/action.yml
@@ -0,0 +1,79 @@
+name: Analyze Flaky Tests with AI
+description: 'Analyzes flaky test failures using Claude AI and posts findings as Slack thread replies.'
+
+inputs:
+  github-token:
+    description: 'GitHub token with repo and actions:read access'
+    required: true
+  claude-api-key:
+    description: 'Anthropic API key for Claude'
+    required: true
+  slack-bot-token:
+    description: 'Slack Bot Token with chat:write scope'
+    required: true
+  slack-channel-id:
+    description: 'Slack channel ID to post findings to'
+    required: true
+  slack-thread-ts:
+    description: 'Slack thread timestamp to reply to (from the flaky test report)'
+    required: true
+  failures-json:
+    description: 'JSON array of test failures to analyze (from the flaky test report)'
+    required: true
+  target-owner:
+    description: 'Owner of the repository containing the tests'
+    required: false
+    default: 'MetaMask'
+  target-repo:
+    description: 'Repository containing the tests'
+    required: false
+    default: 'metamask-extension'
+  github-tools-repository:
+    description: 'The GitHub repository containing the GitHub tools.'
+    required: false
+    default: ${{ github.action_repository }}
+  github-tools-ref:
+    description: 'The SHA of the action to use.'
+    required: false
+    default: ${{ github.action_ref }}
+
+runs:
+  using: composite
+  steps:
+    - name: Checkout GitHub tools repository
+      uses: actions/checkout@v6
+      with:
+        repository: ${{ inputs.github-tools-repository }}
+        ref: ${{ inputs.github-tools-ref }}
+        path: ./github-tools
+
+    - name: Set up Node.js
+      uses: actions/setup-node@v6
+      with:
+        node-version-file: ./github-tools/.nvmrc
+        cache-dependency-path: ./github-tools/yarn.lock
+        cache: yarn
+
+    - name: Enable Corepack
+      working-directory: ./github-tools
+      shell: bash
+      run: corepack enable
+
+    - name: Install dependencies
+      working-directory: ./github-tools
+      shell: bash
+      run: yarn --immutable
+
+    - name: Run AI analysis
+      env:
+        GITHUB_TOKEN: ${{ inputs.github-token }}
+        E2E_CLAUDE_API_KEY: ${{ inputs.claude-api-key }}
+        SLACK_BOT_TOKEN: ${{ inputs.slack-bot-token }}
+        SLACK_CHANNEL_ID: ${{ inputs.slack-channel-id }}
+        SLACK_THREAD_TS: ${{ inputs.slack-thread-ts }}
+        TARGET_OWNER: ${{ inputs.target-owner }}
+        TARGET_REPO: ${{ inputs.target-repo }}
+        FAILURES_JSON: ${{ inputs.failures-json }}
+      working-directory: ./github-tools
+      shell: bash
+      run: yarn ts-node --swc .github/scripts/analyze-flaky-tests/index.ts
diff --git a/.github/actions/flaky-test-report/action.yml b/.github/actions/flaky-test-report/action.yml
index b907583a..d9dac57a 100644
--- a/.github/actions/flaky-test-report/action.yml
+++ b/.github/actions/flaky-test-report/action.yml
@@ -12,8 +12,17 @@ inputs:
     description: 'GitHub token with repo and actions:read access'
     required: true
   slack-webhook-flaky-tests:
-    description: 'Slack webhook URL for flaky test reports'
-    required: true
+    description: 'Slack webhook URL for flaky test reports (legacy fallback)'
+    required: false
+    default: ''
+  slack-bot-token:
+    description: 'Slack Bot Token with chat:write scope (preferred over webhook for thread support)'
+    required: false
+    default: ''
+  slack-channel-id:
+    description: 'Slack channel ID to post the report to (required when using slack-bot-token)'
+    required: false
+    default: ''
   github-tools-repository:
     description: 'The GitHub repository containing the GitHub tools. Defaults to the GitHub tools action repositor, and usually does not need to be changed.'
     required: false
@@ -23,6 +32,17 @@ inputs:
     required: false
     default: ${{ github.action_ref }}
 
+outputs:
+  thread_ts:
+    description: 'Slack thread timestamp of the posted report (only available when using slack-bot-token)'
+    value: ${{ steps.report.outputs.thread_ts }}
+  has_failures:
+    description: 'Whether any test failures were found (true/false)'
+    value: ${{ steps.report.outputs.has_failures }}
+  failures_json:
+    description: 'JSON array of the top 10 test failures for downstream analysis'
+    value: ${{ steps.report.outputs.failures_json }}
+
 runs:
   using: composite
   steps:
@@ -51,11 +71,14 @@ runs:
       run: yarn --immutable
 
     - name: Run flaky test report script
+      id: report
       env:
         REPOSITORY: ${{ inputs.repository }}
         WORKFLOW_ID: ${{ inputs.workflow-id }}
         GITHUB_TOKEN: ${{ inputs.github-token }}
         SLACK_WEBHOOK_FLAKY_TESTS: ${{ inputs.slack-webhook-flaky-tests }}
+        SLACK_BOT_TOKEN: ${{ inputs.slack-bot-token }}
+        SLACK_CHANNEL_ID: ${{ inputs.slack-channel-id }}
       working-directory: ./github-tools
       shell: bash
       run: node .github/scripts/create-flaky-test-report.mjs
diff --git a/.github/scripts/analyze-flaky-tests/index.ts b/.github/scripts/analyze-flaky-tests/index.ts
new file mode 100644
index 00000000..5029f0a6
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/index.ts
@@ -0,0 +1,278 @@
+import fs from 'fs';
+import { Octokit } from '@octokit/rest';
+import type { AnalysisResult, FlakyTestFailure, SlackFinding } from './types';
+import { fetchJobLog } from './utils/job-log-fetcher';
+import { postSlackFindings } from './utils/slack-reporter';
+import { buildInitialPrompt } from './llm/prompt-builder';
+import { analyzeWithClaude } from './llm/claude-analyzer';
+import { executeToolCall } from './llm/tools';
+import type { ToolContext } from './llm/tools';
+import {
+  getKnowledgeSection,
+  listKnowledgeSections,
+} from './utils/knowledge-base';
+
+interface Config {
+  githubToken: string;
+  claudeApiKey: string;
+  slackBotToken: string;
+  slackChannelId: string;
+  slackThreadTs: string;
+  targetOwner: string;
+  targetRepo: string;
+  failuresJson: string;
+  dryRun: boolean;
+}
+
+function loadConfig(): Config {
+  const mockLlm = process.argv.includes('--mock-llm');
+  const requiredVars = mockLlm ? ['GITHUB_TOKEN'] : ['GITHUB_TOKEN', 'E2E_CLAUDE_API_KEY'];
+
+  for (const envVar of requiredVars) {
+    if (!process.env[envVar]) {
+      throw new Error(`Missing required environment variable: ${envVar}`);
+    }
+  }
+
+  const dryRun = process.argv.includes('--dry-run');
+
+  return {
+    githubToken: process.env.GITHUB_TOKEN!,
+    claudeApiKey: process.env.E2E_CLAUDE_API_KEY ?? 'mock',
+    slackBotToken: process.env.SLACK_BOT_TOKEN ?? '',
+    slackChannelId: process.env.SLACK_CHANNEL_ID ?? '',
+    slackThreadTs: process.env.SLACK_THREAD_TS ?? '',
+    targetOwner: process.env.TARGET_OWNER ?? 'MetaMask',
+    targetRepo: process.env.TARGET_REPO ?? 'metamask-extension',
+    failuresJson: process.env.FAILURES_JSON ?? '',
+    dryRun,
+  };
+}
+
+function parseFailures(failuresJson: string): FlakyTestFailure[] {
+  if (!failuresJson) {
+    throw new Error(
+      'FAILURES_JSON is empty. Pass it as an env var or use --fixtures-file <path>.',
+    );
+  }
+  return JSON.parse(failuresJson) as FlakyTestFailure[];
+}
+
+function loadFailuresFromArgs(): string {
+  const fixturesIdx = process.argv.indexOf('--fixtures-file');
+  if (fixturesIdx !== -1 && process.argv[fixturesIdx + 1]) {
+    const filePath = process.argv[fixturesIdx + 1]!;
+    console.log(`Loading failures from file: ${filePath}\n`);
+    return fs.readFileSync(filePath, 'utf-8');
+  }
+  return process.env.FAILURES_JSON ?? '';
+}
+
+async function mockAnalysis(
+  failure: FlakyTestFailure,
+  logSection: string,
+  toolContext: ToolContext,
+): Promise<AnalysisResult> {
+  console.log('  [mock-llm] Simulating tool calls that Claude would make...');
+
+  // 1. Try fetching the test file
+  console.log(`    [tool] fetch_file({"path":"${failure.path}"})`);
+  const testContent = await executeToolCall('fetch_file', { path: failure.path }, toolContext);
+  let resolvedPath = failure.path;
+  let fetched: string;
+
+  if (testContent.startsWith('File not found')) {
+    console.log('    [tool] fetch_file => NOT FOUND');
+
+    // 2. Search for the correct file path
+    const testNameKeyword = failure.name.split(' ')[0]!.toLowerCase();
+    console.log(`    [tool] search_test_file({"query":"${testNameKeyword}"})`);
+    const searchResult = await executeToolCall('search_test_file', { query: testNameKeyword }, toolContext);
+    console.log(`    [tool] search_test_file => ${searchResult.split('\n').length - 1} results`);
+
+    const firstMatch = searchResult.match(/^- (.+\.spec\.\w+)$/m);
+    if (firstMatch?.[1]) {
+      resolvedPath = firstMatch[1];
+      console.log(`    [tool] fetch_file({"path":"${resolvedPath}"})`);
+      const retryContent = await executeToolCall('fetch_file', { path: resolvedPath }, toolContext);
+      fetched = retryContent.startsWith('File not found') ? 'NOT FOUND' : `${retryContent.length} chars`;
+      console.log(`    [tool] fetch_file => ${fetched}`);
+    } else {
+      fetched = 'NOT FOUND (search also returned no .spec files)';
+    }
+  } else {
+    fetched = `${testContent.length} chars`;
+    console.log(`    [tool] fetch_file => ${fetched}`);
+  }
+
+  // 3. Try fetching job logs via runId
+  if (failure.runId) {
+    console.log(`    [tool] fetch_job_logs({"run_id":${failure.runId}})`);
+    const jobsResult = await executeToolCall('fetch_job_logs', { run_id: failure.runId }, toolContext);
+    const jobCount = (jobsResult.match(/^- Job /gm) ?? []).length;
+    console.log(`    [tool] fetch_job_logs => ${jobCount} e2e jobs found`);
+  }
+
+  // 4. Knowledge base lookup
+  console.log('    [tool] list_flakiness_categories({})');
+  const categories = listKnowledgeSections();
+  console.log(`    [tool] list_flakiness_categories => ${categories.length} sections`);
+
+  const errorLower = failure.lastError.toLowerCase();
+  let matchedCategory = 'other';
+  if (errorLower.includes('stale')) matchedCategory = 'stale_reference';
+  else if (errorLower.includes('timeout')) matchedCategory = 'timing';
+  else if (errorLower.includes('click intercepted')) matchedCategory = 'element_state';
+  else if (errorLower.includes('no such window')) matchedCategory = 'window_race';
+
+  const knowledgeQuery = matchedCategory === 'stale_reference' ? 'React Re-renders'
+    : matchedCategory === 'timing' ? 'Actions that Take Time'
+    : matchedCategory === 'window_race' ? 'Race Conditions with Windows'
+    : 'Anti-Patterns';
+
+  console.log(`    [tool] get_flakiness_patterns({"category":"${knowledgeQuery}"})`);
+  const section = getKnowledgeSection(knowledgeQuery);
+  console.log(`    [tool] get_flakiness_patterns => ${section.length} chars`);
+
+  // 5. Search for similar fixes
+  const fixKeyword = failure.name.split(' ').slice(0, 3).join(' ');
+  console.log(`    [tool] search_similar_fixes({"query":"${fixKeyword}"})`);
+  const fixes = await executeToolCall(
+    'search_similar_fixes',
+    { query: fixKeyword },
+    toolContext,
+  );
+  console.log(`    [tool] search_similar_fixes => ${fixes.length} chars`);
+
+  console.log('    [tool] submit_analysis({...})');
+
+  return {
+    testName: failure.name,
+    testPath: resolvedPath,
+    classification: 'flaky_test',
+    confidence: 75,
+    rootCauseCategory: matchedCategory,
+    rootCauseExplanation: `[MOCK] Based on error "${failure.lastError.substring(0, 80)}...", this appears to be a ${matchedCategory} issue. The test file was ${fetched}. Found ${categories.length} knowledge base sections and ${fixes.length} chars of similar fix data.`,
+    specificLines: ['[MOCK] Line analysis requires real Claude API'],
+    suggestedFix: '[MOCK] Fix suggestion requires real Claude API. Run without --mock-llm to get actual analysis.',
+    additionalNotes: `[MOCK] CI log section: ${logSection.substring(0, 100)}...`,
+  };
+}
+
+async function main(): Promise<void> {
+  const config = loadConfig();
+  const octokit = new Octokit({ auth: config.githubToken });
+  const mockLlm = process.argv.includes('--mock-llm');
+
+  console.log('=== Flaky Test AI Analyzer (Tool-Augmented Agent) ===\n');
+
+  if (mockLlm) {
+    console.log('MOCK LLM MODE: Using mock Claude responses. Tools will execute for real.\n');
+  }
+  if (config.dryRun) {
+    console.log('DRY RUN MODE: Results will be printed to stdout, not posted to Slack.\n');
+  }
+
+  const failuresSource = loadFailuresFromArgs() || config.failuresJson;
+  const failures = parseFailures(failuresSource);
+  console.log(`Analyzing ${failures.length} test failures...\n`);
+
+  const toolContext: ToolContext = {
+    octokit,
+    owner: config.targetOwner,
+    repo: config.targetRepo,
+  };
+
+  const findings: SlackFinding[] = [];
+
+  for (let i = 0; i < failures.length; i++) {
+    const failure = failures[i]!;
+    console.log(
+      `[${i + 1}/${failures.length}] Analyzing: ${failure.name}`,
+    );
+
+    try {
+      console.log('  Fetching job log...');
+      const logSection = await fetchJobLog(
+        octokit,
+        failure,
+        config.targetOwner,
+        config.targetRepo,
+      );
+
+      let analysis: AnalysisResult;
+      if (mockLlm) {
+        analysis = await mockAnalysis(failure, logSection, toolContext);
+      } else {
+        console.log('  Starting agentic analysis with Claude...');
+        const prompt = buildInitialPrompt(failure, logSection, config.targetOwner, config.targetRepo);
+        analysis = await analyzeWithClaude(
+          prompt,
+          failure,
+          config.claudeApiKey,
+          toolContext,
+        );
+      }
+
+      const jobUrl = failure.jobId && failure.runId
+        ? `https://github.com/${config.targetOwner}/${config.targetRepo}/actions/runs/${failure.runId}/job/${failure.jobId}`
+        : '';
+      const fileUrl = `https://github.com/${config.targetOwner}/${config.targetRepo}/blob/main/${failure.path}`;
+
+      findings.push({ failure, analysis, jobUrl, fileUrl });
+
+      console.log(`  Result: ${analysis.classification} (${analysis.confidence}% confidence)`);
+      console.log(`  Root cause: ${analysis.rootCauseCategory}\n`);
+    } catch (error: unknown) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.error(`  Failed to analyze: ${message}\n`);
+    }
+  }
+
+  console.log(`\n=== Analysis complete: ${findings.length}/${failures.length} tests analyzed ===\n`);
+
+  if (config.dryRun) {
+    for (const finding of findings) {
+      console.log('---');
+      console.log(`Test: ${finding.failure.name}`);
+      console.log(`File: ${finding.failure.path}`);
+      console.log(`Classification: ${finding.analysis.classification}`);
+      console.log(`Confidence: ${finding.analysis.confidence}%`);
+      console.log(`Root Cause: ${finding.analysis.rootCauseCategory}`);
+      console.log(`Explanation: ${finding.analysis.rootCauseExplanation}`);
+      if (finding.analysis.specificLines.length > 0) {
+        console.log(`Problematic Lines:\n  ${finding.analysis.specificLines.join('\n  ')}`);
+      }
+      console.log(`Suggested Fix: ${finding.analysis.suggestedFix}`);
+      if (finding.analysis.additionalNotes) {
+        console.log(`Notes: ${finding.analysis.additionalNotes}`);
+      }
+      console.log(`Job: ${finding.jobUrl}`);
+      console.log(`File: ${finding.fileUrl}`);
+      console.log('');
+    }
+    return;
+  }
+
+  if (!config.slackBotToken || !config.slackChannelId || !config.slackThreadTs) {
+    console.log(
+      'Slack credentials or thread_ts not provided. Skipping Slack posting.',
+    );
+    console.log('Set SLACK_BOT_TOKEN, SLACK_CHANNEL_ID, and SLACK_THREAD_TS to enable.');
+    return;
+  }
+
+  console.log('Posting findings to Slack thread...');
+  await postSlackFindings(
+    findings,
+    config.slackThreadTs,
+    config.slackBotToken,
+    config.slackChannelId,
+  );
+  console.log('Done!');
+}
+
+main().catch((error: unknown) => {
+  console.error('\nFatal error:', error);
+  process.exit(1);
+});
diff --git a/.github/scripts/analyze-flaky-tests/knowledge/extension-flakiness-patterns.md b/.github/scripts/analyze-flaky-tests/knowledge/extension-flakiness-patterns.md
new file mode 100644
index 00000000..cb9a9ed4
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/knowledge/extension-flakiness-patterns.md
@@ -0,0 +1,502 @@
+# Extension CI Flakiness
+
+> **Source:** [Extension CI Flakiness - Google Doc](https://docs.google.com/document/d/1oXd5d1X7j14lHLjaRCWjEh3uhndrXQ_46lBuZ9SAu6M/edit?tab=t.0)
+
+---
+
+## Table of Contents
+
+- [E2E Flakiness Categories](#e2e-flakiness-categories)
+  - [Race Conditions on Driver/Helpers Functions](#race-conditions-on-driverhelpers-functions)
+  - [Taking Unnecessary Steps](#taking-unnecessary-steps)
+  - [Missing or Incorrect Use of Mocks](#missing-or-incorrect-use-of-mocks)
+  - [Removing URL/host entries to the live server allowlist](#removing-urlhost-entries-to-the-live-server-allowlist)
+  - [Race Conditions on Gas / Balance / Navigation values on Screen](#race-conditions-on-gas--balance--navigation-values-on-screen)
+  - [Confirmation Popups / Modals](#confirmation-popups--modals)
+  - [Incorrect Testing Conditions](#incorrect-testing-conditions)
+  - [Race Conditions with Assertions within the Test Body Steps](#race-conditions-with-assertions-within-the-test-body-steps)
+  - [Race Conditions with Windows](#race-conditions-with-windows)
+  - [Race Conditions with React Re-renders](#race-conditions-with-react-re-renders)
+  - [Actions that Take Time](#actions-that-take-time)
+  - [Errors in the testing dapp](#errors-in-the-testing-dapp)
+  - [Not using driver methods](#not-using-driver-methods)
+- [Bugs Discovered on the Wallet Level while Investigating Flaky Tests](#bugs-discovered-on-the-wallet-level-while-investigating-flaky-tests)
+- [E2E Anti-Patterns](#e2e-anti-patterns)
+- [Unit Test Flakiness Categories](#unit-test-flakiness-categories)
+- [Flakiness on Other CI Jobs](#flakiness-on-other-ci-jobs)
+
+---
+
+## E2E Flaky Tests Walkthrough
+
+- **First session** — [Recording](https://consensys.zoom.us/) (Passcode: `h2&gExZE`)
+- **Second session** — [Recording](https://consensys.zoom.us/) (Passcode: `M+^1Tr9Y`)
+
+---
+
+## E2E Flakiness Categories
+
+### Race Conditions on Driver/Helpers Functions
+
+- **Click Element with stale element**
+  [MetaMask/metamask-extension#24813](https://github.com/MetaMask/metamask-extension/pull/24813)
+
+- **Waiting to the correct window handle number**
+  [MetaMask/metamask-extension#24741](https://github.com/MetaMask/metamask-extension/pull/24741)
+
+- **Get window title undefined**
+  [MetaMask/metamask-extension#24642](https://github.com/MetaMask/metamask-extension/issues/24642)
+
+- **Click parent Element with inner elements that refresh instead of the most possible specific element**
+  [MetaMask/metamask-extension#24652](https://github.com/MetaMask/metamask-extension/issues/24652)
+
+- **Holding SRP button for less time than required**
+  [MetaMask/metamask-extension#25328](https://github.com/MetaMask/metamask-extension/pull/25328)
+
+- **Trezor e2e: race condition getting multiple elements with the same selector and then expecting to have the exact number**
+  [Slack thread](https://consensys.slack.com/archives/C1L7H42BT/p1721043228543209?thread_ts=1720108424.349269&cid=C1L7H42BT)
+  Fix: [commit 130794d](https://github.com/MetaMask/metamask-extension/pull/25824/commits/130794d18e5ae39887d70e161636b3ec6f4f8164)
+
+---
+
+### Taking Unnecessary Steps
+
+- **Create token, approve token, missing permission controller connected to the test dapp**
+  [MetaMask/metamask-extension#24937](https://github.com/MetaMask/metamask-extension/pull/24937)
+
+- **Migrate Opensea missing permission controller connected to the test dapp, smart contract deployed on the background**
+  [MetaMask/metamask-extension#24739](https://github.com/MetaMask/metamask-extension/pull/24739)
+
+- **Request Queue SwitchChain missing smart contract deployed on the background**
+  [MetaMask/metamask-extension#24674](https://github.com/MetaMask/metamask-extension/pull/24674)
+
+- **Unnecessary browser refresh, causing to land into the Confirmation screen if it was appearing in the activity as unapproved**
+  [MetaMask/metamask-extension#24809](https://github.com/MetaMask/metamask-extension/pull/24809)
+
+- **Unnecessary scrolls, and delays which added up more than 15 seconds of delay**
+  [MetaMask/metamask-extension#25288](https://github.com/MetaMask/metamask-extension/issues/25288)
+
+- **Unnecessary step on enabling the nonce going to Settings, instead of using preferenceController fixtures**
+  [commit 9a72e16](https://github.com/MetaMask/metamask-extension/pull/25687/commits/9a72e166e668fb8a0eba642365b4d62924545ec0)
+
+- **Unnecessary step of deploying a contract when it's already deployed and loaded in the test dapp param**
+  [commit 9a72e16](https://github.com/MetaMask/metamask-extension/pull/25687/commits/9a72e166e668fb8a0eba642365b4d62924545ec0)
+
+- **Unnecessary steps importing a token instead of using fixtures (had to modify the chainId as the token was imported to chainId 1 using ganache)**
+  [MetaMask/metamask-extension#26654](https://github.com/MetaMask/metamask-extension/pull/26654)
+
+- **Switching to Mainnet before starting a test for Import tokens — can use fixtures to start the wallet in Mainnet network**
+  [MetaMask/metamask-extension#27567](https://github.com/MetaMask/metamask-extension/pull/27567)
+
+- **Unnecessary steps by deploying manually 3 token contracts instead of just pre deploying using the anvil seeder**
+  [MetaMask/metamask-extension#35664](https://github.com/MetaMask/metamask-extension/pull/35664)
+
+- **Unnecessary steps for switching network when already in the network I want**
+  [MetaMask/metamask-extension#37374](https://github.com/MetaMask/metamask-extension/pull/37374)
+
+---
+
+### Missing or Incorrect Use of Mocks
+
+- **Missing IPFS metadata mock for Import ERC1155**
+  [MetaMask/metamask-extension#24709](https://github.com/MetaMask/metamask-extension/pull/24709)
+
+- **Missing mocks for ENS resolution**
+  [MetaMask/metamask-extension#24898](https://github.com/MetaMask/metamask-extension/pull/24898)
+
+- **Missing aggregatorMetadata, block list and include blocked tokens mocks**
+  [MetaMask-planning#2637](https://github.com/MetaMask/MetaMask-planning/issues/2637)
+
+- **Missing mock for quotes Swap test**
+  [MetaMask/metamask-extension#27160](https://github.com/MetaMask/metamask-extension/pull/27160)
+
+- **Inconsistency between the mocked value and the default value: test execution success depends on the polling rate**
+  [MetaMask/metamask-extension#23520](https://github.com/MetaMask/metamask-extension/pull/23520)
+
+- **Mocking eth_balance with a value >0ETH causes request polling for subsequent accounts, creating new ones and preventing other requests. Mock balance 0 to avoid this when using Mainnet**
+  [MetaMask/metamask-extension#25525](https://github.com/MetaMask/metamask-extension/pull/25525)
+
+- **Incorrect mock request by passing an id, makes the body never match, so the mock response is not implemented**
+  [MetaMask/metamask-extension#27156](https://github.com/MetaMask/metamask-extension/pull/27156)
+
+- **Solana missing mock to api.simplehash.com broke CI when that request changed its response, causing a subsequent call to another external API not in the privacy snapshot**
+  [MetaMask/metamask-extension#29986](https://github.com/MetaMask/metamask-extension/pull/29986)
+
+- **Add transaction simulation supported networks global mock**
+  [MetaMask/metamask-extension#30507](https://github.com/MetaMask/metamask-extension/pull/30507)
+
+- **Blockaid API was not correctly mocked (chainId used was int instead of hex), causing Blockaid validation to fail and metrics event assertion values to fail**
+  [MetaMask/metamask-extension#30769](https://github.com/MetaMask/metamask-extension/pull/30769)
+
+- **The default mock for Solana was over-riding the custom mock, causing the balance to be different if the test was slow enough**
+  [MetaMask/metamask-extension#30808](https://github.com/MetaMask/metamask-extension/pull/30808)
+
+- **Missing mock caused Smart Transactions + Swap specs to fail**
+  [MetaMask/metamask-extension#30932](https://github.com/MetaMask/metamask-extension/pull/30932)
+
+- **Missing mock for Swaps notifications slippage tests**
+  [MetaMask/metamask-extension#31383](https://github.com/MetaMask/metamask-extension/pull/31383)
+
+- **Missing mock for Solana devnet**
+  [MetaMask/metamask-extension#31331](https://github.com/MetaMask/metamask-extension/pull/31331)
+
+- **Missing mock on onboarding privacy**
+  [MetaMask/metamask-extension#31272](https://github.com/MetaMask/metamask-extension/pull/31272)
+
+- **Missing user storage mocks**
+  [MetaMask/metamask-extension#31947](https://github.com/MetaMask/metamask-extension/pull/31947)
+
+- **Missing mock for custom network during onboarding**
+  [MetaMask/metamask-extension#32932](https://github.com/MetaMask/metamask-extension/pull/32932)
+
+- **Missing the token list mock**
+  [MetaMask/metamask-extension#34834](https://github.com/MetaMask/metamask-extension/pull/34834)
+
+---
+
+### Removing URL/host entries to the live server allowlist
+
+- **Part 1:** [MetaMask/metamask-extension#33267](https://github.com/MetaMask/metamask-extension/pull/33267)
+- **Part 2:** [MetaMask/metamask-extension#33302](https://github.com/MetaMask/metamask-extension/pull/33302)
+
+---
+
+### Race Conditions on Gas / Balance / Navigation values on Screen
+
+- **Balance not loaded when starting the Send, causing gas to be 0 and blocking the Confirmation screen**
+  [MetaMask/metamask-extension#24639](https://github.com/MetaMask/metamask-extension/pull/24639)
+  Same issue: [#34128](https://github.com/MetaMask/metamask-extension/pull/34128), [#34854](https://github.com/MetaMask/metamask-extension/pull/34854)
+
+- **Mismatch in gas calculation values, when changing the increase token allowance amount**
+  [MetaMask/metamask-extension#24734](https://github.com/MetaMask/metamask-extension/pull/24734)
+
+- **Active network data (isActive, EIP1559..) was not loaded in state when running the assertion**
+  [MetaMask/metamask-extension#25137](https://github.com/MetaMask/metamask-extension/pull/25137)
+
+- **Gas is not recalculated before clicking Continue, when switching assets in the Send flow**
+  [MetaMask/metamask-extension#25181](https://github.com/MetaMask/metamask-extension/issues/25181)
+
+- **Transaction didn't have the total value loaded before we click reject**
+  [MetaMask/metamask-extension#25312](https://github.com/MetaMask/metamask-extension/pull/25312)
+
+- **Spec was not waiting for queued signatures to display navigation, making some signatures not queue properly. Need to wait for the navigation numbers to appear before queueing a new signature**
+  [MetaMask/metamask-extension#27481](https://github.com/MetaMask/metamask-extension/pull/27481)
+
+---
+
+### Confirmation Popups / Modals
+
+- **Snaps confirmation popup appears in confirmation screen**
+  [MetaMask/metamask-extension#24939](https://github.com/MetaMask/metamask-extension/pull/24939)
+
+- **Vault decryption confirmation popup appears in settings**
+  [MetaMask/metamask-extension#24830](https://github.com/MetaMask/metamask-extension/pull/24830)
+
+- **"Got it" element taking time to disappear obfuscates other elements**
+  [MetaMask/metamask-extension#24580](https://github.com/MetaMask/metamask-extension/pull/24580)
+
+- **Add account popup obfuscates clicking on the next element from the Home page**
+  [MetaMask/metamask-extension#25861](https://github.com/MetaMask/metamask-extension/pull/25861)
+
+- **Import NFT modal obfuscates clicking on the Account menu**
+  [MetaMask/metamask-extension#27006](https://github.com/MetaMask/metamask-extension/pull/27006)
+
+- **On the onboarding carousel, not waiting for the element to disappear when switching between screens causes race conditions**
+  [MetaMask/metamask-extension#27858](https://github.com/MetaMask/metamask-extension/pull/27858)
+
+- **On the Add token flow, should wait until the dialog has been closed before proceeding — otherwise re-render with React failures**
+  [MetaMask/metamask-extension#27853](https://github.com/MetaMask/metamask-extension/pull/27853)
+
+- **On Queued Confirmations tests, connected manually to the test dapp and didn't wait for the MM dialog to close after connect. Caused chainId to be incorrectly outdated**
+  [MetaMask/metamask-extension#30028](https://github.com/MetaMask/metamask-extension/pull/30028)
+
+- **The notification (red dot) appears on top of the menu, blocking clicks on the menu button**
+  [MetaMask/metamask-extension#33492](https://github.com/MetaMask/metamask-extension/pull/33492)
+
+- **When changing language, sometimes the dropdown menu remains open, causing the next click to have no effect**
+  [MetaMask/metamask-extension#34169](https://github.com/MetaMask/metamask-extension/pull/34169)
+
+---
+
+### Incorrect Testing Conditions
+
+- **Testing background in MV3 builds, where there is no background but service worker instead**
+  [MetaMask/metamask-extension#25164](https://github.com/MetaMask/metamask-extension/pull/25164)
+
+---
+
+### Race Conditions with Assertions within the Test Body Steps
+
+- **Assert element value as soon as we find the element — the real value has not been rendered**
+  [MetaMask/metamask-extension#23450](https://github.com/MetaMask/metamask-extension/pull/23450)
+
+- **Rapid input of the entire Chain ID resulted in the error message appearing and persisting**
+  [MetaMask/metamask-extension#24790](https://github.com/MetaMask/metamask-extension/pull/24790)
+
+- **Trying to find a pending transaction and then a confirmed one — bad pattern as we shouldn't look for transient elements. Looking for the confirmed tx gives us the assertion we want**
+  [MetaMask/metamask-extension#25545](https://github.com/MetaMask/metamask-extension/pull/25545)
+
+- **Assert the currentUrl is the desired one can create a race condition. The correct approach is to wait for the URL we want**
+  [MetaMask/metamask-extension#26651](https://github.com/MetaMask/metamask-extension/pull/26651)
+
+- **Find an element and then assert it has the correct status (enabled) creates a race condition. Need to wait for the desired state instead of asserting directly**
+  [MetaMask/metamask-extension#27017](https://github.com/MetaMask/metamask-extension/pull/27017)
+
+- **Find an element and then assert it has the correct value creates a race condition. Need to wait for the desired value**
+  [MetaMask/metamask-extension#27095](https://github.com/MetaMask/metamask-extension/pull/27095)
+
+- **Find an element and assert it has the correct text for dapp permissions**
+  [MetaMask/metamask-extension#27894](https://github.com/MetaMask/metamask-extension/pull/27894)
+
+- **Looking for the Deposit transaction by its text in the activity tab — this element updates its state from pending to confirmed, meaning it can become stale**
+  [MetaMask/metamask-extension#27889](https://github.com/MetaMask/metamask-extension/pull/27889)
+
+- **Asserting an element is displayed after looking for its selector can cause race conditions where the element is updated in between (e.g., tx from pending to confirmed)**
+  [MetaMask/metamask-extension#27928](https://github.com/MetaMask/metamask-extension/pull/27928/files)
+
+- **Find element and assert correct text in the Swaps STX spec**
+  [MetaMask/metamask-extension#32032](https://github.com/MetaMask/metamask-extension/pull/32032)
+
+- **Find element and assert correct text in wallet_invokeMethod multichain test**
+  [MetaMask/metamask-extension#32962](https://github.com/MetaMask/metamask-extension/pull/32962)
+
+---
+
+### Race Conditions with Windows
+
+- **Vault decrypt uses a production build which automatically opens a MetaMask window. Using driver.navigate too caused 2 MetaMask windows, leading to flakiness as the active browser window was not where driver actions were happening**
+  [MetaMask/metamask-extension#25443](https://github.com/MetaMask/metamask-extension/pull/25443)
+
+- **Getting all windows and after several steps referencing an old window**
+  [MetaMask/metamask-extension#2585](https://github.com/MetaMask/metamask-extension/pull/2585)
+
+- **Tests that click a button in the popup window that eventually closes it, but don't wait for the popup to close before continuing. Added a method that clicks and waits for the window to close**
+  [MetaMask/metamask-extension#26449](https://github.com/MetaMask/metamask-extension/pull/26449),
+  [MetaMask/metamask-extension#26725](https://github.com/MetaMask/metamask-extension/pull/26725)
+
+- **Triggering a Send from Dapp 1 and quickly switching to Dapp 0 — the network for the first Send is taken from Dapp 0 instead of Dapp 1**
+  [MetaMask/metamask-extension#26794](https://github.com/MetaMask/metamask-extension/pull/26794)
+
+- **chainId proxy sync should preserve per dapp network selections**
+  [MetaMask/metamask-extension#31599](https://github.com/MetaMask/metamask-extension/pull/31599)
+
+- **Multichain API Call wallet_createSession**
+  [MetaMask/metamask-extension#31603](https://github.com/MetaMask/metamask-extension/pull/31603)
+
+- **Snaps race condition with windows**
+  [MetaMask/metamask-extension#32320](https://github.com/MetaMask/metamask-extension/pull/32320)
+
+- **Snap cronjobs dialog appears and disappears after some seconds — needed specific assert handling for the case where the window was closed automatically**
+  [MetaMask/metamask-extension#33004](https://github.com/MetaMask/metamask-extension/pull/33004)
+
+- **Need to wait until the dialog is closed before performing the next action in Request Queuing tests**
+  [MetaMask/metamask-extension#34141](https://github.com/MetaMask/metamask-extension/pull/34141)
+
+---
+
+### Race Conditions with React Re-renders
+
+- **After changing the language, clicking on the account menu while MetaMask is in a loading state — click takes no effect as the component re-renders**
+  [MetaMask/metamask-extension#25648](https://github.com/MetaMask/metamask-extension/pull/25648)
+
+- **Checkbox component for Snap Insights Signatures is re-rendered when the host value is loaded, making the checkbox unchecked if the click happens before the re-render**
+  [MetaMask/metamask-extension#27007](https://github.com/MetaMask/metamask-extension/pull/27007)
+
+- **The Add account modal needs to finish rendering the account list before proceeding with a click action — otherwise the re-render causes the click to be performed outside the popup, closing the modal**
+  [MetaMask/metamask-extension#27420](https://github.com/MetaMask/metamask-extension/pull/27420)
+
+- **In the onboarding flow, clicking an element when it's moving causes the click to take no effect. Added a new driver method to wait until the element is not moving**
+  [MetaMask/metamask-extension#27921](https://github.com/MetaMask/metamask-extension/pull/27921)
+
+- **In the carousel spec, looking for an element and then using `.click` — a re-render in between made the element stale. Should use the custom `clickElement` driver method**
+  [MetaMask/metamask-extension#33362](https://github.com/MetaMask/metamask-extension/pull/33362)
+
+---
+
+### Actions that Take Time
+
+- **Requests to Sentry take time — if the wait time is not enough, tests will be flaky**
+  [MetaMask/metamask-extension#26648](https://github.com/MetaMask/metamask-extension/pull/26648)
+
+- **Chrome takes time to write to .log files (storage) — vault decrypt test was flaky when trying to import the log file before it was finished writing**
+  [MetaMask/metamask-extension#26612](https://github.com/MetaMask/metamask-extension/pull/26612)
+
+- **The Connect action takes several seconds — the default timeout for the next action was not enough**
+  [MetaMask/metamask-extension#26792](https://github.com/MetaMask/metamask-extension/pull/26792)
+
+- **After going to metamask.io with Marketing feature enabled, the cookie id takes time to be added into MetaMask state**
+  [MetaMask/metamask-extension#26697](https://github.com/MetaMask/metamask-extension/pull/26697/files#diff-b1c4086e548781d946ed142c838710286d74e7043c5b7b0edce4e5f617091a52R73)
+
+- **Some `it` blocks are really long leading to timeout issues — not because the test fails, but because the 80000ms threshold is reached**
+  [MetaMask/metamask-extension#30044](https://github.com/MetaMask/metamask-extension/pull/30044)
+
+- **Metrics events can get unordered if 2 actions are done subsequently very fast, leading to the 2nd event being the first one triggered**
+  [MetaMask/metamask-extension#30031](https://github.com/MetaMask/metamask-extension/pull/30031)
+
+- **Importing a function from another spec file causes the tests from that spec file to also be run, causing long test runs and possible timeouts**
+  [MetaMask/metamask-extension#30481](https://github.com/MetaMask/metamask-extension/pull/30481)
+
+- **Chain id is not immediately set when we land on the home page. For actions that rely on chain id, should wait until the balance is loaded**
+  [MetaMask/metamask-extension#31348](https://github.com/MetaMask/metamask-extension/pull/31348)
+
+- **Creating an account takes a few seconds to be loaded. Performing a subsequent action right away without checking can create race conditions (e.g., switching to Solana shows a dialog warning about missing Solana account)**
+  [MetaMask/metamask-extension#31382](https://github.com/MetaMask/metamask-extension/pull/31382)
+
+- **On the Swap page with a default token, adding an amount triggers quotes. Changing to a custom token before quotes finalize can load quotes for the previous token swap**
+  [MetaMask/metamask-extension#32233](https://github.com/MetaMask/metamask-extension/pull/32233)
+
+- **Re-starting the wallet after the vault was corrupt**
+  [MetaMask/metamask-extension#33591](https://github.com/MetaMask/metamask-extension/pull/33591)
+
+- **Scroll to bottom using the arrow button takes several seconds for the button to disappear (wallet-side bug)**
+  [MetaMask/metamask-extension#33493](https://github.com/MetaMask/metamask-extension/pull/33493)
+
+- **Writing to the local storage file takes time — Vault Decryptor test flaky because sometimes the backup file was empty on upload**
+  [MetaMask/metamask-extension#33646](https://github.com/MetaMask/metamask-extension/pull/33646)
+
+- **Request to Profile Sync after onboarding takes seconds — locking the wallet before this request causes "unable to proceed, wallet is locked" error**
+  [MetaMask/metamask-extension#33763](https://github.com/MetaMask/metamask-extension/pull/33763)
+
+- **After login, Authentication API requests take time to be triggered. Locking the wallet before they happen causes "wallet is locked" error**
+  [MetaMask/metamask-extension#34888](https://github.com/MetaMask/metamask-extension/pull/34888)
+
+- **Triggering several transactions from different dapps without waiting individually can cause transactions to appear in a different order**
+  [MetaMask/metamask-extension#35944](https://github.com/MetaMask/metamask-extension/pull/35944)
+
+---
+
+### Errors in the testing dapp
+
+- **A span element is nested inside the buttons for all Snap test e2e buttons — causes flakiness when interacting with the button. Fixed on the snap test dapp side**
+  [MetaMask/snaps#2782](https://github.com/MetaMask/snaps/pull/2782)
+  Related: [#27597](https://github.com/MetaMask/metamask-extension/issues/27597), [#27576](https://github.com/MetaMask/metamask-extension/issues/27576), [#26804](https://github.com/MetaMask/metamask-extension/issues/26804)
+
+- **Phishing detection page adds event listener later on, making the click to the malicious link do nothing**
+  [MetaMask/phishing-warning#173](https://github.com/MetaMask/phishing-warning/pull/173)
+
+---
+
+### Not using driver methods
+
+- **Using `element.click()` instead of `clickElement()` can cause race conditions when the element is present but not clickable. The driver function has appropriate guards in place**
+  [MetaMask/metamask-extension#27599](https://github.com/MetaMask/metamask-extension/pull/27599)
+
+---
+
+## Bugs Discovered on the Wallet Level while Investigating Flaky Tests
+
+- **Send - ENS resolution displays different address length previews**
+  [MetaMask/metamask-extension#25286](https://github.com/MetaMask/metamask-extension/issues/25286)
+
+- **Tokens - MM breaks with "Can't convert undefined to object"**
+  [MetaMask/metamask-extension#25266](https://github.com/MetaMask/metamask-extension/issues/25266)
+
+- **Gas - Race condition where gas is not updated after switching assets and going to the last Confirmation screen**
+  [MetaMask/metamask-extension#25243](https://github.com/MetaMask/metamask-extension/issues/25243)
+
+- **Assets - Importing an ERC1155 token throws "Contract does not support ERC721 metadata interface"**
+  [MetaMask/metamask-extension#24988](https://github.com/MetaMask/metamask-extension/issues/24988)
+
+- **Tokens - Cannot import a token ERC1155 if the IPFS call for the metadata takes long**
+  [MetaMask/metamask-extension#24710](https://github.com/MetaMask/metamask-extension/issues/24710)
+
+- **Onboarding rerouting when createNewAccount flow**
+  [MetaMask/metamask-extension#24874](https://github.com/MetaMask/metamask-extension/pull/24874)
+
+- **Announcements - NFT autodetection modal overlays Token autodetection modal**
+  [MetaMask/metamask-extension#25465](https://github.com/MetaMask/metamask-extension/issues/25465)
+
+- **Settings - Changing the app locale re-renders the state two times and displays the loading spinner 2 times**
+  [MetaMask/metamask-extension#25651](https://github.com/MetaMask/metamask-extension/issues/25651)
+
+- **Hardware Wallet - Going to the hardware wallet add account page in Firefox re-renders the state two times**
+  [MetaMask/metamask-extension#25851](https://github.com/MetaMask/metamask-extension/issues/25851)
+
+- **Race condition changes order in which transactions are displayed**
+  [MetaMask/metamask-extension#25251](https://github.com/MetaMask/metamask-extension/issues/25251)
+
+- **Assets - Add token doesn't close the MM dialog after Adding it (MMI-only)**
+  [MetaMask/metamask-extension#27854](https://github.com/MetaMask/metamask-extension/issues/27854)
+
+- **Wallet API - When connecting to the test dapp for the first time, switched to Mainnet automatically despite not having this network selected (Release Blocker)**
+  [MetaMask/metamask-extension#27891](https://github.com/MetaMask/metamask-extension/issues/27891)
+
+- **Network Switch - After switching networks for the first time, "Network switched" dialog sometimes appears and sometimes doesn't**
+  [MetaMask/metamask-extension#27870](https://github.com/MetaMask/metamask-extension/issues/27870)
+
+- **BTC Accounts - Portfolio link is not displayed when we have a BTC Account selected**
+  [MetaMask/metamask-extension#28185](https://github.com/MetaMask/metamask-extension/issues/28185)
+
+- **Blockaid security validation can be bypassed with race condition**
+  [Slack thread](https://consensys.slack.com/archives/C029JG63136/p1731690020573439?thread_ts=1729246801.516029&cid=C029JG63136)
+
+- **Wallet API queuing bug not fully fixed**
+  [Slack thread](https://consensys.slack.com/archives/CTQAGKY5V/p1731693702380099?thread_ts=1731579667.780579&cid=CTQAGKY5V)
+
+- **Balance polling starting with a locked wallet makes balance load forever when unlocked (until MM is refreshed)**
+  [commit 9aff235](https://github.com/MetaMask/metamask-extension/pull/28277/commits/9aff235d168598ac0c4da763a6eef0b7c7002212)
+
+- **Gas controls to edit Max base fee and Priority Fee do not support decimal point on Mac (test passed on Linux but not Mac)**
+  [MetaMask/metamask-extension#28843](https://github.com/MetaMask/metamask-extension/issues/28843)
+
+- **Send - When pasting an address without 0x prefix, the input is normalized but the Continue button remains disabled**
+  [MetaMask/metamask-extension#30349](https://github.com/MetaMask/metamask-extension/issues/30349)
+
+- **Gas API - Starting a transaction/swap makes a request to /networks/1/gasPrices even if not on Ethereum Mainnet**
+  [MetaMask/metamask-extension#33377](https://github.com/MetaMask/metamask-extension/issues/33377)
+
+- **Survey - 2 identical requests are made to the surveys endpoint whenever we start the wallet for the 1st time**
+  [MetaMask/metamask-extension#33604](https://github.com/MetaMask/metamask-extension/issues/33604)
+
+- **Error is re-thrown causing duplicated Error key (e.g., "Error: Error: Unable to find value of key...")**
+  [MetaMask/metamask-extension#34867](https://github.com/MetaMask/metamask-extension/issues/34867)
+
+- **Accounts - Repeated API GET request to profile/lineage after login**
+  [MetaMask/metamask-extension#34938](https://github.com/MetaMask/metamask-extension/issues/34938)
+
+- **ENS resolution - IPFS resolves domains before I've onboarded to the wallet**
+  [MetaMask/metamask-extension#35675](https://github.com/MetaMask/metamask-extension/issues/35675)
+
+- **Onboarding - Metametrics page sometimes appears on Chrome browser for Social login**
+  [MetaMask/metamask-extension#36070](https://github.com/MetaMask/metamask-extension/issues/36070)
+
+- **Accounts state not updated immediately after create-password**
+  [MetaMask/metamask-extension#36395](https://github.com/MetaMask/metamask-extension/pull/36395)
+
+---
+
+## E2E Anti-Patterns
+
+- **Directly asserting element values by text without waiting for those text to be present** using `assert(element.getText(), expected text)`
+  [MetaMask/metamask-extension#19870](https://github.com/MetaMask/metamask-extension/issues/19870)
+
+- **Looking for an element and then asserting it's displayed** with `assert.equal(await elem.isDisplayed(), true)` — causes race conditions if the element updates between the lookup and assertion (e.g., transaction changes from pending to confirmed, throwing "stale element" error)
+  [MetaMask/metamask-extension#27928](https://github.com/MetaMask/metamask-extension/pull/27928/files#r1805186006)
+
+- **Using `element.click()` instead of `clickElement()`** — looking for the element and then using `.click` can cause race conditions if the element re-renders and becomes stale. The `clickElement` driver method has a guard for this
+  [MetaMask/metamask-extension#27599](https://github.com/MetaMask/metamask-extension/pull/27599)
+
+- **Going to live sites** (portfolio dapp, snap dapp, vault decrypt page) instead of using mocks
+  > Note: [a catch-all mock PR](https://github.com/MetaMask/metamask-extension/) exists, but currently 130+ specs fail because they rely on live requests. Once fixed and merged, it won't be possible to introduce changes without adding corresponding mocks.
+
+- **Adding delays instead of waiting for conditions**, whenever possible
+
+- **Importing a function from another spec file** — this causes the tests from that spec file import to also be run, causing long test runs and possible timeouts (>80000ms)
+  [MetaMask/metamask-extension#30481](https://github.com/MetaMask/metamask-extension/pull/30481/files#r1965313492)
+
+---
+
+## Unit Test Flakiness Categories
+
+- **A property of the store is sometimes undefined**
+  [MetaMask/metamask-extension#27941](https://github.com/MetaMask/metamask-extension/pull/27941)
+
+---
+
+## Flakiness on Other CI Jobs
+
+- **The lint-lockfile job is flaky as it's under-resourced** — fixed by changing resources from medium to medium-plus
+  [MetaMask/metamask-extension#27950](https://github.com/MetaMask/metamask-extension/pull/27950)
+
+- **Rate limited by yarnpkg returning 429 Too Many Requests** — makes any job dependent on yarn fail
+  [Slack thread](https://consensys.slack.com/archives/CTQAGKY5V/p1747406828996759)
diff --git a/.github/scripts/analyze-flaky-tests/llm/claude-analyzer.ts b/.github/scripts/analyze-flaky-tests/llm/claude-analyzer.ts
new file mode 100644
index 00000000..6573c3ff
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/llm/claude-analyzer.ts
@@ -0,0 +1,136 @@
+import Anthropic from '@anthropic-ai/sdk';
+import type { AnalysisResult, FlakyTestFailure } from '../types';
+import { getToolDefinitions, executeToolCall } from './tools';
+import type { ToolContext } from './tools';
+
+const MODEL = 'claude-sonnet-4-20250514';
+const MAX_TOKENS = 4096;
+const MAX_ITERATIONS = 10;
+
+function extractAnalysisFromToolCall(
+  content: Anthropic.Messages.ContentBlock[],
+  failure: FlakyTestFailure,
+): AnalysisResult | null {
+  for (const block of content) {
+    if (block.type === 'tool_use' && block.name === 'submit_analysis') {
+      const input = block.input as Record<string, unknown>;
+      return {
+        testName: failure.name,
+        testPath: failure.path,
+        classification: (input.classification as AnalysisResult['classification']) ?? 'flaky_test',
+        confidence: typeof input.confidence === 'number' ? input.confidence : 50,
+        rootCauseCategory: (input.rootCauseCategory as string) ?? 'other',
+        rootCauseExplanation: (input.rootCauseExplanation as string) ?? 'Unable to determine root cause.',
+        specificLines: Array.isArray(input.specificLines) ? (input.specificLines as string[]) : [],
+        suggestedFix: (input.suggestedFix as string) ?? 'No suggestion available.',
+        additionalNotes: (input.additionalNotes as string) ?? '',
+      };
+    }
+  }
+  return null;
+}
+
+export async function analyzeWithClaude(
+  initialPrompt: string,
+  failure: FlakyTestFailure,
+  apiKey: string,
+  toolContext: ToolContext,
+): Promise<AnalysisResult> {
+  const client = new Anthropic({ apiKey });
+  const tools = getToolDefinitions(toolContext.owner, toolContext.repo);
+  const messages: Anthropic.Messages.MessageParam[] = [
+    { role: 'user', content: initialPrompt },
+  ];
+
+  for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
+    const response = await client.messages.create({
+      model: MODEL,
+      max_tokens: MAX_TOKENS,
+      tools,
+      messages,
+    });
+
+    const submittedAnalysis = extractAnalysisFromToolCall(response.content, failure);
+    if (submittedAnalysis) {
+      const submitBlock = response.content.find(
+        (b) => b.type === 'tool_use' && b.name === 'submit_analysis',
+      );
+      if (submitBlock && submitBlock.type === 'tool_use') {
+        messages.push({ role: 'assistant', content: response.content });
+        messages.push({
+          role: 'user',
+          content: [
+            {
+              type: 'tool_result',
+              tool_use_id: submitBlock.id,
+              content: 'Analysis received. Thank you.',
+            },
+          ],
+        });
+      }
+      return submittedAnalysis;
+    }
+
+    if (response.stop_reason === 'end_turn') {
+      const textBlock = response.content.find((b) => b.type === 'text');
+      if (textBlock && textBlock.type === 'text') {
+        try {
+          let cleaned = textBlock.text.trim();
+          const jsonMatch = cleaned.match(/```(?:json)?\s*([\s\S]*?)```/);
+          if (jsonMatch?.[1]) cleaned = jsonMatch[1].trim();
+          const parsed = JSON.parse(cleaned) as Record<string, unknown>;
+          return {
+            testName: failure.name,
+            testPath: failure.path,
+            classification: (parsed.classification as AnalysisResult['classification']) ?? 'flaky_test',
+            confidence: typeof parsed.confidence === 'number' ? parsed.confidence : 50,
+            rootCauseCategory: (parsed.rootCauseCategory as string) ?? 'other',
+            rootCauseExplanation: (parsed.rootCauseExplanation as string) ?? 'Unable to determine root cause.',
+            specificLines: Array.isArray(parsed.specificLines) ? (parsed.specificLines as string[]) : [],
+            suggestedFix: (parsed.suggestedFix as string) ?? 'No suggestion available.',
+            additionalNotes: (parsed.additionalNotes as string) ?? '',
+          };
+        } catch {
+          throw new Error(`Claude ended without calling submit_analysis. Raw response: ${textBlock.text.substring(0, 200)}`);
+        }
+      }
+      throw new Error('Claude ended conversation without producing an analysis.');
+    }
+
+    if (response.stop_reason !== 'tool_use') {
+      throw new Error(`Unexpected stop_reason: ${response.stop_reason}`);
+    }
+
+    const toolUseBlocks = response.content.filter(
+      (block): block is Anthropic.Messages.ToolUseBlock => block.type === 'tool_use',
+    );
+
+    const toolResults: Anthropic.Messages.ToolResultBlockParam[] = [];
+    for (const toolUse of toolUseBlocks) {
+      if (toolUse.name === 'submit_analysis') continue;
+
+      console.log(`    [tool] ${toolUse.name}(${JSON.stringify(toolUse.input).substring(0, 100)})`);
+
+      const result = await executeToolCall(
+        toolUse.name,
+        toolUse.input as Record<string, unknown>,
+        toolContext,
+      );
+
+      const truncated = result.length > 15000
+        ? `${result.substring(0, 15000)}\n... (truncated, ${result.length} chars total)`
+        : result;
+
+      toolResults.push({
+        type: 'tool_result',
+        tool_use_id: toolUse.id,
+        content: truncated,
+      });
+    }
+
+    messages.push({ role: 'assistant', content: response.content });
+    messages.push({ role: 'user', content: toolResults });
+  }
+
+  throw new Error(`Analysis did not complete within ${MAX_ITERATIONS} iterations.`);
+}
diff --git a/.github/scripts/analyze-flaky-tests/llm/prompt-builder.ts b/.github/scripts/analyze-flaky-tests/llm/prompt-builder.ts
new file mode 100644
index 00000000..b0eafd27
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/llm/prompt-builder.ts
@@ -0,0 +1,50 @@
+import type { FlakyTestFailure } from '../types';
+
+/**
+ * Builds the initial prompt for the agentic analysis loop.
+ * Only includes the failure metadata and CI log -- the agent uses tools
+ * to fetch source code, knowledge base sections, and past fixes on demand.
+ */
+export function buildInitialPrompt(
+  failure: FlakyTestFailure,
+  logSection: string,
+  owner: string,
+  repo: string,
+): string {
+  const classification = failure.isFlaky
+    ? 'Flaky (passed after retry)'
+    : 'Real failure';
+
+  const repoRef = `${owner}/${repo}`;
+
+  return `You are an expert at diagnosing flaky E2E tests in the ${repoRef} repository.
+
+You have tools available to investigate this failure. Use them to:
+1. Fetch the test source file and any page objects or helpers it imports (use search_test_file first if you're unsure of the exact path)
+2. Look up relevant flakiness patterns from the knowledge base (list categories first, then fetch specific ones)
+3. Search for similar past fixes if applicable
+4. Fetch CI job logs if you need more context about the failure (use fetch_job_logs with run_id to discover jobs, then fetch specific job logs)
+
+## Failure Information
+- Test name: ${failure.name}
+- Test file: ${failure.path}
+- Error message: ${failure.lastError}
+- Times failed: ${failure.realFailures} real failures, ${failure.totalRetries} retries
+- Classification: ${classification}
+- Run ID: ${failure.runId ?? 'N/A'}
+- Job ID: ${failure.jobId ?? 'N/A'}
+
+## Full Error + Stack Trace from CI Logs
+\`\`\`
+${logSection}
+\`\`\`
+
+## Investigation Guidelines
+- Most failures in this report are flaky tests, not app bugs
+- If the test file path returns "not found", use search_test_file to discover the correct path
+- Use list_flakiness_categories and get_flakiness_patterns to learn about known flakiness patterns and anti-patterns for this repository
+- Fetch the test source, read its imports, and fetch relevant page objects or helpers
+- Search for similar past fixes if applicable
+
+Start by fetching the test file at "${failure.path}". If it's not found, use search_test_file to find the correct path. Then investigate as needed. When done, call submit_analysis with your findings.`;
+}
diff --git a/.github/scripts/analyze-flaky-tests/llm/tools.ts b/.github/scripts/analyze-flaky-tests/llm/tools.ts
new file mode 100644
index 00000000..0324fa1a
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/llm/tools.ts
@@ -0,0 +1,284 @@
+import type { Octokit } from '@octokit/rest';
+import type Anthropic from '@anthropic-ai/sdk';
+import { fetchFileContent, searchTestFiles } from '../utils/test-source-reader';
+import { searchFixesByKeyword } from '../utils/past-fixes-fetcher';
+import { fetchJobLogById, listE2eJobs } from '../utils/job-log-fetcher';
+import {
+  getKnowledgeSection,
+  listKnowledgeSections,
+} from '../utils/knowledge-base';
+
+export interface ToolContext {
+  octokit: Octokit;
+  owner: string;
+  repo: string;
+}
+
+export function getToolDefinitions(
+  owner: string,
+  repo: string,
+): Anthropic.Messages.Tool[] {
+  const repoRef = `${owner}/${repo}`;
+
+  return [
+    {
+      name: 'fetch_file',
+      description:
+        `Fetch the contents of a file from the ${repoRef} repository. ` +
+        'Use this to read test files, page objects, helpers, fixtures, or any source code you need to investigate.',
+      input_schema: {
+        type: 'object' as const,
+        properties: {
+          path: {
+            type: 'string',
+            description:
+              'File path relative to the repo root, e.g. "test/e2e/tests/connections/edit-account-permissions.spec.ts"',
+          },
+        },
+        required: ['path'],
+      },
+    },
+    {
+      name: 'search_test_file',
+      description:
+        `Search for test files in ${repoRef} by name or keyword. ` +
+        'Returns matching file paths under test/e2e/. Use when you do not know the exact path or when fetch_file returns "not found".',
+      input_schema: {
+        type: 'object' as const,
+        properties: {
+          query: {
+            type: 'string',
+            description:
+              'Keyword(s) to search for in file paths, e.g. "ens" or "refresh-auth" or "cronjob spec"',
+          },
+        },
+        required: ['query'],
+      },
+    },
+    {
+      name: 'fetch_job_logs',
+      description:
+        `Fetch GitHub Actions job logs from ${repoRef}. ` +
+        'Provide either a job_id to fetch logs directly, or a run_id to list all e2e jobs in that workflow run. ' +
+        'Optionally provide test_name to extract just the relevant failure section from the logs.',
+      input_schema: {
+        type: 'object' as const,
+        properties: {
+          job_id: {
+            type: 'number',
+            description: 'Specific GitHub Actions job ID to fetch logs for',
+          },
+          run_id: {
+            type: 'number',
+            description: 'Workflow run ID -- lists all e2e test jobs so you can pick one to fetch logs from',
+          },
+          test_name: {
+            type: 'string',
+            description: 'Test name to search for in logs (narrows the log output to the relevant failure)',
+          },
+        },
+      },
+    },
+    {
+      name: 'get_flakiness_patterns',
+      description:
+        'Get a specific section from the flakiness knowledge base. ' +
+        'Each section documents a category of flakiness with real examples and fix PRs. ' +
+        'Use a keyword to match a section (e.g. "race conditions", "mocks", "popups", "windows", "re-renders", "assertions", "anti-patterns").',
+      input_schema: {
+        type: 'object' as const,
+        properties: {
+          category: {
+            type: 'string',
+            description:
+              'Keyword to match a knowledge base section, e.g. "race conditions windows", "mocks", "popups modals", "assertions", "anti-patterns"',
+          },
+        },
+        required: ['category'],
+      },
+    },
+    {
+      name: 'list_flakiness_categories',
+      description:
+        'List all available section headings in the flakiness knowledge base. ' +
+        'Call this first to discover what categories are available before requesting a specific one.',
+      input_schema: {
+        type: 'object' as const,
+        properties: {},
+      },
+    },
+    {
+      name: 'search_similar_fixes',
+      description:
+        `Search for merged pull requests in ${repoRef} that fixed similar flaky test issues. ` +
+        'Returns PR titles and diffs filtered to test file changes. Use keywords from the error message or test pattern.',
+      input_schema: {
+        type: 'object' as const,
+        properties: {
+          query: {
+            type: 'string',
+            description:
+              'Search keyword(s) to find similar past fixes, e.g. "stale element", "waitForSelector", "click intercepted", or the test file name',
+          },
+        },
+        required: ['query'],
+      },
+    },
+    {
+      name: 'submit_analysis',
+      description:
+        'Submit the final analysis of the flaky test failure. Call this exactly once when you have completed your investigation.',
+      input_schema: {
+        type: 'object' as const,
+        properties: {
+          classification: {
+            type: 'string',
+            enum: ['flaky_test', 'app_bug', 'infra_issue'],
+            description: 'The type of failure',
+          },
+          confidence: {
+            type: 'number',
+            description: 'Confidence level 0-100',
+          },
+          rootCauseCategory: {
+            type: 'string',
+            enum: [
+              'timing',
+              'element_state',
+              'network_race',
+              'stale_reference',
+              'state_leakage',
+              'animation',
+              'missing_mock',
+              'unnecessary_steps',
+              'window_race',
+              'react_rerender',
+              'popup_modal',
+              'other',
+            ],
+            description: 'The category of root cause',
+          },
+          rootCauseExplanation: {
+            type: 'string',
+            description: '2-3 sentence explanation of what is causing the flakiness',
+          },
+          specificLines: {
+            type: 'array',
+            items: { type: 'string' },
+            description: 'Line numbers or code snippets causing the issue',
+          },
+          suggestedFix: {
+            type: 'string',
+            description: 'Detailed description of the fix with before/after code',
+          },
+          additionalNotes: {
+            type: 'string',
+            description: 'Any other observations',
+          },
+        },
+        required: [
+          'classification',
+          'confidence',
+          'rootCauseCategory',
+          'rootCauseExplanation',
+          'specificLines',
+          'suggestedFix',
+          'additionalNotes',
+        ],
+      },
+    },
+  ];
+}
+
+export async function executeToolCall(
+  toolName: string,
+  toolInput: Record<string, unknown>,
+  context: ToolContext,
+): Promise<string> {
+  switch (toolName) {
+    case 'fetch_file': {
+      const filePath = toolInput.path as string;
+      const content = await fetchFileContent(
+        context.octokit,
+        context.owner,
+        context.repo,
+        filePath,
+      );
+      return content ?? `File not found or could not be fetched: ${filePath}`;
+    }
+
+    case 'search_test_file': {
+      const query = toolInput.query as string;
+      const matches = await searchTestFiles(
+        context.octokit,
+        context.owner,
+        context.repo,
+        query,
+      );
+      if (matches.length === 0) {
+        return `No test files found matching "${query}". Try broader keywords.`;
+      }
+      return `Found ${matches.length} matching test file(s):\n${matches.map((p) => `- ${p}`).join('\n')}`;
+    }
+
+    case 'fetch_job_logs': {
+      const jobId = toolInput.job_id as number | undefined;
+      const runId = toolInput.run_id as number | undefined;
+      const testName = toolInput.test_name as string | undefined;
+
+      if (jobId) {
+        return fetchJobLogById(context.octokit, context.owner, context.repo, jobId, testName);
+      }
+
+      if (runId) {
+        const jobs = await listE2eJobs(context.octokit, context.owner, context.repo, runId);
+        if (jobs.length === 0) {
+          return `No e2e test jobs found in run ${runId}.`;
+        }
+        return (
+          `Found ${jobs.length} e2e job(s) in run ${runId}:\n` +
+          jobs.map((j) => `- Job ${j.id}: ${j.name} (conclusion: ${j.conclusion ?? 'running'})`).join('\n') +
+          '\n\nCall fetch_job_logs again with a specific job_id to get logs.'
+        );
+      }
+
+      return 'Provide either job_id or run_id.';
+    }
+
+    case 'get_flakiness_patterns': {
+      const category = toolInput.category as string;
+      return getKnowledgeSection(category);
+    }
+
+    case 'list_flakiness_categories': {
+      const sections = listKnowledgeSections();
+      return `Available knowledge base sections:\n${sections.map((s) => `- ${s}`).join('\n')}`;
+    }
+
+    case 'search_similar_fixes': {
+      const query = toolInput.query as string;
+      const fixes = await searchFixesByKeyword(
+        context.octokit,
+        context.owner,
+        context.repo,
+        query,
+      );
+      if (fixes.length === 0) {
+        return `No merged flaky test fix PRs found matching "${query}".`;
+      }
+      return fixes
+        .map(
+          (fix) =>
+            `### PR #${fix.prNumber}: ${fix.title}\n\`\`\`diff\n${fix.diffContent}\n\`\`\``,
+        )
+        .join('\n\n');
+    }
+
+    case 'submit_analysis': {
+      return JSON.stringify(toolInput);
+    }
+
+    default:
+      return `Unknown tool: ${toolName}`;
+  }
+}
diff --git a/.github/scripts/analyze-flaky-tests/types.ts b/.github/scripts/analyze-flaky-tests/types.ts
new file mode 100644
index 00000000..4f5724c4
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/types.ts
@@ -0,0 +1,45 @@
+export interface FlakyTestFailure {
+  name: string;
+  path: string;
+  realFailures: number;
+  totalRetries: number;
+  lastError: string;
+  jobId: number;
+  runId: number;
+  suite: string;
+  isFlaky: boolean;
+}
+
+export interface AnalysisResult {
+  testName: string;
+  testPath: string;
+  classification: 'flaky_test' | 'app_bug' | 'infra_issue';
+  confidence: number;
+  rootCauseCategory: string;
+  rootCauseExplanation: string;
+  specificLines: string[];
+  suggestedFix: string;
+  additionalNotes: string;
+}
+
+export interface SlackFinding {
+  failure: FlakyTestFailure;
+  analysis: AnalysisResult;
+  jobUrl: string;
+  fileUrl: string;
+}
+
+export interface TestSourceContext {
+  testFileContent: string;
+  testFilePath: string;
+  pageObjects: Array<{
+    path: string;
+    content: string;
+  }>;
+}
+
+export interface PastFixExample {
+  prNumber: number;
+  title: string;
+  diffContent: string;
+}
diff --git a/.github/scripts/analyze-flaky-tests/utils/job-log-fetcher.ts b/.github/scripts/analyze-flaky-tests/utils/job-log-fetcher.ts
new file mode 100644
index 00000000..9a8b7bb7
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/utils/job-log-fetcher.ts
@@ -0,0 +1,140 @@
+import type { Octokit } from '@octokit/rest';
+import type { FlakyTestFailure } from '../types';
+
+const CONTEXT_LINES = 100;
+
+const ERROR_PATTERNS = [
+  /Error:/i,
+  /AssertionError:/i,
+  /TimeoutError:/i,
+  /AssertionError/i,
+  /at\s+.*\.(spec|test)\.(ts|js)/,
+];
+
+function extractRelevantLogSection(
+  logText: string,
+  testName: string,
+): string {
+  const lines = logText.split('\n');
+
+  const testNameIndex = lines.findIndex((line) => line.includes(testName));
+
+  let errorIndex = -1;
+  if (testNameIndex !== -1) {
+    for (let i = testNameIndex; i < Math.min(testNameIndex + 50, lines.length); i++) {
+      if (ERROR_PATTERNS.some((pattern) => pattern.test(lines[i] ?? ''))) {
+        errorIndex = i;
+        break;
+      }
+    }
+  }
+
+  const anchorIndex = errorIndex !== -1 ? errorIndex : testNameIndex;
+
+  if (anchorIndex === -1) {
+    let lastErrorIndex = -1;
+    for (let i = lines.length - 1; i >= 0; i--) {
+      if (ERROR_PATTERNS.some((pattern) => pattern.test(lines[i] ?? ''))) {
+        lastErrorIndex = i;
+        break;
+      }
+    }
+    if (lastErrorIndex !== -1) {
+      const start = Math.max(0, lastErrorIndex - CONTEXT_LINES);
+      const end = Math.min(lines.length, lastErrorIndex + CONTEXT_LINES);
+      return lines.slice(start, end).join('\n');
+    }
+    return lines.slice(-200).join('\n');
+  }
+
+  const start = Math.max(0, anchorIndex - CONTEXT_LINES);
+  const end = Math.min(lines.length, anchorIndex + CONTEXT_LINES);
+  return lines.slice(start, end).join('\n');
+}
+
+export async function fetchJobLog(
+  octokit: Octokit,
+  failure: FlakyTestFailure,
+  owner: string,
+  repo: string,
+): Promise<string> {
+  if (!failure.jobId) {
+    return 'No job ID available for this failure.';
+  }
+
+  return fetchJobLogById(octokit, owner, repo, failure.jobId, failure.name);
+}
+
+export async function fetchJobLogById(
+  octokit: Octokit,
+  owner: string,
+  repo: string,
+  jobId: number,
+  testName?: string,
+): Promise<string> {
+  try {
+    const response = await octokit.rest.actions.downloadJobLogsForWorkflowRun({
+      owner,
+      repo,
+      job_id: jobId,
+    });
+
+    const logText = typeof response.data === 'string'
+      ? response.data
+      : String(response.data);
+
+    if (testName) {
+      return extractRelevantLogSection(logText, testName);
+    }
+    if (logText.length > 30000) {
+      return `${logText.substring(0, 30000)}\n... (truncated, ${logText.length} chars total)`;
+    }
+    return logText;
+  } catch (error: unknown) {
+    const message = error instanceof Error ? error.message : String(error);
+    console.error(`Failed to fetch job log for job ${jobId}: ${message}`);
+    return `Failed to fetch job log: ${message}`;
+  }
+}
+
+export interface E2eJobInfo {
+  id: number;
+  name: string;
+  conclusion: string | null;
+  htmlUrl: string;
+}
+
+export async function listE2eJobs(
+  octokit: Octokit,
+  owner: string,
+  repo: string,
+  runId: number,
+): Promise<E2eJobInfo[]> {
+  const jobs: E2eJobInfo[] = [];
+
+  for (let page = 1; page <= 5; page++) {
+    const { data } = await octokit.rest.actions.listJobsForWorkflowRun({
+      owner,
+      repo,
+      run_id: runId,
+      per_page: 100,
+      page,
+    });
+
+    for (const job of data.jobs) {
+      const nameLower = job.name.toLowerCase();
+      if (nameLower.includes('e2e')) {
+        jobs.push({
+          id: job.id,
+          name: job.name,
+          conclusion: job.conclusion ?? null,
+          htmlUrl: job.html_url ?? '',
+        });
+      }
+    }
+
+    if (data.jobs.length < 100) break;
+  }
+
+  return jobs;
+}
diff --git a/.github/scripts/analyze-flaky-tests/utils/knowledge-base.ts b/.github/scripts/analyze-flaky-tests/utils/knowledge-base.ts
new file mode 100644
index 00000000..898536e9
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/utils/knowledge-base.ts
@@ -0,0 +1,71 @@
+import fs from 'fs';
+import path from 'path';
+
+const KNOWLEDGE_PATH = path.join(
+  __dirname,
+  '..',
+  'knowledge',
+  'extension-flakiness-patterns.md',
+);
+
+let cachedContent: string | null = null;
+
+function loadKnowledgeBase(): string {
+  if (cachedContent !== null) return cachedContent;
+  try {
+    cachedContent = fs.readFileSync(KNOWLEDGE_PATH, 'utf-8');
+    return cachedContent;
+  } catch {
+    console.error('Warning: Could not load extension-flakiness-patterns.md');
+    cachedContent = '';
+    return '';
+  }
+}
+
+/**
+ * Returns all available section names (## and ### headings) from the knowledge base.
+ */
+export function listKnowledgeSections(): string[] {
+  const content = loadKnowledgeBase();
+  const headingRegex = /^#{2,3}\s+(.+)$/gm;
+  const sections: string[] = [];
+  let match;
+  while ((match = headingRegex.exec(content)) !== null) {
+    if (match[1]) sections.push(match[1]);
+  }
+  return sections;
+}
+
+/**
+ * Searches the knowledge base for a section matching the given category keyword.
+ * Matches against ## headers and returns the full section content up to the next ## header.
+ */
+export function getKnowledgeSection(category: string): string {
+  const content = loadKnowledgeBase();
+  if (!content) return 'Knowledge base not available.';
+
+  const normalizedQuery = category.toLowerCase().replace(/[_-]/g, ' ');
+
+  const sectionRegex = /^## (.+)$/gm;
+  const sectionStarts: Array<{ title: string; index: number }> = [];
+  let match;
+  while ((match = sectionRegex.exec(content)) !== null) {
+    if (match[1]) {
+      sectionStarts.push({ title: match[1], index: match.index });
+    }
+  }
+
+  const matched = sectionStarts.find((s) =>
+    s.title.toLowerCase().replace(/[_-]/g, ' ').includes(normalizedQuery),
+  );
+
+  if (!matched) {
+    const allTitles = sectionStarts.map((s) => s.title).join(', ');
+    return `No section matching "${category}" found. Available sections: ${allTitles}`;
+  }
+
+  const matchedIdx = sectionStarts.indexOf(matched);
+  const nextSection = sectionStarts[matchedIdx + 1];
+  const sectionEnd = nextSection ? nextSection.index : content.length;
+  return content.substring(matched.index, sectionEnd).trim();
+}
diff --git a/.github/scripts/analyze-flaky-tests/utils/past-fixes-fetcher.ts b/.github/scripts/analyze-flaky-tests/utils/past-fixes-fetcher.ts
new file mode 100644
index 00000000..3ee1f06e
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/utils/past-fixes-fetcher.ts
@@ -0,0 +1,123 @@
+import type { Octokit } from '@octokit/rest';
+import type { PastFixExample } from '../types';
+
+const MAX_PAST_FIXES = 5;
+const MAX_DIFF_LENGTH = 5000;
+
+/**
+ * Filters a unified diff to only include hunks touching test/e2e files,
+ * keeping diffs focused and within context budget.
+ */
+function filterDiffToTestFiles(diff: string): string {
+  const fileSections = diff.split(/^diff --git /m);
+  const testSections = fileSections.filter(
+    (section) =>
+      section.includes('test/e2e/') ||
+      section.includes('page-objects/') ||
+      section.includes('.spec.'),
+  );
+
+  const joined = testSections
+    .map((section) => `diff --git ${section}`)
+    .join('\n');
+
+  if (joined.length > MAX_DIFF_LENGTH) {
+    return `${joined.substring(0, MAX_DIFF_LENGTH)}\n... (diff truncated)`;
+  }
+  return joined;
+}
+
+function sanitizeSearchQuery(raw: string): string {
+  return raw
+    .replace(/['"\\`{}[\]()]/g, '')
+    .replace(/\s+/g, ' ')
+    .trim()
+    .substring(0, 80);
+}
+
+async function searchPRs(
+  octokit: Octokit,
+  owner: string,
+  repo: string,
+  query: string,
+): Promise<PastFixExample[]> {
+  const { data: searchResults } = await octokit.rest.search.issuesAndPullRequests({
+    q: query,
+    per_page: 3,
+  });
+
+  const fixes: PastFixExample[] = [];
+
+  for (const item of searchResults.items) {
+    try {
+      const { data: diff } = await octokit.rest.pulls.get({
+        owner,
+        repo,
+        pull_number: item.number,
+        mediaType: { format: 'diff' },
+      });
+
+      const diffText = typeof diff === 'string' ? diff : String(diff);
+      const filteredDiff = filterDiffToTestFiles(diffText);
+
+      if (filteredDiff.trim()) {
+        fixes.push({
+          prNumber: item.number,
+          title: item.title,
+          diffContent: filteredDiff,
+        });
+      }
+    } catch (error: unknown) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.error(`Failed to fetch diff for PR #${item.number}: ${message}`);
+    }
+  }
+
+  return fixes;
+}
+
+export async function searchFixesByKeyword(
+  octokit: Octokit,
+  owner: string,
+  repo: string,
+  keyword: string,
+): Promise<PastFixExample[]> {
+  const sanitized = sanitizeSearchQuery(keyword);
+  if (!sanitized) return [];
+
+  const queries = [
+    `repo:${owner}/${repo} is:pr is:merged "flaky" "${sanitized}"`,
+    `repo:${owner}/${repo} is:pr is:merged "${sanitized}" test e2e`,
+  ];
+
+  for (const q of queries) {
+    try {
+      const fixes = await searchPRs(octokit, owner, repo, q);
+      if (fixes.length > 0) return fixes;
+    } catch (error: unknown) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.error(`Search query failed ("${q.substring(0, 60)}..."): ${message}`);
+    }
+  }
+
+  return [];
+}
+
+export async function fetchPastFixes(
+  octokit: Octokit,
+  owner: string,
+  repo: string,
+): Promise<PastFixExample[]> {
+  try {
+    return await searchPRs(
+      octokit,
+      owner,
+      repo,
+      `repo:${owner}/${repo} is:pr is:merged "flaky" test e2e sort:updated-desc`,
+    );
+  } catch (error: unknown) {
+    const message = error instanceof Error ? error.message : String(error);
+    console.error(`Failed to search for past flaky test fixes: ${message}`);
+    return [];
+  }
+}
diff --git a/.github/scripts/analyze-flaky-tests/utils/slack-reporter.ts b/.github/scripts/analyze-flaky-tests/utils/slack-reporter.ts
new file mode 100644
index 00000000..d2849954
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/utils/slack-reporter.ts
@@ -0,0 +1,137 @@
+import { WebClient } from '@slack/web-api';
+import type { SlackFinding } from '../types';
+
+function buildSummaryBlocks(findings: SlackFinding[]): object[] {
+  const flakyCount = findings.filter(
+    (f) => f.analysis.classification === 'flaky_test',
+  ).length;
+  const bugCount = findings.filter(
+    (f) => f.analysis.classification === 'app_bug',
+  ).length;
+  const infraCount = findings.filter(
+    (f) => f.analysis.classification === 'infra_issue',
+  ).length;
+
+  return [
+    {
+      type: 'header',
+      text: {
+        type: 'plain_text',
+        text: `AI Analysis of ${findings.length} Flaky Tests`,
+        emoji: true,
+      },
+    },
+    {
+      type: 'context',
+      elements: [
+        {
+          type: 'mrkdwn',
+          text: `Classification: ${flakyCount} flaky tests | ${bugCount} app bugs | ${infraCount} infra issues`,
+        },
+      ],
+    },
+    { type: 'divider' },
+  ];
+}
+
+function truncate(text: string, maxLength: number): string {
+  if (text.length <= maxLength) return text;
+  return `${text.substring(0, maxLength)}...`;
+}
+
+function buildFindingBlocks(finding: SlackFinding): object[] {
+  const { failure, analysis, jobUrl, fileUrl } = finding;
+
+  const classificationEmoji =
+    analysis.classification === 'flaky_test'
+      ? ':large_yellow_circle:'
+      : analysis.classification === 'app_bug'
+        ? ':red_circle:'
+        : ':white_circle:';
+
+  const blocks: object[] = [
+    {
+      type: 'header',
+      text: {
+        type: 'plain_text',
+        text: truncate(`Test: ${failure.name}`, 150),
+        emoji: true,
+      },
+    },
+    {
+      type: 'context',
+      elements: [
+        {
+          type: 'mrkdwn',
+          text: `${classificationEmoji} ${analysis.classification.replace('_', ' ')} | Confidence: ${analysis.confidence}% | Root cause: ${analysis.rootCauseCategory}`,
+        },
+      ],
+    },
+    {
+      type: 'section',
+      text: {
+        type: 'mrkdwn',
+        text: `*Diagnosis*\n${truncate(analysis.rootCauseExplanation, 2900)}`,
+      },
+    },
+  ];
+
+  if (analysis.specificLines.length > 0) {
+    blocks.push({
+      type: 'section',
+      text: {
+        type: 'mrkdwn',
+        text: `*Problematic Code*\n\`\`\`${truncate(analysis.specificLines.join('\n'), 2900)}\`\`\``,
+      },
+    });
+  }
+
+  blocks.push({
+    type: 'section',
+    text: {
+      type: 'mrkdwn',
+      text: `*Suggested Fix*\n${truncate(analysis.suggestedFix, 2900)}`,
+    },
+  });
+
+  blocks.push({
+    type: 'context',
+    elements: [
+      {
+        type: 'mrkdwn',
+        text: `<${jobUrl}|Job Log> | <${fileUrl}|Test File>`,
+      },
+    ],
+  });
+
+  blocks.push({ type: 'divider' });
+
+  return blocks;
+}
+
+export async function postSlackFindings(
+  findings: SlackFinding[],
+  threadTs: string,
+  botToken: string,
+  channelId: string,
+): Promise<void> {
+  const slack = new WebClient(botToken);
+
+  const summaryBlocks = buildSummaryBlocks(findings);
+  await slack.chat.postMessage({
+    channel: channelId,
+    thread_ts: threadTs,
+    blocks: summaryBlocks,
+    text: `AI Analysis of ${findings.length} flaky tests`,
+  });
+
+  for (const finding of findings) {
+    const blocks = buildFindingBlocks(finding);
+    await slack.chat.postMessage({
+      channel: channelId,
+      thread_ts: threadTs,
+      blocks,
+      text: `Analysis: ${finding.failure.name}`,
+    });
+  }
+}
diff --git a/.github/scripts/analyze-flaky-tests/utils/test-source-reader.ts b/.github/scripts/analyze-flaky-tests/utils/test-source-reader.ts
new file mode 100644
index 00000000..e0feaed7
--- /dev/null
+++ b/.github/scripts/analyze-flaky-tests/utils/test-source-reader.ts
@@ -0,0 +1,124 @@
+import path from 'path';
+import type { Octokit } from '@octokit/rest';
+import type { TestSourceContext } from '../types';
+
+const MAX_SEARCH_RESULTS = 20;
+
+export async function searchTestFiles(
+  octokit: Octokit,
+  owner: string,
+  repo: string,
+  query: string,
+): Promise<string[]> {
+  const { data: tree } = await octokit.rest.git.getTree({
+    owner,
+    repo,
+    tree_sha: 'main',
+    recursive: '1',
+  });
+
+  const keywords = query.toLowerCase().split(/\s+/);
+  return tree.tree
+    .filter((item) => {
+      if (item.type !== 'blob' || !item.path) return false;
+      const p = item.path.toLowerCase();
+      if (!p.includes('test/e2e/')) return false;
+      return keywords.every((kw) => p.includes(kw));
+    })
+    .map((item) => item.path!)
+    .slice(0, MAX_SEARCH_RESULTS);
+}
+
+export async function fetchFileContent(
+  octokit: Octokit,
+  owner: string,
+  repo: string,
+  filePath: string,
+  ref = 'main',
+): Promise<string | null> {
+  try {
+    const response = await octokit.rest.repos.getContent({
+      owner,
+      repo,
+      path: filePath,
+      ref,
+    });
+
+    if ('content' in response.data && typeof response.data.content === 'string') {
+      return Buffer.from(response.data.content, 'base64').toString('utf-8');
+    }
+    return null;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Parses import statements from a test file to discover page object and flow
+ * dependencies. Returns resolved paths relative to the repo root.
+ */
+function parseImportedPageObjects(
+  testFileContent: string,
+  testFilePath: string,
+): string[] {
+  const importRegex = /from\s+['"]([^'"]+)['"]/g;
+  const pageObjectPaths: string[] = [];
+  const testDir = path.dirname(testFilePath);
+
+  let match;
+  while ((match = importRegex.exec(testFileContent)) !== null) {
+    const importPath = match[1];
+    if (!importPath) continue;
+
+    const isPageObjectOrFlow =
+      importPath.includes('page-objects') ||
+      importPath.includes('pages/') ||
+      importPath.includes('flows/');
+
+    if (importPath.startsWith('.') && isPageObjectOrFlow) {
+      let resolved = path.posix.join(testDir, importPath);
+      if (!resolved.endsWith('.ts') && !resolved.endsWith('.js')) {
+        resolved += '.ts';
+      }
+      pageObjectPaths.push(resolved);
+    }
+  }
+
+  return pageObjectPaths;
+}
+
+export async function fetchTestSource(
+  octokit: Octokit,
+  testFilePath: string,
+  owner: string,
+  repo: string,
+  ref = 'main',
+): Promise<TestSourceContext> {
+  const testFileContent = await fetchFileContent(octokit, owner, repo, testFilePath, ref);
+
+  if (!testFileContent) {
+    return {
+      testFileContent: `Could not fetch test file: ${testFilePath}`,
+      testFilePath,
+      pageObjects: [],
+    };
+  }
+
+  const pageObjectPaths = parseImportedPageObjects(testFileContent, testFilePath);
+  const pageObjects: TestSourceContext['pageObjects'] = [];
+
+  const fetches = pageObjectPaths.map(async (poPath) => {
+    const content = await fetchFileContent(octokit, owner, repo, poPath, ref);
+    if (content) {
+      pageObjects.push({ path: poPath, content });
+    }
+  });
+
+  await Promise.all(fetches);
+
+  return {
+    testFileContent,
+    testFilePath,
+    pageObjects,
+  };
+}
diff --git a/.github/scripts/create-flaky-test-report.mjs b/.github/scripts/create-flaky-test-report.mjs
index 7aab7582..1b08a2cb 100644
--- a/.github/scripts/create-flaky-test-report.mjs
+++ b/.github/scripts/create-flaky-test-report.mjs
@@ -2,9 +2,11 @@
 
 // Based on the original script done by @itsyoboieltr on Extension repo
 
+import fs from 'fs';
 import { Octokit } from '@octokit/rest';
 import unzipper from 'unzipper';
 import { IncomingWebhook } from '@slack/webhook';
+import { WebClient } from '@slack/web-api';
 
 const githubToken = process.env.GITHUB_TOKEN;
 if (!githubToken) throw new Error('Missing GITHUB_TOKEN env var');
@@ -18,6 +20,8 @@ const env = {
   WORKFLOW_ID: process.env.WORKFLOW_ID || 'ci.yml',
   BRANCH: process.env.BRANCH || 'main',
   SLACK_WEBHOOK_FLAKY_TESTS: process.env.SLACK_WEBHOOK_FLAKY_TESTS || '',
+  SLACK_BOT_TOKEN: process.env.SLACK_BOT_TOKEN || '',
+  SLACK_CHANNEL_ID: process.env.SLACK_CHANNEL_ID || '',
   TEST_REPORT_ARTIFACTS: process.env.TEST_REPORT_ARTIFACTS
     ? process.env.TEST_REPORT_ARTIFACTS.split(',').map(name => name.trim())
     : ['test-e2e-android-json-report', 'test-e2e-ios-json-report', 'test-e2e-chrome-report', 'test-e2e-firefox-report'],
@@ -326,27 +330,53 @@ function summarizeFailures(realFailures, flakyTests = []) {
 }
 
 async function sendSlackReport(summary, dateDisplay, workflowCount, failedCount) {
-  if (!env.SLACK_WEBHOOK_FLAKY_TESTS || !env.SLACK_WEBHOOK_FLAKY_TESTS.startsWith('https://')) {
-    console.log('Skipping Slack notification');
-    return;
+  const useBotToken = env.SLACK_BOT_TOKEN && env.SLACK_CHANNEL_ID;
+  const useWebhook = env.SLACK_WEBHOOK_FLAKY_TESTS && env.SLACK_WEBHOOK_FLAKY_TESTS.startsWith('https://');
+
+  if (!useBotToken && !useWebhook) {
+    console.log('Skipping Slack notification (no SLACK_BOT_TOKEN+SLACK_CHANNEL_ID or SLACK_WEBHOOK_FLAKY_TESTS)');
+    return null;
   }
 
   console.log('\n📤 Sending report to Slack...');
+  const blocks = createSlackBlocks(summary, dateDisplay, workflowCount, failedCount);
+  const BATCH_SIZE = 50;
+
+  if (useBotToken) {
+    try {
+      const slack = new WebClient(env.SLACK_BOT_TOKEN);
+      let threadTs = null;
+
+      for (let i = 0; i < blocks.length; i += BATCH_SIZE) {
+        const batch = blocks.slice(i, i + BATCH_SIZE);
+        const result = await slack.chat.postMessage({
+          channel: env.SLACK_CHANNEL_ID,
+          blocks: batch,
+          text: 'Flaky Test Report',
+          ...(threadTs ? { thread_ts: threadTs } : {}),
+        });
+        if (!threadTs) threadTs = result.ts;
+      }
+
+      console.log(`✅ Report sent to Slack via WebClient (thread_ts: ${threadTs})`);
+      return threadTs;
+    } catch (slackError) {
+      console.error('❌ Failed to send Slack notification via WebClient:', slackError.message);
+      return null;
+    }
+  }
+
   try {
     const webhook = new IncomingWebhook(env.SLACK_WEBHOOK_FLAKY_TESTS);
-    const blocks = createSlackBlocks(summary, dateDisplay, workflowCount, failedCount);
-
-    // Slack has a limit of 50 blocks per message
-    const BATCH_SIZE = 50;
     for (let i = 0; i < blocks.length; i += BATCH_SIZE) {
       const batch = blocks.slice(i, i + BATCH_SIZE);
       await webhook.send({ blocks: batch });
     }
-
-    console.log('✅ Report sent to Slack successfully');
+    console.log('✅ Report sent to Slack via webhook (no thread_ts available)');
   } catch (slackError) {
-    console.error('❌ Failed to send Slack notification:', slackError.message);
+    console.error('❌ Failed to send Slack notification via webhook:', slackError.message);
   }
+  return null;
 }
 
 function createSlackBlocks(summary, dateDisplay, workflowCount = 0, failedCount = 0) {
@@ -602,6 +632,15 @@ function displayResults(summary, dateDisplay) {
   }
 }
 
+function setGitHubOutput(name, value) {
+  const outputFile = process.env.GITHUB_OUTPUT;
+  if (outputFile) {
+    const delimiter = `ghadelimiter_${crypto.randomUUID?.() || Date.now()}`;
+    fs.appendFileSync(outputFile, `${name}<<${delimiter}\n${value}\n${delimiter}\n`);
+  }
+  console.log(`::set-output name=${name}::${value}`);
+}
+
 async function main() {
   const github = new Octokit({ auth: env.GITHUB_TOKEN });
 
@@ -641,7 +680,25 @@ async function main() {
 
     const summary = summarizeFailures(realFailures, flakyTests);
     displayResults(summary, dateRange.display);
-    await sendSlackReport(summary, dateRange.display, workflowRuns.length, failedRuns.length);
+    const threadTs = await sendSlackReport(summary, dateRange.display, workflowRuns.length, failedRuns.length);
+
+    const top10 = summary.slice(0, 10);
+    const hasFailures = top10.length > 0;
+    const failuresJson = JSON.stringify(top10.map(test => ({
+      name: test.name,
+      path: test.path,
+      realFailures: test.realFailures,
+      totalRetries: test.totalRetries,
+      lastError: test.lastRealFailureError || test.flakyFailureError || '',
+      jobId: test.lastRealFailureJobId || test.flakyFailureJobId,
+      runId: test.lastRealFailureRunId || test.flakyFailureRunId,
+      suite: test.suite,
+      isFlaky: test.realFailures === 0,
+    })));
+
+    setGitHubOutput('thread_ts', threadTs || '');
+    setGitHubOutput('has_failures', hasFailures ? 'true' : 'false');
+    setGitHubOutput('failures_json', failuresJson);
 
   } catch (error) {
     console.error('❌ Error:', error.message);
diff --git a/.github/workflows/flaky-test-ai-analysis.yml b/.github/workflows/flaky-test-ai-analysis.yml
new file mode 100644
index 00000000..39d4cb7a
--- /dev/null
+++ b/.github/workflows/flaky-test-ai-analysis.yml
@@ -0,0 +1,70 @@
+name: Flaky Test AI Analysis (test)
+
+on:
+  workflow_dispatch:
+    inputs:
+      repository:
+        description: 'Target repository (e.g. metamask-extension)'
+        required: true
+        default: 'metamask-extension'
+      workflow-id:
+        description: 'Workflow ID to analyze (e.g. main.yml)'
+        required: true
+        default: 'main.yml'
+      dry-run:
+        description: 'Print analysis to stdout instead of posting to Slack'
+        type: boolean
+        default: true
+
+permissions:
+  contents: read
+  actions: read
+
+jobs:
+  flaky-report-and-analysis:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: .nvmrc
+          cache: yarn
+          cache-dependency-path: yarn.lock
+
+      - name: Enable Corepack
+        run: corepack enable
+
+      - name: Install dependencies
+        run: yarn --immutable
+
+      - name: Generate flaky test report
+        id: report
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+          SLACK_CHANNEL_ID: ${{ secrets.SLACK_CHANNEL_ID }}
+          OWNER: MetaMask
+          REPOSITORY: ${{ inputs.repository }}
+          WORKFLOW_ID: ${{ inputs.workflow-id }}
+          BRANCH: main
+        run: node .github/scripts/create-flaky-test-report.mjs
+
+      - name: AI analysis of flaky tests
+        if: steps.report.outputs.has_failures == 'true'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          E2E_CLAUDE_API_KEY: ${{ secrets.E2E_CLAUDE_API_KEY }}
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+          SLACK_CHANNEL_ID: ${{ secrets.SLACK_CHANNEL_ID }}
+          SLACK_THREAD_TS: ${{ steps.report.outputs.thread_ts }}
+          TARGET_OWNER: MetaMask
+          TARGET_REPO: ${{ inputs.repository }}
+          FAILURES_JSON: ${{ steps.report.outputs.failures_json }}
+        run: |
+          ARGS=""
+          if [ "${{ inputs.dry-run }}" = "true" ]; then
+            ARGS="--dry-run"
+          fi
+          yarn ts-node --swc .github/scripts/analyze-flaky-tests/index.ts $ARGS
diff --git a/package.json b/package.json
index d7527130..5fb3c40e 100644
--- a/package.json
+++ b/package.json
@@ -23,9 +23,11 @@
     "slack:release-testing": "node .github/scripts/slack-release-testing.mjs",
     "test": "jest && jest-it-up",
     "test:watch": "jest --watch",
+    "analyze-flaky-tests": "ts-node --swc .github/scripts/analyze-flaky-tests/index.ts",
     "update-release-sheet": "node .github/scripts/update-release-sheet.mjs"
   },
   "dependencies": {
+    "@anthropic-ai/sdk": "^0.39.0",
     "@metamask/auto-changelog": "^5.3.2",
     "@metamask/utils": "^7.1.0",
     "@octokit/graphql": "^7.0.1",
diff --git a/yarn.lock b/yarn.lock
index 5dfdb61a..cefe00b4 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -21,6 +21,21 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@anthropic-ai/sdk@npm:^0.39.0":
+  version: 0.39.0
+  resolution: "@anthropic-ai/sdk@npm:0.39.0"
+  dependencies:
+    "@types/node": "npm:^18.11.18"
+    "@types/node-fetch": "npm:^2.6.4"
+    abort-controller: "npm:^3.0.0"
+    agentkeepalive: "npm:^4.2.1"
+    form-data-encoder: "npm:1.7.2"
+    formdata-node: "npm:^4.3.2"
+    node-fetch: "npm:^2.6.7"
+  checksum: 10/8f1cb2d6a797ed095503ceec4271347ba9ee101c020fe3f5080c6853a5f3a9fc874649fcd0e3ae584c33e58548368cf3fb1da167221172859a1dff1e8c3419f6
+  languageName: node
+  linkType: hard
+
 "@babel/code-frame@npm:^7.0.0, @babel/code-frame@npm:^7.12.13, @babel/code-frame@npm:^7.18.6, @babel/code-frame@npm:^7.24.6":
   version: 7.24.6
   resolution: "@babel/code-frame@npm:7.24.6"
@@ -1012,6 +1027,7 @@ __metadata:
   version: 0.0.0-use.local
   resolution: "@metamask/github-tools@workspace:."
   dependencies:
+    "@anthropic-ai/sdk": "npm:^0.39.0"
     "@lavamoat/allow-scripts": "npm:^2.3.1"
     "@lavamoat/preinstall-always-fail": "npm:^1.0.0"
     "@metamask/auto-changelog": "npm:^5.3.2"
@@ -1958,6 +1974,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/node-fetch@npm:^2.6.4":
+  version: 2.6.13
+  resolution: "@types/node-fetch@npm:2.6.13"
+  dependencies:
+    "@types/node": "npm:*"
+    form-data: "npm:^4.0.4"
+  checksum: 10/944d52214791ebba482ca1393a4f0d62b0dbac5f7343ff42c128b75d5356d8bcefd4df77771b55c1acd19d118e16e9bd5d2792819c51bc13402d1c87c0975435
+  languageName: node
+  linkType: hard
+
 "@types/node@npm:*, @types/node@npm:>=12.0.0, @types/node@npm:>=18.0.0":
   version: 24.3.0
   resolution: "@types/node@npm:24.3.0"
@@ -1967,6 +1993,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/node@npm:^18.11.18":
+  version: 18.19.130
+  resolution: "@types/node@npm:18.19.130"
+  dependencies:
+    undici-types: "npm:~5.26.4"
+  checksum: 10/ebb85c6edcec78df926de27d828ecbeb1b3d77c165ceef95bfc26e171edbc1924245db4eb2d7d6230206fe6b1a1f7665714fe1c70739e9f5980d8ce31af6ef82
+  languageName: node
+  linkType: hard
+
 "@types/node@npm:^20.3.2":
   version: 20.3.2
   resolution: "@types/node@npm:20.3.2"
@@ -2240,6 +2275,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"abort-controller@npm:^3.0.0":
+  version: 3.0.0
+  resolution: "abort-controller@npm:3.0.0"
+  dependencies:
+    event-target-shim: "npm:^5.0.0"
+  checksum: 10/ed84af329f1828327798229578b4fe03a4dd2596ba304083ebd2252666bdc1d7647d66d0b18704477e1f8aa315f055944aa6e859afebd341f12d0a53c37b4b40
+  languageName: node
+  linkType: hard
+
 "acorn-jsx@npm:^5.3.2":
   version: 5.3.2
   resolution: "acorn-jsx@npm:5.3.2"
@@ -4044,6 +4088,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"event-target-shim@npm:^5.0.0":
+  version: 5.0.1
+  resolution: "event-target-shim@npm:5.0.1"
+  checksum: 10/49ff46c3a7facbad3decb31f597063e761785d7fdb3920d4989d7b08c97a61c2f51183e2f3a03130c9088df88d4b489b1b79ab632219901f184f85158508f4c8
+  languageName: node
+  linkType: hard
+
 "eventemitter3@npm:^3.1.0":
   version: 3.1.2
   resolution: "eventemitter3@npm:3.1.2"
@@ -4357,6 +4408,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"form-data-encoder@npm:1.7.2":
+  version: 1.7.2
+  resolution: "form-data-encoder@npm:1.7.2"
+  checksum: 10/227bf2cea083284411fd67472ccc22f5cb354ca92c00690e11ff5ed942d993c13ac99dea365046306200f8bd71e1a7858d2d99e236de694b806b1f374a4ee341
+  languageName: node
+  linkType: hard
+
 "form-data@npm:^2.5.0":
   version: 2.5.2
   resolution: "form-data@npm:2.5.2"
@@ -4382,6 +4440,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"formdata-node@npm:^4.3.2":
+  version: 4.4.1
+  resolution: "formdata-node@npm:4.4.1"
+  dependencies:
+    node-domexception: "npm:1.0.0"
+    web-streams-polyfill: "npm:4.0.0-beta.3"
+  checksum: 10/29622f75533107c1bbcbe31fda683e6a55859af7f48ec354a9800591ce7947ed84cd3ef2b2fcb812047a884f17a1bac75ce098ffc17e23402cd373e49c1cd335
+  languageName: node
+  linkType: hard
+
 "fs-extra@npm:^11.2.0":
   version: 11.3.1
   resolution: "fs-extra@npm:11.3.1"
@@ -6414,6 +6482,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"node-domexception@npm:1.0.0":
+  version: 1.0.0
+  resolution: "node-domexception@npm:1.0.0"
+  checksum: 10/e332522f242348c511640c25a6fc7da4f30e09e580c70c6b13cb0be83c78c3e71c8d4665af2527e869fc96848924a4316ae7ec9014c091e2156f41739d4fa233
+  languageName: node
+  linkType: hard
+
 "node-fetch@npm:^2.6.7, node-fetch@npm:^2.6.9":
   version: 2.7.0
   resolution: "node-fetch@npm:2.7.0"
@@ -8245,6 +8320,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"undici-types@npm:~5.26.4":
+  version: 5.26.5
+  resolution: "undici-types@npm:5.26.5"
+  checksum: 10/0097779d94bc0fd26f0418b3a05472410408877279141ded2bd449167be1aed7ea5b76f756562cb3586a07f251b90799bab22d9019ceba49c037c76445f7cddd
+  languageName: node
+  linkType: hard
+
 "undici-types@npm:~7.10.0":
   version: 7.10.0
   resolution: "undici-types@npm:7.10.0"
@@ -8386,6 +8468,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"web-streams-polyfill@npm:4.0.0-beta.3":
+  version: 4.0.0-beta.3
+  resolution: "web-streams-polyfill@npm:4.0.0-beta.3"
+  checksum: 10/dcdef67de57d83008f9dc330662b65ba4497315555dd0e4e7bcacb132ffdf8a830eaab8f74ad40a4a44f542461f51223f406e2a446ece1cc29927859b1405853
+  languageName: node
+  linkType: hard
+
 "webidl-conversions@npm:^3.0.0":
   version: 3.0.1
   resolution: "webidl-conversions@npm:3.0.1"