Skip to content

Commit 9a6ce8b

Browse files
committed
feat: add compare_file_contents tool with semantic diffs (#1973)
Add a new compare_file_contents tool that compares a file between two git refs, producing semantic diffs for structured formats (JSON, YAML) and falling back to unified diffs for unsupported formats. The tool is feature-flagged behind the "semantic_diff" flag.
1 parent d44894e commit 9a6ce8b

File tree

6 files changed

+927
-0
lines changed

6 files changed

+927
-0
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"annotations": {
3+
"readOnlyHint": true,
4+
"title": "Compare file contents"
5+
},
6+
"description": "Compare a file between two git refs, with semantic diffs for structured formats (JSON, YAML)",
7+
"inputSchema": {
8+
"properties": {
9+
"base": {
10+
"description": "Base git ref to compare from (commit SHA, branch name, or tag)",
11+
"type": "string"
12+
},
13+
"head": {
14+
"description": "Head git ref to compare to (commit SHA, branch name, or tag)",
15+
"type": "string"
16+
},
17+
"owner": {
18+
"description": "Repository owner (username or organization)",
19+
"type": "string"
20+
},
21+
"path": {
22+
"description": "Path to the file to compare",
23+
"type": "string"
24+
},
25+
"repo": {
26+
"description": "Repository name",
27+
"type": "string"
28+
}
29+
},
30+
"required": [
31+
"owner",
32+
"repo",
33+
"path",
34+
"base",
35+
"head"
36+
],
37+
"type": "object"
38+
},
39+
"name": "compare_file_contents"
40+
}

pkg/github/compare.go

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
package github
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"net/http"
8+
9+
"github.com/github/github-mcp-server/pkg/inventory"
10+
"github.com/github/github-mcp-server/pkg/raw"
11+
"github.com/github/github-mcp-server/pkg/scopes"
12+
"github.com/github/github-mcp-server/pkg/translations"
13+
"github.com/github/github-mcp-server/pkg/utils"
14+
"github.com/google/jsonschema-go/jsonschema"
15+
"github.com/modelcontextprotocol/go-sdk/mcp"
16+
)
17+
18+
// CompareFileContents creates a tool to compare a file between two git refs,
19+
// producing semantic diffs for structured formats (JSON, YAML) and falling back
20+
// to unified diffs for unsupported formats.
21+
func CompareFileContents(t translations.TranslationHelperFunc) inventory.ServerTool {
22+
st := NewTool(
23+
ToolsetMetadataRepos,
24+
mcp.Tool{
25+
Name: "compare_file_contents",
26+
Description: t("TOOL_COMPARE_FILE_CONTENTS_DESCRIPTION", "Compare a file between two git refs, with semantic diffs for structured formats (JSON, YAML)"),
27+
Annotations: &mcp.ToolAnnotations{
28+
Title: t("TOOL_COMPARE_FILE_CONTENTS_USER_TITLE", "Compare file contents"),
29+
ReadOnlyHint: true,
30+
},
31+
InputSchema: &jsonschema.Schema{
32+
Type: "object",
33+
Properties: map[string]*jsonschema.Schema{
34+
"owner": {
35+
Type: "string",
36+
Description: "Repository owner (username or organization)",
37+
},
38+
"repo": {
39+
Type: "string",
40+
Description: "Repository name",
41+
},
42+
"path": {
43+
Type: "string",
44+
Description: "Path to the file to compare",
45+
},
46+
"base": {
47+
Type: "string",
48+
Description: "Base git ref to compare from (commit SHA, branch name, or tag)",
49+
},
50+
"head": {
51+
Type: "string",
52+
Description: "Head git ref to compare to (commit SHA, branch name, or tag)",
53+
},
54+
},
55+
Required: []string{"owner", "repo", "path", "base", "head"},
56+
},
57+
},
58+
[]scopes.Scope{scopes.Repo},
59+
func(ctx context.Context, deps ToolDependencies, _ *mcp.CallToolRequest, args map[string]any) (*mcp.CallToolResult, any, error) {
60+
owner, err := RequiredParam[string](args, "owner")
61+
if err != nil {
62+
return utils.NewToolResultError(err.Error()), nil, nil
63+
}
64+
repo, err := RequiredParam[string](args, "repo")
65+
if err != nil {
66+
return utils.NewToolResultError(err.Error()), nil, nil
67+
}
68+
path, err := RequiredParam[string](args, "path")
69+
if err != nil {
70+
return utils.NewToolResultError(err.Error()), nil, nil
71+
}
72+
base, err := RequiredParam[string](args, "base")
73+
if err != nil {
74+
return utils.NewToolResultError(err.Error()), nil, nil
75+
}
76+
head, err := RequiredParam[string](args, "head")
77+
if err != nil {
78+
return utils.NewToolResultError(err.Error()), nil, nil
79+
}
80+
81+
rawClient, err := deps.GetRawClient(ctx)
82+
if err != nil {
83+
return utils.NewToolResultError("failed to get raw content client"), nil, nil
84+
}
85+
86+
baseContent, err := fetchFileContent(ctx, rawClient, owner, repo, path, base)
87+
if err != nil {
88+
return utils.NewToolResultError(fmt.Sprintf("failed to fetch base file: %s", err)), nil, nil
89+
}
90+
91+
headContent, err := fetchFileContent(ctx, rawClient, owner, repo, path, head)
92+
if err != nil {
93+
return utils.NewToolResultError(fmt.Sprintf("failed to fetch head file: %s", err)), nil, nil
94+
}
95+
96+
diff, format, isFallback, err := SemanticDiff(baseContent, headContent, path)
97+
if err != nil {
98+
return utils.NewToolResultError(fmt.Sprintf("failed to compute diff: %s", err)), nil, nil
99+
}
100+
101+
var header string
102+
if isFallback {
103+
header = fmt.Sprintf("Format: %s (unified diff — no semantic diff available for this format)", format)
104+
} else {
105+
header = fmt.Sprintf("Format: %s (semantic diff)", format)
106+
}
107+
108+
result := fmt.Sprintf("%s\n\n%s", header, diff)
109+
110+
return utils.NewToolResultText(result), nil, nil
111+
},
112+
)
113+
st.FeatureFlagEnable = "semantic_diff"
114+
return st
115+
}
116+
117+
// fetchFileContent retrieves the raw content of a file at a given ref.
118+
func fetchFileContent(ctx context.Context, client *raw.Client, owner, repo, path, ref string) ([]byte, error) {
119+
resp, err := client.GetRawContent(ctx, owner, repo, path, &raw.ContentOpts{Ref: ref})
120+
if err != nil {
121+
return nil, fmt.Errorf("request failed: %w", err)
122+
}
123+
defer func() { _ = resp.Body.Close() }()
124+
125+
if resp.StatusCode == http.StatusNotFound {
126+
return nil, fmt.Errorf("file %q not found at ref %q", path, ref)
127+
}
128+
if resp.StatusCode != http.StatusOK {
129+
return nil, fmt.Errorf("unexpected status %d for %q at ref %q", resp.StatusCode, path, ref)
130+
}
131+
132+
body, err := io.ReadAll(resp.Body)
133+
if err != nil {
134+
return nil, fmt.Errorf("failed to read response body: %w", err)
135+
}
136+
137+
return body, nil
138+
}

pkg/github/compare_test.go

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
package github
2+
3+
import (
4+
"context"
5+
"net/http"
6+
"net/url"
7+
"strings"
8+
"testing"
9+
10+
"github.com/github/github-mcp-server/internal/toolsnaps"
11+
"github.com/github/github-mcp-server/pkg/raw"
12+
"github.com/github/github-mcp-server/pkg/translations"
13+
"github.com/google/go-github/v82/github"
14+
"github.com/stretchr/testify/assert"
15+
"github.com/stretchr/testify/require"
16+
)
17+
18+
func Test_CompareFileContents(t *testing.T) {
19+
// Verify tool definition and snapshot
20+
toolDef := CompareFileContents(translations.NullTranslationHelper)
21+
assert.Equal(t, "compare_file_contents", toolDef.Tool.Name)
22+
assert.True(t, toolDef.Tool.Annotations.ReadOnlyHint)
23+
assert.Equal(t, "semantic_diff", toolDef.FeatureFlagEnable)
24+
require.NoError(t, toolsnaps.Test(toolDef.Tool.Name, toolDef.Tool))
25+
26+
tests := []struct {
27+
name string
28+
mockedClient *http.Client
29+
requestArgs map[string]any
30+
expectError bool
31+
expectedErrMsg string
32+
expectContains []string
33+
}{
34+
{
35+
name: "successful JSON semantic diff",
36+
mockedClient: MockHTTPClientWithHandlers(map[string]http.HandlerFunc{
37+
"": func(w http.ResponseWriter, r *http.Request) {
38+
path := r.URL.Path
39+
switch {
40+
case containsRef(path, "abc123"):
41+
w.WriteHeader(http.StatusOK)
42+
_, _ = w.Write([]byte(`{"name":"Bob","age":30}`))
43+
case containsRef(path, "def456"):
44+
w.WriteHeader(http.StatusOK)
45+
_, _ = w.Write([]byte(`{"name":"Bobby","age":30}`))
46+
default:
47+
w.WriteHeader(http.StatusNotFound)
48+
}
49+
},
50+
}),
51+
requestArgs: map[string]any{
52+
"owner": "owner",
53+
"repo": "repo",
54+
"path": "config.json",
55+
"base": "abc123",
56+
"head": "def456",
57+
},
58+
expectError: false,
59+
expectContains: []string{
60+
"Format: json (semantic diff)",
61+
`name: "Bob" → "Bobby"`,
62+
},
63+
},
64+
{
65+
name: "successful YAML semantic diff",
66+
mockedClient: MockHTTPClientWithHandlers(map[string]http.HandlerFunc{
67+
"": func(w http.ResponseWriter, r *http.Request) {
68+
path := r.URL.Path
69+
switch {
70+
case containsRef(path, "v1.0"):
71+
w.WriteHeader(http.StatusOK)
72+
_, _ = w.Write([]byte("name: Alice\nage: 30\n"))
73+
case containsRef(path, "v2.0"):
74+
w.WriteHeader(http.StatusOK)
75+
_, _ = w.Write([]byte("name: Alice\nage: 31\n"))
76+
default:
77+
w.WriteHeader(http.StatusNotFound)
78+
}
79+
},
80+
}),
81+
requestArgs: map[string]any{
82+
"owner": "owner",
83+
"repo": "repo",
84+
"path": "config.yaml",
85+
"base": "v1.0",
86+
"head": "v2.0",
87+
},
88+
expectError: false,
89+
expectContains: []string{
90+
"Format: yaml (semantic diff)",
91+
"age: 30 → 31",
92+
},
93+
},
94+
{
95+
name: "fallback to unified diff for txt",
96+
mockedClient: MockHTTPClientWithHandlers(map[string]http.HandlerFunc{
97+
"": func(w http.ResponseWriter, r *http.Request) {
98+
path := r.URL.Path
99+
switch {
100+
case containsRef(path, "old"):
101+
w.WriteHeader(http.StatusOK)
102+
_, _ = w.Write([]byte("line1\nline2\nline3\n"))
103+
case containsRef(path, "new"):
104+
w.WriteHeader(http.StatusOK)
105+
_, _ = w.Write([]byte("line1\nmodified\nline3\n"))
106+
default:
107+
w.WriteHeader(http.StatusNotFound)
108+
}
109+
},
110+
}),
111+
requestArgs: map[string]any{
112+
"owner": "owner",
113+
"repo": "repo",
114+
"path": "readme.txt",
115+
"base": "old",
116+
"head": "new",
117+
},
118+
expectError: false,
119+
expectContains: []string{
120+
"unified diff",
121+
"-line2",
122+
"+modified",
123+
},
124+
},
125+
{
126+
name: "base file not found",
127+
mockedClient: MockHTTPClientWithHandlers(map[string]http.HandlerFunc{
128+
"": func(w http.ResponseWriter, _ *http.Request) {
129+
w.WriteHeader(http.StatusNotFound)
130+
},
131+
}),
132+
requestArgs: map[string]any{
133+
"owner": "owner",
134+
"repo": "repo",
135+
"path": "missing.json",
136+
"base": "abc123",
137+
"head": "def456",
138+
},
139+
expectError: true,
140+
expectedErrMsg: "failed to fetch base file",
141+
},
142+
{
143+
name: "missing required param owner",
144+
mockedClient: MockHTTPClientWithHandlers(map[string]http.HandlerFunc{}),
145+
requestArgs: map[string]any{
146+
"repo": "repo",
147+
"path": "file.json",
148+
"base": "abc",
149+
"head": "def",
150+
},
151+
expectError: true,
152+
expectedErrMsg: "missing required parameter: owner",
153+
},
154+
{
155+
name: "missing required param base",
156+
mockedClient: MockHTTPClientWithHandlers(map[string]http.HandlerFunc{}),
157+
requestArgs: map[string]any{
158+
"owner": "owner",
159+
"repo": "repo",
160+
"path": "file.json",
161+
"head": "def",
162+
},
163+
expectError: true,
164+
expectedErrMsg: "missing required parameter: base",
165+
},
166+
}
167+
168+
for _, tc := range tests {
169+
t.Run(tc.name, func(t *testing.T) {
170+
client := github.NewClient(tc.mockedClient)
171+
mockRawClient := raw.NewClient(client, &url.URL{Scheme: "https", Host: "raw.example.com", Path: "/"})
172+
deps := BaseDeps{
173+
Client: client,
174+
RawClient: mockRawClient,
175+
}
176+
handler := toolDef.Handler(deps)
177+
178+
request := createMCPRequest(tc.requestArgs)
179+
result, err := handler(ContextWithDeps(context.Background(), deps), &request)
180+
181+
if tc.expectError {
182+
require.NoError(t, err)
183+
textContent := getErrorResult(t, result)
184+
assert.Contains(t, textContent.Text, tc.expectedErrMsg)
185+
return
186+
}
187+
188+
require.NoError(t, err)
189+
require.False(t, result.IsError)
190+
textContent := getTextResult(t, result)
191+
192+
for _, expected := range tc.expectContains {
193+
assert.Contains(t, textContent.Text, expected)
194+
}
195+
})
196+
}
197+
}
198+
199+
// containsRef checks if a URL path contains a specific ref segment.
200+
func containsRef(path, ref string) bool {
201+
return strings.Contains(path, "/"+ref+"/")
202+
}

0 commit comments

Comments
 (0)