Skip to content

Commit

Permalink
Merge pull request #8534 from dolthub/nicktobey/json_fix
Browse files Browse the repository at this point in the history
Fix issue where JSON diff would fail under specific circumstances.
  • Loading branch information
nicktobey authored Nov 6, 2024
2 parents 7d9a638 + ad631c6 commit b241b58
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 45 deletions.
99 changes: 59 additions & 40 deletions go/store/prolly/tree/indexed_json_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,61 +44,74 @@ func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*I
differ.fromStop = differ.fromStop.parent
differ.toStop = differ.toStop.parent

if differ.from == nil || differ.to == nil {
// This can happen when either document fits in a single chunk.
// We don't use the chunk differ in this case, and instead we create the cursors without it.
var currentFromCursor, currentToCursor *JsonCursor
if differ.from == nil {
// The "from" document fits inside a single chunk.
// We can't create the "from" cursor from the differ, so we create it here instead.
diffKey := []byte{byte(startOfValue)}
currentFromCursor, err := newJsonCursorAtStartOfChunk(ctx, from.m.NodeStore, from.m.Root, diffKey)
currentFromCursor, err = newJsonCursorAtStartOfChunk(ctx, from.m.NodeStore, from.m.Root, diffKey)
if err != nil {
return nil, err
}
currentToCursor, err := newJsonCursorAtStartOfChunk(ctx, to.m.NodeStore, to.m.Root, diffKey)
// Advance the cursor past the "beginning of document" location, so that it aligns with the "to" cursor no matter what.
err = advanceCursor(ctx, &currentFromCursor)
if err != nil {
return nil, err
}
}

if differ.to == nil {
// The "to" document fits inside a single chunk.
// We can't create the "from" cursor from the differ, so we create it here instead.
diffKey := []byte{byte(startOfValue)}
currentToCursor, err = newJsonCursorAtStartOfChunk(ctx, to.m.NodeStore, to.m.Root, diffKey)
if err != nil {
return nil, err
}
// Advance the cursor past the "beginning of document" location, so that it aligns with the "from" cursor no matter what.
err = advanceCursor(ctx, &currentToCursor)
if err != nil {
return nil, err
}
return &IndexedJsonDiffer{
differ: differ,
from: from,
to: to,
currentFromCursor: currentFromCursor,
currentToCursor: currentToCursor,
}, nil
}

return &IndexedJsonDiffer{
differ: differ,
from: from,
to: to,
differ: differ,
from: from,
to: to,
currentFromCursor: currentFromCursor,
currentToCursor: currentToCursor,
}, nil
}

func advanceCursor(ctx context.Context, jCur **JsonCursor) (err error) {
if (*jCur).jsonScanner.atEndOfChunk() {
err = (*jCur).cur.advance(ctx)
if err != nil {
return err
}
*jCur = nil
} else {
err = (*jCur).jsonScanner.AdvanceToNextLocation()
if err != nil {
return err
}
}
return nil
}

// Next computes the next diff between the two JSON documents.
// To accomplish this, it uses the underlying Differ to find chunks that have changed, and uses a pair of JsonCursors
// to walk corresponding chunks.
func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error) {
// helper function to advance a JsonCursor and set it to nil if it reaches the end of a chunk
advanceCursor := func(jCur **JsonCursor) (err error) {
if (*jCur).jsonScanner.atEndOfChunk() {
err = (*jCur).cur.advance(ctx)
if err != nil {
return err
}
*jCur = nil
} else {
err = (*jCur).jsonScanner.AdvanceToNextLocation()
if err != nil {
return err
}
}
return nil
}

newAddedDiff := func(key []byte) (JsonDiff, error) {
addedValue, err := jd.currentToCursor.NextValue(ctx)
if err != nil {
return JsonDiff{}, err
}
err = advanceCursor(&jd.currentToCursor)
err = advanceCursor(ctx, &jd.currentToCursor)
if err != nil {
return JsonDiff{}, err
}
Expand All @@ -114,7 +127,7 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
if err != nil {
return JsonDiff{}, err
}
err = advanceCursor(&jd.currentFromCursor)
err = advanceCursor(ctx, &jd.currentFromCursor)
if err != nil {
return JsonDiff{}, err
}
Expand Down Expand Up @@ -150,29 +163,35 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
return JsonDiff{}, err
}
} else if jd.currentFromCursor == nil {
// We exhausted the current `from` chunk but not the `to` chunk. Since the chunk boundaries don't align on
// We exhausted the current `from` chunk but not the current `to` chunk. Since the chunk boundaries don't align on
// the same key, we need to continue into the next chunk.

// Alternatively, the "to" cursor was created during construction because the "to" document fit in a single chunk,
// and the "from" cursor hasn't been created yet.

jd.currentFromCursor, err = newJsonCursorFromCursor(ctx, jd.differ.from)
if err != nil {
return JsonDiff{}, err
}

err = advanceCursor(&jd.currentFromCursor)
err = advanceCursor(ctx, &jd.currentFromCursor)
if err != nil {
return JsonDiff{}, err
}
continue
} else if jd.currentToCursor == nil {
// We exhausted the current `to` chunk but not the `from` chunk. Since the chunk boundaries don't align on
// We exhausted the current `to` chunk but not the current `from` chunk. Since the chunk boundaries don't align on
// the same key, we need to continue into the next chunk.

// Alternatively, the "from" cursor was created during construction because the "from" document fit in a single chunk,
// and the "to" cursor hasn't been created yet.

jd.currentToCursor, err = newJsonCursorFromCursor(ctx, jd.differ.to)
if err != nil {
return JsonDiff{}, err
}

err = advanceCursor(&jd.currentToCursor)
err = advanceCursor(ctx, &jd.currentToCursor)
if err != nil {
return JsonDiff{}, err
}
Expand Down Expand Up @@ -209,11 +228,11 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
if compareJsonLocations(fromCurrentLocation, toCurrentLocation) != 0 {
return JsonDiff{}, jsonParseError
}
err = advanceCursor(&jd.currentFromCursor)
err = advanceCursor(ctx, &jd.currentFromCursor)
if err != nil {
return JsonDiff{}, err
}
err = advanceCursor(&jd.currentToCursor)
err = advanceCursor(ctx, &jd.currentToCursor)
if err != nil {
return JsonDiff{}, err
}
Expand All @@ -230,11 +249,11 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
// Otherwise, compare them and possibly return a modification.
if (fromScanner.current() == '{' && toScanner.current() == '{') ||
(fromScanner.current() == '[' && toScanner.current() == '[') {
err = advanceCursor(&jd.currentFromCursor)
err = advanceCursor(ctx, &jd.currentFromCursor)
if err != nil {
return JsonDiff{}, err
}
err = advanceCursor(&jd.currentToCursor)
err = advanceCursor(ctx, &jd.currentToCursor)
if err != nil {
return JsonDiff{}, err
}
Expand Down
25 changes: 21 additions & 4 deletions go/store/prolly/tree/json_diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,13 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest {
ctx := sql.NewEmptyContext()
ns := NewTestNodeStore()

emptyDocument := types.JSONDocument{Val: types.JsonObject{}}

insert := func(document types.MutableJSON, path string, val interface{}) types.MutableJSON {
jsonVal, inRange, err := types.JSON.Convert(val)
require.NoError(t, err)
require.True(t, (bool)(inRange))
document = document.Clone(ctx).(types.MutableJSON)
newDoc, changed, err := document.Insert(ctx, path, jsonVal.(sql.JSONWrapper))
require.NoError(t, err)
require.True(t, changed)
Expand Down Expand Up @@ -400,6 +403,18 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest {
},
},
},
{
// This is a regression test.
// If:
// - One document fits in a single chunk and the other doesn't
// - The location of the chunk boundary in the larger document is also present in the smaller document
// - The chunk boundary doesn't fall at the beginning of value.
// Then the differ would fail to advance the prolly tree cursor and would incorrectly see the larger document as corrupt.
// The values in this test case are specifically chosen to meet these conditions.
name: "no error when diffing large doc with small doc",
from: largeObject,
to: insert(emptyDocument, "$.level6", insert(emptyDocument, "$.level4", lookup(largeObject, "$.level6.level4"))),
},
}
}

Expand Down Expand Up @@ -473,9 +488,11 @@ func runTest(t *testing.T, test jsonDiffTest) {

return cmp == 0
}
require.Equal(t, len(test.expectedDiffs), len(actualDiffs))
for i, expected := range test.expectedDiffs {
actual := actualDiffs[i]
require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual))
if test.expectedDiffs != nil {
require.Equal(t, len(test.expectedDiffs), len(actualDiffs))
for i, expected := range test.expectedDiffs {
actual := actualDiffs[i]
require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual))
}
}
}
2 changes: 1 addition & 1 deletion go/store/prolly/tree/json_scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type JsonScanner struct {
valueOffset int
}

var jsonParseError = fmt.Errorf("an error occurred while reading or writing JSON to/from the database. This is most likely a bug, but could indicate database corruption")
var jsonParseError = fmt.Errorf("encountered invalid JSON while reading JSON from the database, or while preparing to write JSON to the database. This is most likely a bug in JSON diffing")

func (j JsonScanner) Clone() JsonScanner {
return JsonScanner{
Expand Down

0 comments on commit b241b58

Please sign in to comment.