From d7e4898e2ad0d8c025b67c18c07e1112b808a3c5 Mon Sep 17 00:00:00 2001 From: Neil Macneale IV Date: Wed, 24 Jul 2024 11:13:29 -0700 Subject: [PATCH 1/5] Don't panic when performing a GC on a shallow clone --- go/store/types/value.go | 21 +++++++++++---------- go/store/types/value_store.go | 19 +++++++++++++++++-- integration-tests/bats/shallow-clone.bats | 11 +++++++++++ 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/go/store/types/value.go b/go/store/types/value.go index 509d06d7bab..cf756ebce0e 100644 --- a/go/store/types/value.go +++ b/go/store/types/value.go @@ -309,6 +309,7 @@ func (v valueImpl) Kind() NomsKind { // GhostValue is a placeholder for a value that has not been pulled from a remote. The structure holds no information, // All methods will panic if called. type GhostValue struct { + hash hash.Hash } var _ Value = GhostValue{} @@ -317,20 +318,20 @@ func (g GhostValue) Kind() NomsKind { panic("Error: GhostValue.Kind() called.") } -func (g GhostValue) Value(ctx context.Context) (Value, error) { +func (g GhostValue) Value(_ context.Context) (Value, error) { panic("Error: GhostValue.Value() called.") } -func (g GhostValue) Less(ctx context.Context, nbf *NomsBinFormat, other LesserValuable) (bool, error) { +func (g GhostValue) Less(_ context.Context, _ *NomsBinFormat, _ LesserValuable) (bool, error) { panic("Error: GhostValue.Less() called.") } -func (g GhostValue) Equals(other Value) bool { +func (g GhostValue) Equals(_ Value) bool { panic("Error: GhostValue.Equals() called.") } -func (g GhostValue) Hash(format *NomsBinFormat) (hash.Hash, error) { - panic("Error: GhostValue.Hash() called.") +func (g GhostValue) Hash(_ *NomsBinFormat) (hash.Hash, error) { + return g.hash, nil } func (g GhostValue) isPrimitive() bool { @@ -341,22 +342,22 @@ func (g GhostValue) HumanReadableString() string { panic("Error: GhostValue.HumanReadableString() called.") } -func (g GhostValue) walkRefs(format *NomsBinFormat, callback RefCallback) error { - panic("Error: GhostValue.walkRefs() called.") +func (g GhostValue) walkRefs(_ *NomsBinFormat, _ RefCallback) error { + return nil } func (g GhostValue) typeOf() (*Type, error) { panic("Error: GhostValue.typeOf() called.") } -func (g GhostValue) writeTo(writer nomsWriter, format *NomsBinFormat) error { +func (g GhostValue) writeTo(_ nomsWriter, _ *NomsBinFormat) error { panic("Error: GhostValue.writeTo() called.") } -func (g GhostValue) readFrom(format *NomsBinFormat, reader *binaryNomsReader) (Value, error) { +func (g GhostValue) readFrom(_ *NomsBinFormat, _ *binaryNomsReader) (Value, error) { panic("Error: GhostValue.readFrom() called.") } -func (g GhostValue) skip(format *NomsBinFormat, reader *binaryNomsReader) { +func (g GhostValue) skip(_ *NomsBinFormat, _ *binaryNomsReader) { panic("Error: GhostValue.skip() called.") } diff --git a/go/store/types/value_store.go b/go/store/types/value_store.go index a28a68a752e..3e7d0e37714 100644 --- a/go/store/types/value_store.go +++ b/go/store/types/value_store.go @@ -200,7 +200,7 @@ func (lvs *ValueStore) ReadValue(ctx context.Context, h hash.Hash) (Value, error return nil, err } if chunk.IsGhost() { - return GhostValue{}, nil + return GhostValue{hash: chunk.Hash()}, nil } if chunk.IsEmpty() { @@ -233,7 +233,7 @@ func (lvs *ValueStore) ReadManyValues(ctx context.Context, hashes hash.HashSlice lvs.versOnce.Do(lvs.expectVersion) decode := func(h hash.Hash, chunk *chunks.Chunk) (Value, error) { if chunk.IsGhost() { - return GhostValue{}, nil + return GhostValue{hash: chunk.Hash()}, nil } v, ferr := DecodeValue(*chunk, lvs) @@ -739,6 +739,21 @@ func (lvs *ValueStore) gcProcessRefs(ctx context.Context, } } + // GC skips ghost values, but other ref walkers don't. Filter them out here. + realVals := make(ValueSlice, 0, len(vals)) + for _, v := range vals { + if _, ok := v.(GhostValue); !ok { + h, err := v.Hash(lvs.Format()) + if err != nil { + return err + } + visited.Insert(h) // Can't visit a ghost. That would be spooky. + } else { + realVals = append(realVals, v) + } + } + vals = realVals + hashes, err := walker.GetRefSet(visited, vals) if err != nil { return err diff --git a/integration-tests/bats/shallow-clone.bats b/integration-tests/bats/shallow-clone.bats index e307f8b17b5..44ba88e006a 100644 --- a/integration-tests/bats/shallow-clone.bats +++ b/integration-tests/bats/shallow-clone.bats @@ -120,6 +120,17 @@ seed_and_start_serial_remote() { [[ "$output" =~ "15" ]] || false # 1+2+3+4+5 = 15. } +@test "shallow-clone: dolt gc works" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + dolt sql -q "call dolt_clone('--depth', '1','http://localhost:50051/test-org/test-repo')" + + cd test-repo + dolt gc +} @test "shallow-clone: push to a new remote should error" { seed_and_start_serial_remote From b948da9495da1da6258d16e02a4942869b8ddfe5 Mon Sep 17 00:00:00 2001 From: Neil Macneale IV Date: Wed, 24 Jul 2024 12:52:31 -0700 Subject: [PATCH 2/5] Remove ghost hash from GC batch as well --- go/store/types/value_store.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/go/store/types/value_store.go b/go/store/types/value_store.go index 3e7d0e37714..ba4b493931e 100644 --- a/go/store/types/value_store.go +++ b/go/store/types/value_store.go @@ -725,10 +725,6 @@ func (lvs *ValueStore) gcProcessRefs(ctx context.Context, toVisit = make([]hash.HashSet, len(batches)+1) toVisitCount = 0 for i, batch := range batches { - if err := keepHashes(batch); err != nil { - return err - } - vals, err := lvs.ReadManyValues(ctx, batch) if err != nil { return err @@ -741,19 +737,25 @@ func (lvs *ValueStore) gcProcessRefs(ctx context.Context, // GC skips ghost values, but other ref walkers don't. Filter them out here. realVals := make(ValueSlice, 0, len(vals)) + nonGhostBatch := make([]hash.Hash, 0, len(vals)) for _, v := range vals { - if _, ok := v.(GhostValue); !ok { - h, err := v.Hash(lvs.Format()) - if err != nil { - return err - } + h, err := v.Hash(lvs.Format()) + if err != nil { + return err + } + if _, ok := v.(GhostValue); ok { visited.Insert(h) // Can't visit a ghost. That would be spooky. } else { realVals = append(realVals, v) + nonGhostBatch = append(nonGhostBatch, h) } } vals = realVals + if err := keepHashes(nonGhostBatch); err != nil { + return err + } + hashes, err := walker.GetRefSet(visited, vals) if err != nil { return err From a78f71784ab2f73d735d929051f41c673c3cb597 Mon Sep 17 00:00:00 2001 From: Neil Macneale IV Date: Tue, 23 Jul 2024 08:14:02 -0700 Subject: [PATCH 3/5] Attempt to make shallow clone less flaky --- integration-tests/bats/shallow-clone.bats | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/integration-tests/bats/shallow-clone.bats b/integration-tests/bats/shallow-clone.bats index 44ba88e006a..d93a3bdcfd9 100644 --- a/integration-tests/bats/shallow-clone.bats +++ b/integration-tests/bats/shallow-clone.bats @@ -28,7 +28,7 @@ stop_remotesrv() { # serial repository is 7 commits: # (init) <- (table create) <- (val 1) <- (val 2) <- (val 3) <- (val 4) <- (val 5) [main] -seed_and_start_serial_remote() { +seed_local_remote() { mkdir remote cd remote dolt init @@ -42,6 +42,13 @@ seed_and_start_serial_remote() { done dolt tag nonheadtag HEAD~2 + cd .. +} + + +seed_and_start_serial_remote() { + seed_local_remote + cd remote remotesrv --http-port 1234 --repo-mode & remotesrv_pid=$! @@ -94,8 +101,7 @@ seed_and_start_serial_remote() { } @test "shallow-clone: shallow clone with a file path" { - seed_and_start_serial_remote - stop_remotesrv + seed_local_remote cd remote dolt remote add origin file://../file-remote dolt push origin main From b07e23c87468b79a5b28a9cf4c9808954cce6a54 Mon Sep 17 00:00:00 2001 From: Neil Macneale IV Date: Wed, 24 Jul 2024 13:54:26 -0700 Subject: [PATCH 4/5] Bring back panic in GhostValue.walkRefs to ensure we discover bugs in the future --- go/store/types/value.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/store/types/value.go b/go/store/types/value.go index cf756ebce0e..7d48301f65c 100644 --- a/go/store/types/value.go +++ b/go/store/types/value.go @@ -343,7 +343,7 @@ func (g GhostValue) HumanReadableString() string { } func (g GhostValue) walkRefs(_ *NomsBinFormat, _ RefCallback) error { - return nil + panic("Error: GhostValue.walkRefs() called.") } func (g GhostValue) typeOf() (*Type, error) { From 108db5281fa5a8a88e279de81c12f97c00d85548 Mon Sep 17 00:00:00 2001 From: Neil Macneale IV Date: Wed, 24 Jul 2024 13:57:35 -0700 Subject: [PATCH 5/5] Do a query on the GCed data after shallow-clone --- integration-tests/bats/shallow-clone.bats | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/integration-tests/bats/shallow-clone.bats b/integration-tests/bats/shallow-clone.bats index d93a3bdcfd9..a02d9192830 100644 --- a/integration-tests/bats/shallow-clone.bats +++ b/integration-tests/bats/shallow-clone.bats @@ -136,6 +136,11 @@ seed_and_start_serial_remote() { cd test-repo dolt gc + + # Verify that the table is complete. + run dolt sql -q "select sum(i) from vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "15" ]] || false # 1+2+3+4+5 = 15. } @test "shallow-clone: push to a new remote should error" { seed_and_start_serial_remote