diff --git a/go/store/types/value.go b/go/store/types/value.go index 509d06d7bab..7d48301f65c 100644 --- a/go/store/types/value.go +++ b/go/store/types/value.go @@ -309,6 +309,7 @@ func (v valueImpl) Kind() NomsKind { // GhostValue is a placeholder for a value that has not been pulled from a remote. The structure holds no information, // All methods will panic if called. type GhostValue struct { + hash hash.Hash } var _ Value = GhostValue{} @@ -317,20 +318,20 @@ func (g GhostValue) Kind() NomsKind { panic("Error: GhostValue.Kind() called.") } -func (g GhostValue) Value(ctx context.Context) (Value, error) { +func (g GhostValue) Value(_ context.Context) (Value, error) { panic("Error: GhostValue.Value() called.") } -func (g GhostValue) Less(ctx context.Context, nbf *NomsBinFormat, other LesserValuable) (bool, error) { +func (g GhostValue) Less(_ context.Context, _ *NomsBinFormat, _ LesserValuable) (bool, error) { panic("Error: GhostValue.Less() called.") } -func (g GhostValue) Equals(other Value) bool { +func (g GhostValue) Equals(_ Value) bool { panic("Error: GhostValue.Equals() called.") } -func (g GhostValue) Hash(format *NomsBinFormat) (hash.Hash, error) { - panic("Error: GhostValue.Hash() called.") +func (g GhostValue) Hash(_ *NomsBinFormat) (hash.Hash, error) { + return g.hash, nil } func (g GhostValue) isPrimitive() bool { @@ -341,7 +342,7 @@ func (g GhostValue) HumanReadableString() string { panic("Error: GhostValue.HumanReadableString() called.") } -func (g GhostValue) walkRefs(format *NomsBinFormat, callback RefCallback) error { +func (g GhostValue) walkRefs(_ *NomsBinFormat, _ RefCallback) error { panic("Error: GhostValue.walkRefs() called.") } @@ -349,14 +350,14 @@ func (g GhostValue) typeOf() (*Type, error) { panic("Error: GhostValue.typeOf() called.") } -func (g GhostValue) writeTo(writer nomsWriter, format *NomsBinFormat) error { +func (g GhostValue) writeTo(_ nomsWriter, _ *NomsBinFormat) error { panic("Error: GhostValue.writeTo() called.") } -func (g GhostValue) readFrom(format *NomsBinFormat, reader *binaryNomsReader) (Value, error) { +func (g GhostValue) readFrom(_ *NomsBinFormat, _ *binaryNomsReader) (Value, error) { panic("Error: GhostValue.readFrom() called.") } -func (g GhostValue) skip(format *NomsBinFormat, reader *binaryNomsReader) { +func (g GhostValue) skip(_ *NomsBinFormat, _ *binaryNomsReader) { panic("Error: GhostValue.skip() called.") } diff --git a/go/store/types/value_store.go b/go/store/types/value_store.go index a28a68a752e..ba4b493931e 100644 --- a/go/store/types/value_store.go +++ b/go/store/types/value_store.go @@ -200,7 +200,7 @@ func (lvs *ValueStore) ReadValue(ctx context.Context, h hash.Hash) (Value, error return nil, err } if chunk.IsGhost() { - return GhostValue{}, nil + return GhostValue{hash: chunk.Hash()}, nil } if chunk.IsEmpty() { @@ -233,7 +233,7 @@ func (lvs *ValueStore) ReadManyValues(ctx context.Context, hashes hash.HashSlice lvs.versOnce.Do(lvs.expectVersion) decode := func(h hash.Hash, chunk *chunks.Chunk) (Value, error) { if chunk.IsGhost() { - return GhostValue{}, nil + return GhostValue{hash: chunk.Hash()}, nil } v, ferr := DecodeValue(*chunk, lvs) @@ -725,10 +725,6 @@ func (lvs *ValueStore) gcProcessRefs(ctx context.Context, toVisit = make([]hash.HashSet, len(batches)+1) toVisitCount = 0 for i, batch := range batches { - if err := keepHashes(batch); err != nil { - return err - } - vals, err := lvs.ReadManyValues(ctx, batch) if err != nil { return err @@ -739,6 +735,27 @@ func (lvs *ValueStore) gcProcessRefs(ctx context.Context, } } + // GC skips ghost values, but other ref walkers don't. Filter them out here. + realVals := make(ValueSlice, 0, len(vals)) + nonGhostBatch := make([]hash.Hash, 0, len(vals)) + for _, v := range vals { + h, err := v.Hash(lvs.Format()) + if err != nil { + return err + } + if _, ok := v.(GhostValue); ok { + visited.Insert(h) // Can't visit a ghost. That would be spooky. + } else { + realVals = append(realVals, v) + nonGhostBatch = append(nonGhostBatch, h) + } + } + vals = realVals + + if err := keepHashes(nonGhostBatch); err != nil { + return err + } + hashes, err := walker.GetRefSet(visited, vals) if err != nil { return err diff --git a/integration-tests/bats/shallow-clone.bats b/integration-tests/bats/shallow-clone.bats index e307f8b17b5..a02d9192830 100644 --- a/integration-tests/bats/shallow-clone.bats +++ b/integration-tests/bats/shallow-clone.bats @@ -28,7 +28,7 @@ stop_remotesrv() { # serial repository is 7 commits: # (init) <- (table create) <- (val 1) <- (val 2) <- (val 3) <- (val 4) <- (val 5) [main] -seed_and_start_serial_remote() { +seed_local_remote() { mkdir remote cd remote dolt init @@ -42,6 +42,13 @@ seed_and_start_serial_remote() { done dolt tag nonheadtag HEAD~2 + cd .. +} + + +seed_and_start_serial_remote() { + seed_local_remote + cd remote remotesrv --http-port 1234 --repo-mode & remotesrv_pid=$! @@ -94,8 +101,7 @@ seed_and_start_serial_remote() { } @test "shallow-clone: shallow clone with a file path" { - seed_and_start_serial_remote - stop_remotesrv + seed_local_remote cd remote dolt remote add origin file://../file-remote dolt push origin main @@ -120,6 +126,22 @@ seed_and_start_serial_remote() { [[ "$output" =~ "15" ]] || false # 1+2+3+4+5 = 15. } +@test "shallow-clone: dolt gc works" { + seed_and_start_serial_remote + + mkdir clones + cd clones + + dolt sql -q "call dolt_clone('--depth', '1','http://localhost:50051/test-org/test-repo')" + + cd test-repo + dolt gc + + # Verify that the table is complete. + run dolt sql -q "select sum(i) from vals" + [ "$status" -eq 0 ] + [[ "$output" =~ "15" ]] || false # 1+2+3+4+5 = 15. +} @test "shallow-clone: push to a new remote should error" { seed_and_start_serial_remote