Skip to content

Commit

Permalink
Eliminate delay when running report
Browse files Browse the repository at this point in the history
Prior to this patch, the full set of duplicates was being materialized
in memory before anything was being printed, which was a bad user
experience when the report was very large. This patch streams duplicates
from the DB to avoid this delay.
  • Loading branch information
anishathalye committed Mar 17, 2021
1 parent 609be2c commit 00d8e66
Showing 1 changed file with 47 additions and 9 deletions.
56 changes: 47 additions & 9 deletions report.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package periscope

import (
"container/list"
"fmt"
"sync"

"github.com/anishathalye/periscope/db"
"github.com/anishathalye/periscope/herror"

"github.com/dustin/go-humanize"
Expand All @@ -12,24 +15,59 @@ type ReportOptions struct {
}

func (ps *Periscope) Report(options *ReportOptions) herror.Interface {
// We could stream duplicates with AllDuplicatesC, but then if someone
// We stream duplicates with AllDuplicatesC, but we don't read directly
// from it and write results in the straightforward way. Writing to
// output may block (e.g. if the user is using a pager), so if a user
// had `psc report | less` open in one window and tried to `psc rm` in
// another, they'd get a "database is locked" error. This seems like
// it's a common enough use case that it's worth avoiding it, even if
// it increases the latency of a `psc report` to output the first
// screen of duplicates.
sets, err := ps.db.AllDuplicates()
// it's a common enough use case that it's worth avoiding it. We
// achieve this by buffering the results in memory.
sets, err := ps.db.AllDuplicatesC()
if err != nil {
return err
}
for i, set := range sets {

buf := list.New()
done := false
var mu sync.Mutex
cond := sync.NewCond(&mu)
go func() {
for set := range sets {
mu.Lock()
buf.PushBack(set)
cond.Signal()
mu.Unlock()
}
mu.Lock()
done = true
cond.Signal()
mu.Unlock()
}()

first := true
for {
mu.Lock()
for !done && buf.Len() == 0 {
cond.Wait()
}
if done && buf.Len() == 0 {
mu.Unlock()
break
}
front := buf.Front()
set := front.Value.(db.DuplicateSet)
buf.Remove(front)
mu.Unlock()

if !first {
fmt.Fprintf(ps.outStream, "\n")
}
fmt.Fprintf(ps.outStream, "%s\n", humanize.Bytes(uint64(set.Size)))
for _, info := range set.Paths {
fmt.Fprintf(ps.outStream, " %s\n", info)
}
if i != len(sets)-1 {
fmt.Fprintf(ps.outStream, "\n")
}
first = false
}

return nil
}

0 comments on commit 00d8e66

Please sign in to comment.