Skip to content

Commit

Permalink
backup: support reading changed files/dirs from a file
Browse files Browse the repository at this point in the history
For `restic backup`, support new flags
`--changed-files-from-verbatim` and `--changed-files-from-raw` to
read the files/dirs that actually have changed from a file (or
multiple files). Directories that don't (directly or indirectly)
contain any changed files/dirs will reuse the corresponding subtree
of the parent snapshot.

This option is useful for higher-level backup tools which use
restic as a backend but have their own mechanism of figuring out
which files have changed (e.g., using zfs or btrfs diff tools).
We require to explicitly pass `--parent` as a protection mechanism
in order to make sure the higher-level backup tool and restic agree
on the parent snapshot. Though the caller can circumvent this
protection mechanism by passing `--parent latest`.

Caveat: since device IDs are unstable (across reboots or across
different zfs/btrfs snapshots of the same subvolume), the parent
snapshot and current snapshot might have mismatching device IDs.
In this case, the feature will still reuse subtrees of the parent
snapshot (under the conditions mentioned above), so we end up with
a snapshot that contains subtrees with different `device_id`
values, even if there was only a single mountpoint in play.

For now, we could simply document this caveat and discourage users
who rely on correct restoration of hardlinks from using this
feature. When #3041 is
properly fixed in the future, then this caveat is probably goes
away, too.

The idea for this feature emerged here:
#1502 (comment)
  • Loading branch information
haslersn committed Sep 16, 2023
1 parent 6e586b6 commit ca0be94
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 41 deletions.
103 changes: 80 additions & 23 deletions cmd/restic/cmd_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,28 +88,30 @@ Exit status is 3 if some source data could not be read (incomplete snapshot crea
type BackupOptions struct {
excludePatternOptions

Parent string
GroupBy restic.SnapshotGroupByOptions
Force bool
ExcludeOtherFS bool
ExcludeIfPresent []string
ExcludeCaches bool
ExcludeLargerThan string
Stdin bool
StdinFilename string
Tags restic.TagLists
Host string
FilesFrom []string
FilesFromVerbatim []string
FilesFromRaw []string
TimeStamp string
WithAtime bool
IgnoreInode bool
IgnoreCtime bool
UseFsSnapshot bool
DryRun bool
ReadConcurrency uint
NoScan bool
Parent string
GroupBy restic.SnapshotGroupByOptions
Force bool
ExcludeOtherFS bool
ExcludeIfPresent []string
ExcludeCaches bool
ExcludeLargerThan string
Stdin bool
StdinFilename string
Tags restic.TagLists
Host string
FilesFrom []string
FilesFromVerbatim []string
FilesFromRaw []string
ChangedFilesFromVerbatim []string
ChangedFilesFromRaw []string
TimeStamp string
WithAtime bool
IgnoreInode bool
IgnoreCtime bool
UseFsSnapshot bool
DryRun bool
ReadConcurrency uint
NoScan bool
}

var backupOptions BackupOptions
Expand Down Expand Up @@ -146,6 +148,8 @@ func init() {
f.StringArrayVar(&backupOptions.FilesFrom, "files-from", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromVerbatim, "files-from-verbatim", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromRaw, "files-from-raw", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.ChangedFilesFromVerbatim, "changed-files-from-verbatim", nil, "read names of changed files/directories from `file` (can be combined with changed-file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.ChangedFilesFromRaw, "changed-files-from-raw", nil, "read names of changed files/directories from `file` (can be combined with changed-file args; can be specified multiple times)")
f.StringVar(&backupOptions.TimeStamp, "time", "", "`time` of the backup (ex. '2012-11-01 22:08:41') (default: now)")
f.BoolVar(&backupOptions.WithAtime, "with-atime", false, "store the atime for all files and directories")
f.BoolVar(&backupOptions.IgnoreInode, "ignore-inode", false, "ignore inode number changes when checking for modified files")
Expand Down Expand Up @@ -298,11 +302,27 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
return errors.Fatal("--stdin and --files-from-raw cannot be used together")
}

if len(opts.ChangedFilesFromVerbatim) > 0 {
return errors.Fatal("--stdin and --changed-files-from-verbatim cannot be used together")
}
if len(opts.ChangedFilesFromRaw) > 0 {
return errors.Fatal("--stdin and --changed-files-from-raw cannot be used together")
}

if len(args) > 0 {
return errors.Fatal("--stdin was specified and files/dirs were listed as arguments")
}
}

if opts.Parent == "" {
if len(opts.ChangedFilesFromVerbatim) > 0 {
return errors.Fatal("using --changed-files-from-verbatim requires to also specify --parent")
}
if len(opts.ChangedFilesFromRaw) > 0 {
return errors.Fatal("using --changed-files-from-raw requires to also specify --parent")
}
}

return nil
}

Expand Down Expand Up @@ -431,6 +451,38 @@ func collectTargets(opts BackupOptions, args []string) (targets []string, err er
return targets, nil
}

// collectTargets returns a list of changed files/dirs from several sources.
func collectChangedFiles(opts BackupOptions) (changedFiles *[]string, err error) {
if len(opts.ChangedFilesFromVerbatim) == 0 && len(opts.ChangedFilesFromRaw) == 0 {
return nil, nil
}

changedFiles = &[]string{}

for _, file := range opts.ChangedFilesFromVerbatim {
fromfile, err := readLines(file)
if err != nil {
return nil, err
}
for _, line := range fromfile {
if line == "" {
continue
}
*changedFiles = append(*changedFiles, line)
}
}

for _, file := range opts.ChangedFilesFromRaw {
fromfile, err := readFilenamesFromFileRaw(file)
if err != nil {
return nil, err
}
*changedFiles = append(*changedFiles, fromfile...)
}

return changedFiles, nil
}

// parent returns the ID of the parent snapshot. If there is none, nil is
// returned.
func findParentSnapshot(ctx context.Context, repo restic.Repository, opts BackupOptions, targets []string, timeStampLimit time.Time) (*restic.Snapshot, error) {
Expand Down Expand Up @@ -472,6 +524,11 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
return err
}

changedFiles, err := collectChangedFiles(opts)
if err != nil {
return err
}

timeStamp := time.Now()
if opts.TimeStamp != "" {
timeStamp, err = time.ParseInLocation(TimeFormat, opts.TimeStamp, time.Local)
Expand Down Expand Up @@ -654,7 +711,7 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
if !gopts.JSON {
progressPrinter.V("start backup on %v", targets)
}
_, id, err := arch.Snapshot(ctx, targets, snapshotOpts)
_, id, err := arch.Snapshot(ctx, targets, changedFiles, snapshotOpts)

// cleanly shutdown all running goroutines
cancel()
Expand Down
83 changes: 66 additions & 17 deletions internal/archiver/archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,14 +214,33 @@ func (arch *Archiver) wrapLoadTreeError(id restic.ID, err error) error {

// SaveDir stores a directory in the repo and returns the node. snPath is the
// path within the current snapshot.
func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi os.FileInfo, previous *restic.Tree, complete CompleteFunc) (d FutureNode, err error) {
func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, absdir string, cdtree *Tree, fi os.FileInfo, previous *restic.Node, complete CompleteFunc) (d FutureNode, err error) {
debug.Log("%v %v", snPath, dir)

treeNode, err := arch.nodeFromFileInfo(snPath, dir, fi)
if err != nil {
return FutureNode{}, err
}

if cdtree != nil && previous != nil && len(cdtree.Nodes) == 0 {
debug.Log("%v doesn't contain any changed files, using existing nodes", dir)
treeNode.Subtree = previous.Subtree
fn := newFutureNodeWithResult(futureNodeResult{
snPath: snPath,
target: dir,
node: treeNode,
})
return fn, nil
}

oldSubtree, err := arch.loadSubtree(ctx, previous)
if err != nil {
err = arch.error(absdir, err)
}
if err != nil {
return FutureNode{}, err
}

names, err := readdirnames(arch.FS, dir, fs.O_NOFOLLOW)
if err != nil {
return FutureNode{}, err
Expand All @@ -237,10 +256,16 @@ func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi
return FutureNode{}, ctx.Err()
}

var subcdtree *Tree
if cdtree != nil {
tmp := cdtree.Nodes[name]
subcdtree = &tmp
}

pathname := arch.FS.Join(dir, name)
oldNode := previous.Find(name)
oldNode := oldSubtree.Find(name)
snItem := join(snPath, name)
fn, excluded, err := arch.Save(ctx, snItem, pathname, oldNode)
fn, excluded, err := arch.Save(ctx, snItem, pathname, subcdtree, oldNode)

// return error early if possible
if err != nil {
Expand Down Expand Up @@ -331,7 +356,7 @@ func (arch *Archiver) allBlobsPresent(previous *restic.Node) bool {
// Errors and completion needs to be handled by the caller.
//
// snPath is the path within the current snapshot.
func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) {
func (arch *Archiver) Save(ctx context.Context, snPath, target string, cdtree *Tree, previous *restic.Node) (fn FutureNode, excluded bool, err error) {
start := time.Now()

debug.Log("%v target %q, previous %v", snPath, target, previous)
Expand Down Expand Up @@ -444,15 +469,8 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
debug.Log(" %v dir", target)

snItem := snPath + "/"
oldSubtree, err := arch.loadSubtree(ctx, previous)
if err != nil {
err = arch.error(abstarget, err)
}
if err != nil {
return FutureNode{}, false, err
}

fn, err = arch.SaveDir(ctx, snPath, target, fi, oldSubtree,
fn, err = arch.SaveDir(ctx, snPath, target, abstarget, cdtree, fi, previous,
func(node *restic.Node, stats ItemStats) {
arch.CompleteItem(snItem, previous, node, stats, time.Since(start))
})
Expand Down Expand Up @@ -537,7 +555,7 @@ func (arch *Archiver) statDir(dir string) (os.FileInfo, error) {

// SaveTree stores a Tree in the repo, returned is the tree. snPath is the path
// within the current snapshot.
func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, previous *restic.Tree, complete CompleteFunc) (FutureNode, int, error) {
func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, cdtree *Tree, previous *restic.Tree, complete CompleteFunc) (FutureNode, int, error) {

var node *restic.Node
if snPath != "/" {
Expand Down Expand Up @@ -575,7 +593,21 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree,

// this is a leaf node
if subatree.Leaf() {
fn, excluded, err := arch.Save(ctx, join(snPath, name), subatree.Path, previous.Find(name))
relative_cdtree := cdtree
if relative_cdtree != nil {
abs_path, err := arch.FS.Abs(subatree.Path)
if err != nil {
return FutureNode{}, 0, err
}
pc, _ := pathComponents(arch.FS, abs_path, false)
for _, component := range pc {
tmp := relative_cdtree.Nodes[component]
relative_cdtree = &tmp
}
debug.Log("relative_cdtree for subtree path %v:\n%v", abs_path, relative_cdtree)
}

fn, excluded, err := arch.Save(ctx, join(snPath, name), subatree.Path, relative_cdtree, previous.Find(name))

if err != nil {
err = arch.error(subatree.Path, err)
Expand Down Expand Up @@ -609,7 +641,7 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree,
}

// not a leaf node, archive subtree
fn, _, err := arch.SaveTree(ctx, join(snPath, name), &subatree, oldSubtree, func(n *restic.Node, is ItemStats) {
fn, _, err := arch.SaveTree(ctx, join(snPath, name), &subatree, cdtree, oldSubtree, func(n *restic.Node, is ItemStats) {
arch.CompleteItem(snItem, oldNode, n, is, time.Since(start))
})
if err != nil {
Expand Down Expand Up @@ -728,7 +760,7 @@ func (arch *Archiver) stopWorkers() {
}

// Snapshot saves several targets and returns a snapshot.
func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts SnapshotOptions) (*restic.Snapshot, restic.ID, error) {
func (arch *Archiver) Snapshot(ctx context.Context, targets []string, changedFiles *[]string, opts SnapshotOptions) (*restic.Snapshot, restic.ID, error) {
cleanTargets, err := resolveRelativeTargets(arch.FS, targets)
if err != nil {
return nil, restic.ID{}, err
Expand All @@ -739,6 +771,23 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps
return nil, restic.ID{}, err
}

var cdtree *Tree
if changedFiles != nil {
// Make sure paths in changedFiles are absolute
for i := range *changedFiles {
(*changedFiles)[i], err = arch.FS.Abs((*changedFiles)[i])
if err != nil {
return nil, restic.ID{}, err
}
}

cdtree, err = NewTree(arch.FS, *changedFiles)
if err != nil {
return nil, restic.ID{}, err
}
debug.Log("cdtree:\n%v", cdtree)
}

var rootTreeID restic.ID

wgUp, wgUpCtx := errgroup.WithContext(ctx)
Expand All @@ -752,7 +801,7 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps
arch.runWorkers(wgCtx, wg)

debug.Log("starting snapshot")
fn, nodeCount, err := arch.SaveTree(wgCtx, "/", atree, arch.loadParentTree(wgCtx, opts.ParentSnapshot), func(n *restic.Node, is ItemStats) {
fn, nodeCount, err := arch.SaveTree(wgCtx, "/", atree, cdtree, arch.loadParentTree(wgCtx, opts.ParentSnapshot), func(n *restic.Node, is ItemStats) {
arch.CompleteItem("/", nil, nil, is, time.Since(start))
})
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion internal/archiver/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func TestSnapshot(t testing.TB, repo restic.Repository, path string, parent *res
}
opts.ParentSnapshot = sn
}
sn, _, err := arch.Snapshot(context.TODO(), []string{path}, opts)
sn, _, err := arch.Snapshot(context.TODO(), []string{path}, nil, opts)
if err != nil {
t.Fatal(err)
}
Expand Down

0 comments on commit ca0be94

Please sign in to comment.