Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG]: db.Backup maybe see the partial write in one transaction #2049

Open
lsytj0413 opened this issue Feb 23, 2024 · 2 comments
Open

[BUG]: db.Backup maybe see the partial write in one transaction #2049

lsytj0413 opened this issue Feb 23, 2024 · 2 comments
Labels
kind/bug Something is broken.

Comments

@lsytj0413
Copy link

lsytj0413 commented Feb 23, 2024

What version of Badger are you using?

v4.2.0

What version of Go are you using?

go version go1.21.0 darwin/amd64

Have you tried reproducing the issue with the latest release?

None

What is the hardware spec (RAM, CPU, OS)?

  • 2.6 GHz 6-cores Intel Core i7
  • 32 GB 2667 MHz DDR4
  • macOS Big Sur 11.7.10

What steps will reproduce the bug?

  • Use db.Backup to backup the database content,and in the same time there's multi write transaction executing

Below code will panic:

type backupWriter struct {
	kvs map[string]string
	v   uint64
}

var (
	_ io.Writer = (*backupWriter)(nil)
)

func (w *backupWriter) Write(p []byte) (n int, err error) {
	var v pb.KVList
	err = proto.Unmarshal(p, &v)
	if err != nil {
		return len(p), nil
	}

	for _, kv := range v.Kv {
		w.kvs[string(kv.Key)] = string(kv.Value)
	}

	return len(p), nil
}

func (w *backupWriter) Validate() error {
	for key, value := range w.kvs {
		k := key
		if strings.Count(k, "_") != 1 {
			continue
		}

		k = strings.TrimRight(k, "_key")
		k = strings.Split(k, "#")[1]
		v, err := strconv.Atoi(value)
		if err != nil {
			return err
		}

		kk, err := strconv.Atoi(string(k))
		if err != nil {
			return err
		}

		err = w.validateKeys(kk, v)
		if err != nil {
			fmt.Printf("%v\n", w.allKV())
			return err
		}
	}

	return nil
}

func (w *backupWriter) allKV() string {
	kvs := []string{}
	for key, value := range w.kvs {
		kvs = append(kvs, fmt.Sprintf("%v=%v", key, value))
	}

	data, _ := json.Marshal(kvs)
	return string(data)
}

func (w *backupWriter) validateKeys(i int, j int) error {
	for jj := 1; jj <= j; jj++ {
		key := fmt.Sprintf("%v_%v_key", jj, i)

		vv, ok := w.kvs[key]
		if !ok {
			return fmt.Errorf("cannot found key %v, for %v %v\n", key, i, j)
		}

		if vv != fmt.Sprintf("%v", jj) {
			return fmt.Errorf("value %v didn't match for k %v\n", vv, key)
		}
	}

	return nil
}

func testBadgerBackup() {
	dir, err := os.MkdirTemp("", "badger_test")
	if err != nil {
		panic(err)
	}
	fmt.Printf("dir: %v\n", dir)

	o := badger.DefaultOptions(dir)
	o.Dir = ""
	o.ValueDir = ""
	o.InMemory = true
	o.Logger = nil
	db, err := badger.Open(o)
	if err != nil {
		panic(err)
	}
	defer func() {
		err := db.Close()
		if err != nil {
			panic(err)
		}
	}()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	// var l sync.RWMutex
	var wg sync.WaitGroup
	wg.Add(10)
	for i := 0; i < 10; i++ {
		go func(i int) {
			defer wg.Done()

			vv := fmt.Sprintf("%v", rand.Int())

			j := 0
			for {
				select {
				case <-ctx.Done():
					return
				case <-time.After(1 * time.Millisecond):
				}

				j++
				// l.RLock()
				tx := db.NewTransaction(true)
				err := tx.Set([]byte(fmt.Sprintf("%v#%v_key", vv, i)), []byte(fmt.Sprintf("%v", j)))
				if err != nil {
					panic(err)
				}

				err = tx.Set([]byte(fmt.Sprintf("%v_%v_key", j, i)), []byte(fmt.Sprintf("%v", j)))
				if err != nil {
					panic(err)
				}

				err = tx.Commit()
				if err != nil {
					panic(err)
				}
				// l.RUnlock()
			}
		}(i)
	}

	ws := []*backupWriter{}
	for i := 0; i < 10; i++ {
		time.Sleep(time.Millisecond * time.Duration(i*rand.Intn(200)+1))
		// l.Lock()
		w := backupWriter{
			kvs: map[string]string{},
		}
		v, err := db.Backup(&w, 0)
		if err != nil {
			panic(err)
		}
		// l.Unlock()
		w.v = v
		ws = append(ws, &w)
	}
	cancel()

	wg.Wait()

	for _, w := range ws {
		err := w.Validate()
		fmt.Printf("backup %v, kv %v, validate %v\n", w.v, len(w.kvs), err)
		if err != nil {
			panic(err)
		}
	}
}

func main() {
	for i := 0; i < 50; i++ {
		fmt.Printf("start testBadgerBackup %v.......................\n", i)
		testBadgerBackup()
	}
}

It will panic probabilistic:

start testBadgerBackup 0.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test1915576934
backup 4, kv 7, validate <nil>
backup 941, kv 951, validate <nil>
backup 2606, kv 2616, validate <nil>
backup 7377, kv 7387, validate <nil>
backup 11594, kv 11604, validate <nil>
backup 12826, kv 12834, validate <nil>
backup 13217, kv 13227, validate <nil>
backup 18297, kv 18307, validate <nil>
backup 18897, kv 18907, validate <nil>
backup 26360, kv 26370, validate <nil>
start testBadgerBackup 1.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test2851596835
backup 7, kv 14, validate <nil>
backup 1404, kv 1414, validate <nil>
backup 3388, kv 3398, validate <nil>
backup 4631, kv 4641, validate <nil>
backup 10052, kv 10062, validate <nil>
backup 12812, kv 12822, validate <nil>
backup 15095, kv 15105, validate <nil>
backup 19277, kv 19287, validate <nil>
backup 23290, kv 23300, validate <nil>
backup 37387, kv 37397, validate <nil>
start testBadgerBackup 2.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test1585819542
backup 4, kv 8, validate <nil>
backup 946, kv 956, validate <nil>
backup 2555, kv 2565, validate <nil>
backup 7035, kv 7045, validate <nil>
backup 9798, kv 9808, validate <nil>
backup 11894, kv 11904, validate <nil>
backup 15171, kv 15181, validate <nil>
backup 26245, kv 26255, validate <nil>
backup 35281, kv 35291, validate <nil>
backup 47292, kv 47302, validate <nil>
start testBadgerBackup 3.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test980017924
backup 1, kv 2, validate <nil>
backup 1480, kv 1490, validate <nil>
backup 1659, kv 1669, validate <nil>
backup 2525, kv 2535, validate <nil>
backup 3601, kv 3611, validate <nil>
backup 4782, kv 4792, validate <nil>
backup 8462, kv 8472, validate <nil>
backup 19804, kv 19814, validate <nil>
backup 22443, kv 22450, validate <nil>
backup 24836, kv 24845, validate <nil>
start testBadgerBackup 4.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test3176089424
backup 3, kv 6, validate <nil>
backup 292, kv 302, validate <nil>
backup 3567, kv 3577, validate <nil>
backup 6927, kv 6937, validate <nil>
backup 9510, kv 9520, validate <nil>
backup 10756, kv 10766, validate <nil>
backup 15206, kv 15216, validate <nil>
backup 19658, kv 19668, validate <nil>
backup 31317, kv 31327, validate <nil>
backup 42476, kv 42486, validate <nil>
start testBadgerBackup 5.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test3885150962
backup 4, kv 8, validate <nil>
backup 1461, kv 1471, validate <nil>
backup 3923, kv 3933, validate <nil>
backup 4953, kv 4963, validate <nil>
backup 7174, kv 7184, validate <nil>
backup 12662, kv 12672, validate <nil>
backup 19332, kv 19342, validate <nil>
backup 26153, kv 26163, validate <nil>
backup 38206, kv 38216, validate <nil>
backup 42134, kv 42144, validate <nil>
start testBadgerBackup 6.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test450010457
backup 1, kv 2, validate <nil>
backup 561, kv 571, validate <nil>
backup 1818, kv 1828, validate <nil>
backup 2865, kv 2875, validate <nil>
backup 7769, kv 7779, validate <nil>
backup 14750, kv 14760, validate <nil>
backup 23341, kv 23351, validate <nil>
backup 28885, kv 28895, validate <nil>
backup 31406, kv 31416, validate <nil>
backup 40905, kv 40915, validate <nil>
start testBadgerBackup 7.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test1032548137
backup 0, kv 0, validate <nil>
backup 972, kv 982, validate <nil>
backup 3746, kv 3756, validate <nil>
backup 7321, kv 7331, validate <nil>
backup 11969, kv 11979, validate <nil>
backup 20365, kv 20375, validate <nil>
backup 30548, kv 30558, validate <nil>
backup 33235, kv 33244, validate <nil>
backup 37813, kv 37823, validate <nil>
backup 43277, kv 43286, validate <nil>
start testBadgerBackup 8.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test4139450864
backup 2, kv 4, validate <nil>
backup 384, kv 394, validate <nil>
backup 3309, kv 3319, validate <nil>
backup 8010, kv 8020, validate <nil>
backup 10379, kv 10389, validate <nil>
backup 14109, kv 14119, validate <nil>
backup 21834, kv 21844, validate <nil>
backup 26754, kv 26764, validate <nil>
backup 29789, kv 29799, validate <nil>
backup 36156, kv 36166, validate <nil>
start testBadgerBackup 9.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test1737783656
backup 1, kv 2, validate <nil>
backup 670, kv 680, validate <nil>
backup 3558, kv 3568, validate <nil>
backup 6943, kv 6953, validate <nil>
backup 11369, kv 11379, validate <nil>
backup 15086, kv 15096, validate <nil>
backup 18627, kv 18637, validate <nil>
backup 22028, kv 22038, validate <nil>
backup 34868, kv 34878, validate <nil>
backup 36728, kv 36738, validate <nil>
start testBadgerBackup 10.......................
dir: /var/folders/n_/qhczpf412h3ghl93j3z6zzx00000gp/T/badger_test3894248576
["1_4_key=1","1_5_key=1","1_6_key=1","237070578879453429#4_key=1","3127585945138148385#6_key=1","5142114080917786158#5_key=1","8919094391511790126#3_key=1"]
backup 4, kv 7, validate cannot found key 1_3_key, for 3 1

panic: cannot found key 1_3_key, for 3 1

In the application, it write two keys in one transaction,for example:

  • Set rand#3_key to 1, set 1_3_key to 1 in the same transaction
  • So reader should never see rand#3_key=1 and 1_3_key didn't exists

If the lock is enabled in backup and transaction, panic never happened.

Expected behavior and actual result.

It never panic,when db.Backup is called,it will behavior as Snapshot Read.

Additional information

No response

@lsytj0413 lsytj0413 added the kind/bug Something is broken. label Feb 23, 2024
Copy link

This issue has been stale for 60 days and will be closed automatically in 7 days. Comment to keep it open.

@github-actions github-actions bot added the Stale label Jul 12, 2024
@lsytj0413
Copy link
Author

/open

@github-actions github-actions bot removed the Stale label Nov 12, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
kind/bug Something is broken.
Development

No branches or pull requests

1 participant