Skip to content

Commit 793a11c

Browse files
author
Anis Elleuch
committed
scanner: Scan buckets asynchronously
- Scan buckets in all erasure sets asynchrounously - No data format is changed - Cycle concept moved to be bucket centric, the cycle is incremented ecah time a bucket is successfully or unsuccesfully scanned - Next bucket in each erasure set is chosen based on the oldest last scan timestamp
1 parent a25a831 commit 793a11c

14 files changed

+574
-390
lines changed

cmd/admin-handlers.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,47 @@ func (a adminAPIHandlers) StorageInfoHandler(w http.ResponseWriter, r *http.Requ
696696
writeSuccessResponseJSON(w, jsonBytes)
697697
}
698698

699+
// BucketScanInfoHandler - GET /minio/admin/v3/scanner/status/{bucket}
700+
func (a adminAPIHandlers) BucketScanInfoHandler(w http.ResponseWriter, r *http.Request) {
701+
ctx := r.Context()
702+
objectAPI, _ := validateAdminReq(ctx, w, r, policy.StorageInfoAdminAction)
703+
if objectAPI == nil {
704+
return
705+
}
706+
707+
vars := mux.Vars(r)
708+
bucket := vars["bucket"]
709+
710+
cycles := globalScannerMetrics.getCycles()
711+
712+
l, ok := cycles[bucket]
713+
if !ok {
714+
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, errors.New("no scan status found")), r.URL)
715+
return
716+
}
717+
718+
output := make([]madmin.BucketScanInfo, 0, len(l))
719+
for _, elem := range l {
720+
output = append(output, madmin.BucketScanInfo{
721+
Pool: elem.pool,
722+
Set: elem.set,
723+
Cycle: elem.current,
724+
LastStarted: elem.lastStarted,
725+
LastUpdate: elem.lastUpdate,
726+
Completed: elem.cycleCompleted,
727+
})
728+
}
729+
730+
// Marshal API response
731+
jsonBytes, err := json.Marshal(output)
732+
if err != nil {
733+
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL)
734+
return
735+
}
736+
737+
writeSuccessResponseJSON(w, jsonBytes)
738+
}
739+
699740
// MetricsHandler - GET /minio/admin/v3/metrics
700741
// ----------
701742
// Get realtime server metrics

cmd/admin-router.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ func registerAdminRouter(router *mux.Router, enableConfigOps bool) {
163163

164164
// StorageInfo operations
165165
adminRouter.Methods(http.MethodGet).Path(adminVersion + "/storageinfo").HandlerFunc(adminMiddleware(adminAPI.StorageInfoHandler, traceAllFlag))
166+
// Bucket scanner status info
167+
adminRouter.Methods(http.MethodGet).Path(adminVersion + "/scanner/status/{bucket}").HandlerFunc(adminMiddleware(adminAPI.BucketScanInfoHandler, traceAllFlag))
166168
// DataUsageInfo operations
167169
adminRouter.Methods(http.MethodGet).Path(adminVersion + "/datausageinfo").HandlerFunc(adminMiddleware(adminAPI.DataUsageInfoHandler, traceAllFlag))
168170
// Metrics operation

cmd/buckets-scan-mgr.go

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
// Copyright (c) 2015-2023 MinIO, Inc.
2+
//
3+
// This file is part of MinIO Object Storage stack
4+
//
5+
// This program is free software: you can redistribute it and/or modify
6+
// it under the terms of the GNU Affero General Public License as published by
7+
// the Free Software Foundation, either version 3 of the License, or
8+
// (at your option) any later version.
9+
//
10+
// This program is distributed in the hope that it will be useful
11+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
// GNU Affero General Public License for more details.
14+
//
15+
// You should have received a copy of the GNU Affero General Public License
16+
// along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
18+
package cmd
19+
20+
import (
21+
"context"
22+
"sync"
23+
"time"
24+
)
25+
26+
const (
27+
// the interval to discover if there are new buckets created in the cluster
28+
bucketsListInterval = time.Minute
29+
)
30+
31+
type setID struct {
32+
pool, set int
33+
}
34+
35+
type bucketScanStat struct {
36+
ongoing bool // this bucket is currently being scanned
37+
lastFinished time.Time // the last cycle of this scan
38+
lastUpdate time.Time
39+
cycle uint32 // the last cycle of this scan
40+
lifecycle bool // This bucket has lifecycle set
41+
}
42+
43+
type bucketsScanMgr struct {
44+
ctx context.Context
45+
46+
// A registered function which knows how to list buckets
47+
bucketsLister func(context.Context, BucketOptions) ([]BucketInfo, error)
48+
49+
mu sync.RWMutex
50+
knownBuckets map[string]struct{} // Current buckets in the S3 namespace
51+
bucketsCh map[setID]chan string // A map of an erasure set identifier and a channel of buckets to scan
52+
internal map[setID]map[string]bucketScanStat // A map of an erasure set identifier and bucket scan stats
53+
}
54+
55+
func newBucketsScanMgr(s3 ObjectLayer) *bucketsScanMgr {
56+
mgr := &bucketsScanMgr{
57+
ctx: GlobalContext,
58+
bucketsLister: s3.ListBuckets,
59+
internal: make(map[setID]map[string]bucketScanStat),
60+
bucketsCh: make(map[setID]chan string),
61+
}
62+
return mgr
63+
}
64+
65+
func (mgr *bucketsScanMgr) getKnownBuckets() []string {
66+
mgr.mu.Lock()
67+
defer mgr.mu.Unlock()
68+
69+
ret := make([]string, 0, len(mgr.knownBuckets))
70+
for k := range mgr.knownBuckets {
71+
ret = append(ret, k)
72+
}
73+
return ret
74+
}
75+
76+
func (mgr *bucketsScanMgr) isKnownBucket(bucket string) bool {
77+
mgr.mu.Lock()
78+
defer mgr.mu.Unlock()
79+
80+
_, ok := mgr.knownBuckets[bucket]
81+
return ok
82+
}
83+
84+
func (mgr *bucketsScanMgr) start() {
85+
86+
m := &sync.Mutex{}
87+
c := sync.NewCond(m)
88+
89+
// A routine that discovers new buckets and initialize scan stats for each new bucket
90+
go func() {
91+
t := time.NewTimer(bucketsListInterval)
92+
defer t.Stop()
93+
94+
for {
95+
select {
96+
case <-t.C:
97+
buckets, err := mgr.bucketsLister(mgr.ctx, BucketOptions{})
98+
if err == nil {
99+
mgr.mu.Lock()
100+
mgr.knownBuckets = make(map[string]struct{}, len(buckets))
101+
for _, b := range buckets {
102+
mgr.knownBuckets[b.Name] = struct{}{}
103+
}
104+
for bucket := range mgr.knownBuckets {
105+
for _, set := range mgr.internal {
106+
st := set[bucket]
107+
if l, err := globalLifecycleSys.Get(bucket); err == nil && l.HasActiveRules("") {
108+
st.lifecycle = true
109+
}
110+
set[bucket] = st
111+
}
112+
}
113+
mgr.mu.Unlock()
114+
115+
m.Lock()
116+
c.Broadcast()
117+
m.Unlock()
118+
}
119+
t.Reset(bucketsListInterval)
120+
case <-mgr.ctx.Done():
121+
return
122+
}
123+
}
124+
}()
125+
126+
// Wait until first buckets listing is successful
127+
m.Lock()
128+
c.Wait() // Unlocks m, waits, then locks m again
129+
m.Unlock()
130+
131+
// Clean up internal data when a deleted bucket is found
132+
go func() {
133+
const cleanInterval = 30 * time.Second
134+
135+
t := time.NewTimer(cleanInterval)
136+
defer t.Stop()
137+
138+
for {
139+
select {
140+
case <-t.C:
141+
mgr.mu.Lock()
142+
for _, set := range mgr.internal {
143+
for bkt := range set {
144+
if _, ok := mgr.knownBuckets[bkt]; !ok {
145+
delete(set, bkt)
146+
}
147+
}
148+
}
149+
mgr.mu.Unlock()
150+
151+
t.Reset(cleanInterval)
152+
case <-mgr.ctx.Done():
153+
return
154+
}
155+
}
156+
}()
157+
158+
// A routine that sends the next bucket to scan for each erasure set listener
159+
go func() {
160+
tick := 10 * time.Second
161+
162+
t := time.NewTimer(tick)
163+
defer t.Stop()
164+
165+
for {
166+
select {
167+
case <-t.C:
168+
mgr.mu.RLock()
169+
for id, ch := range mgr.bucketsCh {
170+
if len(ch) == 0 {
171+
b := mgr.unsafeGetNextBucket(id)
172+
if b != "" {
173+
select {
174+
case ch <- b:
175+
default:
176+
}
177+
}
178+
}
179+
}
180+
mgr.mu.RUnlock()
181+
182+
t.Reset(tick)
183+
case <-mgr.ctx.Done():
184+
return
185+
}
186+
}
187+
}()
188+
}
189+
190+
// Return a channel of buckets names to scan a given erasure set identifier
191+
func (mgr *bucketsScanMgr) getBucketCh(id setID) chan string {
192+
mgr.mu.Lock()
193+
defer mgr.mu.Unlock()
194+
195+
mgr.internal[id] = make(map[string]bucketScanStat)
196+
mgr.bucketsCh[id] = make(chan string, 1)
197+
198+
return mgr.bucketsCh[id]
199+
}
200+
201+
func scanBefore(st1, st2 bucketScanStat) bool {
202+
if st1.ongoing != st2.ongoing {
203+
return st1.ongoing == false
204+
}
205+
if st1.lastFinished.Before(st2.lastFinished) {
206+
return true
207+
}
208+
if st1.lifecycle != st2.lifecycle {
209+
return st1.lifecycle == true
210+
}
211+
return false
212+
}
213+
214+
// Return the next bucket name to scan of a given erasure set identifier
215+
// If all buckets are in a scanning state, return empty result
216+
func (mgr *bucketsScanMgr) unsafeGetNextBucket(id setID) string {
217+
var (
218+
nextBucketStat = bucketScanStat{}
219+
nextBucketName = ""
220+
)
221+
222+
for bucket, stat := range mgr.internal[id] {
223+
if stat.ongoing {
224+
continue
225+
}
226+
if nextBucketName == "" {
227+
nextBucketName = bucket
228+
nextBucketStat = stat
229+
continue
230+
}
231+
if nextBucketName == "" || scanBefore(stat, nextBucketStat) {
232+
nextBucketStat = stat
233+
nextBucketName = bucket
234+
}
235+
}
236+
237+
return nextBucketName
238+
}
239+
240+
// Mark a bucket as done in a specific erasure set - returns true if successful,
241+
// false if the bucket is already in a scanning phase
242+
func (mgr *bucketsScanMgr) markBucketScanStarted(id setID, bucket string, cycle uint32, lastKnownUpdate time.Time) {
243+
mgr.mu.Lock()
244+
defer mgr.mu.Unlock()
245+
246+
m, _ := mgr.internal[id][bucket]
247+
m.ongoing = true
248+
m.cycle = cycle
249+
m.lastUpdate = lastKnownUpdate
250+
mgr.internal[id][bucket] = m
251+
return
252+
}
253+
254+
// Mark a bucket as done in a specific erasure set
255+
func (mgr *bucketsScanMgr) markBucketScanDone(id setID, bucket string) {
256+
mgr.mu.Lock()
257+
defer mgr.mu.Unlock()
258+
259+
m, _ := mgr.internal[id][bucket]
260+
m.ongoing = false
261+
m.lastFinished = time.Now()
262+
mgr.internal[id][bucket] = m
263+
}

0 commit comments

Comments
 (0)