Skip to content

Commit 7642e5a

Browse files
fix(scheduler): fix data race (#2085)
* fix(scheduler): data race when pushing new tasks the problem here is that scheduler can be closed in two ways: - canceling the context given as argument to scheduler.RunScheduler() - running scheduler.Shutdown() because of this shutdown can trigger a data race between calling scheduler.inShutdown() and actually pushing tasks into the pool workers solved that by keeping a quit channel and listening on both quit channel and ctx.Done() and closing the worker chan and scheduler afterwards. Signed-off-by: Petu Eusebiu <[email protected]> * refactor(scheduler): refactor into a single shutdown before this we could stop scheduler either by closing the context provided to RunScheduler(ctx) or by running Shutdown(). simplify things by getting rid of the external context in RunScheduler(). keep an internal context in the scheduler itself and pass it down to all tasks. Signed-off-by: Petu Eusebiu <[email protected]> --------- Signed-off-by: Petu Eusebiu <[email protected]>
1 parent d71a1f4 commit 7642e5a

31 files changed

+495
-327
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ test-prereq: check-skopeo $(TESTDATA) $(ORAS)
195195
.PHONY: test-extended
196196
test-extended: $(if $(findstring ui,$(BUILD_LABELS)), ui)
197197
test-extended: test-prereq
198-
go test -failfast -tags $(BUILD_LABELS),containers_image_openpgp -trimpath -race -timeout 15m -cover -coverpkg ./... -coverprofile=coverage-extended.txt -covermode=atomic ./...
198+
go test -failfast -tags $(BUILD_LABELS),containers_image_openpgp -trimpath -race -timeout 20m -cover -coverpkg ./... -coverprofile=coverage-extended.txt -covermode=atomic ./...
199199
rm -rf /tmp/getter*; rm -rf /tmp/trivy*
200200

201201
.PHONY: test-minimal

pkg/api/authn_test.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"net/http"
1212
"net/http/httptest"
1313
"os"
14+
"path"
1415
"testing"
1516
"time"
1617

@@ -23,9 +24,14 @@ import (
2324
"zotregistry.io/zot/pkg/api/config"
2425
"zotregistry.io/zot/pkg/api/constants"
2526
extconf "zotregistry.io/zot/pkg/extensions/config"
27+
"zotregistry.io/zot/pkg/extensions/monitoring"
2628
"zotregistry.io/zot/pkg/log"
2729
mTypes "zotregistry.io/zot/pkg/meta/types"
2830
reqCtx "zotregistry.io/zot/pkg/requestcontext"
31+
"zotregistry.io/zot/pkg/scheduler"
32+
"zotregistry.io/zot/pkg/storage"
33+
storageConstants "zotregistry.io/zot/pkg/storage/constants"
34+
"zotregistry.io/zot/pkg/storage/local"
2935
authutils "zotregistry.io/zot/pkg/test/auth"
3036
test "zotregistry.io/zot/pkg/test/common"
3137
"zotregistry.io/zot/pkg/test/mocks"
@@ -922,6 +928,88 @@ func TestAPIKeysGeneratorErrors(t *testing.T) {
922928
})
923929
}
924930

931+
func TestCookiestoreCleanup(t *testing.T) {
932+
log := log.Logger{}
933+
metrics := monitoring.NewMetricsServer(true, log)
934+
935+
Convey("Test cookiestore cleanup works", t, func() {
936+
taskScheduler := scheduler.NewScheduler(config.New(), metrics, log)
937+
taskScheduler.RateLimit = 50 * time.Millisecond
938+
taskScheduler.RunScheduler()
939+
940+
rootDir := t.TempDir()
941+
942+
err := os.MkdirAll(path.Join(rootDir, "_sessions"), storageConstants.DefaultDirPerms)
943+
So(err, ShouldBeNil)
944+
945+
sessionPath := path.Join(rootDir, "_sessions", "session_1234")
946+
947+
err = os.WriteFile(sessionPath, []byte("session"), storageConstants.DefaultFilePerms)
948+
So(err, ShouldBeNil)
949+
950+
err = os.Chtimes(sessionPath, time.Time{}, time.Time{})
951+
So(err, ShouldBeNil)
952+
953+
imgStore := local.NewImageStore(rootDir, false, false, log, metrics, nil, nil)
954+
955+
storeController := storage.StoreController{
956+
DefaultStore: imgStore,
957+
}
958+
959+
cookieStore, err := api.NewCookieStore(storeController)
960+
So(err, ShouldBeNil)
961+
962+
cookieStore.RunSessionCleaner(taskScheduler)
963+
964+
time.Sleep(2 * time.Second)
965+
966+
taskScheduler.Shutdown()
967+
968+
// make sure session is removed
969+
_, err = os.Stat(sessionPath)
970+
So(err, ShouldNotBeNil)
971+
})
972+
973+
Convey("Test cookiestore cleanup without permissions on rootDir", t, func() {
974+
taskScheduler := scheduler.NewScheduler(config.New(), metrics, log)
975+
taskScheduler.RateLimit = 50 * time.Millisecond
976+
taskScheduler.RunScheduler()
977+
978+
rootDir := t.TempDir()
979+
980+
err := os.MkdirAll(path.Join(rootDir, "_sessions"), storageConstants.DefaultDirPerms)
981+
So(err, ShouldBeNil)
982+
983+
sessionPath := path.Join(rootDir, "_sessions", "session_1234")
984+
985+
err = os.WriteFile(sessionPath, []byte("session"), storageConstants.DefaultFilePerms)
986+
So(err, ShouldBeNil)
987+
988+
imgStore := local.NewImageStore(rootDir, false, false, log, metrics, nil, nil)
989+
990+
storeController := storage.StoreController{
991+
DefaultStore: imgStore,
992+
}
993+
994+
cookieStore, err := api.NewCookieStore(storeController)
995+
So(err, ShouldBeNil)
996+
997+
err = os.Chmod(rootDir, 0o000)
998+
So(err, ShouldBeNil)
999+
1000+
defer func() {
1001+
err = os.Chmod(rootDir, storageConstants.DefaultDirPerms)
1002+
So(err, ShouldBeNil)
1003+
}()
1004+
1005+
cookieStore.RunSessionCleaner(taskScheduler)
1006+
1007+
time.Sleep(1 * time.Second)
1008+
1009+
taskScheduler.Shutdown()
1010+
})
1011+
}
1012+
9251013
type mockUUIDGenerator struct {
9261014
guuid.Generator
9271015
succeedAttempts int

pkg/api/controller.go

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,12 @@ func (c *Controller) GetPort() int {
9494
return c.chosenPort
9595
}
9696

97-
func (c *Controller) Run(reloadCtx context.Context) error {
97+
func (c *Controller) Run() error {
9898
if err := c.initCookieStore(); err != nil {
9999
return err
100100
}
101101

102-
c.StartBackgroundTasks(reloadCtx)
102+
c.StartBackgroundTasks()
103103

104104
// setup HTTP API router
105105
engine := mux.NewRouter()
@@ -216,7 +216,7 @@ func (c *Controller) Run(reloadCtx context.Context) error {
216216
return server.Serve(listener)
217217
}
218218

219-
func (c *Controller) Init(reloadCtx context.Context) error {
219+
func (c *Controller) Init() error {
220220
// print the current configuration, but strip secrets
221221
c.Log.Info().Interface("params", c.Config.Sanitize()).Msg("configuration settings")
222222

@@ -237,7 +237,7 @@ func (c *Controller) Init(reloadCtx context.Context) error {
237237
return err
238238
}
239239

240-
if err := c.InitMetaDB(reloadCtx); err != nil {
240+
if err := c.InitMetaDB(); err != nil {
241241
return err
242242
}
243243

@@ -280,7 +280,7 @@ func (c *Controller) initCookieStore() error {
280280
return nil
281281
}
282282

283-
func (c *Controller) InitMetaDB(reloadCtx context.Context) error {
283+
func (c *Controller) InitMetaDB() error {
284284
// init metaDB if search is enabled or we need to store user profiles, api keys or signatures
285285
if c.Config.IsSearchEnabled() || c.Config.IsBasicAuthnEnabled() || c.Config.IsImageTrustEnabled() ||
286286
c.Config.IsRetentionEnabled() {
@@ -310,7 +310,7 @@ func (c *Controller) InitMetaDB(reloadCtx context.Context) error {
310310
return nil
311311
}
312312

313-
func (c *Controller) LoadNewConfig(reloadCtx context.Context, newConfig *config.Config) {
313+
func (c *Controller) LoadNewConfig(newConfig *config.Config) {
314314
// reload access control config
315315
c.Config.HTTP.AccessControl = newConfig.HTTP.AccessControl
316316

@@ -364,21 +364,24 @@ func (c *Controller) LoadNewConfig(reloadCtx context.Context, newConfig *config.
364364

365365
c.InitCVEInfo()
366366

367-
c.StartBackgroundTasks(reloadCtx)
368-
369367
c.Log.Info().Interface("reloaded params", c.Config.Sanitize()).
370368
Msg("loaded new configuration settings")
371369
}
372370

373371
func (c *Controller) Shutdown() {
374-
c.taskScheduler.Shutdown()
372+
c.StopBackgroundTasks()
375373
ctx := context.Background()
376374
_ = c.Server.Shutdown(ctx)
377375
}
378376

379-
func (c *Controller) StartBackgroundTasks(reloadCtx context.Context) {
377+
// Will stop scheduler and wait for all tasks to finish their work.
378+
func (c *Controller) StopBackgroundTasks() {
379+
c.taskScheduler.Shutdown()
380+
}
381+
382+
func (c *Controller) StartBackgroundTasks() {
380383
c.taskScheduler = scheduler.NewScheduler(c.Config, c.Metrics, c.Log)
381-
c.taskScheduler.RunScheduler(reloadCtx)
384+
c.taskScheduler.RunScheduler()
382385

383386
// Enable running garbage-collect periodically for DefaultStore
384387
if c.Config.Storage.GC {

pkg/api/controller_test.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -300,10 +300,10 @@ func TestRunAlreadyRunningServer(t *testing.T) {
300300
cm.StartAndWait(port)
301301
defer cm.StopServer()
302302

303-
err := ctlr.Init(context.Background())
303+
err := ctlr.Init()
304304
So(err, ShouldNotBeNil)
305305

306-
err = ctlr.Run(context.Background())
306+
err = ctlr.Run()
307307
So(err, ShouldNotBeNil)
308308
})
309309
}
@@ -377,7 +377,7 @@ func TestObjectStorageController(t *testing.T) {
377377
ctlr := makeController(conf, tmp)
378378
So(ctlr, ShouldNotBeNil)
379379

380-
err := ctlr.Init(context.Background())
380+
err := ctlr.Init()
381381
So(err, ShouldNotBeNil)
382382
})
383383

@@ -1218,7 +1218,7 @@ func TestMultipleInstance(t *testing.T) {
12181218
}
12191219
ctlr := api.NewController(conf)
12201220
ctlr.Log.Info().Int64("seedUser", seedUser).Int64("seedPass", seedPass).Msg("random seed for username & password")
1221-
err := ctlr.Init(context.Background())
1221+
err := ctlr.Init()
12221222
So(err, ShouldEqual, errors.ErrImgStoreNotFound)
12231223

12241224
globalDir := t.TempDir()
@@ -1311,7 +1311,7 @@ func TestMultipleInstance(t *testing.T) {
13111311

13121312
ctlr.Config.Storage.SubPaths = subPathMap
13131313

1314-
err := ctlr.Init(context.Background())
1314+
err := ctlr.Init()
13151315
So(err, ShouldNotBeNil)
13161316

13171317
// subpath root directory does not exist.
@@ -1320,15 +1320,15 @@ func TestMultipleInstance(t *testing.T) {
13201320

13211321
ctlr.Config.Storage.SubPaths = subPathMap
13221322

1323-
err = ctlr.Init(context.Background())
1323+
err = ctlr.Init()
13241324
So(err, ShouldNotBeNil)
13251325

13261326
subPathMap["/a"] = config.StorageConfig{RootDirectory: subDir, Dedupe: true, GC: true}
13271327
subPathMap["/b"] = config.StorageConfig{RootDirectory: subDir, Dedupe: true, GC: true}
13281328

13291329
ctlr.Config.Storage.SubPaths = subPathMap
13301330

1331-
err = ctlr.Init(context.Background())
1331+
err = ctlr.Init()
13321332
So(err, ShouldNotBeNil)
13331333
})
13341334
}
@@ -1826,12 +1826,12 @@ func TestTSLFailedReadingOfCACert(t *testing.T) {
18261826
defer cancel()
18271827
ctlr := makeController(conf, t.TempDir())
18281828

1829-
err = ctlr.Init(ctx)
1829+
err = ctlr.Init()
18301830
So(err, ShouldBeNil)
18311831

18321832
errChan := make(chan error, 1)
18331833
go func() {
1834-
err = ctlr.Run(ctx)
1834+
err = ctlr.Run()
18351835
errChan <- err
18361836
}()
18371837

@@ -1866,12 +1866,12 @@ func TestTSLFailedReadingOfCACert(t *testing.T) {
18661866
defer cancel()
18671867
ctlr := makeController(conf, t.TempDir())
18681868

1869-
err = ctlr.Init(ctx)
1869+
err = ctlr.Init()
18701870
So(err, ShouldBeNil)
18711871

18721872
errChan := make(chan error, 1)
18731873
go func() {
1874-
err = ctlr.Run(ctx)
1874+
err = ctlr.Run()
18751875
errChan <- err
18761876
}()
18771877

pkg/api/cookiestore.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,9 @@ type CleanTask struct {
152152
func (cleanTask *CleanTask) DoWork(ctx context.Context) error {
153153
for _, session := range cleanTask.sessions {
154154
if err := os.Remove(session); err != nil {
155-
return err
155+
if !os.IsNotExist(err) {
156+
return err
157+
}
156158
}
157159
}
158160

pkg/cli/client/cve_cmd_test.go

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -163,16 +163,14 @@ func TestNegativeServerResponse(t *testing.T) {
163163
ctlr := api.NewController(conf)
164164
ctlr.Log.Logger = ctlr.Log.Output(writers)
165165

166-
ctx := context.Background()
167-
168-
if err := ctlr.Init(ctx); err != nil {
166+
if err := ctlr.Init(); err != nil {
169167
panic(err)
170168
}
171169

172170
ctlr.CveScanner = getMockCveScanner(ctlr.MetaDB)
173171

174172
go func() {
175-
if err := ctlr.Run(ctx); !errors.Is(err, http.ErrServerClosed) {
173+
if err := ctlr.Run(); !errors.Is(err, http.ErrServerClosed) {
176174
panic(err)
177175
}
178176
}()
@@ -239,16 +237,14 @@ func TestServerCVEResponse(t *testing.T) {
239237
ctlr := api.NewController(conf)
240238
ctlr.Log.Logger = ctlr.Log.Output(writers)
241239

242-
ctx := context.Background()
243-
244-
if err := ctlr.Init(ctx); err != nil {
240+
if err := ctlr.Init(); err != nil {
245241
panic(err)
246242
}
247243

248244
ctlr.CveScanner = getMockCveScanner(ctlr.MetaDB)
249245

250246
go func() {
251-
if err := ctlr.Run(ctx); !errors.Is(err, http.ErrServerClosed) {
247+
if err := ctlr.Run(); !errors.Is(err, http.ErrServerClosed) {
252248
panic(err)
253249
}
254250
}()
@@ -578,9 +574,7 @@ func TestCVESort(t *testing.T) {
578574
t.FailNow()
579575
}
580576

581-
ctx := context.Background()
582-
583-
if err := ctlr.Init(ctx); err != nil {
577+
if err := ctlr.Init(); err != nil {
584578
panic(err)
585579
}
586580

@@ -617,7 +611,7 @@ func TestCVESort(t *testing.T) {
617611
}
618612

619613
go func() {
620-
if err := ctlr.Run(ctx); !errors.Is(err, http.ErrServerClosed) {
614+
if err := ctlr.Run(); !errors.Is(err, http.ErrServerClosed) {
621615
panic(err)
622616
}
623617
}()

pkg/cli/client/image_cmd_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -866,7 +866,7 @@ func TestServerResponseGQLWithoutPermissions(t *testing.T) {
866866
}
867867

868868
ctlr := api.NewController(conf)
869-
if err := ctlr.Init(context.Background()); err != nil {
869+
if err := ctlr.Init(); err != nil {
870870
So(err, ShouldNotBeNil)
871871
}
872872
})

0 commit comments

Comments
 (0)