From 86a4917df3afffa6333d221e6aad64ba2d632bb5 Mon Sep 17 00:00:00 2001 From: "Jeffrey N. Johnson" Date: Thu, 16 Jan 2025 15:11:51 -0800 Subject: [PATCH 1/8] Reworked KBase user federation logic. Not quite working yet. --- auth/kbase_auth_server.go | 20 +-- auth/kbase_user_federation.go | 112 ------------- auth/kbase_user_federation_test.go | 73 --------- databases/jdp/database_test.go | 6 +- databases/kbase/database.go | 7 +- databases/kbase/database_test.go | 99 ++++++++++++ databases/kbase/errors.go | 35 ++++ databases/kbase/user_federation.go | 247 +++++++++++++++++++++++++++++ services/prototype.go | 2 +- 9 files changed, 389 insertions(+), 212 deletions(-) delete mode 100644 auth/kbase_user_federation.go delete mode 100644 auth/kbase_user_federation_test.go create mode 100644 databases/kbase/database_test.go create mode 100644 databases/kbase/errors.go create mode 100644 databases/kbase/user_federation.go diff --git a/auth/kbase_auth_server.go b/auth/kbase_auth_server.go index 9d184a9c..66873f43 100644 --- a/auth/kbase_auth_server.go +++ b/auth/kbase_auth_server.go @@ -59,27 +59,13 @@ func NewKBaseAuthServer(accessToken string) (*KBaseAuthServer, error) { } // verify that the access token works (i.e. that the client is logged in) - kbaseUser, err := server.kbaseUser() + _, err := server.kbaseUser() if err != nil { return nil, err } - // register the local username under all its ORCIDs with our KBase user - // federation mechanism - for _, pid := range kbaseUser.Idents { - if pid.Provider == "OrcID" { - orcid := pid.UserName - err = SetKBaseLocalUsernameForOrcid(orcid, kbaseUser.Username) - if err != nil { - break - } - } - } - - if err == nil { - // register this instance of the auth server - instances[accessToken] = &server - } + // register this instance of the auth server + instances[accessToken] = &server return &server, err } } diff --git a/auth/kbase_user_federation.go b/auth/kbase_user_federation.go deleted file mode 100644 index c8b6e7d0..00000000 --- a/auth/kbase_user_federation.go +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2023 The KBase Project and its Contributors -// Copyright (c) 2023 Cohere Consulting, LLC -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of -// this software and associated documentation files (the "Software"), to deal in -// the Software without restriction, including without limitation the rights to -// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -// of the Software, and to permit persons to whom the Software is furnished to do -// so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -package auth - -import ( - "fmt" -) - -//======================= -// KBase user federation -//======================= - -// Because the DTS uses KBase's auth server for its own authentication, we can -// create and maintain an ORCID -> username mapping that stores entries for all -// users who have made requests to the DTS. This prevents us from having to -// rely on a secondary data source for this information. - -// associates the given KBase username with the given ORCID -func SetKBaseLocalUsernameForOrcid(orcid, username string) error { - if !kbaseUserFederationStarted { - // fire it up! - started := make(chan struct{}) - go kbaseUserFederation(started) - - // wait for it to start - <-started - } - kbaseOrcidUserChan <- [2]string{orcid, username} - err := <-kbaseErrorChan - return err -} - -// returns the local KBase username associated with the given ORCID -func KBaseLocalUsernameForOrcid(orcid string) (string, error) { - if !kbaseUserFederationStarted { // no one's logged in! - return "", fmt.Errorf("KBase federated user table not available!") - } - kbaseOrcidChan <- orcid - username := <-kbaseUserChan - err := <-kbaseErrorChan - return username, err -} - -//----------- -// Internals -//----------- - -var kbaseUserFederationStarted = false -var kbaseOrcidChan chan string // passes ORCIDs in -var kbaseOrcidUserChan chan [2]string // passes (ORCIDs, username) pairs in -var kbaseUserChan chan string // passes usernames out -var kbaseErrorChan chan error // passes errors out - -// This goroutine maintains a mapping or ORCIDs to local KBase users, -// fielding requests to update and retrieve usernames by ORCID. -func kbaseUserFederation(started chan struct{}) { - // channels - kbaseOrcidChan = make(chan string) - kbaseOrcidUserChan = make(chan [2]string) - kbaseUserChan = make(chan string) - kbaseErrorChan = make(chan error) - - // mapping of ORCIDs to local KBase users - kbaseUserTable := make(map[string]string) - - // we're ready - started <- struct{}{} - kbaseUserFederationStarted = true - - for { - select { - case orcidAndUsername := <-kbaseOrcidUserChan: // setting username for orcid - if username, found := kbaseUserTable[orcidAndUsername[0]]; found { - if username != orcidAndUsername[1] { - kbaseErrorChan <- fmt.Errorf("KBase user mismatch for ORCID %s!", orcidAndUsername[0]) - } else { - kbaseErrorChan <- nil - } - } else { - kbaseUserTable[orcidAndUsername[0]] = orcidAndUsername[1] - kbaseErrorChan <- nil - } - case orcid := <-kbaseOrcidChan: // fetching username for orcid - if username, found := kbaseUserTable[orcid]; found { - kbaseUserChan <- username - kbaseErrorChan <- nil - } else { - kbaseUserChan <- "" - kbaseErrorChan <- fmt.Errorf("KBase user not found for ORCID %s!", orcid) - } - } - } -} diff --git a/auth/kbase_user_federation_test.go b/auth/kbase_user_federation_test.go deleted file mode 100644 index 945beaaa..00000000 --- a/auth/kbase_user_federation_test.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (c) 2023 The KBase Project and its Contributors -// Copyright (c) 2023 Cohere Consulting, LLC -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of -// this software and associated documentation files (the "Software"), to deal in -// the Software without restriction, including without limitation the rights to -// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -// of the Software, and to permit persons to whom the Software is furnished to do -// so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -// These tests verify that we can connect to the KBase authentication server -// and access a user's ORCID credential(s). The tests require the following -// environment variables to be set: -// -// * DTS_KBASE_DEV_TOKEN: a valid unencoded KBase developer token -// * DTS_KBASE_TEST_ORCID: a valid ORCID identifier for a KBase user -package auth - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -// tests whether a proxy for the KBase authentication server can be -// constructed -func TestSetKBaseLocalUsernameForOrcid(t *testing.T) { - assert := assert.New(t) - err := SetKBaseLocalUsernameForOrcid("my-fake-orcid", "my-fake-username") - assert.Nil(err, "Unable to set KBase local username for ORCID!") - - // doing the same thing again should be fine - err = SetKBaseLocalUsernameForOrcid("my-fake-orcid", "my-fake-username") - assert.Nil(err, "Setting KBase local username is not idempotent!") -} - -// tests whether inconsistently reset ORCID/user registrations trigger errors -func TestResetKBaseLocalUsernameForOrcid(t *testing.T) { - assert := assert.New(t) - err := SetKBaseLocalUsernameForOrcid("my-fake-orcid", "my-fake-username") - err = SetKBaseLocalUsernameForOrcid("my-fake-orcid", "your-fake-username") - assert.NotNil(err) -} - -// tests whether the authentication server can return the username for the -// for the owner of the developer token -func TestKBaseLocalUsername(t *testing.T) { - assert := assert.New(t) - err := SetKBaseLocalUsernameForOrcid("my-fake-orcid", "my-fake-username") - username, err := KBaseLocalUsernameForOrcid("my-fake-orcid") - assert.Nil(err) - assert.Equal("my-fake-username", username) -} - -// tests for fetching a kbase username for an unregistered ORCID -func TestUnregisteredKBaseLocalUsername(t *testing.T) { - assert := assert.New(t) - err := SetKBaseLocalUsernameForOrcid("my-fake-orcid", "my-fake-username") - username, err := KBaseLocalUsernameForOrcid("your-fake-orcid") - assert.NotNil(err) - assert.Equal("", username) -} diff --git a/databases/jdp/database_test.go b/databases/jdp/database_test.go index c5fc1224..32cbd0fb 100644 --- a/databases/jdp/database_test.go +++ b/databases/jdp/database_test.go @@ -18,11 +18,7 @@ databases: jdp: name: JGI Data Portal organization: Joint Genome Institue - url: https://files.jgi.doe.gov endpoint: globus-jdp - auth: - client_id: ${JGI_CLIENT_ID} - client_secret: ${JGI_CLIENT_SECRET} endpoints: globus-jdp: name: Globus NERSC DTN @@ -105,7 +101,7 @@ func TestResources(t *testing.T) { assert.Nil(err, "JDP resource query encountered an error") assert.Equal(10, len(resources), "JDP resource query didn't return requested number of results") - for i, _ := range resources { + for i := range resources { jdpSearchResult := results.Resources[i] resource := resources[i] assert.Equal(jdpSearchResult.Id, resource.Id, "Resource ID mismatch") diff --git a/databases/kbase/database.go b/databases/kbase/database.go index 32beb622..52245c2b 100644 --- a/databases/kbase/database.go +++ b/databases/kbase/database.go @@ -26,7 +26,6 @@ import ( "github.com/google/uuid" - "github.com/kbase/dts/auth" "github.com/kbase/dts/databases" "github.com/kbase/dts/frictionless" ) @@ -43,6 +42,8 @@ func NewDatabase(orcid string) (databases.Database, error) { return nil, fmt.Errorf("No ORCID was given") } + startUserFederation() + return &Database{ Id: "kbase", }, nil @@ -73,9 +74,7 @@ func (db *Database) StagingStatus(id uuid.UUID) (databases.StagingStatus, error) } func (db *Database) LocalUser(orcid string) (string, error) { - // for KBase user federation, we rely on a table maintained by our KBase - // auth server proxy - return auth.KBaseLocalUsernameForOrcid(orcid) + return usernameForOrcid(orcid) } func (db Database) Save() (databases.DatabaseSaveState, error) { diff --git a/databases/kbase/database_test.go b/databases/kbase/database_test.go new file mode 100644 index 00000000..3fecf890 --- /dev/null +++ b/databases/kbase/database_test.go @@ -0,0 +1,99 @@ +package kbase + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/kbase/dts/config" + "github.com/kbase/dts/databases" + "github.com/kbase/dts/dtstest" + "github.com/kbase/dts/endpoints" + "github.com/kbase/dts/endpoints/globus" +) + +const kbaseConfig string = ` +databases: + kbase: + name: KBase Workspace Service (KSS) + organization: KBase + endpoint: globus-kbase +endpoints: + globus-kbase: + name: KBase + id: ${DTS_GLOBUS_TEST_ENDPOINT} + provider: globus + auth: + client_id: ${DTS_GLOBUS_CLIENT_ID} + client_secret: ${DTS_GLOBUS_CLIENT_SECRET} +` + +// this function gets called at the begіnning of a test session +func setup() { + dtstest.EnableDebugLogging() + config.Init([]byte(kbaseConfig)) + databases.RegisterDatabase("kbase", NewDatabase) + endpoints.RegisterEndpointProvider("globus", globus.NewEndpoint) +} + +// this function gets called after all tests have been run +func breakdown() { +} + +func TestNewDatabase(t *testing.T) { + assert := assert.New(t) + orcid := os.Getenv("DTS_KBASE_TEST_ORCID") + db, err := NewDatabase(orcid) + assert.NotNil(db, "KBase database not created") + assert.Nil(err, "KBase database creation encountered an error") +} + +func TestNewDatabaseWithoutOrcid(t *testing.T) { + assert := assert.New(t) + db, err := NewDatabase("") + assert.Nil(db, "Invalid KBase database somehow created") + assert.NotNil(err, "KBase database creation without ORCID encountered no error") +} + +func TestSearch(t *testing.T) { + assert := assert.New(t) + orcid := os.Getenv("DTS_KBASE_TEST_ORCID") + db, _ := NewDatabase(orcid) + params := databases.SearchParameters{ + Query: "prochlorococcus", + Pagination: struct { + Offset, MaxNum int + }{ + Offset: 1, + MaxNum: 50, + }, + } + _, err := db.Search(params) + assert.NotNil(err, "Search not implemented for kbase database!") +} + +func TestResources(t *testing.T) { + assert := assert.New(t) + orcid := os.Getenv("DTS_KBASE_TEST_ORCID") + db, _ := NewDatabase(orcid) + _, err := db.Resources(nil) + assert.NotNil(err, "Resources not implemented for kbase database!") +} + +func TestLocalUser(t *testing.T) { + assert := assert.New(t) + orcid := os.Getenv("DTS_KBASE_TEST_ORCID") + db, _ := NewDatabase(orcid) + username, err := db.LocalUser(orcid) + assert.Nil(err) + assert.True(len(username) > 0) +} + +// this runs setup, runs all tests, and does breakdown +func TestMain(m *testing.M) { + setup() + status := m.Run() + breakdown() + os.Exit(status) +} diff --git a/databases/kbase/errors.go b/databases/kbase/errors.go new file mode 100644 index 00000000..76c6204d --- /dev/null +++ b/databases/kbase/errors.go @@ -0,0 +1,35 @@ +// Copyright (c) 2023 The KBase Project and its Contributors +// Copyright (c) 2023 Cohere Consulting, LLC +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package kbase + +import ( + "fmt" +) + +// This error type is returned when a KBase user/ORCID spreadsheet file is invalid. +type InvalidKBaseUserSpreadsheet struct { + File, Message string +} + +func (e InvalidKBaseUserSpreadsheet) Error() string { + return fmt.Sprintf("KBase user spreadsheet file %s is invalid: %s", e.File, e.Message) +} diff --git a/databases/kbase/user_federation.go b/databases/kbase/user_federation.go new file mode 100644 index 00000000..c45089ba --- /dev/null +++ b/databases/kbase/user_federation.go @@ -0,0 +1,247 @@ +// Copyright (c) 2023 The KBase Project and its Contributors +// Copyright (c) 2023 Cohere Consulting, LLC +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package kbase + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strings" + "time" + "unicode" + + "github.com/kbase/dts/config" +) + +//======================= +// KBase user federation +//======================= + +// In order to map an ORCID to a KBase username, we maintain a mapping that +// stores entries for all KBase users with ORCIDs. This mapping currently lives +// a 2-column spreadsheet (kbase_user_orcids.csv) in the DTS data directory. +// The data in this spreadsheet is reloaded every hour on the top of the hour +// so a new file can be dropped into the data directory with predictable results. + +// starts up the user federation machinery if it hasn't yet been started +func startUserFederation() { + // fire up the user federation goroutine if needed + if !kbaseUserFederationStarted { + started := make(chan struct{}) + go kbaseUserFederation(started) + <-started // wait for it to start + + // start a pulse that reloads the user table from a file at the top of every hour + go func() { + for { + kbaseUpdateChan <- struct{}{} + t := time.Now() + topOfHour := time.Date(t.Year(), t.Month(), t.Day(), t.Hour()+1, 0, 0, 0, t.Location()) + time.Sleep(time.Until(topOfHour)) + } + }() + } +} + +// returns the KBase username associated with the given ORCID +func usernameForOrcid(orcid string) (string, error) { + if !kbaseUserFederationStarted { + return "", fmt.Errorf("KBase federated user table not available!") + } + kbaseOrcidChan <- orcid + username := <-kbaseUserChan + err := <-kbaseErrorChan + return username, err +} + +//----------- +// Internals +//----------- + +var kbaseUserFederationStarted = false +var kbaseUpdateChan chan struct{} // triggers updates to the ORCID/user table +var kbaseOrcidChan chan string // passes ORCIDs in for lookup +var kbaseUserChan chan string // passes usernames out +var kbaseErrorChan chan error // passes errors out + +// This goroutine maintains a table that associates ORCIDs with KBase users. +// It fields requests for usernames given ORCIDs, and can also update the table +// by reading a file. +func kbaseUserFederation(started chan struct{}) { + // channels + kbaseOrcidChan = make(chan string) + kbaseUserChan = make(chan string) + kbaseErrorChan = make(chan error) + kbaseUpdateChan = make(chan struct{}) + + // mapping of ORCIDs to KBase users + kbaseUserTable := make(map[string]string) + + // we're ready + kbaseUserFederationStarted = true + started <- struct{}{} + + for { + select { + case orcid := <-kbaseOrcidChan: // fetching username for orcid + if username, found := kbaseUserTable[orcid]; found { + kbaseUserChan <- username + kbaseErrorChan <- nil + } else { + kbaseUserChan <- "" + kbaseErrorChan <- fmt.Errorf("KBase user not found for ORCID %s!", orcid) + } + case <-kbaseUpdateChan: // update ORCID/user table + var err error + newUserTable, err := readUserTable("kbase_user_orcids.csv") + if err == nil { + kbaseUserTable = newUserTable + } + kbaseErrorChan <- err + } + } +} + +// reads the specified .csv file within the DTS data directory, returning a map +// with ORCID keys associated with username values +func readUserTable(csvFile string) (map[string]string, error) { + // open the CVS file containing the user mapping + filename := filepath.Join(config.Service.DataDirectory, csvFile) + file, err := os.Open(filename) + if err != nil { + return nil, InvalidKBaseUserSpreadsheet{ + File: csvFile, + Message: "nonexistent file", + } + } + defer file.Close() + + // Scan the file line by line. Each line should contain 2 cells separated + // by a comma. The first line is almost certainly a header with column names, + // but we can't be sure, so we simply read every line, checking that + // + // * there are 2 entries separated by exactly one comma + // * exactly one of the entries is a well-formed ORCID (xxxx-xxxx-xxxx-xxxx) + // * the other entry is a non-empty string with no special characters + // + // Finally, the structure of all the lines in the file must agree. Every line + // that doesn't conform to these requirements is ignored. If there's at least + // one valid line, we clear the existing KBase user table and add each + // (ORCID, user) pair to the user table. + orcidColumn := -1 + userColumn := -1 + orcidsForUsers := make(map[string]string) + usersForOrcids := make(map[string]string) + scanner := bufio.NewScanner(file) + for scanner.Scan() { + cells := strings.Split(scanner.Text(), ",") + if len(cells) != 2 { + return nil, InvalidKBaseUserSpreadsheet{ + File: csvFile, + Message: fmt.Sprintf("%d comma-separated columns found (2 expected)", len(cells)), + } + } + + if orcidColumn == -1 { // find the column with an ORCID + for i := 0; i < 2; i++ { + if isOrcid(cells[i]) { + orcidColumn = i + userColumn = (i + 1) % 2 // user column's the other one + } + } + } else if !isOrcid(cells[orcidColumn]) { + // we've already established the ORCID column, but this line disagrees, + // so discard it + continue + } + + if orcidColumn != -1 { + orcid := cells[orcidColumn] + // ORCID column's okay, but what about the user column? + if !isUsername(cells[userColumn]) { + continue + } + username := cells[userColumn] + + // have we seen this ORCID or username before? It's okay, as long as everything + // is consistent + if existingUser, found := usersForOrcids[orcid]; found { + if existingUser != orcid { + return nil, InvalidKBaseUserSpreadsheet{ + File: csvFile, + Message: fmt.Sprintf("ORCID %s is associated with multiple users", orcid), + } + } + } else { + usersForOrcids[orcid] = username + } + if existingOrcid, found := orcidsForUsers[username]; found { + if existingOrcid != orcid { + return nil, InvalidKBaseUserSpreadsheet{ + File: csvFile, + Message: fmt.Sprintf("User %s has multiple ORCIDs", username), + } + } + } else { + orcidsForUsers[username] = orcid + } + } + } + + if len(usersForOrcids) == 0 { + return nil, InvalidKBaseUserSpreadsheet{ + File: csvFile, + Message: "No valid username/ORCID pairs found", + } + } + + return usersForOrcids, nil +} + +// returns true iff s contains a valid username +func isUsername(s string) bool { + return len(s) > 0 && !strings.ContainsFunc(s, func(c rune) bool { + return !unicode.IsLetter(c) || !unicode.IsDigit(c) || c != '_' + }) +} + +// returns true iff s contains a valid ORCID +func isOrcid(s string) bool { + if len(s) != 19 { + return false + } + if s[4] != '-' || s[9] != '-' || s[14] != '-' { + return false + } + isAllDigits := func(s string) bool { + return !strings.ContainsFunc(s, func(c rune) bool { return !unicode.IsDigit(c) }) + } + if !isAllDigits(s[:4]) || !isAllDigits(s[5:9]) || !isAllDigits(s[10:14]) || !isAllDigits(s[15:18]) { + return false + } + // the last character can be either a digit or X (representing a checksum of 10) + if !unicode.IsDigit(rune(s[18])) && s[18] != 'X' { + return false + } + return true +} diff --git a/services/prototype.go b/services/prototype.go index 31292997..9ed3f6a6 100644 --- a/services/prototype.go +++ b/services/prototype.go @@ -141,7 +141,7 @@ func (service *prototype) Close() { // Version numbers var majorVersion = 0 -var minorVersion = 2 +var minorVersion = 3 var patchVersion = 0 // Version string From be4968f437b1a476d14ed850914881e52cb53dd5 Mon Sep 17 00:00:00 2001 From: "Jeffrey N. Johnson" Date: Fri, 17 Jan 2025 11:44:08 -0800 Subject: [PATCH 2/8] More work on user federation tests. --- databases/kbase/database_test.go | 215 +++++++++++++++++++++++------ databases/kbase/user_federation.go | 19 ++- 2 files changed, 192 insertions(+), 42 deletions(-) diff --git a/databases/kbase/database_test.go b/databases/kbase/database_test.go index 3fecf890..b290d57b 100644 --- a/databases/kbase/database_test.go +++ b/databases/kbase/database_test.go @@ -1,7 +1,12 @@ package kbase import ( + "fmt" + "io" + "log" "os" + "path/filepath" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -13,51 +18,66 @@ import ( "github.com/kbase/dts/endpoints/globus" ) -const kbaseConfig string = ` -databases: - kbase: - name: KBase Workspace Service (KSS) - organization: KBase - endpoint: globus-kbase -endpoints: - globus-kbase: - name: KBase - id: ${DTS_GLOBUS_TEST_ENDPOINT} - provider: globus - auth: - client_id: ${DTS_GLOBUS_CLIENT_ID} - client_secret: ${DTS_GLOBUS_CLIENT_SECRET} -` - -// this function gets called at the begіnning of a test session -func setup() { - dtstest.EnableDebugLogging() - config.Init([]byte(kbaseConfig)) - databases.RegisterDatabase("kbase", NewDatabase) - endpoints.RegisterEndpointProvider("globus", globus.NewEndpoint) +// this runs setup, runs all tests, and does breakdown +func TestMain(m *testing.M) { + setup() + status := m.Run() + breakdown() + os.Exit(status) } -// this function gets called after all tests have been run -func breakdown() { +// runs all tests serially (so we can swap out KBase user tables) +func TestRunner(t *testing.T) { + tester := SerialTests{Test: t} + tester.TestNewDatabase() + tester.TestNewDatabaseWithoutOrcid() + tester.TestUserFederation() + tester.TestSearch() + tester.TestResources() + tester.TestLocalUser() } -func TestNewDatabase(t *testing.T) { - assert := assert.New(t) +func (t *SerialTests) TestNewDatabase() { + assert := assert.New(t.Test) orcid := os.Getenv("DTS_KBASE_TEST_ORCID") db, err := NewDatabase(orcid) assert.NotNil(db, "KBase database not created") assert.Nil(err, "KBase database creation encountered an error") } -func TestNewDatabaseWithoutOrcid(t *testing.T) { - assert := assert.New(t) +func (t *SerialTests) TestNewDatabaseWithoutOrcid() { + assert := assert.New(t.Test) db, err := NewDatabase("") assert.Nil(db, "Invalid KBase database somehow created") assert.NotNil(err, "KBase database creation without ORCID encountered no error") } -func TestSearch(t *testing.T) { - assert := assert.New(t) +func (t *SerialTests) TestUserFederation() { + assert := assert.New(t.Test) + orcid := os.Getenv("DTS_KBASE_TEST_ORCID") + + // make sure we can create a db with good user tables + for i := range goodUserTables { + copyDataFile(fmt.Sprintf("good_user_table_%d", i), "kbase_user_orcids.csv") + db, err := NewDatabase(orcid) + assert.NotNil(db, fmt.Sprintf("KBase database not created with good_user_table_%d", i)) + assert.Nil(err, "KBase database creation encountered an error") + } + + // make sure we CAN'T create a db with bad user tables + for i := range badUserTables { + copyDataFile(fmt.Sprintf("bad_user_table_%d", i), "kbase_user_orcids.csv") + db, err := NewDatabase(orcid) + assert.Nil(db, fmt.Sprintf("KBase database created with bad_user_table_%d", i)) + assert.NotNil(err, "KBase database creation with bad user table didn't encounter an error") + } + + // copy a good user table back into place + copyDataFile("good_user_table_0", "kbase_user_orcids.csv") +} + +func (t *SerialTests) TestSearch() { + assert := assert.New(t.Test) orcid := os.Getenv("DTS_KBASE_TEST_ORCID") db, _ := NewDatabase(orcid) params := databases.SearchParameters{ @@ -73,16 +93,16 @@ func TestSearch(t *testing.T) { assert.NotNil(err, "Search not implemented for kbase database!") } -func TestResources(t *testing.T) { - assert := assert.New(t) +func (t *SerialTests) TestResources() { + assert := assert.New(t.Test) orcid := os.Getenv("DTS_KBASE_TEST_ORCID") db, _ := NewDatabase(orcid) _, err := db.Resources(nil) assert.NotNil(err, "Resources not implemented for kbase database!") } -func TestLocalUser(t *testing.T) { - assert := assert.New(t) +func (t *SerialTests) TestLocalUser() { + assert := assert.New(t.Test) orcid := os.Getenv("DTS_KBASE_TEST_ORCID") db, _ := NewDatabase(orcid) username, err := db.LocalUser(orcid) @@ -90,10 +110,125 @@ func TestLocalUser(t *testing.T) { assert.True(len(username) > 0) } -// this runs setup, runs all tests, and does breakdown -func TestMain(m *testing.M) { - setup() - status := m.Run() - breakdown() - os.Exit(status) +var CWD string +var TESTING_DIR string + +const kbaseConfig string = ` +service: + data_dir: TESTING_DIR/data +databases: + kbase: + name: KBase Workspace Service (KSS) + organization: KBase + endpoint: globus-kbase +endpoints: + globus-kbase: + name: KBase + id: ${DTS_GLOBUS_TEST_ENDPOINT} + provider: globus + auth: + client_id: ${DTS_GLOBUS_CLIENT_ID} + client_secret: ${DTS_GLOBUS_CLIENT_SECRET} +` + +// test user/ORCID spreadsheets +var goodUserTables = []string{ + `username,orcid +Alice,1234-5678-9101-112X +Bob,1234-5678-9101-1121 +`, + `orcid,username +1234-5678-9101-112X,Alice +1234-5678-9101-1121,Bob +`, +} + +var badUserTables = []string{ + `nocommas +1234-5678-9101-1121,1234-5678-9101-1121 +`, + `username,orcid +1234-5678-9101-1121,Bob +Bob,1234-5678-9101-1121 +`, + `username,orcid +Bob,1234-5678-9101-1121 +Bob,1234-5678-9101-1122 +`, + `username,orcid +Bob,1234-5678-9101-1121 +Boberto,1234-5678-9101-1121 +`, +} + +// this function gets called at the begіnning of a test session +func setup() { + dtstest.EnableDebugLogging() + + // jot down our CWD, create a temporary directory, and change to it + var err error + CWD, err = os.Getwd() + if err != nil { + log.Panicf("Couldn't get current working directory: %s", err) + } + log.Print("Creating testing directory...\n") + TESTING_DIR, err = os.MkdirTemp(os.TempDir(), "kbase-database-tests-") + if err != nil { + log.Panicf("Couldn't create testing directory: %s", err) + } + os.Chdir(TESTING_DIR) + + // read the config file with TESTING_DIR replaced + myConfig := strings.ReplaceAll(kbaseConfig, "TESTING_DIR", TESTING_DIR) + config.Init([]byte(myConfig)) + + // create the data directory and populate it with our test spreadsheets + os.Mkdir(config.Service.DataDirectory, 0755) + for i, userTable := range goodUserTables { + filename := filepath.Join(config.Service.DataDirectory, fmt.Sprintf("good_user_table_%d.csv", i)) + file, _ := os.Create(filename) + io.WriteString(file, userTable) + file.Close() + } + for i, userTable := range badUserTables { + filename := filepath.Join(config.Service.DataDirectory, fmt.Sprintf("bad_user_table_%d.csv", i)) + file, _ := os.Create(filename) + io.WriteString(file, userTable) + file.Close() + } + + // copy a good user table into place + copyDataFile("good_user_table_0.csv", "kbase_user_orcids.csv") + + databases.RegisterDatabase("kbase", NewDatabase) + endpoints.RegisterEndpointProvider("globus", globus.NewEndpoint) +} + +// copies a file from a source to a destination file within the DTS data directory +func copyDataFile(src, dst string) error { + srcFile, err := os.Open(filepath.Join(config.Service.DataDirectory, src)) + if err != nil { + return err + } + defer srcFile.Close() + dstFile, err := os.Create(filepath.Join(config.Service.DataDirectory, dst)) + if err != nil { + return err + } + defer dstFile.Close() + _, err = io.Copy(dstFile, srcFile) + return err } + +// this function gets called after all tests have been run +func breakdown() { + if TESTING_DIR != "" { + // Remove the testing directory and its contents. + log.Printf("Deleting testing directory %s...\n", TESTING_DIR) + os.RemoveAll(TESTING_DIR) + } +} + +// To run the tests serially, we attach them to a SerialTests type and +// have them run by a a single test runner. +type SerialTests struct{ Test *testing.T } diff --git a/databases/kbase/user_federation.go b/databases/kbase/user_federation.go index c45089ba..2b1df50c 100644 --- a/databases/kbase/user_federation.go +++ b/databases/kbase/user_federation.go @@ -24,6 +24,7 @@ package kbase import ( "bufio" "fmt" + "log/slog" "os" "path/filepath" "strings" @@ -44,23 +45,37 @@ import ( // so a new file can be dropped into the data directory with predictable results. // starts up the user federation machinery if it hasn't yet been started -func startUserFederation() { +func startUserFederation() error { // fire up the user federation goroutine if needed if !kbaseUserFederationStarted { started := make(chan struct{}) go kbaseUserFederation(started) <-started // wait for it to start + // load the user table + kbaseUpdateChan <- struct{}{} + err := <-kbaseErrorChan + if err != nil { + return err + } + // start a pulse that reloads the user table from a file at the top of every hour go func() { for { - kbaseUpdateChan <- struct{}{} t := time.Now() topOfHour := time.Date(t.Year(), t.Month(), t.Day(), t.Hour()+1, 0, 0, 0, t.Location()) time.Sleep(time.Until(topOfHour)) + kbaseUpdateChan <- struct{}{} + + // reloading errors are logged, not propagated + err := <-kbaseErrorChan + if err != nil { + slog.Warn(err.Error()) + } } }() } + return nil } // returns the KBase username associated with the given ORCID From 4a2fcb3bccc6fcdf1b92f04247534365f1e55750 Mon Sep 17 00:00:00 2001 From: "Jeffrey N. Johnson" Date: Sun, 19 Jan 2025 10:41:47 -0800 Subject: [PATCH 3/8] Fixed up user federation (and tests). --- databases/kbase/database.go | 5 ++++- databases/kbase/database_test.go | 24 +++++++++++++++++------- databases/kbase/user_federation.go | 28 ++++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/databases/kbase/database.go b/databases/kbase/database.go index 52245c2b..ba19977c 100644 --- a/databases/kbase/database.go +++ b/databases/kbase/database.go @@ -42,7 +42,10 @@ func NewDatabase(orcid string) (databases.Database, error) { return nil, fmt.Errorf("No ORCID was given") } - startUserFederation() + err := startUserFederation() + if err != nil { + return nil, err + } return &Database{ Id: "kbase", diff --git a/databases/kbase/database_test.go b/databases/kbase/database_test.go index b290d57b..120393b8 100644 --- a/databases/kbase/database_test.go +++ b/databases/kbase/database_test.go @@ -58,22 +58,28 @@ func (t *SerialTests) TestUserFederation() { // make sure we can create a db with good user tables for i := range goodUserTables { - copyDataFile(fmt.Sprintf("good_user_table_%d", i), "kbase_user_orcids.csv") + err := copyDataFile(fmt.Sprintf("good_user_table_%d.csv", i), "kbase_user_orcids.csv") + assert.Nil(err, "Couldn't copy good_user_table_%d.csv into place.") db, err := NewDatabase(orcid) assert.NotNil(db, fmt.Sprintf("KBase database not created with good_user_table_%d", i)) assert.Nil(err, "KBase database creation encountered an error") + err = stopUserFederation() + assert.Nil(err, "Couldn't stop user federation subsystem.") } // make sure we CAN'T create a db with bad user tables for i := range badUserTables { - copyDataFile(fmt.Sprintf("bad_user_table_%d", i), "kbase_user_orcids.csv") + err := copyDataFile(fmt.Sprintf("bad_user_table_%d.csv", i), "kbase_user_orcids.csv") + assert.Nil(err, "Couldn't copy bad_user_table_%d.csv into place.") db, err := NewDatabase(orcid) - assert.Nil(db, fmt.Sprintf("KBase database created with bad_user_table_%d", i)) + assert.Nil(db, fmt.Sprintf("KBase database created with bad_user_table_%d.csv", i)) assert.NotNil(err, "KBase database creation with bad user table didn't encounter an error") + err = stopUserFederation() + assert.Nil(err, "Couldn't stop user federation subsystem.") } // copy a good user table back into place - copyDataFile("good_user_table_0", "kbase_user_orcids.csv") + copyDataFile("good_user_table_0.csv", "kbase_user_orcids.csv") } func (t *SerialTests) TestSearch() { @@ -105,9 +111,12 @@ func (t *SerialTests) TestLocalUser() { assert := assert.New(t.Test) orcid := os.Getenv("DTS_KBASE_TEST_ORCID") db, _ := NewDatabase(orcid) - username, err := db.LocalUser(orcid) + username, err := db.LocalUser("1234-5678-9101-112X") assert.Nil(err) - assert.True(len(username) > 0) + assert.Equal("Alice", username) + username, err = db.LocalUser("1235-5678-9101-112X") + assert.NotNil(err) + assert.Equal("", username) } var CWD string @@ -144,7 +153,8 @@ Bob,1234-5678-9101-1121 } var badUserTables = []string{ - `nocommas + `nocommas`, + `orcid,orcid 1234-5678-9101-1121,1234-5678-9101-1121 `, `username,orcid diff --git a/databases/kbase/user_federation.go b/databases/kbase/user_federation.go index 2b1df50c..722581e8 100644 --- a/databases/kbase/user_federation.go +++ b/databases/kbase/user_federation.go @@ -89,12 +89,23 @@ func usernameForOrcid(orcid string) (string, error) { return username, err } +// stops the user federation machinery +func stopUserFederation() error { + if !kbaseUserFederationStarted { + return fmt.Errorf("KBase user federation not started!") + } + kbaseStopChan <- struct{}{} + err := <-kbaseErrorChan + return err +} + //----------- // Internals //----------- var kbaseUserFederationStarted = false var kbaseUpdateChan chan struct{} // triggers updates to the ORCID/user table +var kbaseStopChan chan struct{} // stops the user federation subsystem var kbaseOrcidChan chan string // passes ORCIDs in for lookup var kbaseUserChan chan string // passes usernames out var kbaseErrorChan chan error // passes errors out @@ -108,6 +119,7 @@ func kbaseUserFederation(started chan struct{}) { kbaseUserChan = make(chan string) kbaseErrorChan = make(chan error) kbaseUpdateChan = make(chan struct{}) + kbaseStopChan = make(chan struct{}) // mapping of ORCIDs to KBase users kbaseUserTable := make(map[string]string) @@ -133,6 +145,10 @@ func kbaseUserFederation(started chan struct{}) { kbaseUserTable = newUserTable } kbaseErrorChan <- err + case <-kbaseStopChan: // stop the subsystem + kbaseUserFederationStarted = false + kbaseErrorChan <- nil + break } } } @@ -169,7 +185,8 @@ func readUserTable(csvFile string) (map[string]string, error) { usersForOrcids := make(map[string]string) scanner := bufio.NewScanner(file) for scanner.Scan() { - cells := strings.Split(scanner.Text(), ",") + line := scanner.Text() + cells := strings.Split(line, ",") if len(cells) != 2 { return nil, InvalidKBaseUserSpreadsheet{ File: csvFile, @@ -186,8 +203,11 @@ func readUserTable(csvFile string) (map[string]string, error) { } } else if !isOrcid(cells[orcidColumn]) { // we've already established the ORCID column, but this line disagrees, - // so discard it - continue + // so the whole file is suspect + return nil, InvalidKBaseUserSpreadsheet{ + File: csvFile, + Message: "Different lines list username, ORCID data in different columns", + } } if orcidColumn != -1 { @@ -236,7 +256,7 @@ func readUserTable(csvFile string) (map[string]string, error) { // returns true iff s contains a valid username func isUsername(s string) bool { return len(s) > 0 && !strings.ContainsFunc(s, func(c rune) bool { - return !unicode.IsLetter(c) || !unicode.IsDigit(c) || c != '_' + return !unicode.IsLetter(c) && !unicode.IsDigit(c) && c != '_' }) } From db3eb70ed0055db0902c63008ae36128b7693615 Mon Sep 17 00:00:00 2001 From: "Jeffrey N. Johnson" Date: Mon, 20 Jan 2025 16:10:36 -0800 Subject: [PATCH 4/8] A bit of cleanup. --- databases/kbase/user_federation.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/databases/kbase/user_federation.go b/databases/kbase/user_federation.go index 722581e8..a0c87207 100644 --- a/databases/kbase/user_federation.go +++ b/databases/kbase/user_federation.go @@ -65,10 +65,9 @@ func startUserFederation() error { t := time.Now() topOfHour := time.Date(t.Year(), t.Month(), t.Day(), t.Hour()+1, 0, 0, 0, t.Location()) time.Sleep(time.Until(topOfHour)) - kbaseUpdateChan <- struct{}{} + err := reloadUserTable() // reloading errors are logged, not propagated - err := <-kbaseErrorChan if err != nil { slog.Warn(err.Error()) } @@ -89,6 +88,11 @@ func usernameForOrcid(orcid string) (string, error) { return username, err } +func reloadUserTable() error { + kbaseUpdateChan <- struct{}{} + return <-kbaseErrorChan +} + // stops the user federation machinery func stopUserFederation() error { if !kbaseUserFederationStarted { From cf833297f2faf96ab823a358b697a4d58cdf839b Mon Sep 17 00:00:00 2001 From: "Jeffrey N. Johnson" Date: Tue, 21 Jan 2025 10:36:17 -0800 Subject: [PATCH 5/8] Added a section about the DTS data directory in the Admin Guide. --- docs/admin/deployment.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/admin/deployment.md b/docs/admin/deployment.md index 33ac3e5b..e1e627e4 100644 --- a/docs/admin/deployment.md +++ b/docs/admin/deployment.md @@ -96,7 +96,7 @@ navigate to the `dts` deployment. 3. If needed, navigate to the `Volumes` section and edit the CFS directory for the volume mounted at `/data`. Usually, this is set to `/global/cfs/cdirs/kbase/dts/`, so you usually don't need to edit this. Similarly, check the volume mounted - at `/manifests` (usually set to `/global/cfs/cdirs/kbase/gsharing/manifests/`). + at `/manifests` (usually set to `/global/cfs/cdirs/kbase/gsharing/dts/manifests/`). 4. Edit the Docker image for the deployment, changing the tag after the colon to match the tag of the Docker image pushed by `deploy-to-spin.sh` above. 5. Make sure that the Scaling/Upgrade Policy on the Deployment is set to @@ -106,3 +106,17 @@ navigate to the `dts` deployment. 6. Click `Save` to restart the deployment with this new information. That's it! You've now updated the service with new features and bugfixes. + +## Maintaining the DTS Data Directory + +The DTS uses its data directory to manage its own state, as well as its +interactions with other services. Currently, the data directory contains the +following files: + +* `dts.gob` - a file containing information about pending and recently finished + file transfers, along with any related database-specific state information +* `kbase_user_orcids.csv` - a comma-separated variable file associating ORCID + identifiers with KBase users. This file is a temporary mechanism that allows + the DTS to obtain the username of a KBase user given their ORCID. It is + re-read at the top of the hour, making it easy to replace without restarting + a deployment. From 8bfdc3b816fdd9f01f9bbef26cd0ffeb5da58e87 Mon Sep 17 00:00:00 2001 From: "Jeffrey N. Johnson" Date: Tue, 21 Jan 2025 11:02:40 -0800 Subject: [PATCH 6/8] Added a log message and fixed the name of the KBase user file. --- databases/kbase/database_test.go | 8 ++++---- databases/kbase/user_federation.go | 29 ++++++++++++++++------------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/databases/kbase/database_test.go b/databases/kbase/database_test.go index 120393b8..9e7eb1c4 100644 --- a/databases/kbase/database_test.go +++ b/databases/kbase/database_test.go @@ -58,7 +58,7 @@ func (t *SerialTests) TestUserFederation() { // make sure we can create a db with good user tables for i := range goodUserTables { - err := copyDataFile(fmt.Sprintf("good_user_table_%d.csv", i), "kbase_user_orcids.csv") + err := copyDataFile(fmt.Sprintf("good_user_table_%d.csv", i), kbaseUserTableFile) assert.Nil(err, "Couldn't copy good_user_table_%d.csv into place.") db, err := NewDatabase(orcid) assert.NotNil(db, fmt.Sprintf("KBase database not created with good_user_table_%d", i)) @@ -69,7 +69,7 @@ func (t *SerialTests) TestUserFederation() { // make sure we CAN'T create a db with bad user tables for i := range badUserTables { - err := copyDataFile(fmt.Sprintf("bad_user_table_%d.csv", i), "kbase_user_orcids.csv") + err := copyDataFile(fmt.Sprintf("bad_user_table_%d.csv", i), kbaseUserTableFile) assert.Nil(err, "Couldn't copy bad_user_table_%d.csv into place.") db, err := NewDatabase(orcid) assert.Nil(db, fmt.Sprintf("KBase database created with bad_user_table_%d.csv", i)) @@ -79,7 +79,7 @@ func (t *SerialTests) TestUserFederation() { } // copy a good user table back into place - copyDataFile("good_user_table_0.csv", "kbase_user_orcids.csv") + copyDataFile("good_user_table_0.csv", kbaseUserTableFile) } func (t *SerialTests) TestSearch() { @@ -208,7 +208,7 @@ func setup() { } // copy a good user table into place - copyDataFile("good_user_table_0.csv", "kbase_user_orcids.csv") + copyDataFile("good_user_table_0.csv", kbaseUserTableFile) databases.RegisterDatabase("kbase", NewDatabase) endpoints.RegisterEndpointProvider("globus", globus.NewEndpoint) diff --git a/databases/kbase/user_federation.go b/databases/kbase/user_federation.go index a0c87207..e2119752 100644 --- a/databases/kbase/user_federation.go +++ b/databases/kbase/user_federation.go @@ -40,9 +40,9 @@ import ( // In order to map an ORCID to a KBase username, we maintain a mapping that // stores entries for all KBase users with ORCIDs. This mapping currently lives -// a 2-column spreadsheet (kbase_user_orcids.csv) in the DTS data directory. -// The data in this spreadsheet is reloaded every hour on the top of the hour -// so a new file can be dropped into the data directory with predictable results. +// a 2-column spreadsheet (CSV) in the DTS data directory. The data in this +// spreadsheet is reloaded every hour on the top of the hour so a new file can +// be dropped into the data directory with predictable results. // starts up the user federation machinery if it hasn't yet been started func startUserFederation() error { @@ -107,6 +107,8 @@ func stopUserFederation() error { // Internals //----------- +const kbaseUserTableFile = "kbase_user_orcids.csv" + var kbaseUserFederationStarted = false var kbaseUpdateChan chan struct{} // triggers updates to the ORCID/user table var kbaseStopChan chan struct{} // stops the user federation subsystem @@ -144,7 +146,7 @@ func kbaseUserFederation(started chan struct{}) { } case <-kbaseUpdateChan: // update ORCID/user table var err error - newUserTable, err := readUserTable("kbase_user_orcids.csv") + newUserTable, err := readUserTable() if err == nil { kbaseUserTable = newUserTable } @@ -157,15 +159,16 @@ func kbaseUserFederation(started chan struct{}) { } } -// reads the specified .csv file within the DTS data directory, returning a map +// reads the user table file within the DTS data directory, returning a map // with ORCID keys associated with username values -func readUserTable(csvFile string) (map[string]string, error) { +func readUserTable() (map[string]string, error) { // open the CVS file containing the user mapping - filename := filepath.Join(config.Service.DataDirectory, csvFile) + filename := filepath.Join(config.Service.DataDirectory, kbaseUserTableFile) + slog.Info(fmt.Sprintf("Reading KBase user table from %s", filename)) file, err := os.Open(filename) if err != nil { return nil, InvalidKBaseUserSpreadsheet{ - File: csvFile, + File: kbaseUserTableFile, Message: "nonexistent file", } } @@ -193,7 +196,7 @@ func readUserTable(csvFile string) (map[string]string, error) { cells := strings.Split(line, ",") if len(cells) != 2 { return nil, InvalidKBaseUserSpreadsheet{ - File: csvFile, + File: kbaseUserTableFile, Message: fmt.Sprintf("%d comma-separated columns found (2 expected)", len(cells)), } } @@ -209,7 +212,7 @@ func readUserTable(csvFile string) (map[string]string, error) { // we've already established the ORCID column, but this line disagrees, // so the whole file is suspect return nil, InvalidKBaseUserSpreadsheet{ - File: csvFile, + File: kbaseUserTableFile, Message: "Different lines list username, ORCID data in different columns", } } @@ -227,7 +230,7 @@ func readUserTable(csvFile string) (map[string]string, error) { if existingUser, found := usersForOrcids[orcid]; found { if existingUser != orcid { return nil, InvalidKBaseUserSpreadsheet{ - File: csvFile, + File: kbaseUserTableFile, Message: fmt.Sprintf("ORCID %s is associated with multiple users", orcid), } } @@ -237,7 +240,7 @@ func readUserTable(csvFile string) (map[string]string, error) { if existingOrcid, found := orcidsForUsers[username]; found { if existingOrcid != orcid { return nil, InvalidKBaseUserSpreadsheet{ - File: csvFile, + File: kbaseUserTableFile, Message: fmt.Sprintf("User %s has multiple ORCIDs", username), } } @@ -249,7 +252,7 @@ func readUserTable(csvFile string) (map[string]string, error) { if len(usersForOrcids) == 0 { return nil, InvalidKBaseUserSpreadsheet{ - File: csvFile, + File: kbaseUserTableFile, Message: "No valid username/ORCID pairs found", } } From 1b4145f468f61fd340afd7c8b8459f9783388975 Mon Sep 17 00:00:00 2001 From: "Jeffrey N. Johnson" Date: Tue, 21 Jan 2025 14:24:34 -0800 Subject: [PATCH 7/8] Added csv and regexp parser. --- databases/kbase/user_federation.go | 45 ++++++++++-------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/databases/kbase/user_federation.go b/databases/kbase/user_federation.go index e2119752..11000725 100644 --- a/databases/kbase/user_federation.go +++ b/databases/kbase/user_federation.go @@ -22,11 +22,12 @@ package kbase import ( - "bufio" + "encoding/csv" "fmt" "log/slog" "os" "path/filepath" + "regexp" "strings" "time" "unicode" @@ -190,25 +191,24 @@ func readUserTable() (map[string]string, error) { userColumn := -1 orcidsForUsers := make(map[string]string) usersForOrcids := make(map[string]string) - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := scanner.Text() - cells := strings.Split(line, ",") - if len(cells) != 2 { + reader := csv.NewReader(file) + records, err := reader.ReadAll() + for _, record := range records { + if len(record) != 2 { return nil, InvalidKBaseUserSpreadsheet{ File: kbaseUserTableFile, - Message: fmt.Sprintf("%d comma-separated columns found (2 expected)", len(cells)), + Message: fmt.Sprintf("%d comma-separated columns found (2 expected)", len(record)), } } if orcidColumn == -1 { // find the column with an ORCID for i := 0; i < 2; i++ { - if isOrcid(cells[i]) { + if isOrcid(record[i]) { orcidColumn = i userColumn = (i + 1) % 2 // user column's the other one } } - } else if !isOrcid(cells[orcidColumn]) { + } else if !isOrcid(record[orcidColumn]) { // we've already established the ORCID column, but this line disagrees, // so the whole file is suspect return nil, InvalidKBaseUserSpreadsheet{ @@ -218,12 +218,12 @@ func readUserTable() (map[string]string, error) { } if orcidColumn != -1 { - orcid := cells[orcidColumn] + orcid := record[orcidColumn] // ORCID column's okay, but what about the user column? - if !isUsername(cells[userColumn]) { + if !isUsername(record[userColumn]) { continue } - username := cells[userColumn] + username := record[userColumn] // have we seen this ORCID or username before? It's okay, as long as everything // is consistent @@ -267,23 +267,8 @@ func isUsername(s string) bool { }) } -// returns true iff s contains a valid ORCID +// returns true iff s contains a valid ORCID (nnnn-nnnn-nnnn-nnn[nX]) func isOrcid(s string) bool { - if len(s) != 19 { - return false - } - if s[4] != '-' || s[9] != '-' || s[14] != '-' { - return false - } - isAllDigits := func(s string) bool { - return !strings.ContainsFunc(s, func(c rune) bool { return !unicode.IsDigit(c) }) - } - if !isAllDigits(s[:4]) || !isAllDigits(s[5:9]) || !isAllDigits(s[10:14]) || !isAllDigits(s[15:18]) { - return false - } - // the last character can be either a digit or X (representing a checksum of 10) - if !unicode.IsDigit(rune(s[18])) && s[18] != 'X' { - return false - } - return true + matched, err := regexp.MatchString(`^(\d{4}-){3}\d{3}[\dX]$`, s) + return err == nil && matched } From bda781b9174601ee52d88322c4d070941e0c25ec Mon Sep 17 00:00:00 2001 From: "Jeffrey N. Johnson" Date: Tue, 21 Jan 2025 14:26:41 -0800 Subject: [PATCH 8/8] Added one more error check. --- databases/kbase/user_federation.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/databases/kbase/user_federation.go b/databases/kbase/user_federation.go index 11000725..53d18d2f 100644 --- a/databases/kbase/user_federation.go +++ b/databases/kbase/user_federation.go @@ -193,6 +193,12 @@ func readUserTable() (map[string]string, error) { usersForOrcids := make(map[string]string) reader := csv.NewReader(file) records, err := reader.ReadAll() + if err != nil { + return nil, InvalidKBaseUserSpreadsheet{ + File: kbaseUserTableFile, + Message: "Couldn't parse CVS file", + } + } for _, record := range records { if len(record) != 2 { return nil, InvalidKBaseUserSpreadsheet{