Skip to content

Commit

Permalink
syntax: ParseDID performance (bluesky-social#853)
Browse files Browse the repository at this point in the history
we are apparently burning a bunch of CPU on this regex in prod, so let's
bypass for the most common case (valid did:plc).

Would be nice to have a benchmark demonstrating perf gains (or not).

I did some manually testing with the CLI helper and inserted `panic()`
statements to confirm that valid DID PLC stays on fast path, and other
DIDs don't.

Feedback welcome if there is a more idiomatic/performant golang way to
do this... this is still doing unicode stuff, I assume, and maybe
casting to bytes would be faster? though probably doesn't impact the
actual hot path much. `strings.ContainsAny` or `strings.ContainsFunc`
could be used here I guess but 🤷
  • Loading branch information
bnewbold authored Nov 29, 2024
2 parents feceb36 + 1e2f969 commit de950cc
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
21 changes: 21 additions & 0 deletions atproto/syntax/cmd/atp-syntax/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ func main() {
ArgsUsage: "<tid>",
Action: runParseTID,
},
&cli.Command{
Name: "parse-did",
Usage: "parse a DID",
ArgsUsage: "<did>",
Action: runParseDID,
},
}
h := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug})
slog.SetDefault(slog.New(h))
Expand All @@ -43,3 +49,18 @@ func runParseTID(cctx *cli.Context) error {

return nil
}

func runParseDID(cctx *cli.Context) error {
s := cctx.Args().First()
if s == "" {
return fmt.Errorf("need to provide identifier as an argument")
}

did, err := syntax.ParseDID(s)
if err != nil {
return err
}
fmt.Printf("%s\n", did)

return nil
}
22 changes: 22 additions & 0 deletions atproto/syntax/did.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,30 @@ import (
type DID string

var didRegex = regexp.MustCompile(`^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$`)
var plcChars = ""

func isASCIIAlphaNum(c rune) bool {
if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') {
return true
}
return false
}

func ParseDID(raw string) (DID, error) {
// fast-path for did:plc, avoiding regex
if len(raw) == 32 && strings.HasPrefix(raw, "did:plc:") {
// NOTE: this doesn't really check base32, just broader alphanumberic. might pass invalid PLC DIDs, but they still have overall valid DID syntax
isPlc := true
for _, c := range raw[8:32] {
if !isASCIIAlphaNum(c) {
isPlc = false
break
}
}
if isPlc {
return DID(raw), nil
}
}
if raw == "" {
return "", errors.New("expected DID, got empty string")
}
Expand Down

0 comments on commit de950cc

Please sign in to comment.