Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better error messages for schema validation #16097

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
changes:
- type: feat
scope: sdk/go
description: Make project validation error messages more helpful.
127 changes: 126 additions & 1 deletion sdk/go/common/workspace/project.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ import (
"math"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"

"github.com/pulumi/esc/ast"
"github.com/pulumi/esc/eval"
"github.com/texttheater/golang-levenshtein/levenshtein"

"github.com/hashicorp/go-multierror"
"github.com/pgavlin/fx"
Expand Down Expand Up @@ -343,6 +346,95 @@ func SimplifyMarshalledProject(raw interface{}) (map[string]interface{}, error)
return obj, nil
}

// Finds the closest attribute name to a known attribute name,
//
// This uses the levenshtein distance (edit distance) metric and does case-insensitive
// comparison to find capitalization issues.
//
// In the unlikely event that a name has exactly the same edit distance from two different
// attributes, then this dependent on the map order, which isn't stable. Sorting the keys
// would solve this but seems like overkill
func findClosestKey(needle string, haystack map[string]interface{}) (string, int) {
closestDistance := len(needle)
closest := ""
for key := range haystack {
l := levenshtein.DistanceForStrings([]rune(strings.ToLower(needle)), []rune(strings.ToLower(key)),
levenshtein.DefaultOptionsWithSub)
if l == 0 {
// short circuit, there is no better match possible.
return key, 0
} else if l < closestDistance {
closestDistance = l
closest = key
}
}
return closest, closestDistance
}

// Walks a path to find the schema for the node in question, then lists
// the known attributes for that node
func getSchemaPathAttributes(path string) map[string]interface{} {
elements := strings.Split(path, "/")
isNumber := regexp.MustCompile(`^\d+$`)

curr := ProjectSchema
for len(elements) > 0 {
attr := elements[0]
elements = elements[1:]
if attr == "" {
continue
}

// If this schema node points to another
if curr.Ref != nil {
curr = curr.Ref
}

// check properties
if schema, ok := curr.Properties[attr]; ok {
curr = schema
continue
}

// check additional properties
if curr.AdditionalProperties != nil {
if additional, ok := curr.AdditionalProperties.(map[string]*jsonschema.Schema); ok {
if schema, ok := additional[attr]; ok {
curr = schema
continue
}
}
}

// check for array item
if isNumber.MatchString(attr) && curr.Items2020 != nil {
curr = curr.Items2020
continue
}

// no match found for path
return nil
}

if curr.Ref != nil {
curr = curr.Ref
}

knownProperties := make(map[string]interface{})
for k, v := range curr.Properties {
knownProperties[k] = v
}
if curr.AdditionalProperties != nil {
if additional, ok := curr.AdditionalProperties.(map[string]*jsonschema.Schema); ok {
for k, v := range additional {
knownProperties[k] = v
}
}
}

return knownProperties
}

func ValidateProject(raw interface{}) error {
project, err := SimplifyMarshalledProject(raw)
if err != nil {
Expand All @@ -352,12 +444,21 @@ func ValidateProject(raw interface{}) error {
// Couple of manual errors to match Validate
name, ok := project["name"]
if !ok {
closest, dist := findClosestKey("name", project)
if dist <= 2 {
return fmt.Errorf("project is missing a 'name' attribute, found '%s' instead", closest)
}

return errors.New("project is missing a 'name' attribute")
}
if strName, ok := name.(string); !ok || strName == "" {
return errors.New("project is missing a non-empty string 'name' attribute")
}
if _, ok := project["runtime"]; !ok {
closest, dist := findClosestKey("runtime", project)
if dist <= 2 {
return fmt.Errorf("project is missing a 'runtime' attribute, found '%s' instead", closest)
}
return errors.New("project is missing a 'runtime' attribute")
}

Expand All @@ -370,6 +471,8 @@ func ValidateProject(raw interface{}) error {
return err
}

notAllowedRe := regexp.MustCompile(`'(\w[a-zA-Z0-9_]*)' not allowed$`)

var errs *multierror.Error
var appendError func(err *jsonschema.ValidationError)
appendError = func(err *jsonschema.ValidationError) {
Expand All @@ -379,7 +482,29 @@ func ValidateProject(raw interface{}) error {
return fmt.Errorf("%s: %s", path, fmt.Sprintf(message, args...))
}

errs = multierror.Append(errs, errorf("#"+err.InstanceLocation, "%v", err.Message))
msg := err.Message

if match := notAllowedRe.FindStringSubmatch(msg); match != nil {
attrName := match[1]
attributes := getSchemaPathAttributes(err.InstanceLocation)
closest, dist := findClosestKey(attrName, attributes)
if dist <= 2 {
msg = fmt.Sprintf("%s, did you mean '%s'", msg, closest)
} else if len(attributes) > 0 {
valid := make([]string, 0, len(attributes))
for k := range attributes {
valid = append(valid, "'"+k+"'")
}
if len(valid) > 1 {
sort.StringSlice.Sort(valid)
msg = fmt.Sprintf("%s, the expected attributes are %v and %s",
msg, strings.Join(valid[:len(valid)-1], ", "), valid[len(valid)-1])
} else {
msg = fmt.Sprintf("%s, the only expected attribute is %s", msg, valid[0])
}
}
}
errs = multierror.Append(errs, errorf("#"+err.InstanceLocation, "%v", msg))
}
for _, err := range err.Causes {
appendError(err)
Expand Down
75 changes: 58 additions & 17 deletions sdk/go/common/workspace/project_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,36 +147,36 @@ func TestProjectValidationSucceedsForCorrectDefaultValueType(t *testing.T) {
assert.NoError(t, err, "There should be no validation error")
}

func writeAndLoad(t *testing.T, str string) (*Project, error) {
tmp, err := os.CreateTemp("", "*.json")
assert.NoError(t, err)
path := tmp.Name()
err = os.WriteFile(path, []byte(str), 0o600)
assert.NoError(t, err)
return LoadProject(path)
}

func TestProjectLoadJSON(t *testing.T) {
t.Parallel()

writeAndLoad := func(str string) (*Project, error) {
tmp, err := os.CreateTemp("", "*.json")
assert.NoError(t, err)
path := tmp.Name()
err = os.WriteFile(path, []byte(str), 0o600)
assert.NoError(t, err)
return LoadProject(path)
}

// Test wrong type
_, err := writeAndLoad("\"hello \"")
_, err := writeAndLoad(t, "\"hello \"")
assert.ErrorContains(t, err, "expected project to be an object, was 'string'")

// Test lack of name
_, err = writeAndLoad("{}")
_, err = writeAndLoad(t, "{}")
assert.ErrorContains(t, err, "project is missing a 'name' attribute")

// Test bad name
_, err = writeAndLoad("{\"name\": \"\"}")
_, err = writeAndLoad(t, "{\"name\": \"\"}")
assert.ErrorContains(t, err, "project is missing a non-empty string 'name' attribute")

// Test missing runtime
_, err = writeAndLoad("{\"name\": \"project\"}")
_, err = writeAndLoad(t, "{\"name\": \"project\"}")
assert.ErrorContains(t, err, "project is missing a 'runtime' attribute")

// Test other schema errors
_, err = writeAndLoad("{\"name\": \"project\", \"runtime\": 4}")
_, err = writeAndLoad(t, "{\"name\": \"project\", \"runtime\": 4}")
// These can vary in order, so contains not equals check
expected := []string{
"3 errors occurred:",
Expand All @@ -188,7 +188,7 @@ func TestProjectLoadJSON(t *testing.T) {
assert.ErrorContains(t, err, e)
}

_, err = writeAndLoad("{\"name\": \"project\", \"runtime\": \"test\", \"backend\": 4, \"main\": {}}")
_, err = writeAndLoad(t, "{\"name\": \"project\", \"runtime\": \"test\", \"backend\": 4, \"main\": {}}")
expected = []string{
"2 errors occurred:",
"* #/main: expected string or null, but got object",
Expand All @@ -199,19 +199,60 @@ func TestProjectLoadJSON(t *testing.T) {
}

// Test success
proj, err := writeAndLoad("{\"name\": \"project\", \"runtime\": \"test\"}")
proj, err := writeAndLoad(t, "{\"name\": \"project\", \"runtime\": \"test\"}")
assert.NoError(t, err)
assert.Equal(t, tokens.PackageName("project"), proj.Name)
assert.Equal(t, "test", proj.Runtime.Name())

// Test null optionals should work
proj, err = writeAndLoad("{\"name\": \"project\", \"runtime\": \"test\", " +
proj, err = writeAndLoad(t, "{\"name\": \"project\", \"runtime\": \"test\", "+
"\"description\": null, \"main\": null, \"backend\": null}")
assert.NoError(t, err)
assert.Nil(t, proj.Description)
assert.Equal(t, "", proj.Main)
}

func TestProjectLoadJSONInformativeErrors(t *testing.T) {
t.Parallel()

_, err := writeAndLoad(t, "{\"Name\": \"project\", \"runtime\": \"test\"}")
assert.ErrorContains(t, err, "project is missing a 'name' attribute")
assert.ErrorContains(t, err, "found 'Name' instead")

_, err = writeAndLoad(t, "{\"name\": \"project\", \"rutnime\": \"test\"}")
assert.ErrorContains(t, err, "project is missing a 'runtime' attribute")
assert.ErrorContains(t, err, "found 'rutnime' instead")

// Minor spelling mistake deeper in the schema is identified and a more helpful error created
_, err = writeAndLoad(t, "{\"name\": \"project\", \"runtime\": \"test\", \"template\":{\"displatName\":\"foo\"}}")
assert.ErrorContains(t, err, "'displatName' not allowed, did you mean 'displayName'")

// Major spelling mistake deeper in the schema is identified
_, err = writeAndLoad(t, "{\"name\": \"project\", \"runtime\": \"test\", "+
"\"template\":{\"displayNameDisplayName\":\"foo\"}}")
assert.ErrorContains(t, err, "'displayNameDisplayName' not allowed")
assert.ErrorContains(t, err, "'displayNameDisplayName' not allowed, the expected attributes are "+
"'config', 'description', 'displayName', 'important', 'metadata' and 'quickstart'")

// Error message is different when there is only one choice
_, err = writeAndLoad(t, "{\"name\": \"project\", \"runtime\": \"test\", "+
"\"backend\": {\"url\": \"https://pulumi.com\", \"name\": \"test\"}}")
assert.ErrorContains(t, err, "'name' not allowed")
assert.ErrorContains(t, err, "'name' not allowed, the only expected attribute is 'url'")

// Minor spelling mistake even deeper in the schema is identified and a more helpful error created
_, err = writeAndLoad(t, "{\"name\": \"project\", \"runtime\": \"test\", "+
"\"plugins\":{\"providers\": [{\"nome\": \"test\"}]}}")
assert.ErrorContains(t, err, "'nome' not allowed, did you mean 'name'")

// Major spelling mistake event deeper in the schema is identified
_, err = writeAndLoad(t, "{\"name\": \"project\", \"runtime\": \"test\", "+
"\"plugins\":{\"providers\": [{\"displayName\": \"test\"}]}}")
assert.ErrorContains(t, err, "'displayName' not allowed")
assert.ErrorContains(t, err, "'displayName' not allowed, the expected attributes are "+
"'name', 'path' and 'version'")
}

func deleteFile(t *testing.T, file *os.File) {
if file != nil {
err := os.Remove(file.Name())
Expand Down