diff --git a/go.mod b/go.mod index dd182afe..397c5331 100644 --- a/go.mod +++ b/go.mod @@ -7,9 +7,9 @@ require ( github.com/go-playground/locales v0.14.1 github.com/go-playground/universal-translator v0.18.1 github.com/go-playground/validator/v10 v10.18.0 + github.com/goccy/go-yaml v1.18.0 github.com/italia/httpclient-lib-go v0.0.2 github.com/rivo/uniseg v0.4.2 - gopkg.in/yaml.v3 v3.0.1 ) require ( diff --git a/go.sum b/go.sum index a988d79e..f38ec5a2 100644 --- a/go.sum +++ b/go.sum @@ -19,12 +19,12 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= github.com/go-playground/validator/v10 v10.18.0 h1:BvolUXjp4zuvkZ5YN5t7ebzbhlUtPsPm2S9NAZ5nl9U= github.com/go-playground/validator/v10 v10.18.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= +github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= +github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/italia/httpclient-lib-go v0.0.2 h1:4bJLywTVd7qHPdKxJXvvhlXp436JTC4KA6dLhIl5a6c= github.com/italia/httpclient-lib-go v0.0.2/go.mod h1:b0/D3ULsBw8X+zEl7j/kSZmiMlUdj+agppneOvSq6eA= -github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= @@ -55,7 +55,6 @@ golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/parser.go b/parser.go index 8bebe185..2e51b2e4 100644 --- a/parser.go +++ b/parser.go @@ -10,7 +10,6 @@ import ( "path/filepath" "regexp" "slices" - "strconv" "strings" "unicode/utf8" @@ -19,9 +18,10 @@ import ( ut "github.com/go-playground/universal-translator" "github.com/go-playground/validator/v10" en_translations "github.com/go-playground/validator/v10/translations/en" + "github.com/goccy/go-yaml" + "github.com/goccy/go-yaml/ast" urlutil "github.com/italia/publiccode-parser-go/v4/internal" publiccodeValidator "github.com/italia/publiccode-parser-go/v4/validators" - "gopkg.in/yaml.v3" ) type ParserConfig struct { @@ -94,61 +94,53 @@ func (p *Parser) ParseStream(in io.Reader) (PublicCode, error) { // First, decode the YAML into yaml.Node so we can access line and column // numbers. - var node yaml.Node - - d := yaml.NewDecoder(bytes.NewReader(b)) - d.KnownFields(true) - err = d.Decode(&node) - - if err == nil && len(node.Content) > 0 { - node = *node.Content[0] - } else { - // YAML is malformed - return nil, ValidationResults{toValidationError(err.Error(), nil)} - } + // var node yaml.Node + // + // d := yaml.NewDecoder(bytes.NewReader(b)) + // d.KnownFields(true) + // err = d.Decode(&node) + // + // if err == nil && len(node.Content) > 0 { + // node = *node.Content[0] + // } else { + // // YAML is malformed + // return nil, ValidationResults{toValidationError(err.Error(), nil)} + // } + // + path, _ := yaml.PathString("$.publiccodeYmlVersion") - _, version := getNodes("publiccodeYmlVersion", &node) - if version == nil { + var node ast.Node + if node, err = path.ReadNode(bytes.NewReader(b)); err != nil { return nil, ValidationResults{newValidationError("publiccodeYmlVersion", "publiccodeYmlVersion is a required field")} } + version := node.GetToken().Value - if version.ShortTag() != "!!str" { - line, column := getPositionInFile("publiccodeYmlVersion", node) - - return nil, ValidationResults{ValidationError{ - Key: "publiccodeYmlVersion", - Description: "wrong type for this field", - Line: line, - Column: column, - }} - } + var ve ValidationResults - if !slices.Contains(SupportedVersions, version.Value) { + if !slices.Contains(SupportedVersions, version) { return nil, ValidationResults{ newValidationError("publiccodeYmlVersion", fmt.Sprintf( "unsupported version: '%s'. Supported versions: %s", - version.Value, + version, strings.Join(SupportedVersions, ", "), )), } } - var ve ValidationResults - - if slices.Contains(SupportedVersions, version.Value) && !strings.HasPrefix(version.Value, "0.4") { + if slices.Contains(SupportedVersions, version) && !strings.HasPrefix(version, "0.4") { + position := node.GetToken().Position latestVersion := SupportedVersions[len(SupportedVersions)-1] - line, column := getPositionInFile("publiccodeYmlVersion", node) ve = append(ve, ValidationWarning{ Key: "publiccodeYmlVersion", Description: fmt.Sprintf( "v%s is not the latest version, use '%s'. Parsing this file as v%s.", - version.Value, + version, latestVersion, latestVersion, ), - Line: line, - Column: column, + Line: position.Line, + Column: position.Column, }) } @@ -158,11 +150,11 @@ func (p *Parser) ParseStream(in io.Reader) (PublicCode, error) { var decodeResults ValidationResults - if version.Value[0] == '0' { + if version[0] == '0' { v0 := PublicCodeV0{} validateFields = validateFieldsV0 - decodeResults = decode(b, &v0, node) + decodeResults = decode(b, &v0) publiccode = v0 } @@ -191,6 +183,10 @@ func (p *Parser) ParseStream(in io.Reader) (PublicCode, error) { err = validate.Struct(publiccode) if err != nil { + var ast ast.Node + _ = yaml.UnmarshalWithOptions(b, &ast, yaml.DisallowUnknownField()) + // TODO: err + for _, err := range err.(validator.ValidationErrors) { var sb strings.Builder @@ -206,13 +202,20 @@ func (p *Parser) ParseStream(in io.Reader) (PublicCode, error) { m := regexp.MustCompile(`\[([[:alpha:]]+)\]`) key = m.ReplaceAllString(key, ".$1") - line, column := getPositionInFile(key, node) + path, e := yaml.PathString("$." + key) + // TODO: err + + if node, e = path.FilterNode(ast); e != nil { + return nil, ValidationResults{newValidationError("XXX", "Xx")} + } + + position := node.GetToken().Position ve = append(ve, ValidationError{ Key: key, Description: sb.String(), - Line: line, - Column: column, + Line: position.Line, + Column: position.Column, }) } } @@ -290,127 +293,61 @@ func (p *Parser) Parse(uri string) (PublicCode, error) { return p.ParseStream(stream) } -func getNodes(key string, node *yaml.Node) (*yaml.Node, *yaml.Node) { - for i := 0; i < len(node.Content); i += 2 { - childNode := *node.Content[i] - - if childNode.Value == key { - return &childNode, node.Content[i+1] - } - } - - return nil, nil -} - -func getPositionInFile(key string, node yaml.Node) (int, int) { - n := &node +// Decode the YAML into a PublicCode structure, so we get type errors +func decode[T any](data []byte, publiccode *T) ValidationResults { + var ve ValidationResults - keys := strings.Split(key, ".") - for _, path := range keys[:len(keys)-1] { - _, n = getNodes(path, n) + if err := yaml.UnmarshalWithOptions(data, &publiccode, yaml.DisallowUnknownField()); err != nil { + switch err := err.(type) { + case *yaml.TypeError: + token := err.Token - // This should not happen, but let's be defensive - if n == nil { - return 0, 0 + ve = append(ve, ValidationError{ + // token is the wrong type token, + // token.Prev is ":" + // token.Prev.Prev is the actual key + Key: token.Prev.Prev.Value, + Description: "wrong type for this field", + Line: token.Position.Line, + Column: token.Position.Column, + }) + default: + ve = append(ve, newValidationError("", err.Error())) } } - parentNode := n - - n, _ = getNodes(keys[len(keys)-1], n) - - if n != nil { - return n.Line, n.Column - } else { - return parentNode.Line, parentNode.Column - } + return ve } -// getKeyAtLine returns the key name at line "line" for the YAML document -// represented at parentNode. -func getKeyAtLine(parentNode yaml.Node, line int, path string) string { - key := path - - for i, currNode := range parentNode.Content { - // If this node is a mapping and the index is odd it means - // we are not looking at a key, but at its value. Skip it. - if parentNode.Kind == yaml.MappingNode && i%2 != 0 && currNode.Kind == yaml.ScalarNode { - continue - } - - // This node is a key of a mapping type - if parentNode.Kind == yaml.MappingNode && i%2 == 0 { - if path == "" { - key = currNode.Value - } else { - key = fmt.Sprintf("%s.%s", path, currNode.Value) - } - } - - // We want the scalar node (ie. key) not the mapping node which - // doesn't have a tag name even if it has the same line number - if currNode.Line == line && parentNode.Kind == yaml.MappingNode && currNode.Kind == yaml.ScalarNode { - return key - } +type keyFinder struct { + key string + result *ast.Node +} - if currNode.Kind != yaml.ScalarNode { - if k := getKeyAtLine(*currNode, line, key); k != "" { - return k - } - } +func (v keyFinder) Visit(node ast.Node) ast.Visitor { + if node.GetToken().Value == v.key { + v.result = &node + return nil } - return "" + return v } -func toValidationError(errorText string, node *yaml.Node) ValidationError { - r := regexp.MustCompile(`(line ([0-9]+): )`) - matches := r.FindStringSubmatch(errorText) +func getPositionInFile(key string, node ast.Node) (int, int) { + finder := keyFinder{key: key} - line := 0 - if len(matches) > 1 { - line, _ = strconv.Atoi(matches[2]) - errorText = strings.ReplaceAll(errorText, matches[1], "") - } + ast.Walk(finder, node) - // Transform unmarshalling errors messages to a user friendlier message - r = regexp.MustCompile("^cannot unmarshal") - if r.MatchString(errorText) { - errorText = "wrong type for this field" + // This should not happen, but let's be defensive + if finder.result == nil { + return 0, 0 } - var key string - if node != nil { - key = getKeyAtLine(*node, line, "") - } - - return ValidationError{ - Key: key, - Description: errorText, - Line: line, - Column: 1, - } -} - -// Decode the YAML into a PublicCode structure, so we get type errors -func decode[T any](data []byte, publiccode *T, node yaml.Node) ValidationResults { - var ve ValidationResults - - d := yaml.NewDecoder(bytes.NewReader(data)) - d.KnownFields(true) + n := *finder.result + position := n.GetToken().Position - if err := d.Decode(&publiccode); err != nil { - switch err := err.(type) { - case *yaml.TypeError: - for _, errorText := range err.Errors { - ve = append(ve, toValidationError(errorText, &node)) - } - default: - ve = append(ve, newValidationError("", err.Error())) - } - } + return position.Line, position.Column - return ve } func toURL(file string) (*url.URL, error) { diff --git a/testdata/v0/valid_with_warnings/valid.minimal.v0.2.yml b/testdata/v0/valid_with_warnings/valid.minimal.v0.2.yml index 0d19e687..d6b96078 100644 --- a/testdata/v0/valid_with_warnings/valid.minimal.v0.2.yml +++ b/testdata/v0/valid_with_warnings/valid.minimal.v0.2.yml @@ -1,4 +1,3 @@ -publiccodeYmlVersion: "0.2" name: Medusa url: "https://github.com/italia/developers.italia.it.git" @@ -36,6 +35,7 @@ description: features: - Just one feature +publiccodeYmlVersion: "0.2" legal: license: AGPL-3.0-or-later diff --git a/v0.go b/v0.go index 5d4d0dd8..a619f766 100644 --- a/v0.go +++ b/v0.go @@ -1,8 +1,8 @@ package publiccode import ( + "github.com/goccy/go-yaml" urlutil "github.com/italia/publiccode-parser-go/v4/internal" - "gopkg.in/yaml.v3" ) // PublicCodeV0 defines how a publiccode.yml v0.x is structured diff --git a/v1.go b/v1.go index c4ebe8eb..022195bb 100644 --- a/v1.go +++ b/v1.go @@ -1,8 +1,8 @@ package publiccode import ( + "github.com/goccy/go-yaml" urlutil "github.com/italia/publiccode-parser-go/v4/internal" - "gopkg.in/yaml.v3" ) // There's no v1 yet, this is just an unexported placeholder type