-
Notifications
You must be signed in to change notification settings - Fork 50
update code to honor the canonical purl encoding #83
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
92c64b7
3eaa3ff
9554a32
261cfc1
d3b478f
e60e373
2377e2c
f39d8a8
e126e7d
68ac947
6effe6f
efbf958
4d14de0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ import ( | |
| "net/url" | ||
| "path" | ||
| "regexp" | ||
| "slices" | ||
| "sort" | ||
| "strings" | ||
| ) | ||
|
|
@@ -258,24 +259,18 @@ type Qualifier struct { | |
| Value string | ||
| } | ||
|
|
||
| // String returns a canonical string representation of the qualifier according to [SPEC]. | ||
| // | ||
| // [SPEC] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#rules-for-each-purl-component | ||
| func (q Qualifier) String() string { | ||
| // A value must be a percent-encoded string | ||
| return fmt.Sprintf("%s=%s", q.Key, url.PathEscape(q.Value)) | ||
| return fmt.Sprintf("%s=%s", q.Key, percentEncode(q.Value)) | ||
| } | ||
|
|
||
| // Qualifiers is a slice of key=value pairs, with order preserved as it appears | ||
| // in the package URL. | ||
| type Qualifiers []Qualifier | ||
|
|
||
| // urlQuery returns a raw URL query with all the qualifiers as keys + values. | ||
| func (q Qualifiers) urlQuery() (rawQuery string) { | ||
| v := make(url.Values) | ||
| for _, qq := range q { | ||
| v.Add(qq.Key, qq.Value) | ||
| } | ||
| return v.Encode() | ||
| } | ||
|
|
||
| // QualifiersFromMap constructs a Qualifiers slice from a string map. To get a | ||
| // deterministic qualifier order (despite maps not providing any iteration order | ||
| // guarantees) the returned Qualifiers are sorted in increasing order of key. | ||
|
|
@@ -305,8 +300,13 @@ func (qq Qualifiers) Map() map[string]string { | |
| return m | ||
| } | ||
|
|
||
| // String returns a canonical string representation of the qualifiers according to [SPEC]. | ||
| // | ||
| // [SPEC] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#rules-for-each-purl-component | ||
| func (qq Qualifiers) String() string { | ||
| var kvPairs []string | ||
| // Canonical form requires qualifier keys to be lexicographically ordered. | ||
| slices.SortFunc(qq, func(a, b Qualifier) int { return strings.Compare(a.Key, b.Key) }) | ||
| for _, q := range qq { | ||
| kvPairs = append(kvPairs, q.String()) | ||
| } | ||
|
|
@@ -364,40 +364,55 @@ func NewPackageURL(purlType, namespace, name, version string, | |
| } | ||
| } | ||
|
|
||
| // ToString returns the human-readable instance of the PackageURL structure. | ||
| // This is the literal purl as defined by the spec. | ||
| // ToString returns a canonical string representation of the qualifier according to [SPEC]. | ||
| // | ||
| // [SPEC] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#rules-for-each-purl-component | ||
| func (p *PackageURL) ToString() string { | ||
| u := &url.URL{ | ||
| Scheme: "pkg", | ||
| RawQuery: p.Qualifiers.urlQuery(), | ||
| Fragment: p.Subpath, | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: we cannot use Instead we "manually" add any fragment part at the end. |
||
| RawQuery: p.Qualifiers.String(), | ||
| } | ||
|
|
||
| paths := []string{p.Type} | ||
| // we need to escape each segment by itself, so that we don't escape "/" in the namespace. | ||
| // Each namespace segment MUST be a percent-encoded string. | ||
| // We need to escape each segment by itself, so that we don't escape "/" in the namespace. | ||
| for _, segment := range strings.Split(p.Namespace, "/") { | ||
| if segment == "" { | ||
| continue | ||
| } | ||
| paths = append(paths, escape(segment)) | ||
| paths = append(paths, percentEncode(segment)) | ||
| } | ||
|
|
||
| nameWithVersion := escape(p.Name) | ||
| // A name MUST be a percent-encoded string. | ||
| nameWithVersion := percentEncode(p.Name) | ||
| if p.Version != "" { | ||
| nameWithVersion += "@" + escape(p.Version) | ||
| // A version MUST be a percent-encoded string. | ||
| nameWithVersion += "@" + percentEncode(p.Version) | ||
| } | ||
|
|
||
| paths = append(paths, nameWithVersion) | ||
|
|
||
| u.Opaque = strings.Join(paths, "/") | ||
| return u.String() | ||
| if p.Subpath == "" { | ||
| return u.String() | ||
| } | ||
|
|
||
| // Each subpath segment MUST be a percent-encoded string. | ||
| var subpathSegments []string | ||
| for _, segment := range strings.Split(p.Subpath, "/") { | ||
| if segment == "" { | ||
| continue | ||
| } | ||
| subpathSegments = append(subpathSegments, percentEncode(segment)) | ||
| } | ||
| return u.String() + "#" + strings.Join(subpathSegments, "/") | ||
| } | ||
|
|
||
| func (p PackageURL) String() string { | ||
| return p.ToString() | ||
| } | ||
|
|
||
| // FromString parses a valid package url string into a PackageURL structure | ||
| // FromString parses a valid package url string into a [PackageURL]. | ||
| func FromString(purl string) (PackageURL, error) { | ||
| u, err := url.Parse(purl) | ||
| if err != nil { | ||
|
|
@@ -473,15 +488,18 @@ func (p *PackageURL) Normalize() error { | |
| return validCustomRules(*p) | ||
| } | ||
|
|
||
| // escape the given string in a purl-compatible way. | ||
| func escape(s string) string { | ||
| // for compatibility with other implementations and the purl-spec, we want to escape all | ||
| // characters, which is what "QueryEscape" does. The issue with QueryEscape is that it encodes | ||
| // " " (space) as "+", which is valid in a query, but invalid in a path (see | ||
| // https://stackoverflow.com/questions/2678551/when-should-space-be-encoded-to-plus-or-20) for | ||
| // context). | ||
| // To work around that, we replace the "+" signs with the path-compatible "%20". | ||
| return strings.ReplaceAll(url.QueryEscape(s), "+", "%20") | ||
| // percentEncode percent-encodes a purl component according to [Encoding]. | ||
| // | ||
| // [Encoding] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#character-encoding | ||
| func percentEncode(s string) string { | ||
| // [url.QueryEscape] gets us most of the way. | ||
| s = url.QueryEscape(s) | ||
| // ... but we need to correct its output to conform to the purl spec. | ||
| replacer := strings.NewReplacer( | ||
| "%3A", ":", // Spec says colon MUST NOT be encoded. | ||
| "+", "%20", // A space must be percent-encoded, not turned to a '+'. | ||
| ) | ||
| return replacer.Replace(s) | ||
| } | ||
|
|
||
| func separateNamespaceNameVersion(path string) (ns, name, version string, err error) { | ||
|
|
@@ -637,41 +655,29 @@ func validType(typ string) bool { | |
| // validCustomRules evaluates additional rules for each package url type, as specified in the package-url specification. | ||
| // On success, it returns nil. On failure, a descriptive error will be returned. | ||
| func validCustomRules(p PackageURL) error { | ||
| q := p.Qualifiers.Map() | ||
| switch p.Type { | ||
| case TypeConan: | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The conan spec has been relaxed. |
||
| if p.Namespace != "" { | ||
| if val, ok := q["channel"]; ok { | ||
| if val == "" { | ||
| return errors.New("the qualifier channel must be not empty if namespace is present") | ||
| } | ||
| } else { | ||
| return errors.New("channel qualifier does not exist") | ||
| } | ||
| } else { | ||
| if val, ok := q["channel"]; ok { | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The conan spec has been relaxed. |
||
| if val != "" { | ||
| return errors.New("namespace is required if channel is non empty") | ||
| } | ||
| } | ||
| } | ||
| case TypeCpan: | ||
| // It MUST be written uppercase and is required. | ||
| if p.Namespace == "" { | ||
| return errors.New("a cpan purl must have a namespace") | ||
| } | ||
| if strings.ToUpper(p.Namespace) != p.Namespace { | ||
| return errors.New("a cpan purl namespace must use uppercase characters") | ||
| } | ||
|
|
||
| // A distribution name MUST NOT contain the string '::'. | ||
| distName := p.Name | ||
| if strings.Contains(distName, "::") { | ||
| return errors.New("a cpan distribution name must not contain '::'") | ||
| } | ||
| case TypeJulia: | ||
| // The spec prohibits a namespace. | ||
| if p.Namespace != "" { | ||
| // The purl refers to a CPAN distribution. | ||
| publisher := p.Namespace | ||
| if publisher != strings.ToUpper(publisher) { | ||
| return errors.New("a cpan distribution namespace must be all uppercase") | ||
| } | ||
| distName := p.Name | ||
| if strings.Contains(distName, "::") { | ||
| return errors.New("a cpan distribution name must not contain '::'") | ||
| } | ||
| } else { | ||
| // The purl refers to a CPAN module. | ||
| moduleName := p.Name | ||
| if strings.Contains(moduleName, "-") { | ||
| return errors.New("a cpan module name may not contain dashes") | ||
| } | ||
| return errors.New("a julia purl must not have a namespace") | ||
| } | ||
| // The spec requires the presence of a uuid qualifier. | ||
| if _, ok := p.Qualifiers.Map()["uuid"]; !ok { | ||
| return errors.New("a julia purl must have a uuid qualifier") | ||
| } | ||
| case TypeSwift: | ||
| if p.Namespace == "" { | ||
|
|
@@ -680,10 +686,6 @@ func validCustomRules(p PackageURL) error { | |
| if p.Version == "" { | ||
| return errors.New("version is required") | ||
| } | ||
| case TypeCran: | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| if p.Version == "" { | ||
| return errors.New("version is required") | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,161 @@ | ||
| package packageurl | ||
|
|
||
| import "testing" | ||
|
|
||
| // Verifies that qualifier values are properly percent-encoded. | ||
| func TestQualifierValueEncoding(t *testing.T) { | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added some explicit testing of qualifier encoding (outside of purl-spec test suite) since this was a problematic spot which had poor coverage in the upstream tests. |
||
| tests := []struct { | ||
| name string | ||
| given PackageURL | ||
| want string | ||
| }{ | ||
| { | ||
| name: "must percent-encode a qualifier value", | ||
| given: PackageURL{ | ||
| Type: "generic", | ||
| Name: "openssl", | ||
| Version: "1.2.3", | ||
| Qualifiers: Qualifiers{ | ||
| {Key: "download_url", Value: "dl.site.org/openssl"}, | ||
| }, | ||
| }, | ||
| want: "pkg:generic/[email protected]?download_url=dl.site.org%2Fopenssl", | ||
| }, | ||
|
|
||
| { | ||
| name: "must not percent-encode colons in qualifier values", | ||
| given: PackageURL{ | ||
| Type: "generic", | ||
| Name: "openssl", | ||
| Version: "1.2.3", | ||
| Qualifiers: Qualifiers{ | ||
| {Key: "download_url", Value: "dl.site.org:443/openssl"}, | ||
| }, | ||
| }, | ||
| want: "pkg:generic/[email protected]?download_url=dl.site.org:443%2Fopenssl", | ||
| }, | ||
|
|
||
| // Note: checks that the use of [url.QueryEscape] does not lead to a space being | ||
| // encoded as "+". | ||
| { | ||
| name: "must properly percent-encode a plus", | ||
| given: PackageURL{ | ||
| Type: "generic", | ||
| Name: "openssl", | ||
| Version: "1.2.3", | ||
| Qualifiers: Qualifiers{ | ||
| {Key: "download_url", Value: "dl.site.org:443/openssl secure"}, | ||
| }, | ||
| }, | ||
| want: "pkg:generic/[email protected]?download_url=dl.site.org:443%2Fopenssl%20secure", | ||
| }, | ||
|
|
||
| { | ||
| name: "must order qualifier values lexicographically by key", | ||
| given: PackageURL{ | ||
| Type: "generic", | ||
| Name: "openssl", | ||
| Version: "1.2.3", | ||
| Qualifiers: Qualifiers{ | ||
| {Key: "download_url", Value: "dl.site.org"}, | ||
| {Key: "checksum", Value: "sha256:abc123"}, | ||
| }, | ||
| }, | ||
| want: "pkg:generic/[email protected]?checksum=sha256:abc123&download_url=dl.site.org", | ||
| }, | ||
|
|
||
| { | ||
| name: "must percent-encode separators in qualifier value", | ||
| given: PackageURL{ | ||
| Type: "generic", | ||
| Name: "openssl", | ||
| Version: "1.2.3", | ||
| Qualifiers: Qualifiers{ | ||
| {Key: "download_url", Value: "https://dl.site.org:443/openssl+secure?type=zip&fast=yes"}, | ||
| {Key: "checksum", Value: "sha256:abc123"}, | ||
| }, | ||
| }, | ||
| want: "pkg:generic/[email protected]?checksum=sha256:abc123&download_url=https:%2F%2Fdl.site.org:443%2Fopenssl%2Bsecure%3Ftype%3Dzip%26fast%3Dyes", | ||
| }, | ||
|
|
||
| { | ||
| name: "must escape a qualifier value", | ||
| given: PackageURL{ | ||
| Type: "generic", | ||
| Name: "openssl", | ||
| Version: "1.2.3", | ||
| Qualifiers: Qualifiers{ | ||
| {Key: "download_url", Value: "dl.site.org/openssl"}, | ||
| }, | ||
| }, | ||
| want: "pkg:generic/[email protected]?download_url=dl.site.org%2Fopenssl", | ||
| }, | ||
| } | ||
|
|
||
| for _, tc := range tests { | ||
| t.Run(tc.name, func(t *testing.T) { | ||
| got := tc.given.String() | ||
| if tc.want != got { | ||
| t.Logf("'%s' test failed: wanted: '%s', got '%s'", tc.name, tc.want, got) | ||
| t.Fail() | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| // Exercise the [percentEncode] function. | ||
| func TestPercentEncode(t *testing.T) { | ||
| tests := []struct { | ||
| name string | ||
| givenValue string | ||
| want string | ||
| }{ | ||
| { | ||
| name: "must not percent-encode alphanumeric characters", | ||
| givenValue: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", | ||
| want: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", | ||
| }, | ||
|
|
||
| { | ||
| name: "must not percent-encode punctuation characters", | ||
| givenValue: ".-_~", | ||
| want: ".-_~", | ||
| }, | ||
|
|
||
| { | ||
| name: "must not percent-encode colon", | ||
| givenValue: ":", | ||
| want: ":", | ||
| }, | ||
|
|
||
| { | ||
| name: "must percent-encode separator characters other than colon", | ||
| givenValue: "/@?=&#", | ||
| // slash: %2F, at: %40, question mark: %3F, equal: %3D, ampersand: %26, | ||
| // pound-sign: %23 | ||
| want: "%2F%40%3F%3D%26%23", | ||
| }, | ||
|
|
||
| { | ||
| name: "must percent-encode a plus", | ||
| givenValue: "+", | ||
| want: "%2B", | ||
| }, | ||
|
|
||
| { | ||
| name: "must percent-encode a whitespace", | ||
| givenValue: " ", | ||
| want: "%20", | ||
| }, | ||
| } | ||
|
|
||
| for _, tc := range tests { | ||
| t.Run(tc.name, func(t *testing.T) { | ||
| got := percentEncode(tc.givenValue) | ||
| if tc.want != got { | ||
| t.Logf("'%s' test failed: wanted: '%s', got '%s'", tc.name, tc.want, got) | ||
| t.Fail() | ||
| } | ||
| }) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We cannot use
Values.Encode()since it uses https://pkg.go.dev/net/url#QueryEscape under the hood to encode the value. This works in many cases but has caveats (does not work according to spec for whitespaceand colon:).