diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e69de29 diff --git a/packageurl.go b/packageurl.go index 0c882ef..e1297ca 100644 --- a/packageurl.go +++ b/packageurl.go @@ -29,6 +29,7 @@ import ( "net/url" "path" "regexp" + "slices" "sort" "strings" ) @@ -258,24 +259,18 @@ type Qualifier struct { Value string } +// String returns a canonical string representation of the qualifier according to [SPEC]. +// +// [SPEC] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#rules-for-each-purl-component func (q Qualifier) String() string { // A value must be a percent-encoded string - return fmt.Sprintf("%s=%s", q.Key, url.PathEscape(q.Value)) + return fmt.Sprintf("%s=%s", q.Key, percentEncode(q.Value)) } // Qualifiers is a slice of key=value pairs, with order preserved as it appears // in the package URL. type Qualifiers []Qualifier -// urlQuery returns a raw URL query with all the qualifiers as keys + values. -func (q Qualifiers) urlQuery() (rawQuery string) { - v := make(url.Values) - for _, qq := range q { - v.Add(qq.Key, qq.Value) - } - return v.Encode() -} - // QualifiersFromMap constructs a Qualifiers slice from a string map. To get a // deterministic qualifier order (despite maps not providing any iteration order // guarantees) the returned Qualifiers are sorted in increasing order of key. @@ -305,8 +300,13 @@ func (qq Qualifiers) Map() map[string]string { return m } +// String returns a canonical string representation of the qualifiers according to [SPEC]. +// +// [SPEC] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#rules-for-each-purl-component func (qq Qualifiers) String() string { var kvPairs []string + // Canonical form requires qualifier keys to be lexicographically ordered. + slices.SortFunc(qq, func(a, b Qualifier) int { return strings.Compare(a.Key, b.Key) }) for _, q := range qq { kvPairs = append(kvPairs, q.String()) } @@ -364,40 +364,55 @@ func NewPackageURL(purlType, namespace, name, version string, } } -// ToString returns the human-readable instance of the PackageURL structure. -// This is the literal purl as defined by the spec. +// ToString returns a canonical string representation of the qualifier according to [SPEC]. +// +// [SPEC] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#rules-for-each-purl-component func (p *PackageURL) ToString() string { u := &url.URL{ Scheme: "pkg", - RawQuery: p.Qualifiers.urlQuery(), - Fragment: p.Subpath, + RawQuery: p.Qualifiers.String(), } paths := []string{p.Type} - // we need to escape each segment by itself, so that we don't escape "/" in the namespace. + // Each namespace segment MUST be a percent-encoded string. + // We need to escape each segment by itself, so that we don't escape "/" in the namespace. for _, segment := range strings.Split(p.Namespace, "/") { if segment == "" { continue } - paths = append(paths, escape(segment)) + paths = append(paths, percentEncode(segment)) } - nameWithVersion := escape(p.Name) + // A name MUST be a percent-encoded string. + nameWithVersion := percentEncode(p.Name) if p.Version != "" { - nameWithVersion += "@" + escape(p.Version) + // A version MUST be a percent-encoded string. + nameWithVersion += "@" + percentEncode(p.Version) } paths = append(paths, nameWithVersion) u.Opaque = strings.Join(paths, "/") - return u.String() + if p.Subpath == "" { + return u.String() + } + + // Each subpath segment MUST be a percent-encoded string. + var subpathSegments []string + for _, segment := range strings.Split(p.Subpath, "/") { + if segment == "" { + continue + } + subpathSegments = append(subpathSegments, percentEncode(segment)) + } + return u.String() + "#" + strings.Join(subpathSegments, "/") } func (p PackageURL) String() string { return p.ToString() } -// FromString parses a valid package url string into a PackageURL structure +// FromString parses a valid package url string into a [PackageURL]. func FromString(purl string) (PackageURL, error) { u, err := url.Parse(purl) if err != nil { @@ -473,15 +488,18 @@ func (p *PackageURL) Normalize() error { return validCustomRules(*p) } -// escape the given string in a purl-compatible way. -func escape(s string) string { - // for compatibility with other implementations and the purl-spec, we want to escape all - // characters, which is what "QueryEscape" does. The issue with QueryEscape is that it encodes - // " " (space) as "+", which is valid in a query, but invalid in a path (see - // https://stackoverflow.com/questions/2678551/when-should-space-be-encoded-to-plus-or-20) for - // context). - // To work around that, we replace the "+" signs with the path-compatible "%20". - return strings.ReplaceAll(url.QueryEscape(s), "+", "%20") +// percentEncode percent-encodes a purl component according to [Encoding]. +// +// [Encoding] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#character-encoding +func percentEncode(s string) string { + // [url.QueryEscape] gets us most of the way. + s = url.QueryEscape(s) + // ... but we need to correct its output to conform to the purl spec. + replacer := strings.NewReplacer( + "%3A", ":", // Spec says colon MUST NOT be encoded. + "+", "%20", // A space must be percent-encoded, not turned to a '+'. + ) + return replacer.Replace(s) } func separateNamespaceNameVersion(path string) (ns, name, version string, err error) { @@ -637,41 +655,29 @@ func validType(typ string) bool { // validCustomRules evaluates additional rules for each package url type, as specified in the package-url specification. // On success, it returns nil. On failure, a descriptive error will be returned. func validCustomRules(p PackageURL) error { - q := p.Qualifiers.Map() switch p.Type { - case TypeConan: - if p.Namespace != "" { - if val, ok := q["channel"]; ok { - if val == "" { - return errors.New("the qualifier channel must be not empty if namespace is present") - } - } else { - return errors.New("channel qualifier does not exist") - } - } else { - if val, ok := q["channel"]; ok { - if val != "" { - return errors.New("namespace is required if channel is non empty") - } - } - } case TypeCpan: + // It MUST be written uppercase and is required. + if p.Namespace == "" { + return errors.New("a cpan purl must have a namespace") + } + if strings.ToUpper(p.Namespace) != p.Namespace { + return errors.New("a cpan purl namespace must use uppercase characters") + } + + // A distribution name MUST NOT contain the string '::'. + distName := p.Name + if strings.Contains(distName, "::") { + return errors.New("a cpan distribution name must not contain '::'") + } + case TypeJulia: + // The spec prohibits a namespace. if p.Namespace != "" { - // The purl refers to a CPAN distribution. - publisher := p.Namespace - if publisher != strings.ToUpper(publisher) { - return errors.New("a cpan distribution namespace must be all uppercase") - } - distName := p.Name - if strings.Contains(distName, "::") { - return errors.New("a cpan distribution name must not contain '::'") - } - } else { - // The purl refers to a CPAN module. - moduleName := p.Name - if strings.Contains(moduleName, "-") { - return errors.New("a cpan module name may not contain dashes") - } + return errors.New("a julia purl must not have a namespace") + } + // The spec requires the presence of a uuid qualifier. + if _, ok := p.Qualifiers.Map()["uuid"]; !ok { + return errors.New("a julia purl must have a uuid qualifier") } case TypeSwift: if p.Namespace == "" { @@ -680,10 +686,6 @@ func validCustomRules(p PackageURL) error { if p.Version == "" { return errors.New("version is required") } - case TypeCran: - if p.Version == "" { - return errors.New("version is required") - } } return nil } diff --git a/qualifier_test.go b/qualifier_test.go new file mode 100644 index 0000000..aaf5ee1 --- /dev/null +++ b/qualifier_test.go @@ -0,0 +1,161 @@ +package packageurl + +import "testing" + +// Verifies that qualifier values are properly percent-encoded. +func TestQualifierValueEncoding(t *testing.T) { + tests := []struct { + name string + given PackageURL + want string + }{ + { + name: "must percent-encode a qualifier value", + given: PackageURL{ + Type: "generic", + Name: "openssl", + Version: "1.2.3", + Qualifiers: Qualifiers{ + {Key: "download_url", Value: "dl.site.org/openssl"}, + }, + }, + want: "pkg:generic/openssl@1.2.3?download_url=dl.site.org%2Fopenssl", + }, + + { + name: "must not percent-encode colons in qualifier values", + given: PackageURL{ + Type: "generic", + Name: "openssl", + Version: "1.2.3", + Qualifiers: Qualifiers{ + {Key: "download_url", Value: "dl.site.org:443/openssl"}, + }, + }, + want: "pkg:generic/openssl@1.2.3?download_url=dl.site.org:443%2Fopenssl", + }, + + // Note: checks that the use of [url.QueryEscape] does not lead to a space being + // encoded as "+". + { + name: "must properly percent-encode a plus", + given: PackageURL{ + Type: "generic", + Name: "openssl", + Version: "1.2.3", + Qualifiers: Qualifiers{ + {Key: "download_url", Value: "dl.site.org:443/openssl secure"}, + }, + }, + want: "pkg:generic/openssl@1.2.3?download_url=dl.site.org:443%2Fopenssl%20secure", + }, + + { + name: "must order qualifier values lexicographically by key", + given: PackageURL{ + Type: "generic", + Name: "openssl", + Version: "1.2.3", + Qualifiers: Qualifiers{ + {Key: "download_url", Value: "dl.site.org"}, + {Key: "checksum", Value: "sha256:abc123"}, + }, + }, + want: "pkg:generic/openssl@1.2.3?checksum=sha256:abc123&download_url=dl.site.org", + }, + + { + name: "must percent-encode separators in qualifier value", + given: PackageURL{ + Type: "generic", + Name: "openssl", + Version: "1.2.3", + Qualifiers: Qualifiers{ + {Key: "download_url", Value: "https://dl.site.org:443/openssl+secure?type=zip&fast=yes"}, + {Key: "checksum", Value: "sha256:abc123"}, + }, + }, + want: "pkg:generic/openssl@1.2.3?checksum=sha256:abc123&download_url=https:%2F%2Fdl.site.org:443%2Fopenssl%2Bsecure%3Ftype%3Dzip%26fast%3Dyes", + }, + + { + name: "must escape a qualifier value", + given: PackageURL{ + Type: "generic", + Name: "openssl", + Version: "1.2.3", + Qualifiers: Qualifiers{ + {Key: "download_url", Value: "dl.site.org/openssl"}, + }, + }, + want: "pkg:generic/openssl@1.2.3?download_url=dl.site.org%2Fopenssl", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := tc.given.String() + if tc.want != got { + t.Logf("'%s' test failed: wanted: '%s', got '%s'", tc.name, tc.want, got) + t.Fail() + } + }) + } +} + +// Exercise the [percentEncode] function. +func TestPercentEncode(t *testing.T) { + tests := []struct { + name string + givenValue string + want string + }{ + { + name: "must not percent-encode alphanumeric characters", + givenValue: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", + want: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", + }, + + { + name: "must not percent-encode punctuation characters", + givenValue: ".-_~", + want: ".-_~", + }, + + { + name: "must not percent-encode colon", + givenValue: ":", + want: ":", + }, + + { + name: "must percent-encode separator characters other than colon", + givenValue: "/@?=&#", + // slash: %2F, at: %40, question mark: %3F, equal: %3D, ampersand: %26, + // pound-sign: %23 + want: "%2F%40%3F%3D%26%23", + }, + + { + name: "must percent-encode a plus", + givenValue: "+", + want: "%2B", + }, + + { + name: "must percent-encode a whitespace", + givenValue: " ", + want: "%20", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := percentEncode(tc.givenValue) + if tc.want != got { + t.Logf("'%s' test failed: wanted: '%s', got '%s'", tc.name, tc.want, got) + t.Fail() + } + }) + } +} diff --git a/testdata/purl-spec b/testdata/purl-spec index 9041aa7..0c3bc11 160000 --- a/testdata/purl-spec +++ b/testdata/purl-spec @@ -1 +1 @@ -Subproject commit 9041aa74236686b23153652f8cd3862eef8c33a9 +Subproject commit 0c3bc118ac5c001e067ba42fba8501405514f1a9