Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added go.sum
Empty file.
132 changes: 67 additions & 65 deletions packageurl.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"net/url"
"path"
"regexp"
"slices"
"sort"
"strings"
)
Expand Down Expand Up @@ -258,24 +259,18 @@ type Qualifier struct {
Value string
}

// String returns a canonical string representation of the qualifier according to [SPEC].
//
// [SPEC] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#rules-for-each-purl-component
func (q Qualifier) String() string {
// A value must be a percent-encoded string
return fmt.Sprintf("%s=%s", q.Key, url.PathEscape(q.Value))
return fmt.Sprintf("%s=%s", q.Key, percentEncode(q.Value))
}

// Qualifiers is a slice of key=value pairs, with order preserved as it appears
// in the package URL.
type Qualifiers []Qualifier

// urlQuery returns a raw URL query with all the qualifiers as keys + values.
func (q Qualifiers) urlQuery() (rawQuery string) {
v := make(url.Values)
for _, qq := range q {
v.Add(qq.Key, qq.Value)
}
return v.Encode()
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We cannot use Values.Encode() since it uses https://pkg.go.dev/net/url#QueryEscape under the hood to encode the value. This works in many cases but has caveats (does not work according to spec for whitespace and colon :).

}

// QualifiersFromMap constructs a Qualifiers slice from a string map. To get a
// deterministic qualifier order (despite maps not providing any iteration order
// guarantees) the returned Qualifiers are sorted in increasing order of key.
Expand Down Expand Up @@ -305,8 +300,13 @@ func (qq Qualifiers) Map() map[string]string {
return m
}

// String returns a canonical string representation of the qualifiers according to [SPEC].
//
// [SPEC] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#rules-for-each-purl-component
func (qq Qualifiers) String() string {
var kvPairs []string
// Canonical form requires qualifier keys to be lexicographically ordered.
slices.SortFunc(qq, func(a, b Qualifier) int { return strings.Compare(a.Key, b.Key) })
for _, q := range qq {
kvPairs = append(kvPairs, q.String())
}
Expand Down Expand Up @@ -364,40 +364,55 @@ func NewPackageURL(purlType, namespace, name, version string,
}
}

// ToString returns the human-readable instance of the PackageURL structure.
// This is the literal purl as defined by the spec.
// ToString returns a canonical string representation of the qualifier according to [SPEC].
//
// [SPEC] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#rules-for-each-purl-component
func (p *PackageURL) ToString() string {
u := &url.URL{
Scheme: "pkg",
RawQuery: p.Qualifiers.urlQuery(),
Fragment: p.Subpath,
Copy link
Collaborator Author

@petergardfjall petergardfjall Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: we cannot use url.Fragment since url.String will call u.EscapedFragment() and we end up with an incorrect escaping (similar to url.QueryEscape I believe).

Instead we "manually" add any fragment part at the end.

RawQuery: p.Qualifiers.String(),
}

paths := []string{p.Type}
// we need to escape each segment by itself, so that we don't escape "/" in the namespace.
// Each namespace segment MUST be a percent-encoded string.
// We need to escape each segment by itself, so that we don't escape "/" in the namespace.
for _, segment := range strings.Split(p.Namespace, "/") {
if segment == "" {
continue
}
paths = append(paths, escape(segment))
paths = append(paths, percentEncode(segment))
}

nameWithVersion := escape(p.Name)
// A name MUST be a percent-encoded string.
nameWithVersion := percentEncode(p.Name)
if p.Version != "" {
nameWithVersion += "@" + escape(p.Version)
// A version MUST be a percent-encoded string.
nameWithVersion += "@" + percentEncode(p.Version)
}

paths = append(paths, nameWithVersion)

u.Opaque = strings.Join(paths, "/")
return u.String()
if p.Subpath == "" {
return u.String()
}

// Each subpath segment MUST be a percent-encoded string.
var subpathSegments []string
for _, segment := range strings.Split(p.Subpath, "/") {
if segment == "" {
continue
}
subpathSegments = append(subpathSegments, percentEncode(segment))
}
return u.String() + "#" + strings.Join(subpathSegments, "/")
}

func (p PackageURL) String() string {
return p.ToString()
}

// FromString parses a valid package url string into a PackageURL structure
// FromString parses a valid package url string into a [PackageURL].
func FromString(purl string) (PackageURL, error) {
u, err := url.Parse(purl)
if err != nil {
Expand Down Expand Up @@ -473,15 +488,18 @@ func (p *PackageURL) Normalize() error {
return validCustomRules(*p)
}

// escape the given string in a purl-compatible way.
func escape(s string) string {
// for compatibility with other implementations and the purl-spec, we want to escape all
// characters, which is what "QueryEscape" does. The issue with QueryEscape is that it encodes
// " " (space) as "+", which is valid in a query, but invalid in a path (see
// https://stackoverflow.com/questions/2678551/when-should-space-be-encoded-to-plus-or-20) for
// context).
// To work around that, we replace the "+" signs with the path-compatible "%20".
return strings.ReplaceAll(url.QueryEscape(s), "+", "%20")
// percentEncode percent-encodes a purl component according to [Encoding].
//
// [Encoding] https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#character-encoding
func percentEncode(s string) string {
// [url.QueryEscape] gets us most of the way.
s = url.QueryEscape(s)
// ... but we need to correct its output to conform to the purl spec.
replacer := strings.NewReplacer(
"%3A", ":", // Spec says colon MUST NOT be encoded.
"+", "%20", // A space must be percent-encoded, not turned to a '+'.
)
return replacer.Replace(s)
}

func separateNamespaceNameVersion(path string) (ns, name, version string, err error) {
Expand Down Expand Up @@ -637,41 +655,29 @@ func validType(typ string) bool {
// validCustomRules evaluates additional rules for each package url type, as specified in the package-url specification.
// On success, it returns nil. On failure, a descriptive error will be returned.
func validCustomRules(p PackageURL) error {
q := p.Qualifiers.Map()
switch p.Type {
case TypeConan:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The conan spec has been relaxed.

if p.Namespace != "" {
if val, ok := q["channel"]; ok {
if val == "" {
return errors.New("the qualifier channel must be not empty if namespace is present")
}
} else {
return errors.New("channel qualifier does not exist")
}
} else {
if val, ok := q["channel"]; ok {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The conan spec has been relaxed.

if val != "" {
return errors.New("namespace is required if channel is non empty")
}
}
}
case TypeCpan:
// It MUST be written uppercase and is required.
if p.Namespace == "" {
return errors.New("a cpan purl must have a namespace")
}
if strings.ToUpper(p.Namespace) != p.Namespace {
return errors.New("a cpan purl namespace must use uppercase characters")
}

// A distribution name MUST NOT contain the string '::'.
distName := p.Name
if strings.Contains(distName, "::") {
return errors.New("a cpan distribution name must not contain '::'")
}
case TypeJulia:
// The spec prohibits a namespace.
if p.Namespace != "" {
// The purl refers to a CPAN distribution.
publisher := p.Namespace
if publisher != strings.ToUpper(publisher) {
return errors.New("a cpan distribution namespace must be all uppercase")
}
distName := p.Name
if strings.Contains(distName, "::") {
return errors.New("a cpan distribution name must not contain '::'")
}
} else {
// The purl refers to a CPAN module.
moduleName := p.Name
if strings.Contains(moduleName, "-") {
return errors.New("a cpan module name may not contain dashes")
}
return errors.New("a julia purl must not have a namespace")
}
// The spec requires the presence of a uuid qualifier.
if _, ok := p.Qualifiers.Map()["uuid"]; !ok {
return errors.New("a julia purl must have a uuid qualifier")
}
case TypeSwift:
if p.Namespace == "" {
Expand All @@ -680,10 +686,6 @@ func validCustomRules(p PackageURL) error {
if p.Version == "" {
return errors.New("version is required")
}
case TypeCran:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if p.Version == "" {
return errors.New("version is required")
}
}
return nil
}
161 changes: 161 additions & 0 deletions qualifier_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package packageurl

import "testing"

// Verifies that qualifier values are properly percent-encoded.
func TestQualifierValueEncoding(t *testing.T) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some explicit testing of qualifier encoding (outside of purl-spec test suite) since this was a problematic spot which had poor coverage in the upstream tests.

tests := []struct {
name string
given PackageURL
want string
}{
{
name: "must percent-encode a qualifier value",
given: PackageURL{
Type: "generic",
Name: "openssl",
Version: "1.2.3",
Qualifiers: Qualifiers{
{Key: "download_url", Value: "dl.site.org/openssl"},
},
},
want: "pkg:generic/[email protected]?download_url=dl.site.org%2Fopenssl",
},

{
name: "must not percent-encode colons in qualifier values",
given: PackageURL{
Type: "generic",
Name: "openssl",
Version: "1.2.3",
Qualifiers: Qualifiers{
{Key: "download_url", Value: "dl.site.org:443/openssl"},
},
},
want: "pkg:generic/[email protected]?download_url=dl.site.org:443%2Fopenssl",
},

// Note: checks that the use of [url.QueryEscape] does not lead to a space being
// encoded as "+".
{
name: "must properly percent-encode a plus",
given: PackageURL{
Type: "generic",
Name: "openssl",
Version: "1.2.3",
Qualifiers: Qualifiers{
{Key: "download_url", Value: "dl.site.org:443/openssl secure"},
},
},
want: "pkg:generic/[email protected]?download_url=dl.site.org:443%2Fopenssl%20secure",
},

{
name: "must order qualifier values lexicographically by key",
given: PackageURL{
Type: "generic",
Name: "openssl",
Version: "1.2.3",
Qualifiers: Qualifiers{
{Key: "download_url", Value: "dl.site.org"},
{Key: "checksum", Value: "sha256:abc123"},
},
},
want: "pkg:generic/[email protected]?checksum=sha256:abc123&download_url=dl.site.org",
},

{
name: "must percent-encode separators in qualifier value",
given: PackageURL{
Type: "generic",
Name: "openssl",
Version: "1.2.3",
Qualifiers: Qualifiers{
{Key: "download_url", Value: "https://dl.site.org:443/openssl+secure?type=zip&fast=yes"},
{Key: "checksum", Value: "sha256:abc123"},
},
},
want: "pkg:generic/[email protected]?checksum=sha256:abc123&download_url=https:%2F%2Fdl.site.org:443%2Fopenssl%2Bsecure%3Ftype%3Dzip%26fast%3Dyes",
},

{
name: "must escape a qualifier value",
given: PackageURL{
Type: "generic",
Name: "openssl",
Version: "1.2.3",
Qualifiers: Qualifiers{
{Key: "download_url", Value: "dl.site.org/openssl"},
},
},
want: "pkg:generic/[email protected]?download_url=dl.site.org%2Fopenssl",
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := tc.given.String()
if tc.want != got {
t.Logf("'%s' test failed: wanted: '%s', got '%s'", tc.name, tc.want, got)
t.Fail()
}
})
}
}

// Exercise the [percentEncode] function.
func TestPercentEncode(t *testing.T) {
tests := []struct {
name string
givenValue string
want string
}{
{
name: "must not percent-encode alphanumeric characters",
givenValue: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
want: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
},

{
name: "must not percent-encode punctuation characters",
givenValue: ".-_~",
want: ".-_~",
},

{
name: "must not percent-encode colon",
givenValue: ":",
want: ":",
},

{
name: "must percent-encode separator characters other than colon",
givenValue: "/@?=&#",
// slash: %2F, at: %40, question mark: %3F, equal: %3D, ampersand: %26,
// pound-sign: %23
want: "%2F%40%3F%3D%26%23",
},

{
name: "must percent-encode a plus",
givenValue: "+",
want: "%2B",
},

{
name: "must percent-encode a whitespace",
givenValue: " ",
want: "%20",
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := percentEncode(tc.givenValue)
if tc.want != got {
t.Logf("'%s' test failed: wanted: '%s', got '%s'", tc.name, tc.want, got)
t.Fail()
}
})
}
}
2 changes: 1 addition & 1 deletion testdata/purl-spec
Loading