Skip to content

Commit bb6ad0a

Browse files
resolve host detection using url
1 parent 042c9f5 commit bb6ad0a

File tree

4 files changed

+49
-216
lines changed

4 files changed

+49
-216
lines changed

detect_gcs.go

Lines changed: 11 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,7 @@ package getter
66
import (
77
"fmt"
88
"net/url"
9-
"path"
10-
"regexp"
119
"strings"
12-
"unicode"
1310
)
1411

1512
// GCSDetector implements Detector to detect GCS URLs and turn
@@ -21,39 +18,32 @@ func (d *GCSDetector) Detect(src, _ string) (string, bool, error) {
2118
return "", false, nil
2219
}
2320

24-
if strings.Contains(src, ".googleapis.com/") {
25-
return d.detectHTTP(src)
21+
if !strings.HasPrefix(src, "http://") && !strings.HasPrefix(src, "https://") {
22+
src = "https://" + src
23+
}
24+
25+
parsedURL, err := url.Parse(src)
26+
if err != nil {
27+
return "", false, fmt.Errorf("error parsing GCS URL")
28+
}
29+
30+
if strings.HasSuffix(parsedURL.Host, ".googleapis.com") {
31+
return d.detectHTTP(strings.ReplaceAll(src, "https://", ""))
2632
}
2733

2834
return "", false, nil
2935
}
3036

3137
func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {
32-
src = path.Clean(src)
3338

3439
parts := strings.Split(src, "/")
3540
if len(parts) < 5 {
3641
return "", false, fmt.Errorf(
3742
"URL is not a valid GCS URL")
3843
}
39-
4044
version := parts[2]
41-
if !isValidGCSVersion(version) {
42-
return "", false, fmt.Errorf(
43-
"GCS URL version is not valid")
44-
}
45-
4645
bucket := parts[3]
47-
if !isValidGCSBucketName(bucket) {
48-
return "", false, fmt.Errorf(
49-
"GCS URL bucket name is not valid")
50-
}
51-
5246
object := strings.Join(parts[4:], "/")
53-
if !isValidGCSObjectName(object) {
54-
return "", false, fmt.Errorf(
55-
"GCS URL object name is not valid")
56-
}
5747

5848
url, err := url.Parse(fmt.Sprintf("https://www.googleapis.com/storage/%s/%s/%s",
5949
version, bucket, object))
@@ -63,92 +53,3 @@ func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {
6353

6454
return "gcs::" + url.String(), true, nil
6555
}
66-
67-
func isValidGCSVersion(version string) bool {
68-
versionPattern := `^v\d+$`
69-
if matched, _ := regexp.MatchString(versionPattern, version); !matched {
70-
return false
71-
}
72-
return true
73-
}
74-
75-
// Validate the bucket name using the following rules: https://cloud.google.com/storage/docs/naming-buckets
76-
func isValidGCSBucketName(bucket string) bool {
77-
// Rule 1: Must be between 3 and 63 characters (or up to 222 if it contains dots, each component up to 63 chars)
78-
if len(bucket) < 3 || len(bucket) > 63 {
79-
if len(bucket) > 63 && len(bucket) <= 222 {
80-
// If it contains dots, each segment between dots must be <= 63 chars
81-
components := strings.Split(bucket, ".")
82-
for _, component := range components {
83-
if len(component) > 63 {
84-
return false
85-
}
86-
}
87-
} else {
88-
return false
89-
}
90-
}
91-
92-
// Rule 2: Bucket name cannot start or end with a hyphen, dot, or underscore
93-
if bucket[0] == '-' || bucket[0] == '.' || bucket[len(bucket)-1] == '-' || bucket[len(bucket)-1] == '.' || bucket[len(bucket)-1] == '_' {
94-
return false
95-
}
96-
97-
// Rule 3: Bucket name cannot contain spaces
98-
if strings.Contains(bucket, " ") {
99-
return false
100-
}
101-
102-
// Rule 4: Bucket name cannot be an IP address (only digits and dots, e.g., 192.168.5.4)
103-
ipPattern := `^(\d{1,3}\.){3}\d{1,3}$`
104-
if matched, _ := regexp.MatchString(ipPattern, bucket); matched {
105-
return false
106-
}
107-
108-
// Rule 5: Bucket name cannot start with "goog"
109-
if strings.HasPrefix(bucket, "goog") {
110-
return false
111-
}
112-
113-
// Rule 6: Bucket name cannot contain "google" or common misspellings like "g00gle"
114-
googlePattern := `google|g00gle`
115-
if matched, _ := regexp.MatchString(googlePattern, bucket); matched {
116-
return false
117-
}
118-
119-
// Rule 7: Bucket name can only contain lowercase letters, digits, dashes, underscores, and dots
120-
bucketPattern := `^[a-z0-9\-_\.]+$`
121-
if matched, _ := regexp.MatchString(bucketPattern, bucket); !matched {
122-
return false
123-
}
124-
125-
return true
126-
}
127-
128-
// Validate the object name using the following rules: https://cloud.google.com/storage/docs/naming-objects
129-
func isValidGCSObjectName(object string) bool {
130-
// Rule 1: Object names cannot contain Carriage Return (\r) or Line Feed (\n) characters
131-
if strings.Contains(object, "\r") || strings.Contains(object, "\n") {
132-
return false
133-
}
134-
135-
// Rule 2: Object names cannot start with '.well-known/acme-challenge/'
136-
if strings.HasPrefix(object, ".well-known/acme-challenge/") {
137-
return false
138-
}
139-
140-
// Rule 3: Object names cannot be exactly '.' or '..'
141-
if object == "." || object == ".." {
142-
return false
143-
}
144-
145-
// Rule 4: Ensure that the object name contains only valid Unicode characters
146-
// (for simplicity, let's ensure it's not empty and does not contain any forbidden control characters)
147-
for _, r := range object {
148-
if !unicode.IsPrint(r) && !unicode.IsSpace(r) && r != '.' && r != '-' && r != '/' {
149-
return false
150-
}
151-
}
152-
153-
return true
154-
}

detect_gcs_test.go

Lines changed: 3 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -73,31 +73,8 @@ func TestGCSDetector_MalformedDetectHTTP(t *testing.T) {
7373
"",
7474
},
7575
{
76-
"not valid version",
77-
"www.googleapis.com/storage/invalid-version/my-bucket/foo",
78-
"GCS URL version is not valid",
79-
"",
80-
},
81-
{
82-
"not valid bucket",
83-
"www.googleapis.com/storage/v1/127.0.0.1/foo",
84-
"GCS URL bucket name is not valid",
85-
"",
86-
},
87-
{
88-
"not valid object",
89-
"www.googleapis.com/storage/v1/my-bucket/.well-known/acme-challenge/foo",
90-
"GCS URL object name is not valid",
91-
"",
92-
},
93-
{
94-
"path traversal on bucket",
95-
"www.googleapis.com/storage/v1/../foo/bar",
96-
"URL is not a valid GCS URL",
97-
"",
98-
}, {
99-
"path traversal on object",
100-
"www.googleapis.com/storage/v1/my-bucket/../../../foo/bar",
76+
"not valid url length",
77+
"www.invalid.com/storage/v1",
10178
"URL is not a valid GCS URL",
10279
"",
10380
},
@@ -114,85 +91,7 @@ func TestGCSDetector_MalformedDetectHTTP(t *testing.T) {
11491
}
11592

11693
if output != tc.Output {
117-
t.Fatalf("expected %s, got %s", tc.Output, output)
118-
}
119-
}
120-
}
121-
122-
func TestIsValidGCSVersion(t *testing.T) {
123-
cases := []struct {
124-
Name string
125-
Input string
126-
Expected bool
127-
}{
128-
{
129-
"valid version",
130-
"v1",
131-
true,
132-
},
133-
{
134-
"invalid version",
135-
"invalid1",
136-
false,
137-
},
138-
}
139-
140-
for _, tc := range cases {
141-
output := isValidGCSVersion(tc.Input)
142-
if output != tc.Expected {
143-
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
144-
}
145-
}
146-
}
147-
148-
func TestIsValidGCSBucketName(t *testing.T) {
149-
cases := []struct {
150-
Name string
151-
Input string
152-
Expected bool
153-
}{
154-
{
155-
"valid bucket name",
156-
"my-bucket",
157-
true,
158-
},
159-
{
160-
"invalid bucket name",
161-
"..",
162-
false,
163-
},
164-
}
165-
166-
for _, tc := range cases {
167-
output := isValidGCSBucketName(tc.Input)
168-
if output != tc.Expected {
169-
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
170-
}
171-
}
172-
}
173-
174-
func TestIsValidGCSObjectName(t *testing.T) {
175-
cases := []struct {
176-
Name string
177-
Input string
178-
Expected bool
179-
}{
180-
{
181-
"valid object name",
182-
"my-object",
183-
true,
184-
},
185-
{
186-
"invalid object name",
187-
"..",
188-
false,
189-
},
190-
}
191-
192-
for _, tc := range cases {
193-
output := isValidGCSObjectName(tc.Input)
194-
if output != tc.Expected {
195-
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
94+
t.Fatalf("expected %s, got %s for %s", tc.Output, output, tc.Name)
19695
}
19796
}
19897
}

detect_s3.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,17 @@ func (d *S3Detector) Detect(src, _ string) (string, bool, error) {
1818
return "", false, nil
1919
}
2020

21-
if strings.Contains(src, ".amazonaws.com/") {
22-
return d.detectHTTP(src)
21+
if !strings.HasPrefix(src, "http://") && !strings.HasPrefix(src, "https://") {
22+
src = "https://" + src
23+
}
24+
25+
parsedURL, err := url.Parse(src)
26+
if err != nil {
27+
return "", false, fmt.Errorf("error parsing S3 URL")
28+
}
29+
30+
if strings.HasSuffix(parsedURL.Host, ".amazonaws.com") {
31+
return d.detectHTTP(strings.ReplaceAll(src, "https://", ""))
2332
}
2433

2534
return "", false, nil

get_gcs_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,30 @@ func TestGCSGetter_GetFile_OAuthAccessToken(t *testing.T) {
234234
assertContents(t, dst, "# Main\n")
235235
}
236236

237+
func Test_GCSGetter_ParseUrl(t *testing.T) {
238+
tests := []struct {
239+
name string
240+
url string
241+
}{
242+
{
243+
name: "valid host",
244+
url: "https://www.googleapis.com/storage/v1/hc-go-getter-test/go-getter/foobar",
245+
},
246+
}
247+
for _, tt := range tests {
248+
t.Run(tt.name, func(t *testing.T) {
249+
g := new(GCSGetter)
250+
u, err := url.Parse(tt.url)
251+
if err != nil {
252+
t.Fatalf("unexpected error: %s", err)
253+
}
254+
_, _, _, err = g.parseURL(u)
255+
if err != nil {
256+
t.Fatalf("wasn't expecting error, got %s", err)
257+
}
258+
})
259+
}
260+
}
237261
func Test_GCSGetter_ParseUrl_Malformed(t *testing.T) {
238262
tests := []struct {
239263
name string

0 commit comments

Comments
 (0)