@@ -6,10 +6,7 @@ package getter
66import (
77 "fmt"
88 "net/url"
9- "path"
10- "regexp"
119 "strings"
12- "unicode"
1310)
1411
1512// GCSDetector implements Detector to detect GCS URLs and turn
@@ -21,39 +18,32 @@ func (d *GCSDetector) Detect(src, _ string) (string, bool, error) {
2118 return "" , false , nil
2219 }
2320
24- if strings .Contains (src , ".googleapis.com/" ) {
25- return d .detectHTTP (src )
21+ if ! strings .HasPrefix (src , "http://" ) && ! strings .HasPrefix (src , "https://" ) {
22+ src = "https://" + src
23+ }
24+
25+ parsedURL , err := url .Parse (src )
26+ if err != nil {
27+ return "" , false , fmt .Errorf ("error parsing GCS URL" )
28+ }
29+
30+ if strings .HasSuffix (parsedURL .Host , ".googleapis.com" ) {
31+ return d .detectHTTP (strings .ReplaceAll (src , "https://" , "" ))
2632 }
2733
2834 return "" , false , nil
2935}
3036
3137func (d * GCSDetector ) detectHTTP (src string ) (string , bool , error ) {
32- src = path .Clean (src )
3338
3439 parts := strings .Split (src , "/" )
3540 if len (parts ) < 5 {
3641 return "" , false , fmt .Errorf (
3742 "URL is not a valid GCS URL" )
3843 }
39-
4044 version := parts [2 ]
41- if ! isValidGCSVersion (version ) {
42- return "" , false , fmt .Errorf (
43- "GCS URL version is not valid" )
44- }
45-
4645 bucket := parts [3 ]
47- if ! isValidGCSBucketName (bucket ) {
48- return "" , false , fmt .Errorf (
49- "GCS URL bucket name is not valid" )
50- }
51-
5246 object := strings .Join (parts [4 :], "/" )
53- if ! isValidGCSObjectName (object ) {
54- return "" , false , fmt .Errorf (
55- "GCS URL object name is not valid" )
56- }
5747
5848 url , err := url .Parse (fmt .Sprintf ("https://www.googleapis.com/storage/%s/%s/%s" ,
5949 version , bucket , object ))
@@ -63,92 +53,3 @@ func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {
6353
6454 return "gcs::" + url .String (), true , nil
6555}
66-
67- func isValidGCSVersion (version string ) bool {
68- versionPattern := `^v\d+$`
69- if matched , _ := regexp .MatchString (versionPattern , version ); ! matched {
70- return false
71- }
72- return true
73- }
74-
75- // Validate the bucket name using the following rules: https://cloud.google.com/storage/docs/naming-buckets
76- func isValidGCSBucketName (bucket string ) bool {
77- // Rule 1: Must be between 3 and 63 characters (or up to 222 if it contains dots, each component up to 63 chars)
78- if len (bucket ) < 3 || len (bucket ) > 63 {
79- if len (bucket ) > 63 && len (bucket ) <= 222 {
80- // If it contains dots, each segment between dots must be <= 63 chars
81- components := strings .Split (bucket , "." )
82- for _ , component := range components {
83- if len (component ) > 63 {
84- return false
85- }
86- }
87- } else {
88- return false
89- }
90- }
91-
92- // Rule 2: Bucket name cannot start or end with a hyphen, dot, or underscore
93- if bucket [0 ] == '-' || bucket [0 ] == '.' || bucket [len (bucket )- 1 ] == '-' || bucket [len (bucket )- 1 ] == '.' || bucket [len (bucket )- 1 ] == '_' {
94- return false
95- }
96-
97- // Rule 3: Bucket name cannot contain spaces
98- if strings .Contains (bucket , " " ) {
99- return false
100- }
101-
102- // Rule 4: Bucket name cannot be an IP address (only digits and dots, e.g., 192.168.5.4)
103- ipPattern := `^(\d{1,3}\.){3}\d{1,3}$`
104- if matched , _ := regexp .MatchString (ipPattern , bucket ); matched {
105- return false
106- }
107-
108- // Rule 5: Bucket name cannot start with "goog"
109- if strings .HasPrefix (bucket , "goog" ) {
110- return false
111- }
112-
113- // Rule 6: Bucket name cannot contain "google" or common misspellings like "g00gle"
114- googlePattern := `google|g00gle`
115- if matched , _ := regexp .MatchString (googlePattern , bucket ); matched {
116- return false
117- }
118-
119- // Rule 7: Bucket name can only contain lowercase letters, digits, dashes, underscores, and dots
120- bucketPattern := `^[a-z0-9\-_\.]+$`
121- if matched , _ := regexp .MatchString (bucketPattern , bucket ); ! matched {
122- return false
123- }
124-
125- return true
126- }
127-
128- // Validate the object name using the following rules: https://cloud.google.com/storage/docs/naming-objects
129- func isValidGCSObjectName (object string ) bool {
130- // Rule 1: Object names cannot contain Carriage Return (\r) or Line Feed (\n) characters
131- if strings .Contains (object , "\r " ) || strings .Contains (object , "\n " ) {
132- return false
133- }
134-
135- // Rule 2: Object names cannot start with '.well-known/acme-challenge/'
136- if strings .HasPrefix (object , ".well-known/acme-challenge/" ) {
137- return false
138- }
139-
140- // Rule 3: Object names cannot be exactly '.' or '..'
141- if object == "." || object == ".." {
142- return false
143- }
144-
145- // Rule 4: Ensure that the object name contains only valid Unicode characters
146- // (for simplicity, let's ensure it's not empty and does not contain any forbidden control characters)
147- for _ , r := range object {
148- if ! unicode .IsPrint (r ) && ! unicode .IsSpace (r ) && r != '.' && r != '-' && r != '/' {
149- return false
150- }
151- }
152-
153- return true
154- }
0 commit comments