@@ -163,7 +163,7 @@ pub fn expanded_from_single_directory(addrs: &[PlPath], expanded_addrs: &[PlPath
163163pub fn expand_paths (
164164 paths : & [ PlPath ] ,
165165 glob : bool ,
166- #[ allow( unused_variables) ] cloud_options : Option < & CloudOptions > ,
166+ #[ allow( unused_variables) ] cloud_options : & mut Option < CloudOptions > ,
167167) -> PolarsResult < Arc < [ PlPath ] > > {
168168 expand_paths_hive ( paths, glob, cloud_options, false ) . map ( |x| x. 0 )
169169}
@@ -204,7 +204,7 @@ impl HiveIdxTracker<'_> {
204204pub fn expand_paths_hive (
205205 paths : & [ PlPath ] ,
206206 glob : bool ,
207- #[ allow( unused_variables) ] cloud_options : Option < & CloudOptions > ,
207+ #[ allow( unused_variables) ] cloud_options : & mut Option < CloudOptions > ,
208208 check_directory_level : bool ,
209209) -> PolarsResult < ( Arc < [ PlPath ] > , usize ) > {
210210 let Some ( first_path) = paths. first ( ) else {
@@ -398,13 +398,65 @@ pub fn expand_paths_hive(
398398 } ;
399399
400400 for ( path_idx, path) in paths. iter ( ) . enumerate ( ) {
401+ use std:: borrow:: Cow ;
402+
403+ let mut path = Cow :: Borrowed ( path) ;
404+
401405 if matches ! (
402406 path. cloud_scheme( ) ,
403407 Some ( CloudScheme :: Http | CloudScheme :: Https )
404408 ) {
405- out_paths. push ( path. clone ( ) ) ;
406- hive_idx_tracker. update ( 0 , path_idx) ?;
407- continue ;
409+ let mut rewrite_aws = false ;
410+
411+ #[ cfg( feature = "aws" ) ]
412+ {
413+ // See https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access
414+ // Path format: https://bucket-name.s3.region-code.amazonaws.com/key-name
415+ let p = path. as_ref ( ) . as_ref ( ) ;
416+ let after_scheme = p. strip_scheme ( ) ;
417+ if let Some ( bucket_end) = after_scheme. find ( ".s3." ) {
418+ if let Some ( region_end) = after_scheme. find ( ".amazonaws.com/" ) {
419+ if bucket_end < region_end
420+ && region_end < after_scheme. find ( "/" ) . unwrap ( )
421+ {
422+ use crate :: cloud:: CloudConfig ;
423+
424+ rewrite_aws = true ;
425+
426+ let bucket = & after_scheme[ ..bucket_end] ;
427+ let region = & after_scheme[ bucket_end + 4 ..region_end] ;
428+ let key = & after_scheme[ region_end + 15 ..] ;
429+
430+ if let CloudConfig :: Aws ( configs) = cloud_options
431+ . get_or_insert_default ( )
432+ . config
433+ . get_or_insert_with ( || {
434+ CloudConfig :: Aws ( Vec :: with_capacity ( 1 ) )
435+ } )
436+ {
437+ use object_store:: aws:: AmazonS3ConfigKey ;
438+
439+ if !matches ! (
440+ configs. last( ) ,
441+ Some ( ( AmazonS3ConfigKey :: Region , _) )
442+ ) {
443+ configs. push ( ( AmazonS3ConfigKey :: Region , region. into ( ) ) )
444+ }
445+ }
446+
447+ path = Cow :: Owned ( PlPath :: from_string ( format ! (
448+ "s3://{bucket}/{key}"
449+ ) ) )
450+ }
451+ }
452+ }
453+ }
454+
455+ if !rewrite_aws {
456+ out_paths. push ( path. into_owned ( ) ) ;
457+ hive_idx_tracker. update ( 0 , path_idx) ?;
458+ continue ;
459+ }
408460 }
409461
410462 let glob_start_idx = get_glob_start_idx ( path. to_str ( ) . as_bytes ( ) ) ;
@@ -413,7 +465,7 @@ pub fn expand_paths_hive(
413465 path. clone ( )
414466 } else {
415467 let ( expand_start_idx, paths) =
416- expand_path_cloud ( path. to_str ( ) , cloud_options) ?;
468+ expand_path_cloud ( path. to_str ( ) , cloud_options. as_ref ( ) ) ?;
417469 out_paths. extend_from_slice ( & paths) ;
418470 hive_idx_tracker. update ( expand_start_idx, path_idx) ?;
419471 continue ;
@@ -422,7 +474,7 @@ pub fn expand_paths_hive(
422474 hive_idx_tracker. update ( 0 , path_idx) ?;
423475
424476 let iter = crate :: pl_async:: get_runtime ( )
425- . block_in_place_on ( crate :: async_glob ( path. to_str ( ) , cloud_options) ) ?;
477+ . block_in_place_on ( crate :: async_glob ( path. to_str ( ) , cloud_options. as_ref ( ) ) ) ?;
426478
427479 if is_cloud {
428480 out_paths. extend ( iter. into_iter ( ) . map ( PlPath :: from_string) ) ;
@@ -577,7 +629,7 @@ mod tests {
577629
578630 let path = "https://pola.rs/test.csv?token=bear" ;
579631 let paths = & [ PlPath :: new ( path) ] ;
580- let out = expand_paths ( paths, true , None ) . unwrap ( ) ;
632+ let out = expand_paths ( paths, true , & mut None ) . unwrap ( ) ;
581633 assert_eq ! ( out. as_ref( ) , paths) ;
582634 }
583635}
0 commit comments