1818from typing import (
1919 Any ,
2020 Callable ,
21+ Iterator ,
2122 List ,
2223 Optional ,
2324 Union ,
@@ -208,18 +209,20 @@ def toPandas(self) -> "PandasDataFrameLike":
208209
209210 # Below is toPandas without Arrow optimization.
210211 rows = self .collect ()
211- if len (rows ) > 0 :
212- pdf = pd .DataFrame .from_records (
213- rows , index = range (len (rows )), columns = self .columns # type: ignore[arg-type]
214- )
215- else :
216- pdf = pd .DataFrame (columns = self .columns )
217212
218- if len (pdf .columns ) > 0 :
213+ if len (self .columns ) > 0 :
219214 timezone = sessionLocalTimeZone
220215 struct_in_pandas = pandasStructHandlingMode
221216
222- return pd .concat (
217+ # Extract columns from rows and apply converters
218+ if len (rows ) > 0 :
219+ # Use iterator to avoid materializing intermediate data structure
220+ columns_data : Iterator [Any ] = iter (zip (* rows ))
221+ else :
222+ columns_data = iter ([] for _ in self .schema .fields )
223+
224+ # Build DataFrame from columns
225+ pdf = pd .concat (
223226 [
224227 _create_converter_to_pandas (
225228 field .dataType ,
@@ -230,13 +233,15 @@ def toPandas(self) -> "PandasDataFrameLike":
230233 ),
231234 error_on_duplicated_field_names = False ,
232235 timestamp_utc_localized = False ,
233- )(pser )
234- for ( _ , pser ), field in zip (pdf . items () , self .schema .fields )
236+ )(pd . Series ( col_data , dtype = object ) )
237+ for col_data , field in zip (columns_data , self .schema .fields )
235238 ],
236- axis = "columns" ,
239+ axis = 1 ,
240+ keys = self .columns ,
237241 )
238- else :
239242 return pdf
243+ else :
244+ return pd .DataFrame (columns = [], index = range (len (rows )))
240245
241246 def toArrow (self ) -> "pa.Table" :
242247 from pyspark .sql .dataframe import DataFrame
0 commit comments