2222
2323from pandas ._libs import lib
2424from pandas ._typing import (
25- Axis ,
25+ ArrayLike ,
2626 DtypeObj ,
2727 Manager ,
28- Scalar ,
2928)
3029
3130from pandas .core .dtypes .cast import (
@@ -339,7 +338,7 @@ def nested_data_to_arrays(
339338 # By the time we get here we have already checked treat_as_nested(data)
340339
341340 if is_named_tuple (data [0 ]) and columns is None :
342- columns = data [0 ]._fields
341+ columns = ensure_index ( data [0 ]._fields )
343342
344343 arrays , columns = to_arrays (data , columns , dtype = dtype )
345344 columns = ensure_index (columns )
@@ -577,7 +576,7 @@ def dataclasses_to_dicts(data):
577576# Conversion of Inputs to Arrays
578577
579578
580- def to_arrays (data , columns , dtype : Optional [DtypeObj ] = None ):
579+ def to_arrays (data , columns : Optional [ Index ] , dtype : Optional [DtypeObj ] = None ):
581580 """
582581 Return list of arrays, columns.
583582 """
@@ -608,48 +607,48 @@ def to_arrays(data, columns, dtype: Optional[DtypeObj] = None):
608607
609608 elif isinstance (data , np .ndarray ) and data .dtype .names is not None :
610609 # e.g. recarray
611- columns = list (data .dtype .names )
610+ columns = Index ( list (data .dtype .names ) )
612611 arrays = [data [k ] for k in columns ]
613612 return arrays , columns
614613
615614 if isinstance (data [0 ], (list , tuple )):
616- content , columns = _list_to_arrays (data , columns )
615+ content = _list_to_arrays (data )
617616 elif isinstance (data [0 ], abc .Mapping ):
618617 content , columns = _list_of_dict_to_arrays (data , columns )
619618 elif isinstance (data [0 ], ABCSeries ):
620619 content , columns = _list_of_series_to_arrays (data , columns )
621620 else :
622621 # last ditch effort
623622 data = [tuple (x ) for x in data ]
624- content , columns = _list_to_arrays (data , columns )
623+ content = _list_to_arrays (data )
625624
626625 content , columns = _finalize_columns_and_data (content , columns , dtype )
627626 return content , columns
628627
629628
630- def _list_to_arrays (
631- data : List [Scalar ],
632- columns : Union [Index , List ],
633- ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
629+ def _list_to_arrays (data : List [Union [Tuple , List ]]) -> np .ndarray :
630+ # Returned np.ndarray has ndim = 2
634631 # Note: we already check len(data) > 0 before getting hre
635632 if isinstance (data [0 ], tuple ):
636633 content = lib .to_object_array_tuples (data )
637634 else :
638635 # list of lists
639636 content = lib .to_object_array (data )
640- return content , columns
637+ return content
641638
642639
643640def _list_of_series_to_arrays (
644641 data : List ,
645- columns : Union [Index , List ],
646- ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
642+ columns : Optional [Index ],
643+ ) -> Tuple [np .ndarray , Index ]:
644+ # returned np.ndarray has ndim == 2
645+
647646 if columns is None :
648647 # We know pass_data is non-empty because data[0] is a Series
649648 pass_data = [x for x in data if isinstance (x , (ABCSeries , ABCDataFrame ))]
650649 columns = get_objs_combined_axis (pass_data , sort = False )
651650
652- indexer_cache : Dict [int , Scalar ] = {}
651+ indexer_cache : Dict [int , np . ndarray ] = {}
653652
654653 aligned_values = []
655654 for s in data :
@@ -672,8 +671,8 @@ def _list_of_series_to_arrays(
672671
673672def _list_of_dict_to_arrays (
674673 data : List [Dict ],
675- columns : Union [Index , List ],
676- ) -> Tuple [List [ Scalar ], Union [ Index , List [ Axis ]] ]:
674+ columns : Optional [Index ],
675+ ) -> Tuple [np . ndarray , Index ]:
677676 """
678677 Convert list of dicts to numpy arrays
679678
@@ -690,13 +689,14 @@ def _list_of_dict_to_arrays(
690689
691690 Returns
692691 -------
693- tuple
694- arrays, columns
692+ content : np.ndarray[object, ndim=2]
693+ columns : Index
695694 """
696695 if columns is None :
697696 gen = (list (x .keys ()) for x in data )
698697 sort = not any (isinstance (d , dict ) for d in data )
699698 columns = lib .fast_unique_multiple_list_gen (gen , sort = sort )
699+ columns = ensure_index (columns )
700700
701701 # assure that they are of the base dict class and not of derived
702702 # classes
@@ -707,10 +707,10 @@ def _list_of_dict_to_arrays(
707707
708708
709709def _finalize_columns_and_data (
710- content : np .ndarray ,
711- columns : Optional [Union [ Index , List ] ],
710+ content : np .ndarray , # ndim == 2
711+ columns : Optional [Index ],
712712 dtype : Optional [DtypeObj ],
713- ) -> Tuple [List [np .ndarray ], Union [ Index , List [ Axis ]] ]:
713+ ) -> Tuple [List [np .ndarray ], Index ]:
714714 """
715715 Ensure we have valid columns, cast object dtypes if possible.
716716 """
@@ -728,21 +728,21 @@ def _finalize_columns_and_data(
728728
729729
730730def _validate_or_indexify_columns (
731- content : List , columns : Optional [Union [ Index , List ] ]
732- ) -> Union [ Index , List [ Axis ]] :
731+ content : List [ np . ndarray ] , columns : Optional [Index ]
732+ ) -> Index :
733733 """
734734 If columns is None, make numbers as column names; Otherwise, validate that
735735 columns have valid length.
736736
737737 Parameters
738738 ----------
739- content: list of data
740- columns: Iterable or None
739+ content : list of np.ndarrays
740+ columns : Index or None
741741
742742 Returns
743743 -------
744- columns: If columns is Iterable, return as is; If columns is None, assign
745- positional column index value as columns.
744+ Index
745+ If columns is None, assign positional column index value as columns.
746746
747747 Raises
748748 ------
@@ -786,19 +786,19 @@ def _validate_or_indexify_columns(
786786
787787
788788def _convert_object_array (
789- content : List [Scalar ], dtype : Optional [DtypeObj ] = None
790- ) -> List [Scalar ]:
789+ content : List [np . ndarray ], dtype : Optional [DtypeObj ]
790+ ) -> List [ArrayLike ]:
791791 """
792792 Internal function to convert object array.
793793
794794 Parameters
795795 ----------
796- content: list of processed data records
797- dtype: np.dtype, default is None
796+ content: List[np.ndarray]
797+ dtype: np.dtype or ExtensionDtype
798798
799799 Returns
800800 -------
801- arrays: casted content if not object dtype, otherwise return as is in list.
801+ List[ArrayLike]
802802 """
803803 # provide soft conversion of object dtypes
804804 def convert (arr ):
0 commit comments