44
55 The lower level xls/xlsm file format handler using xlrd
66
7- :copyright: (c) 2016-2017 by Onni Software Ltd
7+ :copyright: (c) 2016-2020 by Onni Software Ltd
88 :license: New BSD License
99"""
1010import datetime
1111
1212import xlrd
13-
14- from pyexcel_io .book import BookReader
15- from pyexcel_io .sheet import SheetReader
1613from pyexcel_io .service import has_no_digits_in_float
17- from pyexcel_io ._compact import OrderedDict , irange
14+ from pyexcel_io ._compact import irange
15+ from pyexcel_io .plugin_api .abstract_sheet import ISheet
16+ from pyexcel_io .plugin_api .abstract_reader import IReader
1817
1918XLS_KEYWORDS = [
2019 "filename" ,
@@ -45,20 +44,21 @@ def register_cells(self, registry):
4544 registry [key ] = self
4645
4746
48- class XLSheet (SheetReader ):
47+ class XLSheet (ISheet ):
4948 """
5049 xls, xlsx, xlsm sheet reader
5150
5251 Currently only support first sheet in the file
5352 """
5453
5554 def __init__ (self , sheet , auto_detect_int = True , date_mode = 0 , ** keywords ):
56- SheetReader .__init__ (self , sheet , ** keywords )
5755 self .__auto_detect_int = auto_detect_int
5856 self .__hidden_cols = []
5957 self .__hidden_rows = []
6058 self .__merged_cells = {}
6159 self ._book_date_mode = date_mode
60+ self ._native_sheet = sheet
61+ self ._keywords = keywords
6262 if keywords .get ("detect_merged_cells" ) is True :
6363 for merged_cell_ranges in sheet .merged_cells :
6464 merged_cells = MergedCell (* merged_cell_ranges )
@@ -75,17 +75,14 @@ def __init__(self, sheet, auto_detect_int=True, date_mode=0, **keywords):
7575 def name (self ):
7676 return self ._native_sheet .name
7777
78- def number_of_rows (self ):
79- """
80- Number of rows in the xls sheet
81- """
82- return self ._native_sheet .nrows - len (self .__hidden_rows )
78+ def row_iterator (self ):
79+ number_of_rows = self ._native_sheet .nrows - len (self .__hidden_rows )
80+ return range (number_of_rows )
8381
84- def number_of_columns (self ):
85- """
86- Number of columns in the xls sheet
87- """
88- return self ._native_sheet .ncols - len (self .__hidden_cols )
82+ def column_iterator (self , row ):
83+ number_of_columns = self ._native_sheet .ncols - len (self .__hidden_cols )
84+ for column in range (number_of_columns ):
85+ yield self .cell_value (row , column )
8986
9087 def cell_value (self , row , column ):
9188 """
@@ -127,93 +124,48 @@ def calculate_offsets(incoming_index, hidden_indices):
127124 return incoming_index + offset
128125
129126
130- class XLSBook ( BookReader ):
127+ class XLSReader ( IReader ):
131128 """
132129 XLSBook reader
133130
134131 It reads xls, xlsm, xlsx work book
135132 """
136133
137- def __init__ (self ):
138- BookReader .__init__ (self )
139- self ._file_content = None
140- self .__skip_hidden_sheets = True
141- self .__skip_hidden_row_column = True
142- self .__detect_merged_cells = False
143-
144- def open (self , file_name , ** keywords ):
145- self .__parse_keywords (** keywords )
146- BookReader .open (self , file_name , ** keywords )
147-
148- def open_stream (self , file_stream , ** keywords ):
149- self .__parse_keywords (** keywords )
150- BookReader .open_stream (self , file_stream , ** keywords )
151-
152- def open_content (self , file_content , ** keywords ):
153- self .__parse_keywords (** keywords )
154- self ._keywords = keywords
155- self ._file_content = file_content
156-
157- def __parse_keywords (self , ** keywords ):
134+ def __init__ (self , file_type , ** keywords ):
158135 self .__skip_hidden_sheets = keywords .get ("skip_hidden_sheets" , True )
159136 self .__skip_hidden_row_column = keywords .get (
160137 "skip_hidden_row_and_column" , True
161138 )
162139 self .__detect_merged_cells = keywords .get ("detect_merged_cells" , False )
140+ self ._keywords = keywords
141+ xlrd_params = self ._extract_xlrd_params ()
142+ if self .__skip_hidden_row_column and file_type == "xls" :
143+ xlrd_params ["formatting_info" ] = True
144+ if self .__detect_merged_cells :
145+ xlrd_params ["formatting_info" ] = True
163146
164- def close (self ):
165- if self ._native_book :
166- self ._native_book .release_resources ()
167- self ._native_book = None
168-
169- def read_sheet_by_index (self , sheet_index ):
170- self ._native_book = self ._get_book (on_demand = True )
171- sheet = self ._native_book .sheet_by_index (sheet_index )
172- return self .read_sheet (sheet )
173-
174- def read_sheet_by_name (self , sheet_name ):
175- self ._native_book = self ._get_book (on_demand = True )
176- try :
177- sheet = self ._native_book .sheet_by_name (sheet_name )
178- except xlrd .XLRDError :
179- raise ValueError ("%s cannot be found" % sheet_name )
180- return self .read_sheet (sheet )
181-
182- def read_all (self ):
183- result = OrderedDict ()
184- self ._native_book = self ._get_book ()
147+ self .content_array = []
148+ self ._native_book = self .get_xls_book (** xlrd_params )
185149 for sheet in self ._native_book .sheets ():
186150 if self .__skip_hidden_sheets and sheet .visibility != 0 :
187151 continue
188- data_dict = self .read_sheet (sheet )
189- result .update (data_dict )
190- return result
152+ self .content_array .append (sheet )
191153
192- def read_sheet (self , native_sheet ):
154+ def read_sheet (self , index ):
155+ native_sheet = self .content_array [index ]
193156 sheet = XLSheet (
194157 native_sheet ,
195158 date_mode = self ._native_book .datemode ,
196159 ** self ._keywords
197160 )
198- return { sheet . name : sheet . to_array ()}
161+ return sheet
199162
200- def _get_book (self , on_demand = False ):
201- xlrd_params = self ._extract_xlrd_params ()
202- xlrd_params ["on_demand" ] = on_demand
203-
204- if self ._file_name :
205- xlrd_params ["filename" ] = self ._file_name
206- elif self ._file_stream :
207- file_content = self ._file_stream .read ()
208- xlrd_params ["file_contents" ] = file_content
209- elif self ._file_content is not None :
210- xlrd_params ["file_contents" ] = self ._file_content
211- else :
212- raise IOError ("No valid file name or file content found." )
213- if self .__skip_hidden_row_column and self ._file_type == "xls" :
214- xlrd_params ["formatting_info" ] = True
215- if self .__detect_merged_cells :
216- xlrd_params ["formatting_info" ] = True
163+ def close (self ):
164+ if self ._native_book :
165+ self ._native_book .release_resources ()
166+ self ._native_book = None
167+
168+ def get_xls_book (self , ** xlrd_params ):
217169 xls_book = xlrd .open_workbook (** xlrd_params )
218170 return xls_book
219171
@@ -226,6 +178,24 @@ def _extract_xlrd_params(self):
226178 return params
227179
228180
181+ class XLSInFile (XLSReader ):
182+ def __init__ (self , file_name , file_type , ** keywords ):
183+ super ().__init__ (file_type , filename = file_name , ** keywords )
184+
185+
186+ class XLSInContent (XLSReader ):
187+ def __init__ (self , file_content , file_type , ** keywords ):
188+ super ().__init__ (file_type , file_contents = file_content , ** keywords )
189+
190+
191+ class XLSInMemory (XLSReader ):
192+ def __init__ (self , file_stream , file_type , ** keywords ):
193+ file_stream .seek (0 )
194+ super ().__init__ (
195+ file_type , file_contents = file_stream .read (), ** keywords
196+ )
197+
198+
229199def xldate_to_python_date (value , date_mode ):
230200 """
231201 convert xl date to python date
0 commit comments