1111from .pandas_vb_common import tm
1212
1313
14+ class Dtypes :
15+ params = ["str" , "string" , "arrow_string" ]
16+ param_names = ["dtype" ]
17+
18+ def setup (self , dtype ):
19+ from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
20+
21+ try :
22+ self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype )
23+ except ImportError :
24+ raise NotImplementedError
25+
26+
1427class Construction :
1528
1629 params = ["str" , "string" ]
@@ -49,18 +62,7 @@ def peakmem_cat_frame_construction(self, dtype):
4962 DataFrame (self .frame_cat_arr , dtype = dtype )
5063
5164
52- class Methods :
53- params = ["str" , "string" , "arrow_string" ]
54- param_names = ["dtype" ]
55-
56- def setup (self , dtype ):
57- from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
58-
59- try :
60- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype )
61- except ImportError :
62- raise NotImplementedError
63-
65+ class Methods (Dtypes ):
6466 def time_center (self , dtype ):
6567 self .s .str .center (100 )
6668
@@ -83,6 +85,9 @@ def time_find(self, dtype):
8385 def time_rfind (self , dtype ):
8486 self .s .str .rfind ("[A-Z]+" )
8587
88+ def time_fullmatch (self , dtype ):
89+ self .s .str .fullmatch ("A" )
90+
8691 def time_get (self , dtype ):
8792 self .s .str .get (0 )
8893
@@ -211,35 +216,26 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
211216 self .s .str .cat (others = self .others , sep = sep , na_rep = na_rep )
212217
213218
214- class Contains :
219+ class Contains ( Dtypes ) :
215220
216- params = ([ "str" , "string" , "arrow_string" ] , [True , False ])
221+ params = (Dtypes . params , [True , False ])
217222 param_names = ["dtype" , "regex" ]
218223
219224 def setup (self , dtype , regex ):
220- from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
221-
222- try :
223- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype )
224- except ImportError :
225- raise NotImplementedError
225+ super ().setup (dtype )
226226
227227 def time_contains (self , dtype , regex ):
228228 self .s .str .contains ("A" , regex = regex )
229229
230230
231- class Split :
231+ class Split ( Dtypes ) :
232232
233- params = ([ "str" , "string" , "arrow_string" ] , [True , False ])
233+ params = (Dtypes . params , [True , False ])
234234 param_names = ["dtype" , "expand" ]
235235
236236 def setup (self , dtype , expand ):
237- from pandas .core .arrays .string_arrow import ArrowStringDtype # noqa: F401
238-
239- try :
240- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype ).str .join ("--" )
241- except ImportError :
242- raise NotImplementedError
237+ super ().setup (dtype )
238+ self .s = self .s .str .join ("--" )
243239
244240 def time_split (self , dtype , expand ):
245241 self .s .str .split ("--" , expand = expand )
@@ -248,17 +244,23 @@ def time_rsplit(self, dtype, expand):
248244 self .s .str .rsplit ("--" , expand = expand )
249245
250246
251- class Dummies :
252- params = ["str" , "string" , "arrow_string" ]
253- param_names = ["dtype" ]
247+ class Extract (Dtypes ):
254248
255- def setup ( self , dtype ):
256- from pandas . core . arrays . string_arrow import ArrowStringDtype # noqa: F401
249+ params = ( Dtypes . params , [ True , False ])
250+ param_names = [ "dtype" , "expand" ]
257251
258- try :
259- self .s = Series (tm .makeStringIndex (10 ** 5 ), dtype = dtype ).str .join ("|" )
260- except ImportError :
261- raise NotImplementedError
252+ def setup (self , dtype , expand ):
253+ super ().setup (dtype )
254+
255+ def time_extract_single_group (self , dtype , expand ):
256+ with warnings .catch_warnings (record = True ):
257+ self .s .str .extract ("(\\ w*)A" , expand = expand )
258+
259+
260+ class Dummies (Dtypes ):
261+ def setup (self , dtype ):
262+ super ().setup (dtype )
263+ self .s = self .s .str .join ("|" )
262264
263265 def time_get_dummies (self , dtype ):
264266 self .s .str .get_dummies ("|" )
@@ -279,3 +281,9 @@ def setup(self):
279281 def time_vector_slice (self ):
280282 # GH 2602
281283 self .s .str [:5 ]
284+
285+
286+ class Iter (Dtypes ):
287+ def time_iter (self , dtype ):
288+ for i in self .s :
289+ pass
0 commit comments