@@ -76,11 +76,12 @@ get_binascii_state(PyObject *module)
7676}
7777
7878
79- static const unsigned char table_a2b_base64 [] = {
79+ /* Align to 64 bytes for L1 cache line friendliness */
80+ static const unsigned char table_a2b_base64 [] Py_ALIGNED (64 ) = {
8081 -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
8182 -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
8283 -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,62 , -1 ,-1 ,-1 ,63 ,
83- 52 ,53 ,54 ,55 , 56 ,57 ,58 ,59 , 60 ,61 ,-1 ,-1 , -1 , 0 ,-1 ,-1 , /* Note PAD->0 */
84+ 52 ,53 ,54 ,55 , 56 ,57 ,58 ,59 , 60 ,61 ,-1 ,-1 , -1 ,64 ,-1 ,-1 , /* PAD->64 detected by fast path */
8485 -1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ,10 , 11 ,12 ,13 ,14 ,
8586 15 ,16 ,17 ,18 , 19 ,20 ,21 ,22 , 23 ,24 ,25 ,-1 , -1 ,-1 ,-1 ,-1 ,
8687 -1 ,26 ,27 ,28 , 29 ,30 ,31 ,32 , 33 ,34 ,35 ,36 , 37 ,38 ,39 ,40 ,
@@ -101,9 +102,91 @@ static const unsigned char table_a2b_base64[] = {
101102/* Max binary chunk size; limited only by available memory */
102103#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
103104
104- static const unsigned char table_b2a_base64 [] =
105+ /*
106+ * Fast base64 encoding/decoding helpers.
107+ *
108+ * Process complete groups without loop-carried dependencies.
109+ */
110+
111+ /* Align to 64 bytes for L1 cache line friendliness */
112+ static const unsigned char table_b2a_base64 [] Py_ALIGNED (64 ) =
105113"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ;
106114
115+ /* Encode 3 bytes into 4 base64 characters. */
116+ static inline void
117+ base64_encode_trio (const unsigned char * in , unsigned char * out ,
118+ const unsigned char * table )
119+ {
120+ unsigned int combined = ((unsigned int )in [0 ] << 16 ) |
121+ ((unsigned int )in [1 ] << 8 ) |
122+ (unsigned int )in [2 ];
123+ out [0 ] = table [(combined >> 18 ) & 0x3f ];
124+ out [1 ] = table [(combined >> 12 ) & 0x3f ];
125+ out [2 ] = table [(combined >> 6 ) & 0x3f ];
126+ out [3 ] = table [combined & 0x3f ];
127+ }
128+
129+ /* Encode multiple complete 3-byte groups.
130+ * Returns the number of input bytes processed (always a multiple of 3).
131+ */
132+ static inline Py_ssize_t
133+ base64_encode_fast (const unsigned char * in , Py_ssize_t in_len ,
134+ unsigned char * out , const unsigned char * table )
135+ {
136+ Py_ssize_t n_trios = in_len / 3 ;
137+ const unsigned char * in_end = in + n_trios * 3 ;
138+
139+ while (in < in_end ) {
140+ base64_encode_trio (in , out , table );
141+ in += 3 ;
142+ out += 4 ;
143+ }
144+
145+ return n_trios * 3 ;
146+ }
147+
148+ /* Decode 4 base64 characters into 3 bytes.
149+ * Returns 1 on success, 0 if any character is invalid.
150+ */
151+ static inline int
152+ base64_decode_quad (const unsigned char * in , unsigned char * out ,
153+ const unsigned char * table )
154+ {
155+ unsigned char v0 = table [in [0 ]];
156+ unsigned char v1 = table [in [1 ]];
157+ unsigned char v2 = table [in [2 ]];
158+ unsigned char v3 = table [in [3 ]];
159+
160+ if ((v0 | v1 | v2 | v3 ) & 0xc0 ) {
161+ return 0 ;
162+ }
163+
164+ out [0 ] = (v0 << 2 ) | (v1 >> 4 );
165+ out [1 ] = (v1 << 4 ) | (v2 >> 2 );
166+ out [2 ] = (v2 << 6 ) | v3 ;
167+ return 1 ;
168+ }
169+
170+ /* Decode multiple complete 4-character groups (no padding allowed).
171+ * Returns the number of input characters processed.
172+ * Stops at the first invalid character, padding, or incomplete group.
173+ */
174+ static inline Py_ssize_t
175+ base64_decode_fast (const unsigned char * in , Py_ssize_t in_len ,
176+ unsigned char * out , const unsigned char * table )
177+ {
178+ Py_ssize_t n_quads = in_len / 4 ;
179+ Py_ssize_t i ;
180+
181+ for (i = 0 ; i < n_quads ; i ++ ) {
182+ if (!base64_decode_quad (in + i * 4 , out + i * 3 , table )) {
183+ break ;
184+ }
185+ }
186+
187+ return i * 4 ;
188+ }
189+
107190
108191static const unsigned short crctab_hqx [256 ] = {
109192 0x0000 , 0x1021 , 0x2042 , 0x3063 , 0x4084 , 0x50a5 , 0x60c6 , 0x70e7 ,
@@ -403,10 +486,26 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
403486 goto error_end ;
404487 }
405488
489+ size_t i = 0 ; /* Current position in input */
490+
491+ /* Fast path: use optimized decoder for complete quads.
492+ * This works for both strict and non-strict mode for valid input.
493+ * The fast path stops at padding, invalid chars, or incomplete groups.
494+ */
495+ if (ascii_len >= 4 ) {
496+ Py_ssize_t fast_chars = base64_decode_fast (ascii_data , (Py_ssize_t )ascii_len ,
497+ bin_data , table_a2b_base64 );
498+ if (fast_chars > 0 ) {
499+ i = (size_t )fast_chars ;
500+ bin_data += (fast_chars / 4 ) * 3 ;
501+ }
502+ }
503+
504+ /* Slow path: handle remaining input (padding, invalid chars, partial groups) */
406505 int quad_pos = 0 ;
407506 unsigned char leftchar = 0 ;
408507 int pads = 0 ;
409- for (size_t i = 0 ; i < ascii_len ; i ++ ) {
508+ for (; i < ascii_len ; i ++ ) {
410509 unsigned char this_ch = ascii_data [i ];
411510
412511 /* Check for pad sequences and ignore
@@ -533,9 +632,6 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
533632/*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/
534633{
535634 const unsigned char * bin_data ;
536- int leftbits = 0 ;
537- unsigned char this_ch ;
538- unsigned int leftchar = 0 ;
539635 Py_ssize_t bin_len ;
540636 binascii_state * state ;
541637
@@ -566,26 +662,31 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
566662 }
567663 unsigned char * ascii_data = PyBytesWriter_GetData (writer );
568664
569- for ( ; bin_len > 0 ; bin_len -- , bin_data ++ ) {
570- /* Shift the data into our buffer */
571- leftchar = (leftchar << 8 ) | * bin_data ;
572- leftbits += 8 ;
573-
574- /* See if there are 6-bit groups ready */
575- while ( leftbits >= 6 ) {
576- this_ch = (leftchar >> (leftbits - 6 )) & 0x3f ;
577- leftbits -= 6 ;
578- * ascii_data ++ = table_b2a_base64 [this_ch ];
579- }
580- }
581- if ( leftbits == 2 ) {
582- * ascii_data ++ = table_b2a_base64 [(leftchar & 3 ) << 4 ];
665+ /* Use the optimized fast path for complete 3-byte groups */
666+ Py_ssize_t fast_bytes = base64_encode_fast (bin_data , bin_len , ascii_data ,
667+ table_b2a_base64 );
668+ bin_data += fast_bytes ;
669+ ascii_data += (fast_bytes / 3 ) * 4 ;
670+ bin_len -= fast_bytes ;
671+
672+ /* Handle remaining 0-2 bytes */
673+ if (bin_len == 1 ) {
674+ /* 1 byte remaining: produces 2 base64 chars + 2 padding */
675+ unsigned int val = bin_data [0 ];
676+ * ascii_data ++ = table_b2a_base64 [(val >> 2 ) & 0x3f ];
677+ * ascii_data ++ = table_b2a_base64 [(val << 4 ) & 0x3f ];
583678 * ascii_data ++ = BASE64_PAD ;
584679 * ascii_data ++ = BASE64_PAD ;
585- } else if ( leftbits == 4 ) {
586- * ascii_data ++ = table_b2a_base64 [(leftchar & 0xf ) << 2 ];
680+ }
681+ else if (bin_len == 2 ) {
682+ /* 2 bytes remaining: produces 3 base64 chars + 1 padding */
683+ unsigned int val = ((unsigned int )bin_data [0 ] << 8 ) | bin_data [1 ];
684+ * ascii_data ++ = table_b2a_base64 [(val >> 10 ) & 0x3f ];
685+ * ascii_data ++ = table_b2a_base64 [(val >> 4 ) & 0x3f ];
686+ * ascii_data ++ = table_b2a_base64 [(val << 2 ) & 0x3f ];
587687 * ascii_data ++ = BASE64_PAD ;
588688 }
689+
589690 if (newline )
590691 * ascii_data ++ = '\n' ; /* Append a courtesy newline */
591692
0 commit comments