@@ -133,7 +133,7 @@ namespace utf8
133133 /* *
134134 * @brief Check the validity of a given string in UTF8
135135 * @param str
136- * @return true if the given string is a valid UTF88 string
136+ * @return true if the given string is a valid UTF8 string
137137 */
138138 inline bool isValid (const char * str)
139139 {
@@ -183,46 +183,69 @@ namespace utf8
183183 return true ;
184184 }
185185
186+ inline std::size_t length (const char * str)
187+ {
188+ std::size_t count = 0 ;
189+ const char * s = str;
190+
191+ if (str == nullptr )
192+ return 0 ;
193+
194+ while (*s != 0 )
195+ {
196+ if (0xf0 == (0xf8 & *s))
197+ {
198+ ++count;
199+ s += 4 ;
200+ }
201+ else if (0xe0 == (0xf0 & *s))
202+ {
203+ ++count;
204+ s += 3 ;
205+ }
206+ else if (0xc0 == (0xe0 & *s))
207+ {
208+ ++count;
209+ s += 2 ;
210+ }
211+ else if (0x00 == (0x80 & *s))
212+ {
213+ ++count;
214+ s += 1 ;
215+ }
216+ else
217+ break ;
218+ }
219+
220+ return count;
221+ }
222+
186223 /* *
187224 * @brief Compute the UTF8 codepoint for a given UTF8 char
188225 * @param str
189226 * @return UTF8 codepoint if valid, -1 otherwise
190227 */
191228 inline int32_t codepoint (const char * str)
192229 {
193- int32_t codepoint = 0 ;
194230 const char * s = str;
195231
196232 if (isValid (str))
197233 {
198- while (*s != 0 )
199- {
200- if (0xf0 == (0xf8 & *s))
201- {
202- codepoint = ((0x07 & s[0 ]) << 18 ) | ((0x3f & s[1 ]) << 12 ) | ((0x3f & s[2 ]) << 6 ) | (0x3f & s[3 ]);
203- s += 4 ;
204- }
205- else if (0xe0 == (0xf0 & *s))
206- {
207- codepoint = ((0x0f & s[0 ]) << 12 ) | ((0x3f & s[1 ]) << 6 ) | (0x3f & s[2 ]);
208- s += 3 ;
209- }
210- else if (0xc0 == (0xe0 & *s))
211- {
212- codepoint = ((0x1f & s[0 ]) << 6 ) | (0x3f & s[1 ]);
213- s += 2 ;
214- }
215- else if (0x00 == (0x80 & *s))
216- {
217- codepoint = s[0 ];
218- ++s;
219- }
220- else
221- return -1 ;
222- }
223- }
234+ int32_t c = 0 ;
224235
225- return codepoint;
236+ if (0xf0 == (0xf8 & *s))
237+ c = ((0x07 & s[0 ]) << 18 ) | ((0x3f & s[1 ]) << 12 ) | ((0x3f & s[2 ]) << 6 ) | (0x3f & s[3 ]);
238+ else if (0xe0 == (0xf0 & *s))
239+ c = ((0x0f & s[0 ]) << 12 ) | ((0x3f & s[1 ]) << 6 ) | (0x3f & s[2 ]);
240+ else if (0xc0 == (0xe0 & *s))
241+ c = ((0x1f & s[0 ]) << 6 ) | (0x3f & s[1 ]);
242+ else if (0x00 == (0x80 & *s))
243+ c = s[0 ];
244+ else
245+ return -1 ;
246+ return c;
247+ }
248+ return -1 ;
226249 }
227250
228251 /* *
0 commit comments