forked from miketang84/lglib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
string.lua
535 lines (456 loc) · 15 KB
/
string.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
local require, getmetatable = require, getmetatable
local string, table, unpack, select, debug, error, loadstring, assert = string, table, unpack, select, debug, error, loadstring, assert
local type, tostring, pairs, io, error, print = type, tostring, pairs, io, error, print
-- 如果要了下面这一句,那么就会把lglib.table中的函数,解压到全局变量_G中去。因为require功能是加载文件并执行,并把执行的结果放到_G中。
-- 由于那边并没有写module()语句,并且,这边这个环境中又没有_G。所以,执行后,其定义的函数就放到其继承的metatable中的_G中去了,也就是
-- 到全局环境中去了。
--local lgtable = require 'lglib.table'
-- 这句也一样
--dofile('./lglib/table.lua')
local lgtable = {}
import(lgtable, 'table')
------------------------------------------------------------------------
-- Python风格的字符串内插函数
-- @usage 'I love %s' % { 'you' }
-- @return 被替换后的字符串
------------------------------------------------------------------------
getmetatable("").__mod = function (self, tab)
return (self:gsub("%%%((%a%w*)%)([-0-9%.]*[cdeEfgGiouxXsq])",
function(k, fmt)
--return tab[k] and (('%%%s'):format(fmt)):format(tab[k]) or ("%%(%s)%s"):format(k, fmt)
return tab[k] and ("%"..fmt):format(tab[k]) or "%("..k..")"..fmt
end
))
end
getmetatable("").__add = function (self, astr)
return self .. astr
end
function length(self)
if UTF8_FULLSUPPORT then
return utf8len(self)
else
return self:len()
end
end
------------------------------------------------------------------------
-- 单词首字母大写
-- @param self 单词字符串
-- @usage 'example':cap()
-- @return 首字母大写的单词
------------------------------------------------------------------------
function cap(self)
if #self == 0 then return self end
return ('%s%s'):format(self:sub(1, 1):upper(), self:sub(2))
end
------------------------------------------------------------------------
-- 检查字串包含指定子串
-- @param self 被检查字串
-- @param substr 子串
-- @return true|false 如果self包含substr,返回true,否则返回false
------------------------------------------------------------------------
function contains(self, substr)
if self:find(substr, 1, true) then
return true
end
return false
end
------------------------------------------------------------------------
-- 检查字串以指定子串开始
-- @param self 被检查字串
-- @param beg 子串
-- @return true|false 如果self以beg开始,返回true,否则返回false
------------------------------------------------------------------------
function startsWith(self, beg)
if 1 ~= self:find(beg, 1, true) then
return false
end
return true
end
------------------------------------------------------------------------
-- 检查字串以指定子串结束
-- @param self 被检查字串
-- @param tail 子串
-- @return true|false 如果self以tail结束,返回true,否则返回false
------------------------------------------------------------------------
function endsWith(self, tail)
if self:sub(-#tail) ~= tail then
return false
end
return true
end
------------------------------------------------------------------------
-- 将一个字符串以给定分隔符分割
-- @param self 被处理字符串
-- @param delim 分隔符
-- @param count 限定分隔符被替换的次数
-- @param no_patterns true|false|nil 是否关闭delim中的样式匹配
-- @return rlist 存储分割的结果列表
------------------------------------------------------------------------
function split(self, delim, count, no_patterns)
if delim == '' then error('invalid delimiter', 2) end
count = count or 0
local next_delim = 1
local i = 1
local rlist = {}
repeat
local start, finish = self:find(delim, next_delim, no_patterns)
if start and finish then
rlist[i] = self:sub(next_delim, start - 1)
next_delim = finish + 1
else
break
end
i = i + 1
until i == count + 1
rlist[i] = self:sub(next_delim)
return rlist
end
------------------------------------------------------------------------
-- 将一个字符串以给定分隔符分割
-- @param self 被处理字符串
-- @param delim 分隔符
-- @param count 限定分隔符被替换的次数
-- @param no_patterns true|false|nil 是否关闭delim中的样式匹配
-- @return 解开列表包裹的多值返回
------------------------------------------------------------------------
function splitOut(self, delim, count, no_patterns)
return unpack(split(self, delim, count, no_patterns))
end
------------------------------------------------------------------------
-- 将一个字符串以给定分隔符分割
-- @param self 被处理字符串
-- @param ... 多个分隔符
-- @return 解开列表包裹的多值返回
------------------------------------------------------------------------
function splitBy(self, ...)
local res, tail, values = {}, self, {select(1, ...)}
for i = 1, select("#", ...) do
if not tail then break end
local begPos, endPos = tail:find(values[i], 1, true)
if begPos then
table.insert(res, tail:sub(1, begPos-1))
tail = tail:sub(endPos+1)
end
end
table.insert(res, tail)
return unpack(res)
end
------------------------------------------------------------------------
-- 找到字符串中最后一个出现子串的始末位置
-- @param self 被处理字符串
-- @param substr 子串
-- @return lastBegPos 子串最后出现的起始位置
-- lastEndPos 子串最后出现的结束位置
-- @note 这函数函数的效率并不高,需要改进
------------------------------------------------------------------------
function rfind(self, substr)
local i, lastBegPos, lastEndPos = 1
local begPos, endPos = self:find(substr, i, true)
while begPos do
lastBegPos = begPos
lastEndPos = endPos
i = begPos+1
begPos, endPos = self:find(substr, i, true)
end
return lastBegPos, lastEndPos
end
-- 空白字符集
local TRIM_CHARS = {(" "):byte();("\t"):byte();("\v"):byte();("\r"):byte();("\n"):byte();0}
------------------------------------------------------------------------
-- 清除字符串首部的空白
-- @param self 被处理字符串
-- @return 去除首部空白的字符串
------------------------------------------------------------------------
function ltrim(self)
local index = 1
for i = 1, #self do
if not lgtable.isIn(TRIM_CHARS, self:byte(i)) then
index = i
break
end
end
return self:sub(index)
end
------------------------------------------------------------------------
-- 清除字符串尾部的空白
-- @param self 被处理字符串
-- @return 去除尾部空白的字符串
------------------------------------------------------------------------
function rtrim(self)
local index = 1
for i = #self, 1, -1 do
if not lgtable.isIn(TRIM_CHARS, self:byte(i)) then
index = i
break
end
end
return self:sub(1, index)
end
------------------------------------------------------------------------
-- 清除字符串两端的空白
-- @param self 被处理字符串
-- @return 去除两端空白的字符串
------------------------------------------------------------------------
function trim(self)
return self:ltrim():rtrim()
end
------------------------------------------------------------------------
-- 替换字符串中的子串为新串
-- @param self 被处理字符串
-- @param ori 将要被替换的子串(可为正则表达式)
-- @param new 用于替换的新串
-- @param n 可选。指定替换几次
-- @return 替换后的新串
------------------------------------------------------------------------
function replace(self, ori, new, n)
return self:gsub(ori, new, n)
end
------------------------------------------------------------------------
-- 映射替换。一次替换多个子串
-- @param self 被替换的字符串
-- @param mapping 子串映射表。形式为 {['ori'] = 'new', ['foo'] = 'bar'}
-- @param n 可选。指定替换几次
-- @return UTF8字符|nil 如果找到了,就返回UTF8字符,否则返回nil
------------------------------------------------------------------------
function mapreplace (self, mapping, n)
for k, v in pairs(mapping) do
self:gsub(k, v, n)
end
return self
end
function index(self, i)
return self:sub(i, i)
end
function slice(self, i, j)
return self:sub(i, j)
end
------------------------------------------------------------------------
-- ABNF from RFC 3629
--
-- UTF8-octets = *( UTF8-char )
-- UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
-- UTF8-1 = %x00-7F
-- UTF8-2 = %xC2-DF UTF8-tail
-- UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
-- %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
-- UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
-- %xF4 %x80-8F 2( UTF8-tail )
-- UTF8-tail = %x80-BF
------------------------------------------------------------------------
------------------------------------------------------------------------
-- 用于返回一个UTF8字符串中从某一个位置开始UTF8字符的长度,也可用于检测是否是UTF8字节
-- @param self 被搜索的字符串
-- @param i 字符串中的位置指针
-- @return 1|2|3|4|nil 数字1~4,如果i指向的字节不是一个有效的UTF8字节,则返回nil
------------------------------------------------------------------------
local function utf8charbytes(self, i)
-- argument defaults
i = i or 1
-- argument checking
if type(self) ~= "string" then
error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(self).. ")")
end
if type(i) ~= "number" then
error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")")
end
local c = self:byte(i)
-- determine bytes needed for character, based on RFC 3629
-- validate byte 1
if c > 0 and c <= 127 then
-- UTF8-1
return 1
elseif c >= 194 and c <= 223 then
-- UTF8-2
local c2 = self:byte(i + 1)
if not c2 then
error("UTF-8 string terminated early")
end
-- validate byte 2
if c2 < 128 or c2 > 191 then
error("Invalid UTF-8 character")
end
return 2
elseif c >= 224 and c <= 239 then
-- UTF8-3
local c2 = self:byte(i + 1)
local c3 = self:byte(i + 2)
if not c2 or not c3 then
error("UTF-8 string terminated early")
end
-- validate byte 2
if c == 224 and (c2 < 160 or c2 > 191) then
error("Invalid UTF-8 character")
elseif c == 237 and (c2 < 128 or c2 > 159) then
error("Invalid UTF-8 character")
elseif c2 < 128 or c2 > 191 then
error("Invalid UTF-8 character")
end
-- validate byte 3
if c3 < 128 or c3 > 191 then
error("Invalid UTF-8 character")
end
return 3
elseif c >= 240 and c <= 244 then
-- UTF8-4
local c2 = self:byte(i + 1)
local c3 = self:byte(i + 2)
local c4 = self:byte(i + 3)
if not c2 or not c3 or not c4 then
error("UTF-8 string terminated early")
end
-- validate byte 2
if c == 240 and (c2 < 144 or c2 > 191) then
error("Invalid UTF-8 character")
elseif c == 244 and (c2 < 128 or c2 > 143) then
error("Invalid UTF-8 character")
elseif c2 < 128 or c2 > 191 then
error("Invalid UTF-8 character")
end
-- validate byte 3
if c3 < 128 or c3 > 191 then
error("Invalid UTF-8 character")
end
-- validate byte 4
if c4 < 128 or c4 > 191 then
error("Invalid UTF-8 character")
end
return 4
else
error("Invalid UTF-8 character")
end
end
------------------------------------------------------------------------
-- 计算一个UTF8字符串的UTF8字符个数,也即字符长度,而不是字节长度
-- @param self 被计算的字符串
-- @return len 长度
------------------------------------------------------------------------
function utf8len(self)
local pos = 1
local bytes = self:len()
local len = 0
while pos <= bytes do
len = len + 1
pos = pos + utf8charbytes(self, pos)
end
return len
end
------------------------------------------------------------------------
-- 计算一个UTF8字符串的UTF8字符个数,也即字符长度,而不是字节长度
-- @param self 被计算的字符串
-- @param i 第i个字符,要求,0<i<len(后面,要对传入函数的参数做统一的检查)
-- @return UTF8字符|nil 如果找到了,就返回UTF8字符,否则返回nil
------------------------------------------------------------------------
function utf8index(self, i)
local pos = 1
local bytes = self:len()
local len = 0
local begPos, endPos
while pos <= bytes do
begPos = pos
pos = pos + utf8charbytes(self, pos)
endPos = pos
len = len + 1
if len == i then
return self:sub(begPos, endPos-1)
end
end
return nil
end
------------------------------------------------------------------------
-- 取一个UTF8字符串的长度片断
-- @param self 被计算的字符串
-- @param i 第i个字符,要求,0<i<len(后面,要对传入函数的参数做统一的检查)
-- @param j 第j个字符,要求,0<j<len, j >= i
-- @return UTF8字符|nil 如果找到了,就返回UTF8字符,否则返回nil
------------------------------------------------------------------------
function utf8slice(self, i, j)
if i > j then
return nil
end
local pos = 1
local bytes = self:len()
local len = 0
local ibegPos, iendPos, jbegPos, jendPos
while pos <= bytes do
ibegPos = pos
pos = pos + utf8charbytes(self, pos)
iendPos = pos
len = len + 1
if len == i then
break
end
end
-- if len < i, now len is the length of this utf8 string
if len < i then
return nil
end
if i == j then
return self:sub(ibegPos, iendPos-1)
end
while pos <= bytes do
jbegPos = pos
pos = pos + utf8charbytes(self, pos)
jendPos = pos
len = len + 1
if len == j then
break
end
end
return self:sub(ibegPos, jendPos-1)
end
-- identical to string.reverse except that it supports UTF-8
function utf8reverse (self)
local bytes = self:len()
local pos = bytes
local charbytes
local newstr = ""
while pos > 0 do
c = self:byte(pos)
while c >= 128 and c <= 191 do
pos = pos - 1
c = self:byte(pos)
end
charbytes = utf8charbytes(self, pos)
newstr = newstr .. self:sub(pos, pos + charbytes - 1)
pos = pos - 1
end
return newstr
end
function findpart(str, start, endwhich)
if endwhich < start then return '' end
if endwhich <= 0 or start <= 0 then return '' end
local str = str:trim()
local count = 0
local p = 0
local i = 0
while i do
i = str:find(' ', i+1)
if i then
count = count + 1
if count == start - 1 then
p = i + 1
break
end
end
end
if p == 0 then return '' end
i = 0
count = 0
str = str:sub(p)
p = 0
while i do
i = str:find(' ', i+1)
if i then
count = count + 1
if count == endwhich - start + 1 then
p = i - 1
break
end
end
end
if p == 0 then
return str
else
return str:sub(1, p)
end
end