diff --git a/reader/utils_test.go b/reader/utils_test.go index f11f576fc..3eabf64d2 100644 --- a/reader/utils_test.go +++ b/reader/utils_test.go @@ -136,11 +136,11 @@ func TestGetTags(t *testing.T) { assert.Equal(t, exp, tags) } -func TestSetMapValueWithPrefix(t *testing.T) { +func TestSetMapValueExistWithPrefix(t *testing.T) { data1 := map[string]interface{}{ "a": "b", } - err1 := SetMapValueWithPrefix(data1, "newVal", "prefix", false, "a") + err1 := SetMapValueExistWithPrefix(data1, "newVal", "prefix", "a") assert.NoError(t, err1) exp1 := map[string]interface{}{ "a": "b", @@ -154,7 +154,7 @@ func TestSetMapValueWithPrefix(t *testing.T) { "age": 45, }, } - err2 := SetMapValueWithPrefix(data2, "newVal", "prefix", false, []string{"a", "name"}...) + err2 := SetMapValueExistWithPrefix(data2, "newVal", "prefix", []string{"a", "name"}...) assert.NoError(t, err2) exp2 := map[string]interface{}{ "a": map[string]interface{}{ @@ -165,10 +165,10 @@ func TestSetMapValueWithPrefix(t *testing.T) { } assert.Equal(t, exp2, data2) - err3 := SetMapValueWithPrefix(data2, "newVal", "prefix", false, []string{"xy", "name"}...) + err3 := SetMapValueExistWithPrefix(data2, "newVal", "prefix", []string{"xy", "name"}...) assert.Error(t, err3) - err4 := SetMapValueWithPrefix(data2, "newVal", "prefix", false, []string{"a", "hello"}...) + err4 := SetMapValueExistWithPrefix(data2, "newVal", "prefix", []string{"a", "hello"}...) assert.NoError(t, err4) exp4 := map[string]interface{}{ "a": map[string]interface{}{ @@ -180,11 +180,4 @@ func TestSetMapValueWithPrefix(t *testing.T) { } assert.Equal(t, exp4, data2) - data5 := map[string]interface{}{} - err5 := SetMapValueWithPrefix(data5, "newVal", "prefix", true, "a") - assert.NoError(t, err5) - exp5 := map[string]interface{}{ - "prefix_a": "newVal", - } - assert.Equal(t, exp5, data5) } diff --git a/transforms/ip/ip.go b/transforms/ip/ip.go index bcbe38317..b4dea7072 100644 --- a/transforms/ip/ip.go +++ b/transforms/ip/ip.go @@ -33,8 +33,18 @@ type Transformer struct { DataPath string `json:"data_path"` KeyAsPrefix bool `json:"key_as_prefix"` - loc Locator - stats StatsInfo + loc Locator + keys []string + lastEleKey string + keysRegion []string + keysCity []string + keysCountry []string + keysIsp []string + keysCountryCode []string + keysLatitude []string + keysLongitude []string + keysDistrictCode []string + stats StatsInfo } func (t *Transformer) Init() error { @@ -43,9 +53,32 @@ func (t *Transformer) Init() error { return fmt.Errorf("new locator: %v", err) } t.loc = loc + t.keys = GetKeys(t.Key) + + newKeys := make([]string, len(t.keys)) + copy(newKeys, t.keys) + t.lastEleKey = t.keys[len(t.keys)-1] + t.keysRegion = generateKeys(t.keys, Region, t.KeyAsPrefix) + t.keysCity = generateKeys(t.keys, City, t.KeyAsPrefix) + t.keysCountry = generateKeys(t.keys, Country, t.KeyAsPrefix) + t.keysIsp = generateKeys(t.keys, Isp, t.KeyAsPrefix) + t.keysCountryCode = generateKeys(t.keys, CountryCode, t.KeyAsPrefix) + t.keysLatitude = generateKeys(t.keys, Latitude, t.KeyAsPrefix) + t.keysLongitude = generateKeys(t.keys, Longitude, t.KeyAsPrefix) + t.keysDistrictCode = generateKeys(t.keys, DistrictCode, t.KeyAsPrefix) return nil } +func generateKeys(keys []string, lastEle string, keyAsPrefix bool) []string { + newKeys := make([]string, len(keys)) + copy(newKeys, keys) + if keyAsPrefix { + lastEle = keys[len(keys)-1] + "_" + lastEle + } + newKeys[len(keys)-1] = lastEle + return newKeys +} + func (_ *Transformer) RawTransform(datas []string) ([]string, error) { return datas, errors.New("IP transformer not support rawTransform") } @@ -54,18 +87,15 @@ func (t *Transformer) Transform(datas []Data) ([]Data, error) { var err, fmtErr error errNum := 0 if t.loc == nil { - loc, err := NewLocator(t.DataPath) + err := t.Init() if err != nil { - t.stats, _ = transforms.SetStatsInfo(err, t.stats, int64(errNum), int64(len(datas)), t.Type()) return datas, err } - t.loc = loc } - keys := GetKeys(t.Key) - newKeys := make([]string, len(keys)) + newKeys := make([]string, len(t.keys)) for i := range datas { - copy(newKeys, keys) - val, getErr := GetMapValue(datas[i], keys...) + copy(newKeys, t.keys) + val, getErr := GetMapValue(datas[i], t.keys...) if getErr != nil { errNum, err = transforms.SetError(errNum, getErr, transforms.GetErr, t.Key) continue @@ -81,29 +111,45 @@ func (t *Transformer) Transform(datas []Data) ([]Data, error) { errNum, err = transforms.SetError(errNum, findErr, transforms.General, "") continue } - newKeys[len(newKeys)-1] = Region - SetMapValueWithPrefix(datas[i], info.Region, keys[len(keys)-1], t.KeyAsPrefix, newKeys...) - newKeys[len(newKeys)-1] = City - SetMapValueWithPrefix(datas[i], info.City, keys[len(keys)-1], t.KeyAsPrefix, newKeys...) - newKeys[len(newKeys)-1] = Country - SetMapValueWithPrefix(datas[i], info.Country, keys[len(keys)-1], t.KeyAsPrefix, newKeys...) - newKeys[len(newKeys)-1] = Isp - SetMapValueWithPrefix(datas[i], info.Isp, keys[len(keys)-1], t.KeyAsPrefix, newKeys...) + findErr = t.SetMapValue(datas[i], info.Region, t.keysRegion...) + if findErr != nil { + errNum, err = transforms.SetError(errNum, findErr, transforms.General, "") + } + findErr = t.SetMapValue(datas[i], info.City, t.keysCity...) + if findErr != nil { + errNum, err = transforms.SetError(errNum, findErr, transforms.General, "") + } + findErr = t.SetMapValue(datas[i], info.Country, t.keysCountry...) + if findErr != nil { + errNum, err = transforms.SetError(errNum, findErr, transforms.General, "") + } + findErr = t.SetMapValue(datas[i], info.Isp, t.keysIsp...) + if findErr != nil { + errNum, err = transforms.SetError(errNum, findErr, transforms.General, "") + } if info.CountryCode != "" { - newKeys[len(newKeys)-1] = CountryCode - SetMapValueWithPrefix(datas[i], info.CountryCode, keys[len(keys)-1], t.KeyAsPrefix, newKeys...) + findErr = t.SetMapValue(datas[i], info.CountryCode, t.keysCountryCode...) + if findErr != nil { + errNum, err = transforms.SetError(errNum, findErr, transforms.General, "") + } } if info.Latitude != "" { - newKeys[len(newKeys)-1] = Latitude - SetMapValueWithPrefix(datas[i], info.Latitude, keys[len(keys)-1], t.KeyAsPrefix, newKeys...) + findErr = t.SetMapValue(datas[i], info.Latitude, t.keysLatitude...) + if findErr != nil { + errNum, err = transforms.SetError(errNum, findErr, transforms.General, "") + } } if info.Longitude != "" { - newKeys[len(newKeys)-1] = Longitude - SetMapValueWithPrefix(datas[i], info.Longitude, keys[len(keys)-1], t.KeyAsPrefix, newKeys...) + findErr = t.SetMapValue(datas[i], info.Longitude, t.keysLongitude...) + if findErr != nil { + errNum, err = transforms.SetError(errNum, findErr, transforms.General, "") + } } if info.DistrictCode != "" { - newKeys[len(newKeys)-1] = DistrictCode - SetMapValueWithPrefix(datas[i], info.DistrictCode, keys[len(keys)-1], t.KeyAsPrefix, newKeys...) + findErr = t.SetMapValue(datas[i], info.DistrictCode, t.keysDistrictCode...) + if findErr != nil { + errNum, err = transforms.SetError(errNum, findErr, transforms.General, "") + } } } @@ -111,6 +157,37 @@ func (t *Transformer) Transform(datas []Data) ([]Data, error) { return datas, fmtErr } +//通过层级key设置value值, 如果keys不存在则不加前缀,否则加前缀 +func (t *Transformer) SetMapValue(m map[string]interface{}, val interface{}, keys ...string) error { + if len(keys) == 0 { + return nil + } + var curr map[string]interface{} + curr = m + for _, k := range keys[0 : len(keys)-1] { + finalVal, ok := curr[k] + if !ok { + n := make(map[string]interface{}) + curr[k] = n + curr = n + continue + } + //判断val是否为map[string]interface{}类型 + if curr, ok = finalVal.(map[string]interface{}); ok { + continue + } + return fmt.Errorf("SetMapValueWithPrefix failed, %v is not the type of map[string]interface{}", keys) + } + //判断val(k)是否存在 + _, exist := curr[keys[len(keys)-1]] + if exist { + curr[t.lastEleKey+"_"+keys[len(keys)-1]] = val + } else { + curr[keys[len(keys)-1]] = val + } + return nil +} + func (_ *Transformer) Description() string { //return "transform ip to country region and isp" return "获取IP的区域、国家、城市和运营商信息" diff --git a/transforms/ip/ip_test.go b/transforms/ip/ip_test.go index b256f14eb..d8d75eae1 100644 --- a/transforms/ip/ip_test.go +++ b/transforms/ip/ip_test.go @@ -16,6 +16,7 @@ func TestTransformer(t *testing.T) { Key: "ip", DataPath: "./test_data/17monipdb.dat", } + ipt.Init() data, err := ipt.Transform([]Data{{"ip": "111.2.3.4"}, {"ip": "x.x.x.x"}}) assert.Error(t, err) exp := []Data{{ @@ -65,6 +66,7 @@ func TestTransformer(t *testing.T) { Key: "multi.ip", DataPath: "./test_data/17monipdb.dat", } + ipt2.Init() data2, err2 := ipt2.Transform([]Data{{"multi": map[string]interface{}{"ip": "111.2.3.4"}}, {"multi": map[string]interface{}{"ip": "x.x.x.x"}}}) assert.Error(t, err2) exp2 := []Data{{ @@ -173,6 +175,48 @@ func TestTransformer(t *testing.T) { assert.Len(t, locatorStore.locators, 2) } +var dttest []Data + +//old: 1000000 1152 ns/op 432 B/op 16 allocs/op +//new: 2000000 621 ns/op 232 B/op 7 allocs/op +func BenchmarkIpTrans(b *testing.B) { + b.ReportAllocs() + ipt4 := &Transformer{ + Key: "multi.ip2", + DataPath: "./test_data/17monipdb.dat", + KeyAsPrefix: true, + } + ipt4.Init() + data := []Data{ + { + "multi": map[string]interface{}{ + "ip": "111.2.3.4", + "Region": "浙江", + "City": "宁波", + "Country": "中国", + "Isp": "N/A", + "ip2": "183.251.28.250", + }, + }, + //{ + // "multi": map[string]interface{}{ + // "ip2": "183.251.28.250", + // }, + //}, + //{ + // "multi": map[string]interface{}{ + // "ip2": "183.251.28.250", + // }, + //}, + //{ + // "ip2": "183.251.28.250", + //}, + } + for i := 0; i < b.N; i++ { + dttest, _ = ipt4.Transform(data) + } +} + func Test_badData(t *testing.T) { ipt := &Transformer{ Key: "ip", diff --git a/utils/models/utils.go b/utils/models/utils.go index a1bbd4d26..8d73cc471 100644 --- a/utils/models/utils.go +++ b/utils/models/utils.go @@ -425,31 +425,30 @@ func SetMapValue(m map[string]interface{}, val interface{}, coercive bool, keys return nil } -//通过层级key设置value值, 如果keys不存在则不加前缀,否则加前缀,forceSet为true时无论原来的值存不存在,都加前缀. -func SetMapValueWithPrefix(m map[string]interface{}, val interface{}, prefix string, forceAdd bool, keys ...string) error { +//通过层级key设置value值, 如果keys不存在则不加前缀,否则加前缀 +func SetMapValueExistWithPrefix(m map[string]interface{}, val interface{}, prefix string, keys ...string) error { if len(keys) == 0 { return nil } var curr map[string]interface{} curr = m - var exist bool - for i, k := range keys { - if i < len(keys)-1 { - finalVal, ok := curr[k] - if !ok { - return fmt.Errorf("SetMapValueWithPrefix failed, keys %v are non-existent", val) - } - //判断val是否为map[string]interface{}类型 - if curr, ok = finalVal.(map[string]interface{}); ok { - continue - } - return fmt.Errorf("SetMapValueWithPrefix failed, %v is not the type of map[string]interface{}", keys) + for _, k := range keys[0 : len(keys)-1] { + finalVal, ok := curr[k] + if !ok { + n := make(map[string]interface{}) + curr[k] = n + curr = n + continue } - - //判断val(k)是否存在 - _, exist = curr[k] + //判断val是否为map[string]interface{}类型 + if curr, ok = finalVal.(map[string]interface{}); ok { + continue + } + return fmt.Errorf("SetMapValueWithPrefix failed, %v is not the type of map[string]interface{}", keys) } - if exist || forceAdd { + //判断val(k)是否存在 + _, exist := curr[keys[len(keys)-1]] + if exist { curr[prefix+"_"+keys[len(keys)-1]] = val } else { curr[keys[len(keys)-1]] = val @@ -752,11 +751,18 @@ func GetMapList(data string) map[string]string { return ret } -// 判断时只有数字和字母为合法字符,规则: -// 1. 首字符为数字时,增加首字符 "K" -// 2. 首字符为非法字符时,去掉首字符(例如,如果字符串全为非法字符,则转换后为空) -// 3. 非首字符并且为非法字符时,使用 "_" 替代非法字符 -func PandoraKey(key string) (string, bool) { +func CheckPandoraKey(key string) bool { + for _, c := range key { + if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') { + continue + } + return false + } + return true +} + +// 与PandoraKey相同,返回[]byte,期望提升性能 +func PandoraKeyBytes(key string) ([]byte, bool) { // check valid := true size := 0 @@ -780,11 +786,11 @@ func PandoraKey(key string) (string, bool) { valid = false } if valid { - return key, true + return []byte(key), true } if size <= 0 { - return "", false + return nil, false } // set @@ -793,27 +799,63 @@ func PandoraKey(key string) (string, bool) { for idx, c := range key { if c >= '0' && c <= '9' { if idx == 0 { - bp += copy(bytes, "K") + bytes[bp] = 'K' + bp++ } - bp += copy(bytes[bp:], string(c)) + bytes[bp] = byte(c) + bp++ continue } if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') { - bp += copy(bytes[bp:], string(c)) + bytes[bp] = byte(c) + bp++ continue } if idx > 0 { - bp += copy(bytes[bp:], "_") + bytes[bp] = '_' + bp++ } } - return string(bytes), false + return bytes, false +} + +// 判断时只有数字和字母为合法字符,规则: +// 1. 首字符为数字时,增加首字符 "K" +// 2. 首字符为非法字符时,去掉首字符(例如,如果字符串全为非法字符,则转换后为空) +// 3. 非首字符并且为非法字符时,使用 "_" 替代非法字符 +func PandoraKey(key string) (string, bool) { + bytes, valid := PandoraKeyBytes(key) + if valid { + return key, true + } + return string(bytes), valid } +// 1000000 1449 ns/op 32 B/op 2 allocs/op +func DeepConvertKey(data map[string]interface{}) map[string]interface{} { + for k, v := range data { + nv, ok := v.(map[string]interface{}) + if ok { + v = DeepConvertKey(nv) + } + valid := CheckPandoraKey(k) + if !valid { + delete(data, k) + knew, _ := PandoraKeyBytes(k) + data[string(knew)] = v + } + } + return data +} + +/* +//1000000 1584 ns/op 32 B/op 2 allocs/op func DeepConvertKey(data map[string]interface{}) map[string]interface{} { for k, v := range data { nk, valid := PandoraKey(k) - if nv, ok := v.(map[string]interface{}); ok { + nv, ok := v.(map[string]interface{}) + if ok { v = DeepConvertKey(nv) } if !valid { @@ -823,3 +865,5 @@ func DeepConvertKey(data map[string]interface{}) map[string]interface{} { } return data } + +*/ diff --git a/utils/models/utils_test.go b/utils/models/utils_test.go index b22e2b321..f4d945f32 100644 --- a/utils/models/utils_test.go +++ b/utils/models/utils_test.go @@ -626,9 +626,19 @@ func TestPandoraKey(t *testing.T) { } } +func TestCheckPandoraKey(t *testing.T) { + testKeys := []string{"@timestamp", ".dot", "percent%100", "^^^^^^^^^^", "timestamp"} + expectValid := []bool{false, false, false, false, true} + for idx, key := range testKeys { + valid := CheckPandoraKey(key) + assert.Equal(t, expectValid[idx], valid) + } +} + func BenchmarkPandoraKey(b *testing.B) { b.ReportAllocs() testKeys := []string{"@timestamp", ".dot", "percent%100", "^^^^^^^^^^", "timestamp", "aaa"} + //testKeys := []string{"timestamp", "aaa"} for i := 0; i < b.N; i++ { for _, key := range testKeys { PandoraKey(key) @@ -636,6 +646,17 @@ func BenchmarkPandoraKey(b *testing.B) { } } +func BenchmarkCheckPandoraKey(b *testing.B) { + b.ReportAllocs() + testKeys := []string{"@timestamp", ".dot", "percent%100", "^^^^^^^^^^", "timestamp", "aaa"} + //testKeys := []string{"timestamp", "aaa"} + for i := 0; i < b.N; i++ { + for _, key := range testKeys { + CheckPandoraKey(key) + } + } +} + func BenchmarkDeepConvertKey(b *testing.B) { b.ReportAllocs() testDatas := []map[string]interface{}{ @@ -651,6 +672,42 @@ func BenchmarkDeepConvertKey(b *testing.B) { "percent%100": 100, "^^^^^^^^^^": "mytest", }, + { + "timestamp": "2018-07-18T10:17:36.549054846+08:00", + //"timestamp": "2018-07-19T10:17:36.549054846+08:00", + }, + { + "dot": map[string]interface{}{"dot2": "dot"}, + }, + { + "dot": map[string]interface{}{"dot2": "dot"}, + "percent100": 100, + "axsxs": "mytest", + }, + { + "timestamp": "2018-07-18T10:17:36.549054846+08:00", + //"timestamp": "2018-07-19T10:17:36.549054846+08:00", + }, + { + "dot": map[string]interface{}{"dot2": "dot"}, + }, + { + "dot": map[string]interface{}{"dot2": "dot"}, + "percent100": 100, + "axsxs": "mytest", + }, + { + "timestamp": "2018-07-18T10:17:36.549054846+08:00", + //"timestamp": "2018-07-19T10:17:36.549054846+08:00", + }, + { + "dot": map[string]interface{}{"dot2": "dot"}, + }, + { + "dot": map[string]interface{}{"dot2": "dot"}, + "percent100": 100, + "axsxs": "mytest", + }, } for i := 0; i < b.N; i++ { for _, data := range testDatas {