-
Notifications
You must be signed in to change notification settings - Fork 40
/
codeparser.pl
143 lines (131 loc) · 4.27 KB
/
codeparser.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#! /usr/bin/perl
use Thread;
use warnings;
use strict;
my $argc=scalar(@ARGV);
our $path;
our $log_prefix;
if($argc eq 1){
($path) = @ARGV;
$log_prefix="";
}elsif($argc eq 2){
($path,$log_prefix) = @ARGV ;
}else{
die "wrong argument";
}
our $type="_WARNING|_FATAL|log.warning|LOG.warning|LOG.fatal|log.fatal|log.error|log.critical|CRITICAL|LOG.warn|WARNING_LOG|FATAL_LOG|logger.warn|log.warn|DSTREAM_WARN|DSTREAM_ERROR";
our $file_type=".*\\.\\(c\\|h\\|pl\\|cpp\\|php\\|py\\|java\\|cc\\)";
my @fileList = `find $path -regex "$file_type" | xargs egrep -H "${log_prefix}($type)" | awk -F':' '{print \$1}' | sort |uniq`;
#our $type="WARNING";
&main(@fileList);
sub parse_file()
{
my ($single_file)=@_;
#如果py的话不以;结尾,因此需要做适配
my $tail;
if($single_file =~ /.*\.py/){
$tail="";
}else{
$tail=";";
}
my $openrs = open INPUT,"<",$single_file;
if (! $openrs)
{
print "err open file $single_file \n";
}
my $lineNo = 0;
my $log_buf;
my $log_type;
my $log_count=0;#log count in many line
while(<INPUT>)
{
chomp;
$lineNo++;
my $baseName = `basename $single_file`;
chomp $baseName;
if($log_count eq 0 && $_ !~ /${log_prefix}($type)/){
next;
}
#这部分匹配以""作为日志的格式
#匹配整行,以);结尾
if (/${log_prefix}($type)\s*(,\s*|\().*\s*"(.*)".*\)$tail\s*(\s*|\\\s*)$/)#log_prefix log_type space(, or ()space"chars"chars);spaces$
{
print $baseName,"%%",$lineNo,"%%",$1,"%%",$3,"\n";
next;
}elsif(/${log_prefix}($type)\s*(,\s*|\()(\s*|.*)\s*"(.*?)(".*|"\s*,.*|"\),.*|"\s*,\s*\\|\\)\s*$/){
#匹配部分行,标记log_count,收集日志
$log_buf=$4;
$log_count=1;
$log_type=$1;
next;
}elsif(/${log_prefix}($type)\s*(,\s*|\()(\s*|\s*\\)\s*$/){
#第一次匹配到,初始化变量,用于下一次拼接日志
$log_buf="";
$log_count=1;
$log_type=$1;
next;
}elsif($log_count gt 0){
#拼接日志
if(/("|\\)(.*)"/){
$log_buf = "$log_buf"."$2";
$log_count+=1;
}else{
$log_count+=1;
}
#找到日志结尾,打印具体日志信息
if(/\)\s*$tail\s*(\\\s*|\s*)$/){
#print "can print now $lineNo\n";
print $baseName,"%%",$lineNo,"%%",$log_type,"%%",$log_buf,"\n";
$log_count=0;
$log_type="";
$log_buf="";
}
next;
}
#这部分匹配以''作为日志的格式,与上面类似
if (/${log_prefix}($type)\s*(,\s*|\().*\s*'(.*)'.*\)$tail\s*(\s*|\\\s*)$/)#log_prefix log_type space(, or ()space"chars"chars);spaces$
{
print $baseName,"%%",$lineNo,"%%",$1,"%%",$3,"\n";
}elsif(/${log_prefix}($type)\s*(,\s*|\()(\s*|.*)\s*'(.*?)('.*|'\s*,.*|'\),.*|'\s*,\s*\\|\\)\s*$/){
#匹配部分行,标记log_count,收集日志
$log_buf=$4;
$log_count=1;
$log_type=$1;
}elsif(/${log_prefix}($type)\s*(,\s*|\()(\s*|\s*\\)\s*$/){
$log_buf="";
$log_count=1;
$log_type=$1;
}elsif($log_count gt 0){
if(/('|\\)(.*)'/){
$log_buf = "$log_buf"."$2";
$log_count+=1;
}else{
$log_count+=1;
}
if(/\)\s*$tail(\\\s*|\s*)$/){
#print "can print now $lineNo\n";
print $baseName,"%%",$lineNo,"%%",$log_type,"%%",$log_buf,"\n";
$log_count=0;
$log_type="";
$log_buf="";
}
}
}
close INPUT;
}
sub main()
{
my @files = @_;
my $file_num=scalar(@files);
my @threads;
my $thread_id=0;
foreach my $single_file (@files)
{
chomp $single_file;
$threads[$thread_id]=Thread->new(\&parse_file,$single_file);
$thread_id++;
}
foreach my $thread (@threads) {
$thread->join();
}
}