Linux(Ubuntu 20.04) CLI를 활용한 Weblog Analysis
- Datetime / SIP / Method / Payload / Version / ResponseCode / ResponseByte
- about 154MB, 60 million Line of Session data included
cat srv1_access_daily.tsv | feedgnuplot --domain --timefmt "%Y-%m-%d" --with "boxes lt -1" --legend 0 "daily HTTP Session"
- Daliy SESSION, SIPCNT, SESS/SIPCNT visulization
cat sess_ovr_sipcnt.tsv | awk '$3 > 100{print $0}' | awk '{print $1 "\t" $4}' | feedgnuplot --domain --timefmt '%Y-%m-%d' --lines --points --legend 0 "SESS/SIPCNT"
- upper 400
- SESS.CIPCNT > 400 인 일자를 ddos_event.tsv로 생성
for d in $(cat ddos_event.tsv | awk '{print $1}')
do
zcat ../srv1_accesslog.gz | awk '$1==date{print $0}' date=$d > $DATE"_ddos_evt.tsv"
done
-
zcat 2017-07-03_ddos_evt.tsv.gz | awk '{print $2}' | sort | uniq -c | awk '{print $2 "\t" $1}' | feedgnuplot --domain --timefmt "%H:%M:%S" --with 'boxes lt -1' --legend 0 "2017-07-03 sps"
-
zcat 2017-07-03_ddos_evt.tsv.gz | awk '{print $2 "\t" $3}' | sort -u | awk '{print $1}' | sort | uniq -c | awk '{print $2 "\t" $1}' > 2017-07-03.sipsec.tsv
-
cat 2017-07-03.sipsec.tsv | feedgnuplot --domain --timefmt "%H:%M:%S" --with 'boxes lt 3' --legend 0 "SIP per Second"
-
cat spm.tsv | feedgnuplot --domain --timefmt "%H:%M" --lines --points --legend 0 "SESSION PER MINUTE"
for m in $(cat min)
do
TS=$m
SPM=$(cat spm.tsv | awk '$1==min{print $2}' min=$m)
SIP=$(cat sipmain.tsv | awk '$1==min{print $2}' min=$m)
echo $TS $SPM $SIP
done | awk '{print $0 "\t" ($2+1)/($3+1)}' > ddos_min.tsv
cat ddos_min.tsv | feedgnuplot --domain --timefmt "%H:%M" --lines --points --y2 2 --legend 0 "Session per Minute" --legend 1 "Nr. of SIP per Minute" --legend 2 "SPM / SIPMIN"
- 2017-07-03 15:00 ~ 19:00 / 20:30 ~ 21:00
-
Revisiting attack IP
cat ts_min_sip | awk '{print $2}' | sort | uniq -c | while read line do IP=$(echo $line | awk '{print $2}') FN=$(echo $line | awk '{print $1 ".revisit"}') echo $IP >> $FN done
zcat 2017-07-03_ddos_evt.tsv.gz | awk '{print $3}' | sort | uniq -c | sort -rn | head 200465 IP0041058 118835 IP0040986 70076 IP0001113 68682 IP0040922 29853 IP0084544 22450 IP1062364 18371 IP0000782 17692 IP0001719 11529 IP0173656 7691 IP0001227
zcat srv1_accesslog.gz | awk '$7~/^[12345]/{print $1 "\t" $7}'|sort | uniq -c | awk '{print $2 "\t" $3 "\t" $1}' |
awk '{if ($2 >= "400" && $2 < 500) print ($0)} > 400_rcode.tsv
cat 400_rcode.tsv | sort -rnk 3 | head | awk '{print $3}'|head
13851
13574
5228
3583
3166
3109
2976
2760
2698
2665
- Response code 4XX record visualization
cat 400_r_code.tsv | sort -rnk 3 | awk ‘{print $3}’ | feedgnuplot –histogram 0 –ymax 5
cat 400_rcode.tsv | sort -rnk 3 | awk '{print $1 "\t" $3}' | feedgnuplot --domain --timefmt '%Y-%m-%d' --points
- Response Code 4XX SIP
zcat srv1_accesslog.gz | awk '$7~/^[12345]/{print $3 "\t" $7}'|sort | uniq -c | awk '{print $2 "\t" $3 "\t" $1}' | awk '{if ($2 >=400 && $2 < 500) print $0}' > sip_rcode.tsv
cat sip_rcode.tsv | sort -rnk 3 | head
IP1093735 404 17010
IP0053005 404 11867
IP0008180 404 11730
IP0013767 404 11297
IP0099074 404 10495
IP0002194 404 10088
IP1056836 404 10049
IP1086971 404 9032
IP1087023 404 7962
IP1056822 404 6520
- 4XX
cat sip_rcode.tsv | sort -rnk 3 | head -100 > top_sip.ip
for ip in $(cat top_sip.ip)
> do
> zcat srv1_accesslog.gz | awk '$3==sip{print $0}' sip=$ip > $ip".log"
> done
cat IP* | awk '$7!=""{print $1 "\t" $7}' | awk '{if (($2 >= 200 && $2 < 300) || ($2 >= 400 && $2 < 500)) print $0}' > 2XX_4XX.log
cat 2XX_4XX.log |awk '{print $1 "\t" (int($2/200)-int($2/400)) "\t" int($2/400)}' | awk '$1==prv{r2+=$2;r4+=$3;next}{print prv "\t" r2 "\t" r4; prv=$1;r2=$2; r4=$3}' |
feedgnuplot --domain --points --timefmt "%Y-%m-%d" --title "2xx AND 4xx Response Frequency" --legend 0 "2XX" --legend 1 "4XX"
cat IP* | awk '$7!=""{print $1 "\t" $3 "\t" $7}' |sort| awk '{print $1 "_" $2 "_" int($3/100)*100}' | uniq -c | sort -rn | head -20
- IP0053005
cat IP0053005.log | awk '$7!=""{print $1 "\t" $7}' | sort | awk '{print $1 "\t" int(($2)/100)*100}' |
awk '{print $1 "\t" int($2/200)-int($2/400) "\t" int($2/300) "\t" int($2/400) "\t" int($2/500)}'|
awk '$1==prv{r2+=$2;r3+=$3;r4+=$4;r5+=$5;next}{print prv "\t" r2 "\t" r3 "\t" r4 "\t" r5; prv=$1;r2=$2;r3=$3; r4=$4;r5=$5}'
200 300 400 500
2017-07-04 58 3 3 0
2017-08-01 13664 11944 11837 4
2017-09-21 63 4 4 0
2017-10-10 44 2 2 0
2017-10-11 318 21 20 1
2017-10-13 3 2 2 0
2017-10-20 36 8 3 0
2018-01-03 80 2 1 0
- IP0008180
cat IP0008180.log | awk '$7!=""{print $1 "\t" $7}' | sort | awk '{print $1 "\t" int(($2)/100)*100}' |
awk '{print $1 "\t" int($2/200)-int($2/400) "\t" int($2/300) "\t" int($2/400) "\t" int($2/500)}'|
awk '$1==prv{r2+=$2;r3+=$3;r4+=$4;r5+=$5;next}{print prv "\t" r2 "\t" r3 "\t" r4 "\t" r5; prv=$1;r2=$2;r3=$3; r4=$4;r5=$5}'
200 300 400 500
2017-03-13 270 74 28 0
2017-03-14 12688 11837 11680 6