Skip to content

Add support for more queries for SigLens #361

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 4 additions & 12 deletions siglens/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/bin/bash

# Requires at least 700GB of free disk space on the main partition for the dataset, intermediate files, and SigLens data.
# Requires at least 300GB of free disk space on the main partition for the dataset, intermediate files, and SigLens data.

echo "Install prerequisites"
sudo apt-get install --yes git golang

echo "Get and build SigLens"
git clone https://github.com/siglens/siglens.git --branch 1.0.25
git clone https://github.com/siglens/siglens.git --branch 1.0.41
cd siglens
go mod tidy
go build -o siglens cmd/siglens/main.go
Expand All @@ -15,18 +15,10 @@ cd ..

echo "Download and unzip dataset"
wget --continue https://datasets.clickhouse.com/hits_compatible/hits.json.gz
gzip -d -f hits.json.gz

# Add the _index line and fix the UserID from string to num and preprocesses the dataset for loading
python3 fix_hits.py

echo "Split into 10 files to increase parallelism"
rm hits.json
split -l 20000000 sighits.json splithits_
rm sighits.json
gzip -d hits.json.gz

echo "Load data into SigLens, this can take a few hours"
time python3 send_data.py
time python3 send_datawithactionline.py

echo "Run queries"
./run.sh
59 changes: 0 additions & 59 deletions siglens/fix_hits.py

This file was deleted.

60 changes: 30 additions & 30 deletions siglens/queries.spl
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,38 @@ AdvEngineID != 0 | stats count
* | stats dc(SearchPhrase)
* | stats min(EventDate), max(EventDate)
AdvEngineID != 0 | stats count as cnt by AdvEngineID | sort -cnt
* | stats dc(UserID) as u BY RegionID | sort -u | head 10
* | stats sum(AdvEngineID), count as c, avg(ResolutionWidth), dc(UserID) by RegionID | sort -c | head 10
MobilePhoneModel != \"\" | stats dc(UserID) as u by MobilePhoneModel | sort -u | head 10
MobilePhoneModel != \"\" | stats dc(UserID) as u by MobilePhone, MobilePhoneModel | sort -u | head 10
SearchPhrase != \"\" | stats count as c by SearchPhrase | sort -c | head 10
SearchPhrase != \"\" | stats dc(UserID) as u by SearchPhrase | sort -u | head 10
SearchPhrase != \"\" | stats count as c by SearchEngineID, SearchPhrase | sort -c | head 10
* | stats count as cnt by UserID | sort -cnt | head 10
* | stats count as cnt by UserID, SearchPhrase | sort -cnt | head 10
* | stats dc(UserID) as u BY RegionID | sort 10 -u
* | stats sum(AdvEngineID), count as c, avg(ResolutionWidth), dc(UserID) by RegionID | sort 10 -c
MobilePhoneModel != \"\" | stats dc(UserID) as u by MobilePhoneModel | sort 10 -u
MobilePhoneModel != \"\" | stats dc(UserID) as u by MobilePhone, MobilePhoneModel | sort 10 -u
SearchPhrase != \"\" | stats count as c by SearchPhrase | sort 10 -c
SearchPhrase != \"\" | stats dc(UserID) as u by SearchPhrase | sort 10 -u
SearchPhrase != \"\" | stats count as c by SearchEngineID, SearchPhrase | sort 10 -c
* | stats count as cnt by UserID | sort 10 -cnt
* | stats count as cnt by UserID, SearchPhrase | sort 10 -cnt
* | stats count by UserID, SearchPhrase | head 10
null
UserID=435090932899640449 | fields UserID
* | regex URL = \".*google.*\" | stats count
SearchPhrase != \"\" | regex URL = \".*google.*\" | stats count as c, min(eval(URL)) by SearchPhrase | sort -c | head 10
SearchPhrase != \"\" | regex Title = \".*Google.*\" | regex URL != \".*\\.google\\..*\" | stats count as c, min(eval(URL)), min(eval(Title)), dc(UserID) by SearchPhrase | sort -c | head 10
* | regex URL = \".*google.*\" | sort str(EventTime) | head 10
SearchPhrase != \"\" | sort str(EventTime) | head 10 | fields SearchPhrase
SearchPhrase != \"\" | sort str(SearchPhrase) | head 10 | fields SearchPhrase
SearchPhrase != \"\" | sort str(EventTime), str(SearchPhrase) | head 10 | fields SearchPhrase
URL != \"\" | stats avg(eval(len(URL))) as l, count as c by CounterID | where c > 100000 | sort -l | head 25
null
SearchPhrase != \"\" | regex URL = \".*google.*\" | stats count as c, min(eval(URL)) by SearchPhrase | sort 10 -c
SearchPhrase != \"\" | regex Title = \".*Google.*\" | regex URL != \".*\\.google\\..*\" | stats count as c, min(eval(URL)), min(eval(Title)), dc(UserID) by SearchPhrase | sort 10 -c
* | regex URL = \".*google.*\" | sort 10 str(EventTime)
SearchPhrase != \"\" | sort 10 str(EventTime) | fields SearchPhrase
SearchPhrase != \"\" | sort 10 str(SearchPhrase) | fields SearchPhrase
SearchPhrase != \"\" | sort 10 str(EventTime), str(SearchPhrase) | fields SearchPhrase
URL != \"\" | stats avg(eval(len(URL))) as l, count as c by CounterID | where c > 100000 | sort 25 -l
Referer != \"\" | rex field=Referer \"^https?://(?:www\\.)?(?<k>[^/]+)\" | stats avg(eval(len(Referer))) as l, count as c, min(eval(Referer)) by k | where c > 100000 | sort\n 25 -l
* | stats sum(ResolutionWidth) as sum, count as cnt | eval sum2 = sum + cnt*2, sum3 = sum + cnt*3, sum4 = sum + cnt*4, sum5 = sum + cnt*5, sum6 = sum + cnt*6, sum7 = sum + cnt*7, sum8 = sum + cnt*8, sum9 = sum + cnt*9, sum10 = sum + cnt*10, sum11 = sum + cnt*11, sum12 = sum + cnt*12, sum13 = sum + cnt*13, sum14 = sum + cnt*14, sum15 = sum + cnt*15, sum16 = sum + cnt*16, sum17 = sum + cnt*17, sum18 = sum + cnt*18, sum19 = sum + cnt*19, sum20 = sum + cnt*20, sum21 = sum + cnt*21, sum22 = sum + cnt*22, sum23 = sum + cnt*23, sum24 = sum + cnt*24, sum25 = sum + cnt*25, sum26 = sum + cnt*26, sum27 = sum + cnt*27, sum28 = sum + cnt*28, sum29 = sum + cnt*29, sum30 = sum + cnt*30, sum31 = sum + cnt*31, sum32 = sum + cnt*32, sum33 = sum + cnt*33, sum34 = sum + cnt*34, sum34 = sum + cnt*35, sum36 = sum + cnt*36, sum37 = sum + cnt*37, sum38 = sum + cnt*38, sum39 = sum + cnt*39, sum40 = sum + cnt*40, sum41 = sum + cnt*41, sum42 = sum + cnt*42, sum43 = sum + cnt*43, sum44 = sum + cnt*44, sum45 = sum + cnt*45, sum46 = sum + cnt*46, sum47 = sum + cnt*47, sum48 = sum + cnt*48, sum49 = sum + cnt*49, sum50 = sum + cnt*50, sum51 = sum + cnt*51, sum52 = sum + cnt*52, sum53 = sum + cnt*53, sum54 = sum + cnt*54, sum55 = sum + cnt*55, sum56 = sum + cnt*56, sum57 = sum + cnt*57, sum58 = sum + cnt*58, sum59 = sum + cnt*59, sum60 = sum + cnt*60, sum61 = sum + cnt*61, sum62 = sum + cnt*62, sum63 = sum + cnt*63, sum64 = sum + cnt*64, sum65 = sum + cnt*65, sum66 = sum + cnt*66, sum67 = sum + cnt*67, sum68 = sum + cnt*68, sum69 = sum + cnt*69, sum70 = sum + cnt*70, sum71 = sum + cnt*71, sum72 = sum + cnt*72, sum73 = sum + cnt*73, sum74 = sum + cnt*74, sum75 = sum + cnt*75, sum76 = sum + cnt*76, sum77 = sum + cnt*77, sum78 = sum + cnt*78, sum79 = sum + cnt*79, sum80 = sum + cnt*80, sum81 = sum + cnt*81, sum82 = sum + cnt*82, sum83 = sum + cnt*83, sum84 = sum + cnt*84, sum85 = sum + cnt*85, sum86 = sum + cnt*86, sum87 = sum + cnt*87, sum88 = sum + cnt*88, sum89 = sum + cnt*89 | fields sum, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18, sum19, sum20, sum21, sum22, sum23, sum24, sum25, sum26, sum27, sum28, sum29, sum30, sum31, sum32, sum33, sum34, sum34, sum36, sum37, sum38, sum39, sum40, sum41, sum42, sum43, sum44, sum45, sum46, sum47, sum48, sum49, sum50, sum51, sum52, sum53, sum54, sum55, sum56, sum57, sum58, sum59, sum60, sum61, sum62, sum63, sum64, sum65, sum66, sum67, sum68, sum69, sum70, sum71, sum72, sum73, sum74, sum75, sum76, sum77, sum78, sum79, sum80, sum81, sum82, sum83, sum84, sum85, sum86, sum87, sum88, sum89
SearchPhrase != \"\" | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by SearchEngineID, ClientIP | sort -c | head 10
SearchPhrase != \"\" | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by WatchID, ClientIP | sort -c | head 10
* | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by WatchID, ClientIP | sort -c | head 10
* | stats count as c by URL | sort -c | head 10
* | eval n = 1 | stats count as c by n, URL | sort -c | head 10
* | eval cp = ClientIP-1, cp2 = ClientIP-2, cp3 = ClientIP-3 | stats count as c by ClientIP, cp, cp2, cp3 | sort -c | head 10
CounterID = 62 AND DontCountHits = 0 AND IsRefresh = 0 AND URL != \"\" | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URL | sort -PageViews | head 10
CounterID = 62 AND DontCountHits = 0 AND IsRefresh = 0 AND Title != \"\" | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by Title | sort -PageViews | head 10
CounterID = 62 AND IsRefresh = 0 AND IsLink != 0 AND IsDownload = 0 | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URL | sort -PageViews | head 1010 | tail 10 | tail 10
CounterID = 62 AND IsRefresh = 0 | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | eval Src=if(SearchEngineID=0 AND AdvEngineID=0, Referer, \"\") | rename URL as Dst | stats count as PageViews by TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst | sort -PageViews | head 1010 | tail 10 | tail 10
CounterID = 62 AND IsRefresh = 0 AND RefererHash = \"3594120000172545465\" | where TraficSourceID in(-1, 6) | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URLHash, EventDate | sort -PageViews | head 110 | tail 10 | tail 10
CounterID = 62 AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = \"2868770270353813622\" | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by WindowClientWidth, WindowClientHeight | sort -PageViews | head 10010 | tail 10 | tail 10
null
SearchPhrase != \"\" | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by SearchEngineID, ClientIP | sort 10 -c
SearchPhrase != \"\" | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by WatchID, ClientIP | sort 10 -c
* | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by WatchID, ClientIP | sort 10 -c
* | stats count as c by URL | sort 10 -c
* | stats count AS c by URL | sort 10 -c | eval n=1 | fields n, URL, c
* | stats count as c by ClientIP | sort 10 -c | eval cp = ClientIP-1, cp2 = ClientIP-2, cp3 = ClientIP-3
CounterID = 62 AND DontCountHits = 0 AND IsRefresh = 0 AND URL != \"\" | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URL | sort 10 -PageViews
CounterID = 62 AND DontCountHits = 0 AND IsRefresh = 0 AND Title != \"\" | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by Title | sort 10 -PageViews
CounterID = 62 AND IsRefresh = 0 AND IsLink != 0 AND IsDownload = 0 | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URL | sort 1010 -PageViews | tail 10 | tail 10
CounterID = 62 AND IsRefresh = 0 | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | eval Src=if(SearchEngineID=0 AND AdvEngineID=0, Referer, \"\") | rename URL as Dst | stats count as PageViews by TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst | sort 1010 -PageViews | tail 10 | tail 10
CounterID = 62 AND IsRefresh = 0 AND RefererHash = \"3594120000172545465\" | where TraficSourceID in(-1, 6) | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URLHash, EventDate | sort 110 -PageViews | tail 10 | tail 10
CounterID = 62 AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = \"2868770270353813622\" | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by WindowClientWidth, WindowClientHeight | sort 1010 -PageViews | tail 10 | tail 10
CounterID=62 DontCountHits = 0 IsRefresh = 0 | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | eval truncTime = strftime(tonumber(strptime(EventTime, \"%Y-%m-%d %H:%M:%S\")), \"%Y-%m-%d %H:%M:00\") | stats count as PageViews by truncTime | sort 1010 -truncTime | tail 10 | tail 10
90 changes: 45 additions & 45 deletions siglens/results/c6a.4xlarge.json
Original file line number Diff line number Diff line change
@@ -1,54 +1,54 @@
{
"system": "SigLens",
"date": "2025-03-07",
"machine": "c6a.4xlarge, 700gb gp2",
"date": "2025-05-05",
"machine": "c6a.4xlarge, 300gb gp2",
"cluster_size": 1,
"tags": ["Go", "logs", "search", "SigLens", "observability"],
"load_time": 6345.886,
"load_time": 5198.83,
"data_size": 28396387,
"result": [
[0.166, 0.079, 0.090],
[0.730, 0.423, 0.411],
[0.076, 0.081, 0.082],
[0.081, 0.082, 0.080],
[0.079, 0.083, 0.081],
[0.081, 0.081, 0.082],
[0.083, 0.081, 0.085],
[0.449, 0.903, 0.284],
[6.341, 5.590, 5.617],
[7.673, 7.360, 7.446],
[1.265, 0.673, 0.653],
[1.272, 0.690, 0.700],
[0.257, 0.222, 0.244],
[0.272, 0.307, 0.346],
[0.276, 0.266, 0.244],
[0.159, 0.165, 0.273],
[0.247, 0.229, 0.220],
[0.241, 0.327, 0.308],
[0.209, 0.082, 0.082],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My local measurements on c6a.4xlarge were fairly close to yours:

[0.210, 0.077, 0.080],
[0.722, 0.320, 0.319],
[0.215, 0.088, 0.083],
[0.212, 0.080, 0.083],
[0.215, 0.081, 0.081],
[0.216, 0.083, 0.082],
[0.216, 0.081, 0.081],
[0.739, 0.945, 0.182],
[5.812, 4.923, 4.884],
[8.323, 6.673, 6.704],
[2.273, 1.440, 0.657],
[8.465, 0.683, 0.724],
[0.360, 0.231, 0.240],
[0.490, 0.289, 0.279],
[0.434, 0.237, 0.251],
[0.328, 0.186, 0.197],
[0.425, 0.236, 0.229],
[0.425, 0.252, 0.241],
[null, null, null, ],
[1.076, 1.149, 0.273],
[4.706, 2.247, 2.271],
[6.278, 2.486, 1.685],
[11.507, 4.128, 2.961],
[8.062, 2.238, 1.460],
[13.266, 8.391, 8.317],
[10.481, 8.028, 8.040],
[13.583, 10.607, 10.656],
[9.358, 6.569, 5.751],
[134.868, 131.365, 131.101],
[0.230, 0.100, 0.090],
[0.595, 0.279, 0.290],
[0.731, 0.357, 0.353],
[0.784, 0.480, 0.475],
[0.568, 0.336, 0.340],
[0.575, 0.328, 0.329],
[0.308, 0.200, 0.196],
[10.447, 6.684, 6.661],
[10.971, 6.532, 6.528],
[2.435, 1.171, 1.174],
[7.489, 5.798, 5.781],
[2.093, 1.113, 1.114],
[5.113, 2.551, 2.550],
[10.464, 8.093, 8.051],

[0.602, 0.308, 0.306],
[0.162, 0.082, 0.079],
[0.182, 0.074, 0.080],
[0.172, 0.081, 0.086],
[0.155, 0.075, 0.086],
[0.168, 0.083, 0.080],
[0.625, 0.886, 0.178],
[5.415, 4.735, 4.679],
[7.755, 6.464, 6.525],
[2.299, 1.369, 0.639],
[9.720, 0.687, 0.683],
[0.318, 0.223, 0.243],
[0.407, 0.264, 0.259],
[0.372, 0.243, 0.240],
[0.262, 0.181, 0.181],
[0.350, 0.233, 0.229],
[0.349, 0.218, 0.218],
[null, null, null],
[1.251, 0.478, 0.479],
[32.722, 30.116, 29.918],
[6.781, 2.027, 1.952],
[33.342, 5.674, 5.526],
[44.928, 7.360, 7.137],
[26.121, 23.556, 23.550],
[22.963, 23.171, 23.025],
[30.059, 30.029, 29.992],
[14.537, 12.449, 12.391],
[null, null, null],
[0.141, 0.131, 0.123],
[0.314, 0.270, 0.273],
[0.407, 0.352, 0.381],
[0.532, 0.528, 0.554],
[0.346, 0.333, 0.419],
[65.397, 65.075, 66.119],
[95.028, 94.013, 93.870],
[10.960, 9.286, 9.354],
[10.594, 9.325, 9.323],
[3.163, 2.150, 2.136],
[68.890, 68.193, 68.379],
[2.111, 1.500, 1.496],
[2.427, 1.745, 1.738],
[null, null, null]
[1.020, 1.066, 0.256],
[10.224, 2.242, 2.237],
[13.308, 2.421, 1.683],
[23.581, 3.912, 2.874],
[14.173, 1.890, 1.362],
[12.512, 8.067, 8.055],
[10.056, 7.677, 7.659],
[12.969, 10.032, 10.020],
[12.546, 6.239, 5.523],
[131.366, 129.492, 128.019],
[0.177, 0.095, 0.094],
[0.464, 0.283, 0.273],
[0.651, 0.355, 0.325],
[0.673, 0.475, 0.448],
[0.525, 0.328, 0.333],
[0.536, 0.344, 0.317],
[0.261, 0.183, 0.189],
[12.303, 6.702, 6.646],
[11.801, 6.439, 6.456],
[2.272, 1.155, 1.140],
[7.274, 5.705, 5.726],
[2.006, 1.113, 1.106],
[4.910, 2.525, 2.540],
[10.214, 7.921, 7.923]
]
}
72 changes: 0 additions & 72 deletions siglens/send_data.py

This file was deleted.

Loading