Skip to content

Commit cae2169

Browse files
Merge pull request #15 from databrickslabs/add-app-enhancements
Add app enhancements
2 parents 3e73e58 + c714cd1 commit cae2169

17 files changed

+671
-4
lines changed

app/README/alert_actions.conf.spec

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
[notebook]
3+
python.version = python3
4+
param._cam = <json> Active response parameters.
5+
param.notebook = <string> Notebook
6+
param.paramOne = <string> Field Name for Parameter One
7+
param.paramTwo = <string> Field Name for Parameter Two
8+
2.27 KB
Loading
509 KB
Loading
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
require([
2+
"splunkjs/mvc",
3+
], function(mvc) {
4+
mvc.setFilter("dbquote", function(inputValue) {
5+
return inputValue.replace(/"/g, '\\"').replace(/'/g, "\\'");
6+
});
7+
});
8+
9+
10+
11+
12+
require(
13+
[
14+
'jquery',
15+
'underscore',
16+
'backbone',
17+
"splunk.util"
18+
],
19+
function(
20+
$,
21+
_,
22+
Backbone,
23+
splunkUtil
24+
) {
25+
$("#retryButton").click(function(){
26+
splunkjs.mvc.Components.getInstance("notebookrun").startSearch()
27+
})
28+
29+
splunkjs.mvc.Components.getInstance("submitted").on("change", function(changeEvent) {
30+
//console.log("Got a token change", changeEvent)
31+
if (typeof changeEvent.changed.url != "undefined") {
32+
//console.log("Got a change of the URL", changeEvent.changed.url)
33+
if(splunkjs.mvc.Components.getInstance("submitted").toJSON()['autoforward'] && splunkjs.mvc.Components.getInstance("submitted").toJSON()['autoforward'] == "Yes"){
34+
//console.log("Redirecting to ", changeEvent.changed.url)
35+
window.location.href = changeEvent.changed.url
36+
}
37+
}
38+
if (typeof changeEvent.changed.autoforward != "undefined") {
39+
//console.log("Got a change of the Token Forwarding", changeEvent.changed.autoforward)
40+
if(splunkjs.mvc.Components.getInstance("submitted").toJSON()['url'] && splunkjs.mvc.Components.getInstance("submitted").toJSON()['url'] != ""){
41+
// console.log("Redirecting to ", changeEvent.changed.url)
42+
window.location.href = changeEvent.changed.url
43+
}
44+
}
45+
})
46+
47+
})

app/bin/notebook.py

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
## Minimal set of standard modules to import
2+
import csv ## Result set is in CSV format
3+
import gzip ## Result set is gzipped
4+
import json ## Payload comes in JSON format
5+
import logging ## For specifying log levels
6+
import sys ## For appending the library path
7+
8+
## Standard modules specific to this action
9+
import requests ## For making http based API calls
10+
import urllib ## For url encoding
11+
import time ## For rate limiting
12+
13+
14+
15+
import re, os
16+
import traceback
17+
18+
## Importing the cim_actions.py library
19+
## A. Import make_splunkhome_path
20+
## B. Append your library path to sys.path
21+
## C. Import ModularAction from cim_actions
22+
## D. Import ModularActionTimer from cim_actions
23+
from splunk.clilib.bundle_paths import make_splunkhome_path
24+
sys.path.append(make_splunkhome_path(["etc", "apps", "Splunk_SA_CIM", "lib"]))
25+
from cim_actions import ModularAction, ModularActionTimer
26+
27+
## Retrieve a logging instance from ModularAction
28+
## It is required that this endswith _modalert
29+
logger = ModularAction.setup_logger('databricks_modalert')
30+
31+
## Subclass ModularAction for purposes of implementing
32+
## a script specific dowork() method
33+
class NotebookModularAction(ModularAction):
34+
35+
## This method will initialize NotebookModularAction
36+
def __init__(self, settings, logger, action_name=None):
37+
## Call ModularAction.__init__
38+
super(NotebookModularAction, self).__init__(settings, logger, action_name)
39+
## Initialize param.limit
40+
try:
41+
self.limit = int(self.configuration.get('limit', 1))
42+
if self.limit<1 or self.limit>30:
43+
self.limit = 30
44+
except:
45+
self.limit = 1
46+
47+
## This method will handle validation
48+
def validate(self, result):
49+
## outer validation
50+
pass
51+
# if len(self.rids)<=1:
52+
# ## Validate param.url
53+
# if not self.configuration.get('url'):
54+
# raise Exception('Invalid URL requested')
55+
# ## Validate param.service
56+
# if (self.configuration.get('service', '')
57+
# not in NotebookModularAction.VALID_SERVICES):
58+
# raise Exception('Invalid service requested')
59+
# ## Validate param.parameter_field
60+
# if self.configuration.get('parameter_field', '') not in result:
61+
# raise Exception('Parameter field does not exist in result')
62+
63+
## This method will do the actual work itself
64+
65+
def dowork(self, result):
66+
# ## get parameter value
67+
# parameter = result[self.configuration.get('parameter_field')]
68+
# ## get service
69+
# service = self.configuration.get('service', '')
70+
## build sourcetype
71+
sourcetype = 'databricks:notebook'
72+
self.message(f'Successfully started Databricks Notebook Action', status='success')
73+
# self.message(f'Settings: {self.settings}', status='success')
74+
# self.message(f'Configuration: {self.configuration}', status='success')
75+
76+
paramOne = self.configuration.get('paramone')
77+
paramTwo = None
78+
try:
79+
paramTwo = self.configuration.get('paramtwo')
80+
except:
81+
pass
82+
notebook = self.configuration.get('notebook')
83+
params = {}
84+
if paramOne in result:
85+
params[paramOne] = result[paramOne]
86+
if paramTwo and paramTwo!="" and paramTwo in result:
87+
params[paramTwo] = result[paramTwo]
88+
89+
rid = ""
90+
try:
91+
if "orig_rid" in result:
92+
rid = result['orig_rid']
93+
elif "rid" in result:
94+
rid = result['rid']
95+
elif self.settings.get('rid'):
96+
rid = self.settings.get('rid')
97+
elif self.settings.get('orig_rid'):
98+
rid = self.settings.get('orig_rid')
99+
# self.message(f"RID: \"{rid}\" orig_rid in result?=\"{'orig_rid' in result}\" rid in result?=\"{'rid' in result}\" rid in settings?=\"{self.settings.get('rid')}\" orig_rid in settings?=\"{self.settings.get('rid')}\"", status="working")
100+
except:
101+
pass
102+
103+
sid = ""
104+
try:
105+
if "orig_sid" in result:
106+
sid = result['orig_sid']
107+
elif "sid" in result:
108+
sid = result['sid']
109+
elif self.settings.get('sid'):
110+
sid = self.settings.get('sid')
111+
elif self.settings.get('orig_sid'):
112+
sid = self.settings.get('orig_sid')
113+
# self.message(f"SID: \"{sid}\" orig_sid in result?=\"{'orig_sid' in result}\" sid in result?=\"{'sid' in result}\" sid in settings?=\"{self.settings.get('sid')}\" orig_sid in settings?=\"{self.settings.get('sid')}\"", status="working")
114+
except:
115+
pass
116+
117+
try:
118+
cluster_id = com.get_cluster_id(self.session_key, self.cluster_name)
119+
self.message("Cluster ID received: {}".format(cluster_id), status="working")#, level=logging.INFO)
120+
121+
# Request to submit the run
122+
self.message("Preparing request body for execution", status="working")#, level=logging.INFO)
123+
notebook_task = {"notebook_path": notebook}
124+
notebook_task["base_parameters"] = params
125+
126+
payload = {
127+
# "run_name": self.run_name,
128+
"existing_cluster_id": cluster_id,
129+
"notebook_task": notebook_task,
130+
}
131+
132+
self.message("Submitting the run", status="working")#, level=logging.INFO)
133+
response = com.databricks_api(
134+
"post", const.RUN_SUBMIT_ENDPOINT, self.session_key, data=payload
135+
)
136+
137+
# kv_log_info.update(response)
138+
run_id = response["run_id"]
139+
self.message("Successfully submitted the run with ID: {}".format(run_id))#, status="working", level=logging.INFO)
140+
141+
# Request to get the run_id details
142+
# self.message("Fetching details for run ID: {}".format(run_id))#, status="working", level=logging.INFO)
143+
args = {"run_id": run_id}
144+
response = com.databricks_api("get", const.GET_RUN_ENDPOINT, self.session_key, args=args)
145+
result_url = ""
146+
output_url = response.get("run_page_url")
147+
if output_url:
148+
result_url = output_url.rstrip("/") + "/resultsOnly"
149+
# self.message("Output url returned: {}".format(output_url), status="working")#, level=logging.INFO)
150+
self.message(f"Start result_url=\"{result_url}\" output_url=\"{output_url}\" End", status="success")
151+
if sid != "" and rid != "":
152+
self.addevent(
153+
json.dumps({"_time": time.time(), "sid": sid, "rid": rid, "result_url": result_url, "output_url": output_url, "response": response, "request_params": params, "databricks_instance": self.databricks_instance, "cluster_name": self.cluster_name, "notebook": notebook}),
154+
sourcetype=sourcetype)
155+
# self.message('Reported status for Databricks notebook action', status='success')#, level=logging.INFO)
156+
except Exception as e:
157+
modaction.message(f"Failure during job submission: {traceback.format_exc()}", status='failure', level=logging.CRITICAL)
158+
159+
if __name__ == "__main__":
160+
## This is standard chrome for validating that
161+
## the script is being executed by splunkd accordingly
162+
if len(sys.argv) < 2 or sys.argv[1] != "--execute":
163+
print >> sys.stderr, "FATAL Unsupported execution mode (expected --execute flag)"
164+
sys.exit(1)
165+
166+
## The entire execution is wrapped in an outer try/except
167+
try:
168+
## Retrieve an instanced of NotebookModularAction and name it modaction
169+
## pass the payload (sys.stdin) and logging instance
170+
stdindata = sys.stdin.read()
171+
modaction = NotebookModularAction(stdindata, logger, 'notebook')
172+
logger.debug(modaction.settings)
173+
modaction.message("About to start trying to import the Databricks code", status="starting", level=logging.CRITICAL)
174+
try:
175+
import databricks_com as com
176+
import databricks_const as const
177+
import databricks_common_utils as utils
178+
except Exception as e:
179+
modaction.message(f"Failure on importing Databricks libs: {traceback.format_exc()}", status='failure', level=logging.CRITICAL)
180+
181+
182+
try:
183+
modaction.cluster_name = utils.get_databricks_configs().get('cluster_name')
184+
modaction.databricks_instance = utils.get_databricks_configs().get('databricks_instance')
185+
except Exception as e:
186+
modaction.message(f"Failure getting cluster name config: {traceback.format_exc()}", status='failure', level=logging.CRITICAL)
187+
modaction.session_key = json.loads(stdindata)['session_key']
188+
189+
## Add a duration message for the "main" component using modaction.start_timer as
190+
## the start time
191+
with ModularActionTimer(modaction, 'main', modaction.start_timer):
192+
## Process the result set by opening results_file with gzip
193+
with gzip.open(modaction.results_file, 'rt') as fh:
194+
## Iterate the result set using a dictionary reader
195+
## We also use enumerate which provides "num" which
196+
## can be used as the result ID (rid)
197+
modaction.message("Got a file: {}".format(modaction.results_file), status="working", level=logging.CRITICAL)
198+
for num, result in enumerate(csv.DictReader(fh)):
199+
## results limiting
200+
if num>=modaction.limit:
201+
break
202+
## Set rid to row # (0->n) if unset
203+
result.setdefault('rid', str(num))
204+
## Update the ModularAction instance
205+
## with the current result. This sets
206+
## orig_sid/rid/orig_rid accordingly.
207+
modaction.update(result)
208+
## Generate an invocation message for each result.
209+
## Tells splunkd that we are about to perform the action
210+
## on said result.
211+
modaction.invoke()
212+
## Validate the invocation
213+
modaction.validate(result)
214+
## This is where we do the actual work. In this case
215+
## we are calling out to an external API and creating
216+
## events based on the information returned
217+
modaction.dowork(result)
218+
## rate limiting
219+
time.sleep(1.6)
220+
221+
## Once we're done iterating the result set and making
222+
## the appropriate API calls we will write out the events
223+
modaction.writeevents(index="cim_modactions", source='databricks:modalert')
224+
225+
## This is standard chrome for outer exception handling
226+
except Exception as e:
227+
## adding additional logging since adhoc search invocations do not write to stderr
228+
try:
229+
modaction.message(traceback.format_exc(), status='failure', level=logging.CRITICAL)
230+
except:
231+
logger.critical(e)
232+
print >> sys.stderr, "ERROR: %s" % e
233+
sys.exit(3)

app/default/alert_actions.conf

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
[notebook]
3+
python.version = python3
4+
description = Run a parametrized notebook in Databricks based on data in this event
5+
label = Run Notebook
6+
param._cam = {"drilldown_uri": "../TA-Databricks/launch_notebook_AR?form.orig_sid=$sid$&form.orig_rid=$rid$", "technology": [{"vendor": "Databricks", "product": "Databricks", "version": ["1.0"]}], "supports_adhoc": true, "subject": ["splunk.event"], "category": ["Information Conveyance"], "task": ["create"]}
7+
is_custom = 1
8+
payload_format = json
9+
icon_path = img/alert_run_notebook.png
10+
param.notebook =
11+
param.paramOne =
12+
param.paramTwo =
13+
command = sendalert $action_name$ results_file="$results.file$" results_link="$results.url$" param.action_name=$action_name$

app/default/app.conf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
state_change_requires_restart = true
44
is_configured = false
55
state = enabled
6-
build = 31
6+
build = 35
77

88
[launcher]
99
author = Databricks, Inc.
10-
version = 1.0.0
10+
version = 1.1.0
1111
description = The Databricks Add-on for Splunk is used to query Databricks data, and execute Databricks notebooks from Splunk.
1212

1313
[ui]
@@ -22,4 +22,4 @@ id = TA-Databricks
2222
reload.addon_builder = simple
2323
reload.ta_databricks_account = simple
2424
reload.ta_databricks_settings = simple
25-
reload.passwords = simple
25+
reload.passwords = simple
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<form class="form-horizontal form-complex">
2+
<div class="control-group">
3+
<label class="control-label" for="notebook_notebook">Notebook </label>
4+
<div class="controls">
5+
<input type="text" name="action.notebook.param.notebook" id="notebook_notebook"/>
6+
<span class="help-block">
7+
Full Path, such as: /Users/user@gmail.com/My Notebooks/Investigation Notebook
8+
</span>
9+
</div>
10+
<label class="control-label" for="notebook_paramOne">Param One </label>
11+
<div class="controls">
12+
<input type="text" name="action.notebook.param.paramOne" id="notebook_paramOne"/>
13+
<span class="help-block">
14+
Examples: dest_ip or host. Can take any fields. Multi-value fields will become semi-colon separated.
15+
</span>
16+
</div>
17+
<label class="control-label" for="notebook_paramTwo">Param Two </label>
18+
<div class="controls">
19+
<input type="text" name="action.notebook.param.paramTwo" id="notebook_paramTwo"/>
20+
<span class="help-block">
21+
(Optional)
22+
</span>
23+
</div>
24+
</div>
25+
</form>

app/default/data/ui/nav/default.xml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
<nav search_view="search" color="#1B3139">
2+
<view name="databricks-intro" default="true" />
23
<view name="configuration"/>
34
<view name="databricks_job_execution_details"/>
5+
<view name="databricks-sample-dashboard" />
6+
<view name="databricks-launch-notebook" />
47
<view name="search" label="Search"/>
5-
</nav>
8+
</nav>
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<dashboard>
2+
<label>Intro</label>
3+
<row>
4+
<panel>
5+
<title>Overview</title>
6+
<html>
7+
<img style="float: right;" src="/splunkd/__raw/servicesNS/nobody/TA-Databricks/static/appIconAlt_2x.png"></img>
8+
<p>The Databricks Add-on for Splunk allows Splunk teams to take advantage of the effective cost model of Databricks along with the power of AI without asking users to leave the comforts of their Splunk interface.
9+
</p>
10+
<p>Users can run ad-hoc queries against Databricks from within a Splunk dashboard or search bar with the add-on. Those who have notebooks or jobs in Databricks can launch them through a Splunk dashboard or in response to a Splunk search. The Databricks integration is also bi-directional, letting customers summarize noisy data or run detections in Databricks that show up in Splunk Enterprise Security. Customers can even run Splunk searches from within a Databricks notebook so that they don’t need to duplicate all of their data to get the job done.</p>
11+
<p>The Splunk and Databricks integration allows customers to reduce their cost, expand the data sources they analyze, and provide the results of a more robust analytics engine, all without changing the tools used all day by their staff.</p>
12+
13+
</html>
14+
</panel>
15+
</row>
16+
<row>
17+
<panel>
18+
<title>Integration Points</title>
19+
<html>
20+
<div>
21+
<img style="width: 100%; max-width: 1496px !important;" src="/static/app/TA-Databricks/img/slide-splunk-databricks-integration.png" title="Screenshot of slide showing the integration methods" ></img>
22+
</div>
23+
<p>There are three main integration points, as shown in the slide above:</p>
24+
<ol>
25+
<li>This app enables running queries from Splunk against Databricks by configuring a personal access token for a service account within Databricks (<a href="databricks-sample-dashboard">example</a>). Additionally, you can launch ephemeral notebook runs or jobs. See the <a href="https://splunkbase.splunk.com/app/5416/#/details" target="_blank">app docs</a> for more detail.</li>
26+
<li>You can also configure the Splunk DB Connect app to run searches against Databricks via JDBC. The API used for this add-on is limited to 1000 results when running a simple query, but JDBC can pull back almost infinite amount of data. Additionally, as DB Connect supports multiple profiles, you can configure multiple connections with different levels of access. See our <a href="https://github.com/databrickslabs/splunk-integration/blob/master/docs/markdown/Splunk%20DB%20Connect%20guide%20for%20Databricks.md" target="_blank">integration docs</a> for configuration instructions.</li>
27+
<li>You can also send data from Databricks to Splunk via Splunk's HTTP Event Collector. This could be small sets of data, such as security alerts detected via AI on Databricks, or large sets of data such as aggregated or filtered high volume datasets. You can also use the Splunk REST API to run queries against data stored in Splunk from Databricks.</li>
28+
</ol>
29+
</html>
30+
</panel>
31+
</row>
32+
</dashboard>

0 commit comments

Comments
 (0)