databrickslabs
diff --git a/‎app/README/alert_actions.conf.spec
Lines changed: 8 additions & 0 deletions b/‎app/README/alert_actions.conf.spec
Lines changed: 8 additions & 0 deletions
diff --git a/‎app/appserver/static/img/alert_run_notebook.png
2.27 KB b/‎app/appserver/static/img/alert_run_notebook.png
2.27 KB
diff --git a/‎app/appserver/static/img/slide-splunk-databricks-integration.png
509 KB b/‎app/appserver/static/img/slide-splunk-databricks-integration.png
509 KB
diff --git a/‎app/appserver/static/js/handle_autoforward.js
Lines changed: 47 additions & 0 deletions b/‎app/appserver/static/js/handle_autoforward.js
Lines changed: 47 additions & 0 deletions
diff --git a/‎app/bin/notebook.py
Lines changed: 233 additions & 0 deletions b/‎app/bin/notebook.py
Lines changed: 233 additions & 0 deletions
diff --git a/‎app/default/alert_actions.conf
Lines changed: 13 additions & 0 deletions b/‎app/default/alert_actions.conf
Lines changed: 13 additions & 0 deletions
diff --git a/‎app/default/app.conf
Lines changed: 3 additions & 3 deletions b/‎app/default/app.conf
Lines changed: 3 additions & 3 deletions
diff --git a/‎app/default/data/ui/alerts/notebook.html
Lines changed: 25 additions & 0 deletions b/‎app/default/data/ui/alerts/notebook.html
Lines changed: 25 additions & 0 deletions
diff --git a/‎app/default/data/ui/nav/default.xml
Lines changed: 4 additions & 1 deletion b/‎app/default/data/ui/nav/default.xml
Lines changed: 4 additions & 1 deletion
diff --git a/‎app/default/data/ui/views/databricks-intro.xml
Lines changed: 32 additions & 0 deletions b/‎app/default/data/ui/views/databricks-intro.xml
Lines changed: 32 additions & 0 deletions
@@ -0,0 +1,8 @@
+
+[notebook]
+python.version          = python3
+param._cam              = <json> Active response parameters.
+param.notebook          = <string> Notebook
+param.paramOne      = <string> Field Name for Parameter One
+param.paramTwo      = <string> Field Name for Parameter Two
+
@@ -0,0 +1,47 @@
+require([
+     "splunkjs/mvc",
+     ], function(mvc) {
+ mvc.setFilter("dbquote", function(inputValue) {
+     return inputValue.replace(/"/g, '\\"').replace(/'/g, "\\'");
+   });
+ });
+
+
+
+
+require(
+    [
+        'jquery',
+        'underscore',
+        'backbone',
+        "splunk.util"
+    ],
+    function(
+        $,
+        _,
+        Backbone,
+        splunkUtil
+    ) {
+        $("#retryButton").click(function(){
+            splunkjs.mvc.Components.getInstance("notebookrun").startSearch()
+        })
+                
+        splunkjs.mvc.Components.getInstance("submitted").on("change", function(changeEvent) {
+            //console.log("Got a token change", changeEvent)
+            if (typeof changeEvent.changed.url != "undefined") {
+                //console.log("Got a change of the URL", changeEvent.changed.url)
+                if(splunkjs.mvc.Components.getInstance("submitted").toJSON()['autoforward'] && splunkjs.mvc.Components.getInstance("submitted").toJSON()['autoforward'] == "Yes"){
+                    //console.log("Redirecting to ", changeEvent.changed.url)
+                    window.location.href = changeEvent.changed.url
+                }
+            }
+            if (typeof changeEvent.changed.autoforward != "undefined") {
+                //console.log("Got a change of the Token Forwarding", changeEvent.changed.autoforward)
+                if(splunkjs.mvc.Components.getInstance("submitted").toJSON()['url'] && splunkjs.mvc.Components.getInstance("submitted").toJSON()['url'] != ""){
+                  //  console.log("Redirecting to ", changeEvent.changed.url)
+                    window.location.href = changeEvent.changed.url
+                }
+            }
+        })
+
+    })
@@ -0,0 +1,233 @@
+## Minimal set of standard modules to import
+import csv      ## Result set is in CSV format
+import gzip     ## Result set is gzipped
+import json     ## Payload comes in JSON format
+import logging  ## For specifying log levels
+import sys      ## For appending the library path
+
+## Standard modules specific to this action
+import requests ## For making http based API calls
+import urllib   ## For url encoding
+import time     ## For rate limiting
+
+
+
+import re, os
+import traceback
+
+## Importing the cim_actions.py library
+## A.  Import make_splunkhome_path
+## B.  Append your library path to sys.path
+## C.  Import ModularAction from cim_actions
+## D.  Import ModularActionTimer from cim_actions
+from splunk.clilib.bundle_paths import make_splunkhome_path
+sys.path.append(make_splunkhome_path(["etc", "apps", "Splunk_SA_CIM", "lib"]))
+from cim_actions import ModularAction, ModularActionTimer
+
+## Retrieve a logging instance from ModularAction
+## It is required that this endswith _modalert
+logger = ModularAction.setup_logger('databricks_modalert')
+
+## Subclass ModularAction for purposes of implementing
+## a script specific dowork() method
+class NotebookModularAction(ModularAction):
+
+    ## This method will initialize NotebookModularAction
+    def __init__(self, settings, logger, action_name=None):
+        ## Call ModularAction.__init__
+        super(NotebookModularAction, self).__init__(settings, logger, action_name)
+        ## Initialize param.limit
+        try:
+            self.limit = int(self.configuration.get('limit', 1))
+            if self.limit<1 or self.limit>30:
+                self.limit = 30
+        except:
+            self.limit = 1
+
+    ## This method will handle validation
+    def validate(self, result):
+        ## outer validation
+        pass
+        # if len(self.rids)<=1:
+        #     ## Validate param.url
+        #     if not self.configuration.get('url'):
+        #         raise Exception('Invalid URL requested')
+        #     ## Validate param.service
+        #     if (self.configuration.get('service', '')
+        #        not in NotebookModularAction.VALID_SERVICES):
+        #         raise Exception('Invalid service requested')
+        #     ## Validate param.parameter_field
+        #     if self.configuration.get('parameter_field', '') not in result:
+        #         raise Exception('Parameter field does not exist in result')
+
+    ## This method will do the actual work itself
+  
+    def dowork(self, result):
+        # ## get parameter value
+        # parameter  = result[self.configuration.get('parameter_field')]
+        # ## get service
+        # service    = self.configuration.get('service', '')
+        ## build sourcetype
+        sourcetype = 'databricks:notebook'
+        self.message(f'Successfully started Databricks Notebook Action', status='success')
+        # self.message(f'Settings: {self.settings}', status='success')
+        # self.message(f'Configuration: {self.configuration}', status='success')
+        
+        paramOne = self.configuration.get('paramone')
+        paramTwo = None
+        try:
+            paramTwo = self.configuration.get('paramtwo')
+        except:
+            pass
+        notebook = self.configuration.get('notebook')
+        params = {}
+        if paramOne in result:
+            params[paramOne] = result[paramOne]
+        if paramTwo and paramTwo!="" and paramTwo in result:
+            params[paramTwo] = result[paramTwo]
+
+        rid = ""
+        try: 
+            if "orig_rid" in result:
+                rid = result['orig_rid']
+            elif "rid" in result:
+                rid = result['rid']
+            elif self.settings.get('rid'):
+                rid = self.settings.get('rid')
+            elif self.settings.get('orig_rid'):
+                rid = self.settings.get('orig_rid')
+            # self.message(f"RID: \"{rid}\" orig_rid in result?=\"{'orig_rid' in result}\" rid in result?=\"{'rid' in result}\" rid in settings?=\"{self.settings.get('rid')}\" orig_rid in settings?=\"{self.settings.get('rid')}\"", status="working")
+        except:
+            pass
+
+        sid = ""
+        try:
+            if "orig_sid" in result:
+                sid = result['orig_sid']
+            elif "sid" in result:
+                sid = result['sid']
+            elif self.settings.get('sid'):
+                sid = self.settings.get('sid')
+            elif self.settings.get('orig_sid'):
+                sid = self.settings.get('orig_sid')
+            # self.message(f"SID: \"{sid}\" orig_sid in result?=\"{'orig_sid' in result}\" sid in result?=\"{'sid' in result}\" sid in settings?=\"{self.settings.get('sid')}\" orig_sid in settings?=\"{self.settings.get('sid')}\"", status="working")
+        except:
+            pass
+
+        try:
+            cluster_id = com.get_cluster_id(self.session_key, self.cluster_name)
+            self.message("Cluster ID received: {}".format(cluster_id), status="working")#, level=logging.INFO)
+
+            # Request to submit the run
+            self.message("Preparing request body for execution", status="working")#, level=logging.INFO)
+            notebook_task = {"notebook_path": notebook}
+            notebook_task["base_parameters"] = params
+
+            payload = {
+                # "run_name": self.run_name,
+                "existing_cluster_id": cluster_id,
+                "notebook_task": notebook_task,
+            }
+
+            self.message("Submitting the run", status="working")#, level=logging.INFO)
+            response = com.databricks_api(
+                "post", const.RUN_SUBMIT_ENDPOINT, self.session_key, data=payload
+            )
+
+            # kv_log_info.update(response)
+            run_id = response["run_id"]
+            self.message("Successfully submitted the run with ID: {}".format(run_id))#, status="working", level=logging.INFO)
+
+            # Request to get the run_id details
+            # self.message("Fetching details for run ID: {}".format(run_id))#, status="working", level=logging.INFO)
+            args = {"run_id": run_id}
+            response = com.databricks_api("get", const.GET_RUN_ENDPOINT, self.session_key, args=args)
+            result_url = ""
+            output_url = response.get("run_page_url")
+            if output_url:
+                result_url = output_url.rstrip("/") + "/resultsOnly"
+                # self.message("Output url returned: {}".format(output_url), status="working")#, level=logging.INFO)
+            self.message(f"Start result_url=\"{result_url}\" output_url=\"{output_url}\" End", status="success")
+            if sid != "" and rid != "":
+                self.addevent(
+                    json.dumps({"_time": time.time(), "sid": sid, "rid": rid, "result_url": result_url, "output_url": output_url, "response": response, "request_params": params, "databricks_instance": self.databricks_instance, "cluster_name": self.cluster_name, "notebook": notebook}),
+                    sourcetype=sourcetype)
+            # self.message('Reported status for Databricks notebook action', status='success')#, level=logging.INFO)
+        except Exception as e:
+            modaction.message(f"Failure during job submission: {traceback.format_exc()}", status='failure', level=logging.CRITICAL)
+
+if __name__ == "__main__":
+    ## This is standard chrome for validating that
+    ## the script is being executed by splunkd accordingly
+    if len(sys.argv) < 2 or sys.argv[1] != "--execute":
+        print >> sys.stderr, "FATAL Unsupported execution mode (expected --execute flag)"
+        sys.exit(1)
+
+    ## The entire execution is wrapped in an outer try/except
+    try:
+        ## Retrieve an instanced of NotebookModularAction and name it modaction
+        ## pass the payload (sys.stdin) and logging instance
+        stdindata = sys.stdin.read()
+        modaction = NotebookModularAction(stdindata, logger, 'notebook')
+        logger.debug(modaction.settings)
+        modaction.message("About to start trying to import the Databricks code", status="starting", level=logging.CRITICAL)
+        try:
+            import databricks_com as com
+            import databricks_const as const
+            import databricks_common_utils as utils
+        except Exception as e:
+            modaction.message(f"Failure on importing Databricks libs: {traceback.format_exc()}", status='failure', level=logging.CRITICAL)
+        
+        
+        try:
+            modaction.cluster_name = utils.get_databricks_configs().get('cluster_name')
+            modaction.databricks_instance = utils.get_databricks_configs().get('databricks_instance')
+        except Exception as e:
+            modaction.message(f"Failure getting cluster name config: {traceback.format_exc()}", status='failure', level=logging.CRITICAL)
+        modaction.session_key = json.loads(stdindata)['session_key']
+        
+        ## Add a duration message for the "main" component using modaction.start_timer as
+        ## the start time
+        with ModularActionTimer(modaction, 'main', modaction.start_timer):
+            ## Process the result set by opening results_file with gzip
+            with gzip.open(modaction.results_file, 'rt') as fh:
+                ## Iterate the result set using a dictionary reader
+                ## We also use enumerate which provides "num" which
+                ## can be used as the result ID (rid)
+                modaction.message("Got a file: {}".format(modaction.results_file), status="working", level=logging.CRITICAL)
+                for num, result in enumerate(csv.DictReader(fh)):
+                    ## results limiting
+                    if num>=modaction.limit:
+                        break
+                    ## Set rid to row # (0->n) if unset
+                    result.setdefault('rid', str(num))
+                    ## Update the ModularAction instance
+                    ## with the current result.  This sets
+                    ## orig_sid/rid/orig_rid accordingly.
+                    modaction.update(result)
+                    ## Generate an invocation message for each result.
+                    ## Tells splunkd that we are about to perform the action
+                    ## on said result.
+                    modaction.invoke()
+                    ## Validate the invocation
+                    modaction.validate(result)
+                    ## This is where we do the actual work.  In this case
+                    ## we are calling out to an external API and creating
+                    ## events based on the information returned
+                    modaction.dowork(result)
+                    ## rate limiting
+                    time.sleep(1.6)
+            
+            ## Once we're done iterating the result set and making 
+            ## the appropriate API calls we will write out the events
+            modaction.writeevents(index="cim_modactions", source='databricks:modalert')
+
+    ## This is standard chrome for outer exception handling
+    except Exception as e:
+        ## adding additional logging since adhoc search invocations do not write to stderr
+        try:
+            modaction.message(traceback.format_exc(), status='failure', level=logging.CRITICAL)
+        except:
+            logger.critical(e)
+        print >> sys.stderr, "ERROR: %s" % e
+        sys.exit(3)
@@ -0,0 +1,13 @@
+
+[notebook]
+python.version = python3
+description = Run a parametrized notebook in Databricks based on data in this event
+label = Run Notebook
+param._cam = {"drilldown_uri": "../TA-Databricks/launch_notebook_AR?form.orig_sid=$sid$&form.orig_rid=$rid$", "technology": [{"vendor": "Databricks", "product": "Databricks", "version": ["1.0"]}], "supports_adhoc": true, "subject": ["splunk.event"], "category": ["Information Conveyance"], "task": ["create"]}
+is_custom = 1
+payload_format = json
+icon_path = img/alert_run_notebook.png
+param.notebook = 
+param.paramOne = 
+param.paramTwo = 
+command = sendalert $action_name$ results_file="$results.file$" results_link="$results.url$" param.action_name=$action_name$
@@ -3,11 +3,11 @@
 state_change_requires_restart = true
 is_configured = false
 state = enabled
-build = 31
+build = 35
 
 [launcher]
 author = Databricks, Inc.
-version = 1.0.0
+version = 1.1.0
 description = The Databricks Add-on for Splunk is used to query Databricks data, and execute Databricks notebooks from Splunk.
 
 [ui]
@@ -22,4 +22,4 @@ id = TA-Databricks
 reload.addon_builder = simple
 reload.ta_databricks_account = simple
 reload.ta_databricks_settings = simple
-reload.passwords = simple
+reload.passwords = simple
@@ -0,0 +1,25 @@
+<form class="form-horizontal form-complex">
+<div class="control-group">
+	<label class="control-label" for="notebook_notebook">Notebook  </label>
+    <div class="controls">
+        <input type="text" name="action.notebook.param.notebook" id="notebook_notebook"/>
+        <span class="help-block"> 
+            Full Path, such as: /Users/user@gmail.com/My Notebooks/Investigation Notebook
+        </span>
+    </div>
+	<label class="control-label" for="notebook_paramOne">Param One  </label>
+    <div class="controls">
+        <input type="text" name="action.notebook.param.paramOne" id="notebook_paramOne"/>
+        <span class="help-block"> 
+            Examples: dest_ip or host. Can take any fields. Multi-value fields will become semi-colon separated.
+        </span>
+    </div>
+	<label class="control-label" for="notebook_paramTwo">Param Two  </label>
+    <div class="controls">
+        <input type="text" name="action.notebook.param.paramTwo" id="notebook_paramTwo"/>
+        <span class="help-block"> 
+            (Optional)
+        </span>
+    </div>
+</div>
+</form>
@@ -1,5 +1,8 @@
 <nav search_view="search" color="#1B3139">
+    <view name="databricks-intro" default="true" />
     <view name="configuration"/>
     <view name="databricks_job_execution_details"/>
+    <view name="databricks-sample-dashboard" />
+    <view name="databricks-launch-notebook" />
     <view name="search" label="Search"/>
-</nav>
+</nav>
@@ -0,0 +1,32 @@
+<dashboard>
+  <label>Intro</label>
+  <row>
+    <panel>
+      <title>Overview</title>
+      <html>
+        <img style="float: right;" src="/splunkd/__raw/servicesNS/nobody/TA-Databricks/static/appIconAlt_2x.png"></img>
+        <p>The Databricks Add-on for Splunk allows Splunk teams to take advantage of the effective cost model of Databricks along with the power of AI without asking users to leave the comforts of their Splunk interface. 
+        </p>
+        <p>Users can run ad-hoc queries against Databricks from within a Splunk dashboard or search bar with the add-on. Those who have notebooks or jobs in Databricks can launch them through a Splunk dashboard or in response to a Splunk search. The Databricks integration is also bi-directional, letting customers summarize noisy data or run detections in Databricks that show up in Splunk Enterprise Security. Customers can even run Splunk searches from within a Databricks notebook so that they don’t need to duplicate all of their data to get the job done.</p>
+        <p>The Splunk and Databricks integration allows customers to reduce their cost, expand the data sources they analyze, and provide the results of a more robust analytics engine, all without changing the tools used all day by their staff.</p>
+
+      </html>
+    </panel>
+  </row>
+  <row>
+    <panel>
+      <title>Integration Points</title>
+      <html>
+        <div>
+          <img style="width: 100%; max-width: 1496px !important;" src="/static/app/TA-Databricks/img/slide-splunk-databricks-integration.png" title="Screenshot of slide showing the integration methods" ></img>
+        </div>
+<p>There are three main integration points, as shown in the slide above:</p>
+        <ol>
+          <li>This app enables running queries from Splunk against Databricks by configuring a personal access token for a service account within Databricks (<a href="databricks-sample-dashboard">example</a>). Additionally, you can launch ephemeral notebook runs or jobs. See the <a href="https://splunkbase.splunk.com/app/5416/#/details" target="_blank">app docs</a> for more detail.</li>
+          <li>You can also configure the Splunk DB Connect app to run searches against Databricks via JDBC. The API used for this add-on is limited to 1000 results when running a simple query, but JDBC can pull back almost infinite amount of data. Additionally, as DB Connect supports multiple profiles, you can configure multiple connections with different levels of access. See our <a href="https://github.com/databrickslabs/splunk-integration/blob/master/docs/markdown/Splunk%20DB%20Connect%20guide%20for%20Databricks.md" target="_blank">integration docs</a> for configuration instructions.</li>
+          <li>You can also send data from Databricks to Splunk via Splunk's HTTP Event Collector. This could be small sets of data, such as security alerts detected via AI on Databricks, or large sets of data such as aggregated or filtered high volume datasets. You can also use the Splunk REST API to run queries against data stored in Splunk from Databricks.</li>
+        </ol>
+      </html>
+    </panel>
+  </row>
+</dashboard>