Skip to content

[SPARK-17243] [Web UI] Spark 2.0 History Server won't load with very large application history #14835

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
* limitations under the License.
*/

var appLimit = -1;

function setAppLimit(val) {
appLimit = val;
}

function makeIdNumeric(id) {
var strs = id.split("_");
if (strs.length < 3) {
Expand Down Expand Up @@ -89,7 +95,7 @@ $(document).ready(function() {
requestedIncomplete = getParameterByName("showIncomplete", searchString);
requestedIncomplete = (requestedIncomplete == "true" ? true : false);

$.getJSON("api/v1/applications", function(response,status,jqXHR) {
$.getJSON("api/v1/applications?limit=" + appLimit, function(response,status,jqXHR) {
var array = [];
var hasMultipleAttempts = false;
for (i in response) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
<script src={UIUtils.prependBaseUri("/static/dataTables.rowsGroup.js")}></script> ++
<div id="history-summary" class="span12 pagination"></div> ++
<script src={UIUtils.prependBaseUri("/static/utils.js")}></script> ++
<script src={UIUtils.prependBaseUri("/static/historypage.js")}></script>
<script src={UIUtils.prependBaseUri("/static/historypage.js")}></script> ++
<script>setAppLimit({parent.maxApplications})</script>
} else if (requestedIncomplete) {
<h4>No incomplete applications found!</h4>
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
import org.apache.spark.{SecurityManager, SparkConf}
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._
import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, ApplicationsListResource, UIRoot}
import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
import org.apache.spark.ui.JettyUtils._
Expand Down Expand Up @@ -55,6 +56,9 @@ class HistoryServer(
// How many applications to retain
private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50)

// How many applications the summary ui displays
private[history] val maxApplications = conf.get(HISTORY_UI_MAX_APPS);

// application
private val appCache = new ApplicationCache(this, retainedApplications, new SystemClock())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ package object config {
.intConf
.createWithDefault(100000)

// To limit how many applications are shown in the History Server summary ui
private[spark] val HISTORY_UI_MAX_APPS =
ConfigBuilder("spark.history.ui.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE)

private[spark] val IO_ENCRYPTION_ENABLED = ConfigBuilder("spark.io.encryption.enabled")
.booleanConf
.createWithDefault(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) {
def appList(
@QueryParam("status") status: JList[ApplicationStatus],
@DefaultValue("2010-01-01") @QueryParam("minDate") minDate: SimpleDateParam,
@DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam)
@DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam,
@QueryParam("limit") limit: Integer)
: Iterator[ApplicationInfo] = {
val allApps = uiRoot.getApplicationInfoList
val adjStatus = {
Expand All @@ -41,7 +42,7 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) {
}
val includeCompleted = adjStatus.contains(ApplicationStatus.COMPLETED)
val includeRunning = adjStatus.contains(ApplicationStatus.RUNNING)
allApps.filter { app =>
val appList = allApps.filter { app =>
val anyRunning = app.attempts.exists(!_.completed)
// if any attempt is still running, we consider the app to also still be running
val statusOk = (!anyRunning && includeCompleted) ||
Expand All @@ -53,6 +54,11 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) {
}
statusOk && dateOk
}
if (limit != null) {
appList.take(limit)
} else {
appList
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
[ {
"id" : "local-1430917381534",
"name" : "Spark shell",
"attempts" : [ {
"startTime" : "2015-05-06T13:03:00.893GMT",
"endTime" : "2015-05-06T13:03:11.398GMT",
"lastUpdated" : "",
"duration" : 10505,
"sparkUser" : "irashid",
"completed" : true,
"startTimeEpoch" : 1430917380893,
"endTimeEpoch" : 1430917391398,
"lastUpdatedEpoch" : 0
} ]
}, {
"id" : "local-1430917381535",
"name" : "Spark shell",
"attempts" : [ {
"attemptId" : "2",
"startTime" : "2015-05-06T13:03:00.893GMT",
"endTime" : "2015-05-06T13:03:00.950GMT",
"lastUpdated" : "",
"duration" : 57,
"sparkUser" : "irashid",
"completed" : true,
"startTimeEpoch" : 1430917380893,
"endTimeEpoch" : 1430917380950,
"lastUpdatedEpoch" : 0
}, {
"attemptId" : "1",
"startTime" : "2015-05-06T13:03:00.880GMT",
"endTime" : "2015-05-06T13:03:00.890GMT",
"lastUpdated" : "",
"duration" : 10,
"sparkUser" : "irashid",
"completed" : true,
"startTimeEpoch" : 1430917380880,
"endTimeEpoch" : 1430917380890,
"lastUpdatedEpoch" : 0
} ]
}, {
"id" : "local-1426533911241",
"name" : "Spark shell",
"attempts" : [ {
"attemptId" : "2",
"startTime" : "2015-03-17T23:11:50.242GMT",
"endTime" : "2015-03-17T23:12:25.177GMT",
"lastUpdated" : "",
"duration" : 34935,
"sparkUser" : "irashid",
"completed" : true,
"startTimeEpoch" : 1426633910242,
"endTimeEpoch" : 1426633945177,
"lastUpdatedEpoch" : 0
}, {
"attemptId" : "1",
"startTime" : "2015-03-16T19:25:10.242GMT",
"endTime" : "2015-03-16T19:25:45.177GMT",
"lastUpdated" : "",
"duration" : 34935,
"sparkUser" : "irashid",
"completed" : true,
"startTimeEpoch" : 1426533910242,
"endTimeEpoch" : 1426533945177,
"lastUpdatedEpoch" : 0
} ]
} ]
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
"minDate app list json" -> "applications?minDate=2015-02-10",
"maxDate app list json" -> "applications?maxDate=2015-02-10",
"maxDate2 app list json" -> "applications?maxDate=2015-02-03T16:42:40.000GMT",
"limit app list json" -> "applications?limit=3",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens with limit=-1? I assume the take will just return empty map but good to try it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

limit=-1 will actually return the whole list, I just pass the int to take and that's how that function works, I though this was a good default value (which is why it's the default value in the js file)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, sounds fine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was worried about what would happen if limit was unset —but the existing tests (lines 180-182) all test that

"one app json" -> "applications/local-1422981780767",
"one app multi-attempt json" -> "applications/local-1426533911241",
"job list json" -> "applications/local-1422981780767/jobs",
Expand Down
16 changes: 13 additions & 3 deletions docs/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,17 @@ The history server can be configured as follows:
<td>spark.history.retainedApplications</td>
<td>50</td>
<td>
The number of application UIs to retain. If this cap is exceeded, then the oldest
applications will be removed.
The number of applications to retain UI data for in the cache. If this cap is exceeded, then
the oldest applications will be removed from the cache. If an application is not in the cache,
it will have to be loaded from disk if its accessed from the UI.
</td>
</tr>
<tr>
<td>spark.history.ui.maxApplications</td>
<td>Int.MaxValue</td>
<td>
The number of applications to display on the history summary page. Application UIs are still
available by accessing their URLs directly even if they are not displayed on the history summary page.
</td>
</tr>
<tr>
Expand Down Expand Up @@ -242,7 +251,8 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
<br>Examples:
<br><code>?minDate=2015-02-10</code>
<br><code>?minDate=2015-02-03T16:42:40.000GMT</code>
<br><code>?maxDate=[date]</code> latest date/time to list; uses same format as <code>minDate</code>.</td>
<br><code>?maxDate=[date]</code> latest date/time to list; uses same format as <code>minDate</code>.
<br><code>?limit=[limit]</code> limits the number of applications listed.</td>
</tr>
<tr>
<td><code>/applications/[app-id]/jobs</code></td>
Expand Down