-
Notifications
You must be signed in to change notification settings - Fork 28.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SPARK-1064 #102
SPARK-1064 #102
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,8 +29,10 @@ import org.apache.hadoop.fs._ | |
import org.apache.hadoop.fs.permission.FsPermission; | ||
import org.apache.hadoop.io.DataOutputBuffer | ||
import org.apache.hadoop.mapred.Master | ||
import org.apache.hadoop.mapreduce.MRJobConfig | ||
import org.apache.hadoop.net.NetUtils | ||
import org.apache.hadoop.security.UserGroupInformation | ||
import org.apache.hadoop.util.StringUtils | ||
import org.apache.hadoop.yarn.api._ | ||
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment | ||
import org.apache.hadoop.yarn.api.protocolrecords._ | ||
|
@@ -379,9 +381,48 @@ object ClientBase { | |
|
||
// Based on code from org.apache.hadoop.mapreduce.v2.util.MRApps | ||
def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]) { | ||
for (c <- conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) { | ||
val classpathEntries = Option(conf.getStrings( | ||
YarnConfiguration.YARN_APPLICATION_CLASSPATH)).getOrElse( | ||
getDefaultYarnApplicationClasspath()) | ||
for (c <- classpathEntries) { | ||
Apps.addToEnvironment(env, Environment.CLASSPATH.name, c.trim) | ||
} | ||
|
||
val mrClasspathEntries = Option(conf.getStrings( | ||
"mapreduce.application.classpath")).getOrElse( | ||
getDefaultMRApplicationClasspath()) | ||
if (mrClasspathEntries != null) { | ||
for (c <- mrClasspathEntries) { | ||
Apps.addToEnvironment(env, Environment.CLASSPATH.name, c.trim) | ||
} | ||
} | ||
} | ||
|
||
def getDefaultYarnApplicationClasspath(): Array[String] = { | ||
try { | ||
val field = classOf[MRJobConfig].getField("DEFAULT_YARN_APPLICATION_CLASSPATH") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey @sryza rather than use reflection here why not just modify the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately, the alpha/stable distinction doesn't fully capture the differences here because the APIs are different between the 0.23 Hadoop line and the 2.0 line, both of which fall under yarn-alpha. The comment above getMapReduceApplicationClasspath explains the differences between 0.23, 2.0, and 2.2. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah noted, sorry I missed the comment. |
||
field.get(null).asInstanceOf[Array[String]] | ||
} catch { | ||
case err: NoSuchFieldError => null | ||
} | ||
} | ||
|
||
/** | ||
* In Hadoop 0.23, the MR application classpath comes with the YARN application | ||
* classpath. In Hadoop 2.0, it's an array of Strings, and in 2.2+ it's a String. | ||
* So we need to use reflection to retrieve it. | ||
*/ | ||
def getDefaultMRApplicationClasspath(): Array[String] = { | ||
try { | ||
val field = classOf[MRJobConfig].getField("DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH") | ||
if (field.getType == classOf[String]) { | ||
StringUtils.getStrings(field.get(null).asInstanceOf[String]) | ||
} else { | ||
field.get(null).asInstanceOf[Array[String]] | ||
} | ||
} catch { | ||
case err: NoSuchFieldError => null | ||
} | ||
} | ||
|
||
def populateClasspath(conf: Configuration, sparkConf: SparkConf, addLog4j: Boolean, env: HashMap[String, String]) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hey just so I understand here. What is the way that YARN_APPLICATION_CLASSPATH and MAPREDUCE_APPLICATION_CCLASSPATH are used? Is this just designed to point to the locally installed Yarn/MR code? Or do users every go and include their own application code at these locations as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It points to the location on machines in the cluster of the HDFS/YARN/MR code. An admin might add a library like LZO to this, but users should instead be using the distributed cache if there's a jar specific to their application that they want.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay sounds good. Just wanted to make sure this wasn't the main path for user application code.