@@ -160,6 +160,9 @@ def main(args):
160160    if  args .spark2_version :
161161        _install_service_from_local_repo (cluster , product = 'SPARK2' )
162162
163+     if  args .sdc_version :
164+         _install_service_from_local_repo (cluster , product = 'STREAMSETS_DATACOLLECTOR' )
165+ 
163166    if  args .kerberos :
164167        cluster .kdc_node  =  kdc_node 
165168        _configure_kdc (cluster , args .kerberos_principals , args .kerberos_ticket_lifetime , quiet = quiet )
@@ -273,15 +276,6 @@ def cm_server_not_dead(primary_node):
273276    deployment .update_cm_config (configs = {'manages_parcels' : True })
274277
275278    if  args .sdc_version :
276-         # We install StreamSets DataCollector using local repo /opt/cloudera/parcel-repo. 
277-         # Set file and folder permissions correctly. 
278-         commands  =  ['chown cloudera-scm:cloudera-scm /opt/cloudera/csd' ,
279-                     'chown cloudera-scm:cloudera-scm /opt/cloudera/parcel-repo' ,
280-                     'chown cloudera-scm:cloudera-scm /opt/cloudera/csd/STREAMSETS*.jar' ,
281-                     'chmod 644 /opt/cloudera/csd/STREAMSETS*.jar' ,
282-                     'chown cloudera-scm:cloudera-scm /opt/cloudera/parcel-repo/STREAMSETS_*' ]
283-         primary_node .execute (' && ' .join (commands ))
284- 
285279        # The parcel is already present. Hence just distribute and activate it after refresing parcel repos. 
286280        product  =  'STREAMSETS_DATACOLLECTOR' 
287281        deployment .refresh_parcel_repos ()
@@ -357,7 +351,7 @@ def cm_server_not_dead(primary_node):
357351
358352    if  args .sdc_version :
359353        logger .info ('Configuring StreamSets Data Collector ...' )
360-         _configure_sdc (deployment , cluster , is_kerberos_enabled = args . kerberos )
354+         _configure_sdc (deployment , cluster , args )
361355
362356    if  args .kerberos :
363357        logger .info ('Configure Cloudera Manager for Kerberos ...' )
@@ -899,7 +893,7 @@ def _setup_ssl_encryption_authentication(cluster, service):
899893    ]
900894    cluster .primary_node .execute (' && ' .join (ssl_authentication_commands ))
901895
902- def  _configure_sdc (deployment , cluster , is_kerberos_enabled ):
896+ def  _configure_sdc (deployment , cluster , args ):
903897    logger .info ('Adding StreamSets service to cluster (%s) ...' , DEFAULT_CLUSTER_NAME )
904898    datacollector_role  =  {'type' : 'DATACOLLECTOR' ,
905899                          'hostRef' : {'hostId' : cluster .primary_node .host_id }}
@@ -908,18 +902,21 @@ def _configure_sdc(deployment, cluster, is_kerberos_enabled):
908902                                                  'type' : 'STREAMSETS' ,
909903                                                  'displayName' : 'StreamSets' ,
910904                                                  'roles' : [datacollector_role ]}])
911-     # When running an application  with Spark2, the following 
912-     # environment variables must be set before starting StreamSets Data Collector. 
913-     environment_variables  =  {'SPARK_SUBMIT_YARN_COMMAND' : '/usr/bin/spark2-submit' ,
914-                              'SPARK_KAFKA_VERSION' : '0.10' }
905+     if  args .spark2_version :
906+         # When running an application  with Spark2, the following 
907+         # environment variables must be set before starting StreamSets Data Collector. 
908+         environment_variables  =  {'SPARK_SUBMIT_YARN_COMMAND' : '/usr/bin/spark2-submit' ,
909+                                  'SPARK_KAFKA_VERSION' : '0.10' ,
910+                                  'SPARK_HOME' : '/opt/cloudera/parcels/SPARK2/lib/spark2' }
911+     else :
912+         # When running an application on YARN, the Spark executor requires access to the spark-submit script located in 
913+         # the Spark installation directory. Default is directory specified by SPARK_HOME environment variable. 
914+         # Hence SPARK_HOME environment variable must be set before starting StreamSets Data Collector. 
915+         environment_variables  =  {'SPARK_HOME' : '/opt/cloudera/parcels/CDH/lib/spark' }
915916    configs  =  {'sdc-env.sh_role_safety_valve' : '\n ' .join ('export {}={}' .format (key , value )
916917                                                         for  key , value  in  environment_variables .items ())}
917-     # When running an application on YARN, the Spark executor requires access to the spark-submit script located in 
918-     # the Spark installation directory. Default is directory specified by SPARK_HOME environment variable. 
919-     # Hence SPARK_HOME environment variable must be set before starting StreamSets Data Collector. 
920-     configs  =  {'sdc-env.sh_role_safety_valve' : 'export SPARK_HOME=/opt/cloudera/parcels/CDH/lib/spark' }
921918
922-     if  is_kerberos_enabled :
919+     if  args . kerberos :
923920        # Create JAAS config file on node-1. Needed to access kerberized Kafka. 
924921        primary_node  =  cluster .primary_node 
925922        sdc_principal  =  'sdc/{kafka_node_name}@{realm}' .format (kafka_node_name = primary_node .fqdn ,
0 commit comments