Skip to content

Commit dd87a3f

Browse files
committed
Add env. variables for spark2 (#27)
1 parent e39aaf6 commit dd87a3f

File tree

3 files changed

+21
-58
lines changed

3 files changed

+21
-58
lines changed

images/build_images

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ CSD_URL_TEMPLATE = 'http://{s3_bucket}/datacollector/{build}/csd/STREAMSETS-{0}.
3030
IMAGE_NAME_TEMPLATE = 'streamsets/clusterdock:topology_cdh-streamsets_datacollector-{}'
3131
PARCEL_URL_TEMPLATE = 'http://{s3_bucket}/datacollector/{build}/parcel/STREAMSETS_DATACOLLECTOR-{0}-el6.parcel'
3232
PARCEL_MANIFEST_URL_TEMPLATE = 'http://{s3_bucket}/datacollector/{build}/parcel/manifest.json'
33+
# Name of service in CDH cluster.
34+
SDC_PRODUCT_NAME = 'STREAMSETS_DATACOLLECTOR'
3335

3436

3537
def main():
@@ -50,7 +52,7 @@ def main():
5052
parser.add_argument('-p', '--push', help='Push Docker images after building', action='store_true')
5153
args = parser.parse_args()
5254

53-
image_folder = Path(Path(__file__).parent, 'sdc').resolve()
55+
image_folder = Path(Path(__file__).parent, 'cloudera_service').resolve()
5456

5557
if args.dry_run:
5658
logger.info('Doing dry-run of tool ...')
@@ -63,6 +65,7 @@ def main():
6365
cmd_elements = ['docker build -t {}'.format(image_name),
6466
'--build-arg CSD_URL={}'.format(csd_url),
6567
'--build-arg PARCEL_URL={}'.format(parcel_url),
68+
'--build-arg PRODUCT={}'.format(SDC_PRODUCT_NAME),
6669
str(image_folder)]
6770
cmd = ' '.join(cmd_elements)
6871
logger.debug('Running Docker build command (%s) ...', cmd)

images/sdc/Dockerfile

Lines changed: 0 additions & 37 deletions
This file was deleted.

start.py

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,9 @@ def main(args):
160160
if args.spark2_version:
161161
_install_service_from_local_repo(cluster, product='SPARK2')
162162

163+
if args.sdc_version:
164+
_install_service_from_local_repo(cluster, product='STREAMSETS_DATACOLLECTOR')
165+
163166
if args.kerberos:
164167
cluster.kdc_node = kdc_node
165168
_configure_kdc(cluster, args.kerberos_principals, args.kerberos_ticket_lifetime, quiet=quiet)
@@ -273,15 +276,6 @@ def cm_server_not_dead(primary_node):
273276
deployment.update_cm_config(configs={'manages_parcels': True})
274277

275278
if args.sdc_version:
276-
# We install StreamSets DataCollector using local repo /opt/cloudera/parcel-repo.
277-
# Set file and folder permissions correctly.
278-
commands = ['chown cloudera-scm:cloudera-scm /opt/cloudera/csd',
279-
'chown cloudera-scm:cloudera-scm /opt/cloudera/parcel-repo',
280-
'chown cloudera-scm:cloudera-scm /opt/cloudera/csd/STREAMSETS*.jar',
281-
'chmod 644 /opt/cloudera/csd/STREAMSETS*.jar',
282-
'chown cloudera-scm:cloudera-scm /opt/cloudera/parcel-repo/STREAMSETS_*']
283-
primary_node.execute(' && '.join(commands))
284-
285279
# The parcel is already present. Hence just distribute and activate it after refresing parcel repos.
286280
product = 'STREAMSETS_DATACOLLECTOR'
287281
deployment.refresh_parcel_repos()
@@ -357,7 +351,7 @@ def cm_server_not_dead(primary_node):
357351

358352
if args.sdc_version:
359353
logger.info('Configuring StreamSets Data Collector ...')
360-
_configure_sdc(deployment, cluster, is_kerberos_enabled=args.kerberos)
354+
_configure_sdc(deployment, cluster, args)
361355

362356
if args.kerberos:
363357
logger.info('Configure Cloudera Manager for Kerberos ...')
@@ -899,7 +893,7 @@ def _setup_ssl_encryption_authentication(cluster, service):
899893
]
900894
cluster.primary_node.execute(' && '.join(ssl_authentication_commands))
901895

902-
def _configure_sdc(deployment, cluster, is_kerberos_enabled):
896+
def _configure_sdc(deployment, cluster, args):
903897
logger.info('Adding StreamSets service to cluster (%s) ...', DEFAULT_CLUSTER_NAME)
904898
datacollector_role = {'type': 'DATACOLLECTOR',
905899
'hostRef': {'hostId': cluster.primary_node.host_id}}
@@ -908,18 +902,21 @@ def _configure_sdc(deployment, cluster, is_kerberos_enabled):
908902
'type': 'STREAMSETS',
909903
'displayName': 'StreamSets',
910904
'roles': [datacollector_role]}])
911-
# When running an application with Spark2, the following
912-
# environment variables must be set before starting StreamSets Data Collector.
913-
environment_variables = {'SPARK_SUBMIT_YARN_COMMAND': '/usr/bin/spark2-submit',
914-
'SPARK_KAFKA_VERSION': '0.10'}
905+
if args.spark2_version:
906+
# When running an application with Spark2, the following
907+
# environment variables must be set before starting StreamSets Data Collector.
908+
environment_variables = {'SPARK_SUBMIT_YARN_COMMAND': '/usr/bin/spark2-submit',
909+
'SPARK_KAFKA_VERSION': '0.10',
910+
'SPARK_HOME': '/opt/cloudera/parcels/SPARK2/lib/spark2'}
911+
else:
912+
# When running an application on YARN, the Spark executor requires access to the spark-submit script located in
913+
# the Spark installation directory. Default is directory specified by SPARK_HOME environment variable.
914+
# Hence SPARK_HOME environment variable must be set before starting StreamSets Data Collector.
915+
environment_variables = {'SPARK_HOME': '/opt/cloudera/parcels/CDH/lib/spark'}
915916
configs = {'sdc-env.sh_role_safety_valve': '\n'.join('export {}={}'.format(key, value)
916917
for key, value in environment_variables.items())}
917-
# When running an application on YARN, the Spark executor requires access to the spark-submit script located in
918-
# the Spark installation directory. Default is directory specified by SPARK_HOME environment variable.
919-
# Hence SPARK_HOME environment variable must be set before starting StreamSets Data Collector.
920-
configs = {'sdc-env.sh_role_safety_valve': 'export SPARK_HOME=/opt/cloudera/parcels/CDH/lib/spark'}
921918

922-
if is_kerberos_enabled:
919+
if args.kerberos:
923920
# Create JAAS config file on node-1. Needed to access kerberized Kafka.
924921
primary_node = cluster.primary_node
925922
sdc_principal = 'sdc/{kafka_node_name}@{realm}'.format(kafka_node_name=primary_node.fqdn,

0 commit comments

Comments
 (0)