@@ -79,17 +79,20 @@ def identify_changed_files_from_git_commits(patch_sha, target_branch=None, targe
7979 identify_changed_files_from_git_commits("50a0496a43", target_ref="6765ef9"))]
8080 True
8181 """
82- if target_branch is None and target_ref is None :
83- raise AttributeError ("must specify either target_branch or target_ref" )
84- elif target_branch is not None and target_ref is not None :
82+ if target_branch is not None and target_ref is not None :
8583 raise AttributeError ("must specify either target_branch or target_ref, not both" )
8684 if target_branch is not None :
87- diff_target = target_branch
85+ diff_target = [ target_branch ]
8886 run_cmd (['git' , 'fetch' , 'origin' , str (target_branch + ':' + target_branch )])
87+ elif target_ref is not None :
88+ diff_target = [target_ref ]
8989 else :
90- diff_target = target_ref
91- raw_output = subprocess .check_output (['git' , 'diff' , '--name-only' , patch_sha , diff_target ],
92- universal_newlines = True )
90+ # If both are not specified, just show the diff from the commit only.
91+ diff_target = []
92+ raw_output = subprocess .check_output (
93+ ['git' , 'diff' , '--name-only' , patch_sha ] + diff_target ,
94+ universal_newlines = True )
95+ print (raw_output )
9396 # Remove any empty strings
9497 return [f for f in raw_output .split ('\n ' ) if f ]
9598
@@ -539,6 +542,24 @@ def parse_opts():
539542 "-p" , "--parallelism" , type = int , default = 8 ,
540543 help = "The number of suites to test in parallel (default %(default)d)"
541544 )
545+ parser .add_argument (
546+ "-m" , "--modules" , type = str ,
547+ default = None ,
548+ help = "A comma-separated list of modules to test "
549+ "(default: %s)" % "," .join (sorted ([m .name for m in modules .all_modules ]))
550+ )
551+ parser .add_argument (
552+ "-e" , "--excluded-tags" , type = str ,
553+ default = None ,
554+ help = "A comma-separated list of tags to exclude in the tests, "
555+ "e.g., org.apache.spark.tags.ExtendedHiveTest "
556+ )
557+ parser .add_argument (
558+ "-i" , "--included-tags" , type = str ,
559+ default = None ,
560+ help = "A comma-separated list of tags to include in the tests, "
561+ "e.g., org.apache.spark.tags.ExtendedHiveTest "
562+ )
542563
543564 args , unknown = parser .parse_known_args ()
544565 if unknown :
@@ -589,43 +610,64 @@ def main():
589610 # /home/jenkins/anaconda2/envs/py36/bin
590611 os .environ ["PATH" ] = "/home/anaconda/envs/py36/bin:" + os .environ .get ("PATH" )
591612 else :
592- # else we're running locally and can use local settings
613+ # else we're running locally or Github Actions.
593614 build_tool = "sbt"
594615 hadoop_version = os .environ .get ("HADOOP_PROFILE" , "hadoop2.7" )
595616 hive_version = os .environ .get ("HIVE_PROFILE" , "hive2.3" )
596- test_env = "local"
617+ if "GITHUB_ACTIONS" in os .environ :
618+ test_env = "github_actions"
619+ else :
620+ test_env = "local"
597621
598622 print ("[info] Using build tool" , build_tool , "with Hadoop profile" , hadoop_version ,
599623 "and Hive profile" , hive_version , "under environment" , test_env )
600624 extra_profiles = get_hadoop_profiles (hadoop_version ) + get_hive_profiles (hive_version )
601625
602626 changed_modules = None
627+ test_modules = None
603628 changed_files = None
604- should_only_test_modules = "TEST_ONLY_MODULES" in os . environ
629+ should_only_test_modules = opts . modules is not None
605630 included_tags = []
631+ excluded_tags = []
606632 if should_only_test_modules :
607- str_test_modules = [m .strip () for m in os . environ . get ( "TEST_ONLY_MODULES" ) .split ("," )]
633+ str_test_modules = [m .strip () for m in opts . modules .split ("," )]
608634 test_modules = [m for m in modules .all_modules if m .name in str_test_modules ]
609- # Directly uses test_modules as changed modules to apply tags and environments
610- # as if all specified test modules are changed.
635+
636+ # If we're running the tests in Github Actions, attempt to detect and test
637+ # only the affected modules.
638+ if test_env == "github_actions" :
639+ base_ref = os .environ ["GITHUB_BASE_REF" ]
640+ changed_files = identify_changed_files_from_git_commits (
641+ os .environ ["GITHUB_SHA" ], target_branch = None if base_ref == "" else base_ref )
642+ print ("changed_files : %s" % changed_files )
643+ test_modules = list (set (determine_modules_to_test (
644+ determine_modules_for_files (changed_files ))).intersection (test_modules ))
645+ print ("test_modules : %s" % test_modules )
646+
611647 changed_modules = test_modules
612- str_excluded_tags = os .environ .get ("TEST_ONLY_EXCLUDED_TAGS" , None )
613- str_included_tags = os .environ .get ("TEST_ONLY_INCLUDED_TAGS" , None )
614- excluded_tags = []
615- if str_excluded_tags :
616- excluded_tags = [t .strip () for t in str_excluded_tags .split ("," )]
617- included_tags = []
618- if str_included_tags :
619- included_tags = [t .strip () for t in str_included_tags .split ("," )]
648+
649+ # If we're running the tests in AMPLab Jenkins, calculate the diff from the targeted branch, and
650+ # detect modules to test.
620651 elif test_env == "amplab_jenkins" and os .environ .get ("AMP_JENKINS_PRB" ):
621652 target_branch = os .environ ["ghprbTargetBranch" ]
622653 changed_files = identify_changed_files_from_git_commits ("HEAD" , target_branch = target_branch )
623654 changed_modules = determine_modules_for_files (changed_files )
655+ test_modules = determine_modules_to_test (changed_modules )
624656 excluded_tags = determine_tags_to_exclude (changed_modules )
625657
658+ # If there is no changed module found, tests all.
626659 if not changed_modules :
627660 changed_modules = [modules .root ]
628- excluded_tags = []
661+ if not test_modules :
662+ test_modules = determine_modules_to_test (changed_modules )
663+
664+ str_excluded_tags = opts .excluded_tags
665+ str_included_tags = opts .included_tags
666+ if str_excluded_tags :
667+ excluded_tags .extend ([t .strip () for t in str_excluded_tags .split ("," )])
668+ if str_included_tags :
669+ included_tags .extend ([t .strip () for t in str_included_tags .split ("," )])
670+
629671 print ("[info] Found the following changed modules:" ,
630672 ", " .join (x .name for x in changed_modules ))
631673
@@ -640,8 +682,6 @@ def main():
640682
641683 should_run_java_style_checks = False
642684 if not should_only_test_modules :
643- test_modules = determine_modules_to_test (changed_modules )
644-
645685 # license checks
646686 run_apache_rat_checks ()
647687
@@ -672,40 +712,43 @@ def main():
672712 # if "DOCS" in changed_modules and test_env == "amplab_jenkins":
673713 # build_spark_documentation()
674714
675- if any (m .should_run_build_tests for m in test_modules ) and test_env != "amplab_jenkins" :
676- run_build_tests ()
677-
678- # spark build
679- build_apache_spark (build_tool , extra_profiles )
680-
681- # backwards compatibility checks
682- if build_tool == "sbt" :
683- # Note: compatibility tests only supported in sbt for now
684- detect_binary_inop_with_mima (extra_profiles )
685- # Since we did not build assembly/package before running dev/mima, we need to
686- # do it here because the tests still rely on it; see SPARK-13294 for details.
687- build_spark_assembly_sbt (extra_profiles , should_run_java_style_checks )
688-
689- # run the test suites
690- run_scala_tests (build_tool , extra_profiles , test_modules , excluded_tags , included_tags )
691-
692- modules_with_python_tests = [m for m in test_modules if m .python_test_goals ]
693- if modules_with_python_tests :
694- # We only run PySpark tests with coverage report in one specific job with
695- # Spark master with SBT in Jenkins.
696- is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os .environ
697- run_python_tests (
698- modules_with_python_tests , opts .parallelism , with_coverage = is_sbt_master_job )
699- run_python_packaging_tests ()
700- if any (m .should_run_r_tests for m in test_modules ):
701- run_sparkr_tests ()
715+ print (changed_modules )
716+ print (test_modules )
717+ print ([m for m in test_modules if m .python_test_goals ])
718+ print ([m .should_run_r_tests for m in test_modules ])
719+ print (excluded_tags )
720+ print (included_tags )
721+
722+ # if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins":
723+ # run_build_tests()
724+ #
725+ # # spark build
726+ # build_apache_spark(build_tool, extra_profiles)
727+ #
728+ # # backwards compatibility checks
729+ # if build_tool == "sbt":
730+ # # Note: compatibility tests only supported in sbt for now
731+ # detect_binary_inop_with_mima(extra_profiles)
732+ # # Since we did not build assembly/package before running dev/mima, we need to
733+ # # do it here because the tests still rely on it; see SPARK-13294 for details.
734+ # build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
735+ #
736+ # # run the test suites
737+ # run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags)
738+ #
739+ # modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
740+ # if modules_with_python_tests:
741+ # # We only run PySpark tests with coverage report in one specific job with
742+ # # Spark master with SBT in Jenkins.
743+ # is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ
744+ # run_python_tests(
745+ # modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job)
746+ # run_python_packaging_tests()
747+ # if any(m.should_run_r_tests for m in test_modules):
748+ # run_sparkr_tests()
702749
703750
704751def _test ():
705- if "TEST_ONLY_MODULES" in os .environ :
706- # TODO(SPARK-32252): Enable doctests back in Github Actions.
707- return
708-
709752 import doctest
710753 failure_count = doctest .testmod ()[0 ]
711754 if failure_count :
0 commit comments