@@ -136,7 +136,11 @@ void jl_dump_llvm_opt_impl(void *s)
136136 **jl_ExecutionEngine->get_dump_llvm_opt_stream () = (JL_STREAM*)s;
137137}
138138
139- static void jl_add_to_ee (orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports);
139+ static int jl_add_to_ee (
140+ orc::ThreadSafeModule &M,
141+ const StringMap<orc::ThreadSafeModule*> &NewExports,
142+ DenseMap<orc::ThreadSafeModule*, int > &Queued,
143+ std::vector<orc::ThreadSafeModule*> &Stack);
140144static void jl_decorate_module (Module &M);
141145static uint64_t getAddressForFunction (StringRef fname);
142146
@@ -228,10 +232,13 @@ static jl_callptr_t _jl_compile_codeinst(
228232 }
229233 }
230234 }
235+ DenseMap<orc::ThreadSafeModule*, int > Queued;
236+ std::vector<orc::ThreadSafeModule*> Stack;
231237 for (auto &def : emitted) {
232238 // Add the results to the execution engine now
233239 orc::ThreadSafeModule &M = std::get<0 >(def.second );
234- jl_add_to_ee (M, NewExports);
240+ jl_add_to_ee (M, NewExports, Queued, Stack);
241+ assert (Queued.empty () && Stack.empty () && !M);
235242 }
236243 ++CompiledCodeinsts;
237244 MaxWorkqueueSize.updateMax (emitted.size ());
@@ -1704,76 +1711,72 @@ static void jl_decorate_module(Module &M) {
17041711#endif
17051712}
17061713
1714+ // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
17071715static int jl_add_to_ee (
17081716 orc::ThreadSafeModule &M,
1709- StringMap<orc::ThreadSafeModule*> &NewExports,
1717+ const StringMap<orc::ThreadSafeModule*> &NewExports,
17101718 DenseMap<orc::ThreadSafeModule*, int > &Queued,
1711- std::vector<std::vector<orc::ThreadSafeModule*>> &ToMerge,
1712- int depth)
1719+ std::vector<orc::ThreadSafeModule*> &Stack)
17131720{
1714- // DAG-sort (post-dominator) the compile to compute the minimum
1715- // merge-module sets for linkage
1721+ // First check if the TSM is empty (already compiled)
17161722 if (!M)
17171723 return 0 ;
1718- // First check and record if it's on the stack somewhere
1724+ // Next check and record if it is on the stack somewhere
17191725 {
1720- auto &Cycle = Queued[&M];
1721- if (Cycle )
1722- return Cycle ;
1723- ToMerge .push_back ({} );
1724- Cycle = depth ;
1726+ auto &Id = Queued[&M];
1727+ if (Id )
1728+ return Id ;
1729+ Stack .push_back (&M );
1730+ Id = Stack. size () ;
17251731 }
1732+ // Finally work out the SCC
1733+ int depth = Stack.size ();
17261734 int MergeUp = depth;
1727- // Compute the cycle-id
1735+ std::vector<orc::ThreadSafeModule*> Children;
17281736 M.withModuleDo ([&](Module &m) {
17291737 for (auto &F : m.global_objects ()) {
17301738 if (F.isDeclaration () && F.getLinkage () == GlobalValue::ExternalLinkage) {
17311739 auto Callee = NewExports.find (F.getName ());
17321740 if (Callee != NewExports.end ()) {
1733- auto &CM = Callee->second ;
1734- int Down = jl_add_to_ee (*CM, NewExports, Queued, ToMerge, depth + 1 );
1735- assert (Down <= depth);
1736- if (Down && Down < MergeUp)
1737- MergeUp = Down;
1741+ auto *CM = Callee->second ;
1742+ if (*CM && CM != &M) {
1743+ auto Down = Queued.find (CM);
1744+ if (Down != Queued.end ())
1745+ MergeUp = std::min (MergeUp, Down->second );
1746+ else
1747+ Children.push_back (CM);
1748+ }
17381749 }
17391750 }
17401751 }
17411752 });
1742- if (MergeUp == depth) {
1743- // Not in a cycle (or at the top of it)
1744- Queued.erase (&M);
1745- for (auto &CM : ToMerge.at (depth - 1 )) {
1746- assert (Queued.find (CM)->second == depth);
1747- Queued.erase (CM);
1748- jl_merge_module (M, std::move (*CM));
1749- }
1750- jl_ExecutionEngine->addModule (std::move (M));
1751- MergeUp = 0 ;
1753+ assert (MergeUp > 0 );
1754+ for (auto *CM : Children) {
1755+ int Down = jl_add_to_ee (*CM, NewExports, Queued, Stack);
1756+ assert (Down <= (int )Stack.size ());
1757+ if (Down)
1758+ MergeUp = std::min (MergeUp, Down);
17521759 }
1753- else {
1754- // Add our frame(s) to the top of the cycle
1755- Queued[&M] = MergeUp;
1756- auto &Top = ToMerge.at (MergeUp - 1 );
1757- Top.push_back (&M);
1758- for (auto &CM : ToMerge.at (depth - 1 )) {
1759- assert (Queued.find (CM)->second == depth);
1760- Queued[CM] = MergeUp;
1761- Top.push_back (CM);
1760+ if (MergeUp < depth)
1761+ return MergeUp;
1762+ while (1 ) {
1763+ // Not in a cycle (or at the top of it)
1764+ // remove SCC state and merge every CM from the cycle into M
1765+ orc::ThreadSafeModule *CM = Stack.back ();
1766+ auto it = Queued.find (CM);
1767+ assert (it->second == (int )Stack.size ());
1768+ Queued.erase (it);
1769+ Stack.pop_back ();
1770+ if ((int )Stack.size () < depth) {
1771+ assert (&M == CM);
1772+ break ;
17621773 }
1774+ jl_merge_module (M, std::move (*CM));
17631775 }
1764- ToMerge.pop_back ();
1765- return MergeUp;
1766- }
1767-
1768- static void jl_add_to_ee (orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports)
1769- {
1770- DenseMap<orc::ThreadSafeModule*, int > Queued;
1771- std::vector<std::vector<orc::ThreadSafeModule*>> ToMerge;
1772- jl_add_to_ee (M, NewExports, Queued, ToMerge, 1 );
1773- assert (!M);
1776+ jl_ExecutionEngine->addModule (std::move (M));
1777+ return 0 ;
17741778}
17751779
1776-
17771780static uint64_t getAddressForFunction (StringRef fname)
17781781{
17791782 auto addr = jl_ExecutionEngine->getFunctionAddress (fname);
0 commit comments