@@ -630,93 +630,60 @@ int acl_update_queue(cl_command_queue command_queue) {
630
630
}
631
631
}
632
632
633
- void acl_try_FastKernelRelaunch_ooo_queue_event_dependents (cl_event parent) {
633
+ // Try to submit a kernel even if it has unfinished dependences using fast
634
+ // kernel relaunch
635
+ // Returns true on success, false on failure
636
+ bool acl_fast_relaunch_kernel (cl_event event) {
637
+ if (!(event->command_queue ->properties &
638
+ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
639
+ return false ;
640
+
641
+ if (event->depend_on .size () != 1 )
642
+ return false ;
643
+
644
+ cl_event parent = *(event->depend_on .begin ());
645
+
634
646
if (!(parent->command_queue ->properties &
635
647
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
636
- return ;
637
- if (parent->depend_on_me .empty ())
638
- return ;
648
+ return false ;
649
+
639
650
if (parent->cmd .type != CL_COMMAND_TASK &&
640
651
parent->cmd .type != CL_COMMAND_NDRANGE_KERNEL)
641
- return ;
652
+ return false ;
653
+
642
654
if (parent->execution_status > CL_SUBMITTED ||
643
655
parent->last_device_op ->status > CL_SUBMITTED)
644
- return ;
645
-
646
- // Check if fast kernel relaunch is safe to use, and we can ignore
647
- // the explicit dependency
648
- for (auto dependent_it = parent->depend_on_me .begin ();
649
- dependent_it != parent->depend_on_me .end (); dependent_it++) {
650
- cl_event dependent = *dependent_it;
651
- // Currently we do not handle the case of FKR for mixed queue types
652
- if (!(dependent->command_queue ->properties &
653
- CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
654
- continue ;
655
- // can only FKR if one unresolved dependency
656
- if (dependent->depend_on .size () > 1 )
657
- continue ;
658
- // can happen if this function gets called twice on same parent
659
- // once during submission and once during completion
660
- if (dependent->is_on_device_op_queue )
661
- continue ;
662
-
663
- if (!l_is_same_kernel_event (parent, dependent)) {
664
- // dependent on a different kernel than parent,
665
- // must wait for dependency to be resolved
666
- // OR the dependent is not on the same device,
667
- // not safe to preemptively push dependent to device_op_queue
668
- continue ;
669
- }
670
-
671
- // Special case: if subbuffers are present they may(!) cause a
672
- // migration while another kernel is using that data.
673
- if (acl_kernel_has_unmapped_subbuffers (
674
- &(dependent->cmd .info .ndrange_kernel .memory_migration ))) {
675
- continue ;
676
- }
677
-
678
- // Fast Kernel Relaunch: submitting is safe even though has dependency
679
- // Prior to submitting remove dependency
680
- int local_updates = acl_submit_command (dependent);
681
- if (local_updates) {
682
- dependent->depend_on .erase (parent);
683
- dependent_it = parent->depend_on_me .erase (dependent_it);
684
- dependent_it--; // decrement it otherwise we will skip an element
685
- dependent->command_queue ->num_commands_submitted ++;
686
- }
687
- }
656
+ return false ;
657
+
658
+ if (!l_is_same_kernel_event (parent, event)) {
659
+ // dependent on a different kernel than parent,
660
+ // must wait for dependency to be resolved
661
+ // OR the dependent is not on the same device,
662
+ // not safe to preemptively push dependent to device_op_queue
663
+ return false ;
664
+ }
665
+
666
+ // Special case: if subbuffers are present they may(!) cause a
667
+ // migration while another kernel is using that data.
668
+ if (acl_kernel_has_unmapped_subbuffers (
669
+ &(event->cmd .info .ndrange_kernel .memory_migration )))
670
+ return false ;
671
+
672
+ // Fast Kernel Relaunch: submitting is safe even though has dependency
673
+ // If submission succeeds, remove dependency
674
+ bool success = acl_submit_command (event);
675
+ if (!success)
676
+ return false ;
677
+ event->depend_on .erase (parent);
678
+ parent->depend_on_me .remove (event);
679
+ return true ;
688
680
}
689
681
690
682
int acl_update_ooo_queue (cl_command_queue command_queue) {
691
683
int num_updates = 0 ;
692
684
693
- // Directly submit the event if it has no dependencies
694
- // unless it is a user_event queue which never submits events
695
- while (!command_queue->new_commands .empty ()) {
696
- int success = 1 ;
697
- cl_event event = command_queue->new_commands .front ();
698
- if (command_queue->submits_commands &&
699
- event->execution_status == CL_QUEUED) {
700
- if (event->depend_on .empty ()) {
701
- command_queue->num_commands_submitted ++;
702
- success = acl_submit_command (event);
703
- } else {
704
- // This is allowed to fail, so no need to mark success as false
705
- // dependent events that fail to be FKRd will still be picked up when
706
- // their parent event finishes
707
- acl_try_FastKernelRelaunch_ooo_queue_event_dependents (
708
- *(event->depend_on .begin ()));
709
- }
710
- }
711
-
712
- if (success) {
713
- // safe to pop as there is a master copy in command_queue->commands
714
- command_queue->new_commands .pop_front ();
715
- }
716
- }
717
-
718
- // Remove dependencies on completed events, and launch any events
719
- // that no longer have dependencies.
685
+ // First, remove dependencies on completed events,
686
+ // as this may unblock other evevnts
720
687
// Completed events should be returned to the free pool
721
688
while (!command_queue->completed_commands .empty ()) {
722
689
cl_event event = command_queue->completed_commands .front ();
@@ -735,16 +702,6 @@ int acl_update_ooo_queue(cl_command_queue command_queue) {
735
702
dependent->command_queue ->completed_commands .push_back (
736
703
dependent); // dependent might be on another queue
737
704
}
738
- } else if (dependent->depend_on .empty ()) {
739
- // dependent has no dependencies safe to submit if in OOO queue
740
- if ((dependent->command_queue ->properties &
741
- CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) &&
742
- dependent->cmd .type != CL_COMMAND_USER) {
743
- int local_updates = acl_submit_command (dependent);
744
- dependent->command_queue ->num_commands_submitted +=
745
- local_updates; // dependent might be on another queue
746
- num_updates += local_updates;
747
- }
748
705
}
749
706
}
750
707
@@ -772,10 +729,39 @@ int acl_update_ooo_queue(cl_command_queue command_queue) {
772
729
command_queue->commands .erase (event);
773
730
}
774
731
event->not_popped = false ;
732
+ num_updates++;
775
733
command_queue->num_commands --;
776
734
acl_release (command_queue);
777
735
}
778
736
737
+ // Next try to submit any events with no dependencies
738
+ // or whose only dependences can be handled by fast kernel relaunch
739
+ // unless they are on a user_event queue which never submits events
740
+ for (auto event_iter = command_queue->new_commands .begin ();
741
+ event_iter != command_queue->new_commands .end ();) {
742
+ cl_event event = *event_iter;
743
+ int success = 0 ;
744
+ if (!command_queue->submits_commands )
745
+ success = 1 ;
746
+ else {
747
+ if (event->depend_on .empty ()) {
748
+ success = acl_submit_command (event);
749
+ } else {
750
+ success = acl_fast_relaunch_kernel (event);
751
+ }
752
+ }
753
+
754
+ // Increment before removal so we don't invalidate the iterator
755
+ event_iter++;
756
+ if (success) {
757
+ // num_commands_submitted isn't used for ooo queues today
758
+ // but keep it up-to-date in case someone wants to use it in the future
759
+ command_queue->num_commands_submitted ++;
760
+ command_queue->new_commands .remove (event);
761
+ num_updates++;
762
+ }
763
+ }
764
+
779
765
return num_updates;
780
766
}
781
767
0 commit comments