@@ -2739,7 +2739,7 @@ def lower_parfor_sequential(typingctx, func_ir, typemap, calltypes, openmp):
2739
2739
new_blocks = {}
2740
2740
for (block_label , block ) in func_ir .blocks .items ():
2741
2741
block_label , parfor_found = _lower_parfor_sequential_block (
2742
- block_label , block , new_blocks , typemap , calltypes , parfor_found , openmp , typingctx )
2742
+ block_label , block , new_blocks , typemap , calltypes , parfor_found , openmp , typingctx , func_ir )
2743
2743
# old block stays either way
2744
2744
new_blocks [block_label ] = block
2745
2745
func_ir .blocks = new_blocks
@@ -2761,11 +2761,10 @@ def lower_parfor_sequential(typingctx, func_ir, typemap, calltypes, openmp):
2761
2761
next_region_end_index = 0
2762
2762
for i , inst in enumerate (block .body ):
2763
2763
if inst in openmp_ends :
2764
- block .body [next_region_end_index = 1 :i + 1 ] = block .body [next_region_end_index :i ]
2764
+ block .body [next_region_end_index + 1 :i + 1 ] = block .body [next_region_end_index :i ]
2765
2765
block .body [next_region_end_index ] = inst
2766
2766
next_region_end_index += 1
2767
2767
dprint ("Found openmp end" )
2768
- break
2769
2768
dprint_func_ir (func_ir , "after elevating openmp end" )
2770
2769
return
2771
2770
@@ -2914,6 +2913,14 @@ def add_prints(block, typemap, calltypes):
2914
2913
2915
2914
return new_block
2916
2915
2916
+ def get_next_region_number (func_ir ):
2917
+ if hasattr (func_ir , 'offload_number' ):
2918
+ cur = func_ir .offload_number
2919
+ else :
2920
+ cur = 0
2921
+ func_ir .offload_number = cur + 1
2922
+ return cur
2923
+
2917
2924
def _lower_parfor_openmp (
2918
2925
block_label ,
2919
2926
block ,
@@ -2922,7 +2929,8 @@ def _lower_parfor_openmp(
2922
2929
calltypes ,
2923
2930
parfor_found ,
2924
2931
openmp ,
2925
- typingctx ):
2932
+ typingctx ,
2933
+ func_ir ):
2926
2934
add_directives = True
2927
2935
2928
2936
global omp_count
@@ -3023,8 +3031,12 @@ def _lower_parfor_openmp(
3023
3031
ompubvar , ompubvar_assign = create_assign_var ("ompub" , types .uintp , subexpr , scope , loc , typemap )
3024
3032
inst .init_block .body .append (ompubvar_assign )
3025
3033
3034
+ region_number = get_next_region_number (func_ir )
3035
+
3026
3036
# ----------------------------------------------------------------
3027
3037
3038
+ print ("PyArg_UnpackTuple symbol" , llvmlite .binding .address_of_symbol ('PyArg_UnpackTuple' ))
3039
+ # if openmp == True or openmp == 'cpu' or openmp == 'target':
3028
3040
if openmp == True or openmp == 'cpu' :
3029
3041
openmp_start_tags = [ openmp_tag ("DIR.OMP.PARALLEL.LOOP" ) ]
3030
3042
openmp_end_tags = [ openmp_tag ("DIR.OMP.END.PARALLEL.LOOP" ) ]
@@ -3046,7 +3058,7 @@ def _lower_parfor_openmp(
3046
3058
openmp_start_tags .append (openmp_tag ("QUAL.OMP.NORMALIZED.UB" , ompubvar ))
3047
3059
3048
3060
# Add OpenMP LLVM directives.
3049
- or_start = numba .npyufunc .parfor .openmp_region_start (openmp_start_tags , loc )
3061
+ or_start = numba .npyufunc .parfor .openmp_region_start (openmp_start_tags , region_number , loc )
3050
3062
# Append OpenMP directive right to the block right before the loop.
3051
3063
3052
3064
if omp_count != - 1 :
@@ -3067,12 +3079,24 @@ def _lower_parfor_openmp(
3067
3079
block .body .insert (0 , or_end )
3068
3080
elif openmp == 'target' :
3069
3081
openmp_target_start_tags = [ openmp_tag ("DIR.OMP.TARGET" ),
3070
- openmp_tag ("QUAL.OMP.OFFLOAD.ENTRY.IDX" , 0 ) ]
3082
+ openmp_tag ("QUAL.OMP.OFFLOAD.ENTRY.IDX" , region_number ),
3083
+ openmp_tag ("QUAL.OMP.PRIVATE" , index_variable ),
3084
+ openmp_tag ("QUAL.OMP.FIRSTPRIVATE" , omplbvar ),
3085
+ openmp_tag ("QUAL.OMP.FIRSTPRIVATE" , ompubvar )]
3071
3086
openmp_target_end_tags = [ openmp_tag ("DIR.OMP.END.TARGET" ) ]
3072
- openmp_teams_start_tags = [ openmp_tag ("DIR.OMP.TEAMS" ) ]
3087
+
3088
+ openmp_teams_start_tags = [ openmp_tag ("DIR.OMP.TEAMS" ),
3089
+ openmp_tag ("QUAL.OMP.PRIVATE" , index_variable ),
3090
+ openmp_tag ("QUAL.OMP.SHARED" , omplbvar ),
3091
+ openmp_tag ("QUAL.OMP.SHARED" , ompubvar )]
3073
3092
openmp_teams_end_tags = [ openmp_tag ("DIR.OMP.END.TEAMS" ) ]
3093
+
3074
3094
openmp_distparloop_start_tags = [ openmp_tag ("DIR.OMP.DISTRIBUTE.PARLOOP" ),
3075
- openmp_tag ("QUAL.OMP.SCHEDULE.STATIC" , 1 ) ]
3095
+ openmp_tag ("QUAL.OMP.SCHEDULE.STATIC" , 1 ),
3096
+ openmp_tag ("QUAL.OMP.PRIVATE" , index_variable ),
3097
+ openmp_tag ("QUAL.OMP.FIRSTPRIVATE" , omplbvar ),
3098
+ openmp_tag ("QUAL.OMP.NORMALIZED.IV" , index_variable ),
3099
+ openmp_tag ("QUAL.OMP.NORMALIZED.UB" , ompubvar )]
3076
3100
openmp_distparloop_end_tags = [ openmp_tag ("DIR.OMP.END.DISTRIBUTE.PARLOOP" ) ]
3077
3101
3078
3102
# for i in range(ndims):
@@ -3097,35 +3121,28 @@ def _lower_parfor_openmp(
3097
3121
openmp_start_tags .append (openmp_tag ("QUAL.OMP.COLLAPSE" , ndims ))
3098
3122
3099
3123
# Add OpenMP LLVM directives.
3100
- target_start = numba .npyufunc .parfor .openmp_region_start (openmp_target_start_tags , loc )
3101
- # Append OpenMP directive right to the block right before the loop.
3102
- if add_directives :
3103
- inst .init_block .body .append (target_start )
3124
+ target_start = numba .npyufunc .parfor .openmp_region_start (openmp_target_start_tags , region_number , loc )
3125
+ teams_start = numba .npyufunc .parfor .openmp_region_start (openmp_teams_start_tags , region_number , loc )
3126
+ distparloop_start = numba .npyufunc .parfor .openmp_region_start (openmp_distparloop_start_tags , region_number , loc )
3104
3127
3105
- teams_start = numba .npyufunc .parfor .openmp_region_start (openmp_teams_start_tags , loc )
3106
- # Append OpenMP directive right to the block right before the loop.
3128
+ # Append OpenMP directives to the block right before the loop.
3107
3129
if add_directives :
3130
+ inst .init_block .body .append (target_start )
3108
3131
inst .init_block .body .append (teams_start )
3109
-
3110
- distparloop_start = numba .npyufunc .parfor .openmp_region_start (openmp_distparloop_start_tags , loc )
3111
- # Append OpenMP directive right to the block right before the loop.
3112
- if add_directives :
3113
3132
inst .init_block .body .append (distparloop_start )
3114
3133
3115
3134
distparloop_end = numba .npyufunc .parfor .openmp_region_end (distparloop_start , openmp_distparloop_end_tags , loc )
3116
- # Prepend OpenMP directive right to the block right after the loop.
3117
- if add_directives :
3118
- block .body .insert (0 , distparloop_end )
3119
-
3120
3135
teams_end = numba .npyufunc .parfor .openmp_region_end (teams_start , openmp_teams_end_tags , loc )
3121
- # Prepend OpenMP directive right to the block right after the loop.
3122
- if add_directives :
3123
- block .body .insert (0 , teams_end )
3124
-
3125
3136
target_end = numba .npyufunc .parfor .openmp_region_end (target_start , openmp_target_end_tags , loc )
3126
- # Prepend OpenMP directive right to the block right after the loop.
3137
+
3138
+ # Prepend OpenMP directives to the block right after the loop.
3127
3139
if add_directives :
3140
+ # Because we always insert at position 0, we insert in the reverse
3141
+ # order we want them to be in the IR (which is itself the reverse
3142
+ # order that the region.entry directives were inserted.
3128
3143
block .body .insert (0 , target_end )
3144
+ block .body .insert (0 , teams_end )
3145
+ block .body .insert (0 , distparloop_end )
3129
3146
3130
3147
inst .init_block .body .append (pi_assign )
3131
3148
inst .init_block .body .append (omplbvar_assign )
@@ -3166,6 +3183,7 @@ def _lower_parfor_openmp(
3166
3183
3167
3184
# Make init block jump to top test.
3168
3185
inst .init_block .body .append (ir .Jump (top_test_label , loc ))
3186
+ #inst.init_block.body.append(ir.Jump(jump_to_body_block_label, loc))
3169
3187
if not collapse and ndims > 1 :
3170
3188
parfor_copy = copy .copy (inst )
3171
3189
parfor_copy .init_block = ir .Block (scope , loc )
@@ -3191,7 +3209,7 @@ def _lower_parfor_openmp(
3191
3209
# Add parfor body to blocks
3192
3210
for (l , b ) in inst .loop_body .items ():
3193
3211
l , parfor_found = _lower_parfor_sequential_block (
3194
- l , b , new_blocks , typemap , calltypes , parfor_found , False , typingctx )
3212
+ l , b , new_blocks , typemap , calltypes , parfor_found , False , typingctx , func_ir )
3195
3213
#new_blocks[l] = add_prints(b, typemap, calltypes)
3196
3214
new_blocks [l ] = b
3197
3215
@@ -3296,10 +3314,11 @@ def _lower_parfor_sequential_block(
3296
3314
calltypes ,
3297
3315
parfor_found ,
3298
3316
openmp ,
3299
- typingctx ):
3317
+ typingctx ,
3318
+ func_ir ):
3300
3319
if openmp != False :
3301
3320
return _lower_parfor_openmp (block_label , block , new_blocks , typemap ,
3302
- calltypes , parfor_found , openmp , typingctx )
3321
+ calltypes , parfor_found , openmp , typingctx , func_ir )
3303
3322
3304
3323
scope = block .scope
3305
3324
i = _find_first_parfor (block .body )
0 commit comments