Skip to content

Commit 6df75c0

Browse files
committed
feat: extra simd kernel branches
1 parent 8c87d34 commit 6df75c0

File tree

3 files changed

+583
-1
lines changed

3 files changed

+583
-1
lines changed

ext/DynamicExpressionsLoopVectorizationExt.jl

Lines changed: 219 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ import DynamicExpressions.EvaluateModule:
1212
deg1_l1_ll0_eval,
1313
deg2_l0_r0_eval,
1414
deg2_l0_eval,
15-
deg2_r0_eval
15+
deg2_r0_eval,
16+
deg2_l2_ll0_lr0_r0_eval,
17+
deg2_l0_r2_rl0_rr0_eval
1618
import DynamicExpressions.ExtensionInterfaceModule:
1719
_is_loopvectorization_loaded, bumper_kern1!, bumper_kern2!
1820

@@ -231,4 +233,220 @@ function bumper_kern2!(
231233
return cumulator1
232234
end
233235

236+
function deg2_l2_ll0_lr0_r0_eval(
237+
tree::AbstractExpressionNode{T},
238+
cX::AbstractMatrix{T},
239+
op::F,
240+
op_l::F2,
241+
eval_options::EvalOptions{true},
242+
) where {T<:Number,F,F2}
243+
if tree.l.l.constant && tree.l.r.constant && tree.r.constant
244+
val_ll = tree.l.l.val
245+
val_lr = tree.l.r.val
246+
val_r = tree.r.val
247+
@return_on_nonfinite_val(eval_options, val_ll, cX)
248+
@return_on_nonfinite_val(eval_options, val_lr, cX)
249+
@return_on_nonfinite_val(eval_options, val_r, cX)
250+
x_l = op_l(val_ll, val_lr)::T
251+
@return_on_nonfinite_val(eval_options, x_l, cX)
252+
x = op(x_l, val_r)::T
253+
@return_on_nonfinite_val(eval_options, x, cX)
254+
return ResultOk(get_filled_array(eval_options.buffer, x, cX, axes(cX, 2)), true)
255+
elseif tree.l.l.constant && tree.l.r.constant
256+
val_ll = tree.l.l.val
257+
val_lr = tree.l.r.val
258+
@return_on_nonfinite_val(eval_options, val_ll, cX)
259+
@return_on_nonfinite_val(eval_options, val_lr, cX)
260+
feature_r = tree.r.feature
261+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
262+
x_l = op_l(val_ll, val_lr)::T
263+
@turbo for j in axes(cX, 2)
264+
x = op(x_l, cX[feature_r, j])
265+
cumulator[j] = x
266+
end
267+
return ResultOk(cumulator, true)
268+
elseif tree.l.l.constant && tree.r.constant
269+
val_ll = tree.l.l.val
270+
val_r = tree.r.val
271+
@return_on_nonfinite_val(eval_options, val_ll, cX)
272+
@return_on_nonfinite_val(eval_options, val_r, cX)
273+
feature_lr = tree.l.r.feature
274+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
275+
@turbo for j in axes(cX, 2)
276+
x_l = op_l(val_ll, cX[feature_lr, j])
277+
x = op(x_l, val_r)
278+
cumulator[j] = x
279+
end
280+
return ResultOk(cumulator, true)
281+
elseif tree.l.r.constant && tree.r.constant
282+
val_lr = tree.l.r.val
283+
val_r = tree.r.val
284+
@return_on_nonfinite_val(eval_options, val_lr, cX)
285+
@return_on_nonfinite_val(eval_options, val_r, cX)
286+
feature_ll = tree.l.l.feature
287+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
288+
@turbo for j in axes(cX, 2)
289+
x_l = op_l(cX[feature_ll, j], val_lr)
290+
x = op(x_l, val_r)
291+
cumulator[j] = x
292+
end
293+
return ResultOk(cumulator, true)
294+
elseif tree.l.l.constant
295+
val_ll = tree.l.l.val
296+
@return_on_nonfinite_val(eval_options, val_ll, cX)
297+
feature_lr = tree.l.r.feature
298+
feature_r = tree.r.feature
299+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
300+
@turbo for j in axes(cX, 2)
301+
x_l = op_l(val_ll, cX[feature_lr, j])
302+
x = op(x_l, cX[feature_r, j])
303+
cumulator[j] = x
304+
end
305+
return ResultOk(cumulator, true)
306+
elseif tree.l.r.constant
307+
val_lr = tree.l.r.val
308+
@return_on_nonfinite_val(eval_options, val_lr, cX)
309+
feature_ll = tree.l.l.feature
310+
feature_r = tree.r.feature
311+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
312+
@turbo for j in axes(cX, 2)
313+
x_l = op_l(cX[feature_ll, j], val_lr)
314+
x = op(x_l, cX[feature_r, j])
315+
cumulator[j] = x
316+
end
317+
return ResultOk(cumulator, true)
318+
elseif tree.r.constant
319+
val_r = tree.r.val
320+
@return_on_nonfinite_val(eval_options, val_r, cX)
321+
feature_ll = tree.l.l.feature
322+
feature_lr = tree.l.r.feature
323+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
324+
@turbo for j in axes(cX, 2)
325+
x_l = op_l(cX[feature_ll, j], cX[feature_lr, j])
326+
x = op(x_l, val_r)
327+
cumulator[j] = x
328+
end
329+
return ResultOk(cumulator, true)
330+
else
331+
feature_ll = tree.l.l.feature
332+
feature_lr = tree.l.r.feature
333+
feature_r = tree.r.feature
334+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
335+
@turbo for j in axes(cX, 2)
336+
x_l = op_l(cX[feature_ll, j], cX[feature_lr, j])
337+
x = op(x_l, cX[feature_r, j])
338+
cumulator[j] = x
339+
end
340+
return ResultOk(cumulator, true)
341+
end
342+
end
343+
344+
function deg2_l0_r2_rl0_rr0_eval(
345+
tree::AbstractExpressionNode{T},
346+
cX::AbstractMatrix{T},
347+
op::F,
348+
op_r::F2,
349+
eval_options::EvalOptions{true},
350+
) where {T<:Number,F,F2}
351+
if tree.l.constant && tree.r.l.constant && tree.r.r.constant
352+
val_l = tree.l.val
353+
val_rl = tree.r.l.val
354+
val_rr = tree.r.r.val
355+
@return_on_nonfinite_val(eval_options, val_l, cX)
356+
@return_on_nonfinite_val(eval_options, val_rl, cX)
357+
@return_on_nonfinite_val(eval_options, val_rr, cX)
358+
x_r = op_r(val_rl, val_rr)::T
359+
@return_on_nonfinite_val(eval_options, x_r, cX)
360+
x = op(val_l, x_r)::T
361+
@return_on_nonfinite_val(eval_options, x, cX)
362+
return ResultOk(get_filled_array(eval_options.buffer, x, cX, axes(cX, 2)), true)
363+
elseif tree.r.l.constant && tree.r.r.constant
364+
val_rl = tree.r.l.val
365+
val_rr = tree.r.r.val
366+
@return_on_nonfinite_val(eval_options, val_rl, cX)
367+
@return_on_nonfinite_val(eval_options, val_rr, cX)
368+
feature_l = tree.l.feature
369+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
370+
x_r = op_r(val_rl, val_rr)::T
371+
@turbo for j in axes(cX, 2)
372+
x = op(cX[feature_l, j], x_r)
373+
cumulator[j] = x
374+
end
375+
return ResultOk(cumulator, true)
376+
elseif tree.l.constant && tree.r.r.constant
377+
val_l = tree.l.val
378+
val_rr = tree.r.r.val
379+
@return_on_nonfinite_val(eval_options, val_l, cX)
380+
@return_on_nonfinite_val(eval_options, val_rr, cX)
381+
feature_rl = tree.r.l.feature
382+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
383+
@turbo for j in axes(cX, 2)
384+
x_r = op_r(cX[feature_rl, j], val_rr)
385+
x = op(val_l, x_r)
386+
cumulator[j] = x
387+
end
388+
return ResultOk(cumulator, true)
389+
elseif tree.l.constant && tree.r.l.constant
390+
val_l = tree.l.val
391+
val_rl = tree.r.l.val
392+
@return_on_nonfinite_val(eval_options, val_l, cX)
393+
@return_on_nonfinite_val(eval_options, val_rl, cX)
394+
feature_rr = tree.r.r.feature
395+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
396+
@turbo for j in axes(cX, 2)
397+
x_r = op_r(val_rl, cX[feature_rr, j])
398+
x = op(val_l, x_r)
399+
cumulator[j] = x
400+
end
401+
return ResultOk(cumulator, true)
402+
elseif tree.l.constant
403+
val_l = tree.l.val
404+
@return_on_nonfinite_val(eval_options, val_l, cX)
405+
feature_rl = tree.r.l.feature
406+
feature_rr = tree.r.r.feature
407+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
408+
@turbo for j in axes(cX, 2)
409+
x_r = op_r(cX[feature_rl, j], cX[feature_rr, j])
410+
x = op(val_l, x_r)
411+
cumulator[j] = x
412+
end
413+
return ResultOk(cumulator, true)
414+
elseif tree.r.l.constant
415+
val_rl = tree.r.l.val
416+
@return_on_nonfinite_val(eval_options, val_rl, cX)
417+
feature_l = tree.l.feature
418+
feature_rr = tree.r.r.feature
419+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
420+
@turbo for j in axes(cX, 2)
421+
x_r = op_r(val_rl, cX[feature_rr, j])
422+
x = op(cX[feature_l, j], x_r)
423+
cumulator[j] = x
424+
end
425+
return ResultOk(cumulator, true)
426+
elseif tree.r.r.constant
427+
val_rr = tree.r.r.val
428+
@return_on_nonfinite_val(eval_options, val_rr, cX)
429+
feature_l = tree.l.feature
430+
feature_rl = tree.r.l.feature
431+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
432+
@turbo for j in axes(cX, 2)
433+
x_r = op_r(cX[feature_rl, j], val_rr)
434+
x = op(cX[feature_l, j], x_r)
435+
cumulator[j] = x
436+
end
437+
return ResultOk(cumulator, true)
438+
else
439+
feature_l = tree.l.feature
440+
feature_rl = tree.r.l.feature
441+
feature_rr = tree.r.r.feature
442+
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
443+
@turbo for j in axes(cX, 2)
444+
x_r = op_r(cX[feature_rl, j], cX[feature_rr, j])
445+
x = op(cX[feature_l, j], x_r)
446+
cumulator[j] = x
447+
end
448+
return ResultOk(cumulator, true)
449+
end
450+
end
451+
234452
end

0 commit comments

Comments
 (0)