Skip to content

Reassociate pass serializes vector comparison results #64840

Closed
@RKSimon

Description

@RKSimon

https://godbolt.org/z/Kh7667Yxo

Pulled out of Issue #63946

While the scalar code for an accumulative or chain of comparison results stays as a binary tree:

define i1 @scalar(i32 %a, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7) local_unnamed_addr {
entry:
  %cmp0 = icmp eq i32 %b0, %a
  %cmp1 = icmp eq i32 %b1, %a
  %cmp2 = icmp eq i32 %b2, %a
  %cmp3 = icmp eq i32 %b3, %a
  %cmp4 = icmp eq i32 %b4, %a
  %cmp5 = icmp eq i32 %b5, %a
  %cmp6 = icmp eq i32 %b6, %a
  %cmp7 = icmp eq i32 %b7, %a
  %or01 = or i1 %cmp0, %cmp1
  %or23 = or i1 %cmp2, %cmp3
  %or45 = or i1 %cmp4, %cmp5
  %or67 = or i1 %cmp6, %cmp7
  %or0123 = or i1 %or01, %or23
  %or4567 = or i1 %or45, %or67
  %or01234567 = or i1 %or0123, %or4567
  ret i1 %or01234567
}

The vector code equivalent gets linearised, resulting in a much deeper serial chain, affecting IPC and making it more likely we will hit value tracking recursion depth limits:

define <8 x i1> @vector(<8 x i32> %a, <8 x i32> %b0, <8 x i32> %b1, <8 x i32> %b2, <8 x i32> %b3, <8 x i32> %b4, <8 x i32> %b5, <8 x i32> %b6, <8 x i32> %b7) local_unnamed_addr {
entry:
  %cmp0 = icmp eq <8 x i32> %b0, %a
  %cmp1 = icmp eq <8 x i32> %b1, %a
  %cmp2 = icmp eq <8 x i32> %b2, %a
  %cmp3 = icmp eq <8 x i32> %b3, %a
  %cmp4 = icmp eq <8 x i32> %b4, %a
  %cmp5 = icmp eq <8 x i32> %b5, %a
  %cmp6 = icmp eq <8 x i32> %b6, %a
  %cmp7 = icmp eq <8 x i32> %b7, %a
  %or01 = or <8 x i1> %cmp0, %cmp1
  %or23 = or <8 x i1> %cmp2, %cmp3
  %or45 = or <8 x i1> %cmp4, %cmp5
  %or67 = or <8 x i1> %cmp6, %cmp7
  %or0123 = or <8 x i1> %or01, %or23
  %or4567 = or <8 x i1> %or45, %or67
  %or01234567 = or <8 x i1> %or0123, %or4567
  ret <8 x i1> %or01234567
}

->

define <8 x i1> @vector(<8 x i32> %a, <8 x i32> %b0, <8 x i32> %b1, <8 x i32> %b2, <8 x i32> %b3, <8 x i32> %b4, <8 x i32> %b5, <8 x i32> %b6, <8 x i32> %b7) local_unnamed_addr {
  %cmp0 = icmp eq <8 x i32> %b0, %a
  %cmp1 = icmp eq <8 x i32> %b1, %a
  %cmp2 = icmp eq <8 x i32> %b2, %a
  %cmp3 = icmp eq <8 x i32> %b3, %a
  %cmp4 = icmp eq <8 x i32> %b4, %a
  %cmp5 = icmp eq <8 x i32> %b5, %a
  %cmp6 = icmp eq <8 x i32> %b6, %a
  %cmp7 = icmp eq <8 x i32> %b7, %a
  %or67 = or <8 x i1> %cmp1, %cmp0
  %or45 = or <8 x i1> %or67, %cmp2
  %or4567 = or <8 x i1> %or45, %cmp3
  %or23 = or <8 x i1> %or4567, %cmp4
  %or01 = or <8 x i1> %or23, %cmp5
  %or0123 = or <8 x i1> %or01, %cmp6
  %or01234567 = or <8 x i1> %or0123, %cmp7
  ret <8 x i1> %or01234567
}

Metadata

Metadata

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions