Skip to content

Commit

Permalink
(*)Rearrange calc_kappa_shear_vertex for FMAs
Browse files Browse the repository at this point in the history
  Added mathematically equivalent rearrangements of the code in
calc_kappa_shear_vertex that interpolates velocities, temperatures and
salinities to the vertices to expose the mask variables while ensuring that the
other multiplications occur within parentheses so that they will exhibit
rotational symmetry when fused-multiply-adds are enabled.  FMAs can still occur,
but it will be multiplication by the 0-or-1 masks that are fused with an
addition.  Also added parentheses to 3 expressions calculating the squared shear
in calculate_projected_state for rotational symmetry with FMAs.  All answers are
bitwise identical in cases without FMAs, but answers could change when FMAs are
enabled.
  • Loading branch information
Hallberg-NOAA committed Jul 29, 2024
1 parent c0bef18 commit 0b50a15
Showing 1 changed file with 17 additions and 17 deletions.
34 changes: 17 additions & 17 deletions src/parameterizations/vertical/MOM_kappa_shear.F90
Original file line number Diff line number Diff line change
Expand Up @@ -442,26 +442,26 @@ subroutine Calc_kappa_shear_vertex(u_in, v_in, h, T_in, S_in, tv, p_surf, kappa_

! Interpolate the various quantities to the corners, using masks.
do k=1,nz ; do I=IsB,IeB
u_2d(I,k) = (u_in(I,j,k) * (G%mask2dCu(I,j) * (h(i,j,k) + h(i+1,j,k))) + &
u_in(I,j+1,k) * (G%mask2dCu(I,j+1) * (h(i,j+1,k) + h(i+1,j+1,k))) ) / &
u_2d(I,k) = (G%mask2dCu(I,j) * (u_in(I,j,k) * (h(i,j,k) + h(i+1,j,k))) + &
G%mask2dCu(I,j+1) * (u_in(I,j+1,k) * (h(i,j+1,k) + h(i+1,j+1,k))) ) / &
((G%mask2dCu(I,j) * (h(i,j,k) + h(i+1,j,k)) + &
G%mask2dCu(I,j+1) * (h(i,j+1,k) + h(i+1,j+1,k))) + GV%H_subroundoff)
v_2d(I,k) = (v_in(i,J,k) * (G%mask2dCv(i,J) * (h(i,j,k) + h(i,j+1,k))) + &
v_in(i+1,J,k) * (G%mask2dCv(i+1,J) * (h(i+1,j,k) + h(i+1,j+1,k))) ) / &
v_2d(I,k) = (G%mask2dCv(i,J) * (v_in(i,J,k) * (h(i,j,k) + h(i,j+1,k))) + &
G%mask2dCv(i+1,J) * (v_in(i+1,J,k) * (h(i+1,j,k) + h(i+1,j+1,k))) ) / &
((G%mask2dCv(i,J) * (h(i,j,k) + h(i,j+1,k)) + &
G%mask2dCv(i+1,J) * (h(i+1,j,k) + h(i+1,j+1,k))) + GV%H_subroundoff)
I_hwt = 1.0 / (((G%mask2dT(i,j) * h(i,j,k) + G%mask2dT(i+1,j+1) * h(i+1,j+1,k)) + &
(G%mask2dT(i+1,j) * h(i+1,j,k) + G%mask2dT(i,j+1) * h(i,j+1,k))) + &
GV%H_subroundoff)
if (use_temperature) then
T_2d(I,k) = ( ((G%mask2dT(i,j) * h(i,j,k)) * T_in(i,j,k) + &
(G%mask2dT(i+1,j+1) * h(i+1,j+1,k)) * T_in(i+1,j+1,k)) + &
((G%mask2dT(i+1,j) * h(i+1,j,k)) * T_in(i+1,j,k) + &
(G%mask2dT(i,j+1) * h(i,j+1,k)) * T_in(i,j+1,k)) ) * I_hwt
S_2d(I,k) = ( ((G%mask2dT(i,j) * h(i,j,k)) * S_in(i,j,k) + &
(G%mask2dT(i+1,j+1) * h(i+1,j+1,k)) * S_in(i+1,j+1,k)) + &
((G%mask2dT(i+1,j) * h(i+1,j,k)) * S_in(i+1,j,k) + &
(G%mask2dT(i,j+1) * h(i,j+1,k)) * S_in(i,j+1,k)) ) * I_hwt
T_2d(I,k) = ( (G%mask2dT(i,j) * (h(i,j,k) * T_in(i,j,k)) + &
G%mask2dT(i+1,j+1) * (h(i+1,j+1,k) * T_in(i+1,j+1,k))) + &
(G%mask2dT(i+1,j) * (h(i+1,j,k) * T_in(i+1,j,k)) + &
G%mask2dT(i,j+1) * (h(i,j+1,k) * T_in(i,j+1,k))) ) * I_hwt
S_2d(I,k) = ( (G%mask2dT(i,j) * (h(i,j,k) * S_in(i,j,k)) + &
G%mask2dT(i+1,j+1) * (h(i+1,j+1,k) * S_in(i+1,j+1,k))) + &
(G%mask2dT(i+1,j) * (h(i+1,j,k) * S_in(i+1,j,k)) + &
G%mask2dT(i,j+1) * (h(i,j+1,k) * S_in(i,j+1,k))) ) * I_hwt
endif
h_2d(I,k) = ((G%mask2dT(i,j) * h(i,j,k) + G%mask2dT(i+1,j+1) * h(i+1,j+1,k)) + &
(G%mask2dT(i+1,j) * h(i+1,j,k) + G%mask2dT(i,j+1) * h(i,j+1,k)) ) / &
Expand All @@ -472,8 +472,8 @@ subroutine Calc_kappa_shear_vertex(u_in, v_in, h, T_in, S_in, tv, p_surf, kappa_
((G%mask2dT(i,j) + G%mask2dT(i+1,j+1)) + &
(G%mask2dT(i+1,j) + G%mask2dT(i,j+1)) + 1.0e-36 )
! h_2d(I,k) = 0.25*((h(i,j,k) + h(i+1,j+1,k)) + (h(i+1,j,k) + h(i,j+1,k)))
! h_2d(I,k) = ((h(i,j,k)**2 + h(i+1,j+1,k)**2) + &
! (h(i+1,j,k)**2 + h(i,j+1,k)**2)) * I_hwt
! h_2d(I,k) = (((h(i,j,k)**2) + (h(i+1,j+1,k)**2)) + &
! ((h(i+1,j,k)**2) + (h(i,j+1,k)**2))) * I_hwt
enddo ; enddo
if (.not.use_temperature) then ; do k=1,nz ; do I=IsB,IeB
rho_2d(I,k) = GV%Rlay(k)
Expand Down Expand Up @@ -1224,12 +1224,12 @@ subroutine calculate_projected_state(kappa, u0, v0, T0, S0, dt, nz, dz, I_dz_int
! Store the squared shear at interfaces
S2(1) = 0.0 ; S2(nz+1) = 0.0
if (ks > 1) &
S2(ks) = ((u(ks)-u0(ks-1))**2 + (v(ks)-v0(ks-1))**2) * (US%L_to_Z*I_dz_int(ks))**2
S2(ks) = (((u(ks)-u0(ks-1))**2) + ((v(ks)-v0(ks-1))**2)) * (US%L_to_Z*I_dz_int(ks))**2
do K=ks+1,ke
S2(K) = ((u(k)-u(k-1))**2 + (v(k)-v(k-1))**2) * (US%L_to_Z*I_dz_int(K))**2
S2(K) = (((u(k)-u(k-1))**2) + ((v(k)-v(k-1))**2)) * (US%L_to_Z*I_dz_int(K))**2
enddo
if (ke<nz) &
S2(ke+1) = ((u0(ke+1)-u(ke))**2 + (v0(ke+1)-v(ke))**2) * (US%L_to_Z*I_dz_int(ke+1))**2
S2(ke+1) = (((u0(ke+1)-u(ke))**2) + ((v0(ke+1)-v(ke))**2)) * (US%L_to_Z*I_dz_int(ke+1))**2

! Store the buoyancy frequency at interfaces
N2(1) = 0.0 ; N2(nz+1) = 0.0
Expand Down

0 comments on commit 0b50a15

Please sign in to comment.