From 0b50a155aace08a159882ae7442c1499bbdc833d Mon Sep 17 00:00:00 2001 From: Robert Hallberg Date: Fri, 1 Mar 2024 12:45:21 -0500 Subject: [PATCH] (*)Rearrange calc_kappa_shear_vertex for FMAs Added mathematically equivalent rearrangements of the code in calc_kappa_shear_vertex that interpolates velocities, temperatures and salinities to the vertices to expose the mask variables while ensuring that the other multiplications occur within parentheses so that they will exhibit rotational symmetry when fused-multiply-adds are enabled. FMAs can still occur, but it will be multiplication by the 0-or-1 masks that are fused with an addition. Also added parentheses to 3 expressions calculating the squared shear in calculate_projected_state for rotational symmetry with FMAs. All answers are bitwise identical in cases without FMAs, but answers could change when FMAs are enabled. --- .../vertical/MOM_kappa_shear.F90 | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/parameterizations/vertical/MOM_kappa_shear.F90 b/src/parameterizations/vertical/MOM_kappa_shear.F90 index d44ab79d1b..536d25e595 100644 --- a/src/parameterizations/vertical/MOM_kappa_shear.F90 +++ b/src/parameterizations/vertical/MOM_kappa_shear.F90 @@ -442,26 +442,26 @@ subroutine Calc_kappa_shear_vertex(u_in, v_in, h, T_in, S_in, tv, p_surf, kappa_ ! Interpolate the various quantities to the corners, using masks. do k=1,nz ; do I=IsB,IeB - u_2d(I,k) = (u_in(I,j,k) * (G%mask2dCu(I,j) * (h(i,j,k) + h(i+1,j,k))) + & - u_in(I,j+1,k) * (G%mask2dCu(I,j+1) * (h(i,j+1,k) + h(i+1,j+1,k))) ) / & + u_2d(I,k) = (G%mask2dCu(I,j) * (u_in(I,j,k) * (h(i,j,k) + h(i+1,j,k))) + & + G%mask2dCu(I,j+1) * (u_in(I,j+1,k) * (h(i,j+1,k) + h(i+1,j+1,k))) ) / & ((G%mask2dCu(I,j) * (h(i,j,k) + h(i+1,j,k)) + & G%mask2dCu(I,j+1) * (h(i,j+1,k) + h(i+1,j+1,k))) + GV%H_subroundoff) - v_2d(I,k) = (v_in(i,J,k) * (G%mask2dCv(i,J) * (h(i,j,k) + h(i,j+1,k))) + & - v_in(i+1,J,k) * (G%mask2dCv(i+1,J) * (h(i+1,j,k) + h(i+1,j+1,k))) ) / & + v_2d(I,k) = (G%mask2dCv(i,J) * (v_in(i,J,k) * (h(i,j,k) + h(i,j+1,k))) + & + G%mask2dCv(i+1,J) * (v_in(i+1,J,k) * (h(i+1,j,k) + h(i+1,j+1,k))) ) / & ((G%mask2dCv(i,J) * (h(i,j,k) + h(i,j+1,k)) + & G%mask2dCv(i+1,J) * (h(i+1,j,k) + h(i+1,j+1,k))) + GV%H_subroundoff) I_hwt = 1.0 / (((G%mask2dT(i,j) * h(i,j,k) + G%mask2dT(i+1,j+1) * h(i+1,j+1,k)) + & (G%mask2dT(i+1,j) * h(i+1,j,k) + G%mask2dT(i,j+1) * h(i,j+1,k))) + & GV%H_subroundoff) if (use_temperature) then - T_2d(I,k) = ( ((G%mask2dT(i,j) * h(i,j,k)) * T_in(i,j,k) + & - (G%mask2dT(i+1,j+1) * h(i+1,j+1,k)) * T_in(i+1,j+1,k)) + & - ((G%mask2dT(i+1,j) * h(i+1,j,k)) * T_in(i+1,j,k) + & - (G%mask2dT(i,j+1) * h(i,j+1,k)) * T_in(i,j+1,k)) ) * I_hwt - S_2d(I,k) = ( ((G%mask2dT(i,j) * h(i,j,k)) * S_in(i,j,k) + & - (G%mask2dT(i+1,j+1) * h(i+1,j+1,k)) * S_in(i+1,j+1,k)) + & - ((G%mask2dT(i+1,j) * h(i+1,j,k)) * S_in(i+1,j,k) + & - (G%mask2dT(i,j+1) * h(i,j+1,k)) * S_in(i,j+1,k)) ) * I_hwt + T_2d(I,k) = ( (G%mask2dT(i,j) * (h(i,j,k) * T_in(i,j,k)) + & + G%mask2dT(i+1,j+1) * (h(i+1,j+1,k) * T_in(i+1,j+1,k))) + & + (G%mask2dT(i+1,j) * (h(i+1,j,k) * T_in(i+1,j,k)) + & + G%mask2dT(i,j+1) * (h(i,j+1,k) * T_in(i,j+1,k))) ) * I_hwt + S_2d(I,k) = ( (G%mask2dT(i,j) * (h(i,j,k) * S_in(i,j,k)) + & + G%mask2dT(i+1,j+1) * (h(i+1,j+1,k) * S_in(i+1,j+1,k))) + & + (G%mask2dT(i+1,j) * (h(i+1,j,k) * S_in(i+1,j,k)) + & + G%mask2dT(i,j+1) * (h(i,j+1,k) * S_in(i,j+1,k))) ) * I_hwt endif h_2d(I,k) = ((G%mask2dT(i,j) * h(i,j,k) + G%mask2dT(i+1,j+1) * h(i+1,j+1,k)) + & (G%mask2dT(i+1,j) * h(i+1,j,k) + G%mask2dT(i,j+1) * h(i,j+1,k)) ) / & @@ -472,8 +472,8 @@ subroutine Calc_kappa_shear_vertex(u_in, v_in, h, T_in, S_in, tv, p_surf, kappa_ ((G%mask2dT(i,j) + G%mask2dT(i+1,j+1)) + & (G%mask2dT(i+1,j) + G%mask2dT(i,j+1)) + 1.0e-36 ) ! h_2d(I,k) = 0.25*((h(i,j,k) + h(i+1,j+1,k)) + (h(i+1,j,k) + h(i,j+1,k))) -! h_2d(I,k) = ((h(i,j,k)**2 + h(i+1,j+1,k)**2) + & -! (h(i+1,j,k)**2 + h(i,j+1,k)**2)) * I_hwt +! h_2d(I,k) = (((h(i,j,k)**2) + (h(i+1,j+1,k)**2)) + & +! ((h(i+1,j,k)**2) + (h(i,j+1,k)**2))) * I_hwt enddo ; enddo if (.not.use_temperature) then ; do k=1,nz ; do I=IsB,IeB rho_2d(I,k) = GV%Rlay(k) @@ -1224,12 +1224,12 @@ subroutine calculate_projected_state(kappa, u0, v0, T0, S0, dt, nz, dz, I_dz_int ! Store the squared shear at interfaces S2(1) = 0.0 ; S2(nz+1) = 0.0 if (ks > 1) & - S2(ks) = ((u(ks)-u0(ks-1))**2 + (v(ks)-v0(ks-1))**2) * (US%L_to_Z*I_dz_int(ks))**2 + S2(ks) = (((u(ks)-u0(ks-1))**2) + ((v(ks)-v0(ks-1))**2)) * (US%L_to_Z*I_dz_int(ks))**2 do K=ks+1,ke - S2(K) = ((u(k)-u(k-1))**2 + (v(k)-v(k-1))**2) * (US%L_to_Z*I_dz_int(K))**2 + S2(K) = (((u(k)-u(k-1))**2) + ((v(k)-v(k-1))**2)) * (US%L_to_Z*I_dz_int(K))**2 enddo if (ke