@@ -68,23 +68,23 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp
6868make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'.
6969make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx'
7070
71- DATE: 2022-01-11_20:54:44
71+ DATE: 2022-01-17_16:39:57
7272
7373On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
7474=========================================================================
7575runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP=
7676Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.1.105 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0]
7777Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAREF
7878FP precision = DOUBLE (NaN/abnormal=0, zero=0)
79- EvtsPerSec[Rmb+ME] (23) = ( 4.474940e +07 ) sec^-1
80- EvtsPerSec[MatrixElems] (3) = ( 1.285687e +08 ) sec^-1
81- EvtsPerSec[MECalcOnly] (3a) = ( 1.428806e +08 ) sec^-1
79+ EvtsPerSec[Rmb+ME] (23) = ( 4.102362e +07 ) sec^-1
80+ EvtsPerSec[MatrixElems] (3) = ( 1.254413e +08 ) sec^-1
81+ EvtsPerSec[MECalcOnly] (3a) = ( 1.421700e +08 ) sec^-1
8282MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0
83- TOTAL : 0.804412 sec
84- 1,241,084,391 cycles:u # 1.639 GHz
85- 2,415,012,227 instructions:u # 1.95 insn per cycle
86- 0.877208910 seconds time elapsed
87- ==PROF== Profiling "sigmaKin": launch__registers_per_thread 172
83+ TOTAL : 1.460359 sec
84+ 1,750,368,254 cycles:u # 1.148 GHz
85+ 3,462,065,234 instructions:u # 1.98 insn per cycle
86+ 1.767628095 seconds time elapsed
87+ ==PROF== Profiling "sigmaKin": launch__registers_per_thread 170
8888==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
8989.........................................................................
9090=========================================================================
@@ -93,14 +93,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha
9393Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAREF
9494FP precision = DOUBLE (NaN/abnormal=0, zero=0)
9595Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD)
96- EvtsPerSec[Rmb+ME] (23) = ( 1.885695e +05 ) sec^-1
97- EvtsPerSec[MatrixElems] (3) = ( 2.008159e +05 ) sec^-1
98- EvtsPerSec[MECalcOnly] (3a) = ( 2.008159e +05 ) sec^-1
96+ EvtsPerSec[Rmb+ME] (23) = ( 1.889508e +05 ) sec^-1
97+ EvtsPerSec[MatrixElems] (3) = ( 2.010785e +05 ) sec^-1
98+ EvtsPerSec[MECalcOnly] (3a) = ( 2.010785e +05 ) sec^-1
9999MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0
100- TOTAL : 2.888760 sec
101- 7,553,516,361 cycles:u # 2.604 GHz
102- 22,020,957,578 instructions:u # 2.92 insn per cycle
103- 2.905195425 seconds time elapsed
100+ TOTAL : 2.883996 sec
101+ 7,536,098,356 cycles:u # 2.601 GHz
102+ 22,020,934,454 instructions:u # 2.92 insn per cycle
103+ 2.901081838 seconds time elapsed
104104=Symbols in CPPProcess.o= (~sse4: 456) (avx2: 0) (512y: 0) (512z: 0)
105105-------------------------------------------------------------------------
106106runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.none_d_inl0_hrd0/runTest.exe
@@ -111,14 +111,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha
111111Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXREF
112112FP precision = DOUBLE (NaN/abnormal=0, zero=0)
113113Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
114- EvtsPerSec[Rmb+ME] (23) = ( 2.859668e +05 ) sec^-1
115- EvtsPerSec[MatrixElems] (3) = ( 3.155640e +05 ) sec^-1
116- EvtsPerSec[MECalcOnly] (3a) = ( 3.155640e +05 ) sec^-1
114+ EvtsPerSec[Rmb+ME] (23) = ( 2.856574e +05 ) sec^-1
115+ EvtsPerSec[MatrixElems] (3) = ( 3.155833e +05 ) sec^-1
116+ EvtsPerSec[MECalcOnly] (3a) = ( 3.155833e +05 ) sec^-1
117117MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0
118- TOTAL : 1.939524 sec
119- 5,017,269,787 cycles:u # 2.569 GHz
120- 12,885,087,717 instructions:u # 2.57 insn per cycle
121- 1.955618653 seconds time elapsed
118+ TOTAL : 1.943074 sec
119+ 5,020,219,382 cycles:u # 2.566 GHz
120+ 12,885,064,423 instructions:u # 2.57 insn per cycle
121+ 1.960308285 seconds time elapsed
122122=Symbols in CPPProcess.o= (~sse4: 2353) (avx2: 0) (512y: 0) (512z: 0)
123123-------------------------------------------------------------------------
124124runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe
@@ -129,14 +129,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha
129129Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXREF
130130FP precision = DOUBLE (NaN/abnormal=0, zero=0)
131131Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
132- EvtsPerSec[Rmb+ME] (23) = ( 4.837448e +05 ) sec^-1
133- EvtsPerSec[MatrixElems] (3) = ( 5.708976e +05 ) sec^-1
134- EvtsPerSec[MECalcOnly] (3a) = ( 5.708976e +05 ) sec^-1
132+ EvtsPerSec[Rmb+ME] (23) = ( 4.838378e +05 ) sec^-1
133+ EvtsPerSec[MatrixElems] (3) = ( 5.715632e +05 ) sec^-1
134+ EvtsPerSec[MECalcOnly] (3a) = ( 5.715632e +05 ) sec^-1
135135MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0
136- TOTAL : 1.191260 sec
137- 2,653,373,365 cycles:u # 2.204 GHz
138- 5,512,790,923 instructions:u # 2.08 insn per cycle
139- 1.207423210 seconds time elapsed
136+ TOTAL : 1.191525 sec
137+ 2,654,274,999 cycles:u # 2.202 GHz
138+ 5,512,768,344 instructions:u # 2.08 insn per cycle
139+ 1.208573724 seconds time elapsed
140140=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2185) (512y: 0) (512z: 0)
141141-------------------------------------------------------------------------
142142runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe
@@ -147,14 +147,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha
147147Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXREF
148148FP precision = DOUBLE (NaN/abnormal=0, zero=0)
149149Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
150- EvtsPerSec[Rmb+ME] (23) = ( 5.161384e +05 ) sec^-1
151- EvtsPerSec[MatrixElems] (3) = ( 6.170124e +05 ) sec^-1
152- EvtsPerSec[MECalcOnly] (3a) = ( 6.170124e +05 ) sec^-1
150+ EvtsPerSec[Rmb+ME] (23) = ( 5.161483e +05 ) sec^-1
151+ EvtsPerSec[MatrixElems] (3) = ( 6.175564e +05 ) sec^-1
152+ EvtsPerSec[MECalcOnly] (3a) = ( 6.175564e +05 ) sec^-1
153153MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0
154- TOTAL : 1.121818 sec
155- 2,500,360,700 cycles:u # 2.203 GHz
156- 5,346,587,691 instructions:u # 2.14 insn per cycle
157- 1.137968023 seconds time elapsed
154+ TOTAL : 1.123995 sec
155+ 2,498,424,655 cycles:u # 2.196 GHz
156+ 5,346,564,797 instructions:u # 2.14 insn per cycle
157+ 1.141241097 seconds time elapsed
158158=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2024) (512y: 115) (512z: 0)
159159-------------------------------------------------------------------------
160160runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe
@@ -165,14 +165,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha
165165Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXREF
166166FP precision = DOUBLE (NaN/abnormal=0, zero=0)
167167Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
168- EvtsPerSec[Rmb+ME] (23) = ( 3.361240e +05 ) sec^-1
169- EvtsPerSec[MatrixElems] (3) = ( 3.763054e +05 ) sec^-1
170- EvtsPerSec[MECalcOnly] (3a) = ( 3.763054e +05 ) sec^-1
168+ EvtsPerSec[Rmb+ME] (23) = ( 3.370261e +05 ) sec^-1
169+ EvtsPerSec[MatrixElems] (3) = ( 3.778267e +05 ) sec^-1
170+ EvtsPerSec[MECalcOnly] (3a) = ( 3.778267e +05 ) sec^-1
171171MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0
172- TOTAL : 1.668573 sec
173- 2,764,359,455 cycles:u # 1.644 GHz
174- 3,601,047,206 instructions:u # 1.30 insn per cycle
175- 1.689281266 seconds time elapsed
172+ TOTAL : 1.664563 sec
173+ 2,760,255,519 cycles:u # 1.645 GHz
174+ 3,601,024,707 instructions:u # 1.30 insn per cycle
175+ 1.681761721 seconds time elapsed
176176=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1051) (512y: 85) (512z: 1591)
177177-------------------------------------------------------------------------
178178runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe
0 commit comments