Skip to content

Commit

Permalink
Merge pull request #1 from jeffhammond/cascadelake
Browse files Browse the repository at this point in the history
Cascade Lake support
  • Loading branch information
jeffhammond authored Apr 15, 2019
2 parents 4ae7095 + 2dba574 commit c87015f
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 8 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CC = gcc
CFLAGS = -Wall -O3 -fopenmp
CFLAGS = -Wall -O3 -fopenmp -std=c99

#CFLAGS += -DDEBUG -g3

Expand Down
42 changes: 40 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Summary

The intended user of this project is the developer of a numerical library that needs to know the throughput of floating-point FMA (fused multiply-add) on Xeon processors that support AVX-512. *This project does not attempt to address the throughput of other operations, such as shuffles, permutations, or non-floating-point instructions.*
The intended user of this project is the developer of a numerical library that needs to know the throughput of floating-point FMA (fused multiply-add) on Intel® processors that support AVX-512. *This project does not attempt to address the throughput of other operations, such as shuffles, permutations, or non-floating-point instructions.*

The project provides example code to show a user how to determine the number of AVX-512 FMAs in an Intel processor based on the Skylake microarchitecture. We encourage potential users to adapt the code to their usage, as some of the code included is rather pedantic (although this doesn't add significant runtime overhead unless debug printing is enabled).
The project provides example code to show a user how to determine the number of AVX-512 FMAs in Intel® Xeon® Scalable processors. We encourage potential users to adapt the code to their usage, as some of the code included is rather pedantic (although this doesn't add significant runtime overhead unless debug printing is enabled).

Relevant processors include:
- Intel® Xeon® Scalable Processors
Expand Down Expand Up @@ -57,6 +57,44 @@ This information is available from https://ark.intel.com/products/series/125191/
| Intel® Xeon® Bronze 3106 Processor | 1 |
| Intel® Xeon® Bronze 3104 Processor | 1 |

## 2nd Generation Intel® Xeon® Scalable Processors

This information is available from https://ark.intel.com/content/www/us/en/ark/products/series/192283/2nd-generation-intel-xeon-scalable-processors.html.

| Processor Model Name | Number of AVX-512 FMAs |
|----------------------|----------------|
| Intel® Xeon® Platinum 9282 Processor | 2 |
| Intel® Xeon® Platinum 9242 Processor | 2 |
| Intel® Xeon® Platinum 8280 Processor | 2 |
| Intel® Xeon® Platinum 8276 Processor | 2 |
| Intel® Xeon® Platinum 8270 Processor | 2 |
| Intel® Xeon® Platinum 8268 Processor | 2 |
| Intel® Xeon® Platinum 8260 Processor | 2 |
| Intel® Xeon® Platinum 8256 Processor | 2 |
| Intel® Xeon® Platinum 8253 Processor | 2 |
| Intel® Xeon® Gold 6254 Processor | 2 |
| Intel® Xeon® Gold 6252 Processor | 2 |
| Intel® Xeon® Gold 6248 Processor | 2 |
| Intel® Xeon® Gold 6246 Processor | 2 |
| Intel® Xeon® Gold 6244 Processor | 2 |
| Intel® Xeon® Gold 6242 Processor | 2 |
| Intel® Xeon® Gold 6240 Processor | 2 |
| Intel® Xeon® Gold 6238 Processor | 2 |
| Intel® Xeon® Gold 6234 Processor | 2 |
| Intel® Xeon® Gold 6230 Processor | 2 |
| Intel® Xeon® Gold 6226 Processor | 2 |
| Intel® Xeon® Gold 5222 Processor | 2 |
| Intel® Xeon® Gold 5220 Processor | 1 |
| Intel® Xeon® Gold 5218 Processor | 1 |
| Intel® Xeon® Gold 5217 Processor | 1 |
| Intel® Xeon® Gold 5215 Processor | 1 |
| Intel® Xeon® Silver 4216 Processor | 1 |
| Intel® Xeon® Silver 4215 Processor | 1 |
| Intel® Xeon® Silver 4214 Processor | 1 |
| Intel® Xeon® Silver 4210 Processor | 1 |
| Intel® Xeon® Silver 4208 Processor | 1 |
| Intel® Xeon® Bronze 3204 Processor | 1 |

## Intel® Xeon® W Processors

This information is available from https://ark.intel.com/products/series/125035/Intel-Xeon-Processor-W-Family.
Expand Down
14 changes: 9 additions & 5 deletions vpu-count.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@
#endif

#ifdef DEBUG
#define PDEBUG(fmt, ...) do { printf(fmt, __VA_ARGS__); } while (0)
#define PDEBUG(...) printf(__VA_ARGS__)
#else
#define PDEBUG(fmt, ...)
#define PDEBUG(...)
#endif

void get_cpu_name32(char cpu_name[32])
Expand Down Expand Up @@ -122,7 +122,7 @@ void get_leaf1(uint32_t leaf1[4], bool * skylake_avx512)
model += (xmodel << 4);
//family += xfamily;
}
*skylake_avx512 = (model == 0x55);
*skylake_avx512 = (model == 0x55); /* 85 in binary */

PDEBUG("signature: %#08x\n", (leaf1[0]) );
//PDEBUG("stepping: %#04x=%d\n", stepping, stepping);
Expand Down Expand Up @@ -194,7 +194,7 @@ int vpu_count(void)
PDEBUG("cpu_name[9] = %c\n", cpu_name[9]);
PDEBUG("cpu_name[17] = %c\n", cpu_name[17]);

/* Skylake-X series: * "Intel(R) Core (TM)..." */
/* Skylake-X series: "Intel(R) Core (TM)..." */
if (cpu_name[9] == 'C') {
return 2;
}
Expand All @@ -217,7 +217,7 @@ int vpu_count(void)
if (cpu_name[22] == '6') {
return 2;
/* 5122 */
} else if (cpu_name[22] == 5 && cpu_name[24] == 2 && cpu_name[25] == 2) {
} else if (cpu_name[22] == '5' && cpu_name[24] == '2' && cpu_name[25] == '2') {
return 2;
/* 51xx */
} else {
Expand All @@ -235,6 +235,10 @@ int vpu_count(void)
}
}
}
/* Pre-production parts: Genuine Intel(R) CPU 0000 */
else if (cpu_name[0] == 'G' && cpu_name[21] == '0' && cpu_name[22] == '0' && cpu_name[23] == '0' && cpu_name[24] == '0') {
return 2;
}
/* If we get here, the part is not supported by the SKX logic */
return -1;
}
Expand Down

0 comments on commit c87015f

Please sign in to comment.