|
| 1 | +#include <stdlib.h> |
| 2 | +#include <string.h> |
| 3 | +#include <stdio.h> |
| 4 | +#include <omp.h> |
| 5 | + |
| 6 | +// bin start size: 130 |
| 7 | +int main (int argc, char* argv[]) { |
| 8 | + int val = 0; |
| 9 | + int num_threads = 0; |
| 10 | + char shared_array[4000000]; |
| 11 | + |
| 12 | + int outer = 10, a = 100, a1 = 1000, b = 30, b1 = 2000, c = 24, c1 = 4000, d = 8, d1 = 8000, e = 7, e1 = 16000, inv = 1; |
| 13 | + |
| 14 | + for (int i = 1; i < argc; ++i) { |
| 15 | + if (strncmp(argv[i], "-outer", 7) == 0) { |
| 16 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 17 | + fprintf(stderr, "argv: %s\n", argv[i+1]); |
| 18 | + outer = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 19 | + } else { // Uh-oh, there was no argument to the destination option. |
| 20 | + fprintf(stderr, "There is an error in parsing the command line argument 1\n"); |
| 21 | + return 1; |
| 22 | + } |
| 23 | + } else if (strncmp(argv[i], "-a", 2) == 0){ |
| 24 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 25 | + a = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 26 | + } else { // Uh-oh, there was no argument to the destination option. |
| 27 | + fprintf(stderr, "There is an error in parsing the command line argument 2\n"); |
| 28 | + return 1; |
| 29 | + } |
| 30 | + } else if (strncmp(argv[i], "-a1", 3) == 0){ |
| 31 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 32 | + a1 = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 33 | + } else { // Uh-oh, there was no argument to the destination option. |
| 34 | + fprintf(stderr, "There is an error in parsing the command line argument 3\n"); |
| 35 | + return 1; |
| 36 | + } |
| 37 | + } else if (strncmp(argv[i], "-b", 2) == 0){ |
| 38 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 39 | + b = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 40 | + } else { // Uh-oh, there was no argument to the destination option. |
| 41 | + fprintf(stderr, "There is an error in parsing the command line argument 4\n"); |
| 42 | + return 1; |
| 43 | + } |
| 44 | + } else if (strncmp(argv[i], "-b1", 3) == 0){ |
| 45 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 46 | + b1 = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 47 | + } else { // Uh-oh, there was no argument to the destination option. |
| 48 | + fprintf(stderr, "There is an error in parsing the command line argument 5\n"); |
| 49 | + return 1; |
| 50 | + } |
| 51 | + } else if (strncmp(argv[i], "-c", 2) == 0){ |
| 52 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 53 | + c = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 54 | + } else { // Uh-oh, there was no argument to the destination option. |
| 55 | + fprintf(stderr, "There is an error in parsing the command line argument 6\n"); |
| 56 | + return 1; |
| 57 | + } |
| 58 | + } else if (strncmp(argv[i], "-c1", 3) == 0){ |
| 59 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 60 | + c1 = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 61 | + } else { // Uh-oh, there was no argument to the destination option. |
| 62 | + fprintf(stderr, "There is an error in parsing the command line argument 7\n"); |
| 63 | + return 1; |
| 64 | + } |
| 65 | + } else if (strncmp(argv[i], "-d", 2) == 0){ |
| 66 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 67 | + d = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 68 | + } else { // Uh-oh, there was no argument to the destination option. |
| 69 | + fprintf(stderr, "There is an error in parsing the command line argument 8\n"); |
| 70 | + return 1; |
| 71 | + } |
| 72 | + } else if (strncmp(argv[i], "-d1", 3) == 0){ |
| 73 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 74 | + d1 = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 75 | + } else { // Uh-oh, there was no argument to the destination option. |
| 76 | + fprintf(stderr, "There is an error in parsing the command line argument 9\n"); |
| 77 | + return 1; |
| 78 | + } |
| 79 | + } else if (strncmp(argv[i], "-e", 2) == 0){ |
| 80 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 81 | + e = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 82 | + } else { // Uh-oh, there was no argument to the destination option. |
| 83 | + fprintf(stderr, "There is an error in parsing the command line argument 10\n"); |
| 84 | + return 1; |
| 85 | + } |
| 86 | + } else if (strncmp(argv[i], "-e1", 3) == 0){ |
| 87 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 88 | + e1 = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 89 | + } else { // Uh-oh, there was no argument to the destination option. |
| 90 | + fprintf(stderr, "There is an error in parsing the command line argument 11\n"); |
| 91 | + return 1; |
| 92 | + } |
| 93 | + } else if (strncmp(argv[i], "-inv", 5) == 0){ |
| 94 | + if (i + 1 < argc) { // Make sure we aren't at the end of argv! |
| 95 | + inv = atoi(argv[++i]); // Increment 'i' so we don't get the argument as the next argv[i]. |
| 96 | + } else { // Uh-oh, there was no argument to the destination option. |
| 97 | + fprintf(stderr, "There is an error in parsing the command line argument 12\n"); |
| 98 | + return 1; |
| 99 | + } |
| 100 | + } else { // Uh-oh, there was no argument to the destination option. |
| 101 | + fprintf(stderr, "There is an error in parsing the command line argument 13\n"); |
| 102 | + fprintf(stderr,"%s\n", argv[i]); |
| 103 | + return 1; |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + fprintf(stderr, "outer: %d, a: %d, a1: %d, b: %d, b1: %d, c: %d, c1: %d, d: %d, d1: %d, e: %d, e1: %d, inv: %d\n", outer, a, a1, b, b1, c, c1, d, d1, e, e1, inv); |
| 108 | + #pragma omp parallel |
| 109 | + { |
| 110 | + int output; |
| 111 | + char array1[100000]; |
| 112 | + char array2[200000]; |
| 113 | + char array3[400000]; |
| 114 | + char array4[800000]; |
| 115 | + char array5[1600000]; |
| 116 | + // time distance: 200000, frequency: 10 M |
| 117 | + for(int i = 0; i < outer; ++i) { |
| 118 | + __asm__ __volatile__ ( |
| 119 | + //"movl $0, %%r10d\n\t" // debug |
| 120 | + "loop1:\n\t" |
| 121 | + "movl %%edi, %%eax\n\t" |
| 122 | + "movq %%rsi, %%r8\n\t" |
| 123 | + "loop01:\n\t" |
| 124 | + "movl %%edx, (%%r8)\n\t" |
| 125 | + //"mfence\n\t" |
| 126 | + //"movl %%r10d, (%%r8)\n\t" // debug |
| 127 | + "addq $1, %%r8\n\t" |
| 128 | + "decl %%eax\n\t" |
| 129 | + "jnz loop01\n\t" |
| 130 | + "movl %%ebx, %%eax\n\t" |
| 131 | + "movq %%rcx, %%r8\n\t" |
| 132 | + "loop:\n\t" |
| 133 | + "movb (%%r8), %%r9b\n\t" |
| 134 | + "addq $1, %%r8\n\t" |
| 135 | + "movl %%edx, (%%r8)\n\t" |
| 136 | + //"movl %%r10d, (%%r8)\n\t" // debug |
| 137 | + "addq $1, %%r8\n\t" |
| 138 | + "subl $2, %%eax\n\t" |
| 139 | + "jnz loop\n\t" |
| 140 | + //"addl $1, %%r10d\n\t" // debug |
| 141 | + "decl %%edx\n\t" |
| 142 | + "jnz loop1\n\t" |
| 143 | + : |
| 144 | + : "c" (array1), "S" (shared_array), "b" (a1), "D" (inv), "d" (a) |
| 145 | + : "%eax", "memory", "cc" |
| 146 | + ); |
| 147 | + __asm__ __volatile__ ( |
| 148 | + //"movl $0, %%r10d\n\t" // debug |
| 149 | + "loop2:\n\t" |
| 150 | + "movl %%edi, %%eax\n\t" |
| 151 | + "movq %%rsi, %%r8\n\t" |
| 152 | + "loop02:\n\t" |
| 153 | + "movl %%edx, (%%r8)\n\t" |
| 154 | + //"mfence\n\t" |
| 155 | + //"movl %%r10d, (%%r8)\n\t" // debug |
| 156 | + "addq $1, %%r8\n\t" |
| 157 | + "decl %%eax\n\t" |
| 158 | + "jnz loop02\n\t" |
| 159 | + "movl %%ebx, %%eax\n\t" |
| 160 | + "movq %%rcx, %%r8\n\t" |
| 161 | + "loop12:\n\t" |
| 162 | + "movb (%%r8), %%r9b\n\t" |
| 163 | + "addq $1, %%r8\n\t" |
| 164 | + "movl %%edx, (%%r8)\n\t" |
| 165 | + //"movl %%r10d, (%%r8)\n\t" // debug |
| 166 | + "addq $1, %%r8\n\t" |
| 167 | + "subl $2, %%eax\n\t" |
| 168 | + "jnz loop12\n\t" |
| 169 | + //"addl $1, %%r10d\n\t" // debug |
| 170 | + "decl %%edx\n\t" |
| 171 | + "jnz loop2\n\t" |
| 172 | + : |
| 173 | + : "c" (array2), "S" (shared_array), "b" (b1), "D" (inv), "d" (b) |
| 174 | + : "%eax", "memory", "cc" |
| 175 | + ); |
| 176 | + // time distance: 1000000, frequency: 40 M |
| 177 | + __asm__ __volatile__ ( |
| 178 | + //"movl $0, %%r10d\n\t" |
| 179 | + "loop3:\n\t" |
| 180 | + "movl %%edi, %%eax\n\t" |
| 181 | + "movq %%rsi, %%r8\n\t" |
| 182 | + "loop03:\n\t" |
| 183 | + "movl %%edx, (%%r8)\n\t" |
| 184 | + //"mfence\n\t" |
| 185 | + //"movl %%r10d, (%%r8)\n\t" |
| 186 | + "addq $1, %%r8\n\t" |
| 187 | + "decl %%eax\n\t" |
| 188 | + "jnz loop03\n\t" |
| 189 | + "movl %%ebx, %%eax\n\t" |
| 190 | + "movq %%rcx, %%r8\n\t" |
| 191 | + "loop13:\n\t" |
| 192 | + "movb (%%r8), %%r9b\n\t" |
| 193 | + "addq $1, %%r8\n\t" |
| 194 | + "movl %%edx, (%%r8)\n\t" |
| 195 | + //"movl %%r10d, (%%r8)\n\t" |
| 196 | + "addq $1, %%r8\n\t" |
| 197 | + "subl $2, %%eax\n\t" |
| 198 | + "jnz loop13\n\t" |
| 199 | + //"addl $1, %%r10d\n\t" |
| 200 | + "decl %%edx\n\t" |
| 201 | + "jnz loop3\n\t" |
| 202 | + : |
| 203 | + : "c" (array3), "S" (shared_array), "b" (c1), "D" (inv), "d" (c) |
| 204 | + : "%eax", "memory", "cc" |
| 205 | + ); |
| 206 | + // now this one |
| 207 | + // time distance: 2000000, frequency: 20 M |
| 208 | + __asm__ __volatile__ ( |
| 209 | + //"movl $0, %%r10d\n\t" |
| 210 | + "loop4:\n\t" |
| 211 | + "movl %%edi, %%eax\n\t" |
| 212 | + "movq %%rsi, %%r8\n\t" |
| 213 | + "loop04:\n\t" |
| 214 | + "movl %%edx, (%%r8)\n\t" |
| 215 | + //"mfence\n\t" |
| 216 | + //"movl %%r10d, (%%r8)\n\t" |
| 217 | + "addq $1, %%r8\n\t" |
| 218 | + "decl %%eax\n\t" |
| 219 | + "jnz loop04\n\t" |
| 220 | + "movl %%ebx, %%eax\n\t" |
| 221 | + "movq %%rcx, %%r8\n\t" |
| 222 | + "loop14:\n\t" |
| 223 | + "movb (%%r8), %%r9b\n\t" |
| 224 | + "addq $1, %%r8\n\t" |
| 225 | + "movl %%edx, (%%r8)\n\t" |
| 226 | + //"movl %%r10d, (%%r8)\n\t" |
| 227 | + "addq $1, %%r8\n\t" |
| 228 | + "subl $2, %%eax\n\t" |
| 229 | + "jnz loop14\n\t" |
| 230 | + //"addl $1, %%r10d\n\t" |
| 231 | + "decl %%edx\n\t" |
| 232 | + "jnz loop4\n\t" |
| 233 | + : |
| 234 | + : "c" (array4), "S" (shared_array), "b" (d1), "D" (inv), "d" (d) |
| 235 | + : "%eax", "memory", "cc" |
| 236 | + ); |
| 237 | + // time distance: 4000000, frequency: 8 M |
| 238 | + __asm__ __volatile__ ( |
| 239 | + //"movl $0, %%r10d\n\t" |
| 240 | + "loop5:\n\t" |
| 241 | + "movl %%edi, %%eax\n\t" |
| 242 | + "movq %%rsi, %%r8\n\t" |
| 243 | + "loop05:\n\t" |
| 244 | + "movl %%edx, (%%r8)\n\t" |
| 245 | + //"mfence\n\t" |
| 246 | + //"movl %%r10d, (%%r8)\n\t" |
| 247 | + "addq $1, %%r8\n\t" |
| 248 | + "decl %%eax\n\t" |
| 249 | + "jnz loop05\n\t" |
| 250 | + "movl %%ebx, %%eax\n\t" |
| 251 | + "movq %%rcx, %%r8\n\t" |
| 252 | + "loop15:\n\t" |
| 253 | + "movb (%%r8), %%r9b\n\t" |
| 254 | + "addq $1, %%r8\n\t" |
| 255 | + "movl %%edx, (%%r8)\n\t" |
| 256 | + //"movl %%r10d, (%%r8)\n\t" |
| 257 | + "addq $1, %%r8\n\t" |
| 258 | + "subl $2, %%eax\n\t" |
| 259 | + "jnz loop15\n\t" |
| 260 | + //"addl $1, %%r10d\n\t" |
| 261 | + "decl %%edx\n\t" |
| 262 | + "jnz loop5\n\t" |
| 263 | + : |
| 264 | + : "c" (array5), "S" (shared_array), "b" (e1), "D" (inv), "d" (e) |
| 265 | + : "%eax", "memory", "cc" |
| 266 | + ); |
| 267 | + } |
| 268 | + |
| 269 | + #pragma omp single |
| 270 | + { |
| 271 | + num_threads = omp_get_num_threads(); |
| 272 | + /*for(int i = 0; i < inv; ++i) { |
| 273 | + fprintf(stderr, "shared_array[%d]: %d\n", i, shared_array[i]); |
| 274 | + }*/ |
| 275 | + /*for(int i = 0; i < a1; ++i) { |
| 276 | + fprintf(stderr, "array1[%d]: %d\n", i, array1[i]); |
| 277 | + }*/ |
| 278 | + } |
| 279 | + } |
| 280 | + return 0; |
| 281 | +} |
0 commit comments