forked from halide/Halide
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlesson_11_cross_compilation.cpp
154 lines (128 loc) · 5.9 KB
/
lesson_11_cross_compilation.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// Halide tutorial lesson 11: Cross-compilation
// This lesson demonstrates how to use Halide as a cross-compiler to
// generate code for any platform from any platform.
// On linux, you can compile and run it like so:
// g++ lesson_11*.cpp -g -std=c++11 -I ../include -L ../bin -lHalide -lpthread -ldl -o lesson_11
// LD_LIBRARY_PATH=../bin ./lesson_11
// On os x:
// g++ lesson_11*.cpp -g -std=c++11 -I ../include -L ../bin -lHalide -o lesson_11
// DYLD_LIBRARY_PATH=../bin ./lesson_11
// If you have the entire Halide source tree, you can also build it by
// running:
// make tutorial_lesson_11_cross_compilation
// in a shell with the current directory at the top of the halide
// source tree.
#include "Halide.h"
#include <stdio.h>
using namespace Halide;
int main(int argc, char **argv) {
// We'll define the simple one-stage pipeline that we used in lesson 10.
Func brighter;
Var x, y;
// Declare the arguments.
Param<uint8_t> offset;
ImageParam input(type_of<uint8_t>(), 2);
std::vector<Argument> args(2);
args[0] = input;
args[1] = offset;
// Define the Func.
brighter(x, y) = input(x, y) + offset;
// Schedule it.
brighter.vectorize(x, 16).parallel(y);
// The following line is what we did in lesson 10. It compiles an
// object file suitable for the system that you're running this
// program on. For example, if you compile and run this file on
// 64-bit linux on an x86 cpu with sse4.1, then the generated code
// will be suitable for 64-bit linux on x86 with sse4.1.
brighter.compile_to_file("lesson_11_host", args, "brighter");
// We can also compile object files suitable for other cpus and
// operating systems. You do this with an optional third argument
// to compile_to_file which specifies the target to compile for.
// Let's use this to compile a 32-bit arm android version of this code:
Target target;
target.os = Target::Android; // The operating system
target.arch = Target::ARM; // The CPU architecture
target.bits = 32; // The bit-width of the architecture
std::vector<Target::Feature> arm_features; // A list of features to set
target.set_features(arm_features);
// We then pass the target as the last argument to compile_to_file.
brighter.compile_to_file("lesson_11_arm_32_android", args, "brighter", target);
// And now a Windows object file for 64-bit x86 with AVX and SSE 4.1:
target.os = Target::Windows;
target.arch = Target::X86;
target.bits = 64;
std::vector<Target::Feature> x86_features;
x86_features.push_back(Target::AVX);
x86_features.push_back(Target::SSE41);
target.set_features(x86_features);
brighter.compile_to_file("lesson_11_x86_64_windows", args, "brighter", target);
// And finally an iOS mach-o object file for one of Apple's 32-bit
// ARM processors - the A6. It's used in the iPhone 5. The A6 uses
// a slightly modified ARM architecture called ARMv7s. We specify
// this using the target features field. Support for Apple's
// 64-bit ARM processors is very new in llvm, and still somewhat
// flaky.
target.os = Target::IOS;
target.arch = Target::ARM;
target.bits = 32;
std::vector<Target::Feature> armv7s_features;
armv7s_features.push_back(Target::ARMv7s);
target.set_features(armv7s_features);
brighter.compile_to_file("lesson_11_arm_32_ios", args, "brighter", target);
// Now let's check these files are what they claim, by examining
// their first few bytes.
// 32-arm android object files start with the magic bytes:
uint8_t arm_32_android_magic[] = {0x7f, 'E', 'L', 'F', // ELF format
1, // 32-bit
1, // 2's complement little-endian
1}; // Current version of elf
FILE *f = fopen("lesson_11_arm_32_android.o", "rb");
uint8_t header[32];
if (!f || fread(header, 32, 1, f) != 1) {
printf("Object file not generated\n");
return -1;
}
fclose(f);
if (memcmp(header, arm_32_android_magic, sizeof(arm_32_android_magic))) {
printf("Unexpected header bytes in 32-bit arm object file.\n");
return -1;
}
// 64-bit windows object files start with the magic 16-bit value 0x8664
// (presumably referring to x86-64)
uint8_t win_64_magic[] = {0x64, 0x86};
f = fopen("lesson_11_x86_64_windows.obj", "rb");
if (!f || fread(header, 32, 1, f) != 1) {
printf("Object file not generated\n");
return -1;
}
fclose(f);
if (memcmp(header, win_64_magic, sizeof(win_64_magic))) {
printf("Unexpected header bytes in 64-bit windows object file.\n");
return -1;
}
// 32-bit arm iOS mach-o files start with the following magic bytes:
uint32_t arm_32_ios_magic[] = {0xfeedface, // Mach-o magic bytes
12, // CPU type is ARM
11, // CPU subtype is ARMv7s
1}; // It's a relocatable object file.
f = fopen("lesson_11_arm_32_ios.o", "rb");
if (!f || fread(header, 32, 1, f) != 1) {
printf("Object file not generated\n");
return -1;
}
fclose(f);
if (memcmp(header, arm_32_ios_magic, sizeof(arm_32_ios_magic))) {
printf("Unexpected header bytes in 32-bit arm ios object file.\n");
return -1;
}
// It looks like the object files we produced are plausible for
// those targets. We'll count that as a success for the purposes
// of this tutorial. For a real application you'd then need to
// figure out how to integrate Halide into your cross-compilation
// toolchain. There are several small examples of this in the
// Halide repository under the apps folder. See HelloAndroid and
// HelloiOS here:
// https://github.com/halide/Halide/tree/master/apps/
printf("Success!\n");
return 0;
}