|
10 | 10 |
|
11 | 11 | import android.app.Activity; |
12 | 12 | import android.content.Intent; |
13 | | -import android.os.AsyncTask; |
14 | 13 | import android.os.Bundle; |
15 | | -import android.os.Debug; |
| 14 | +import android.os.Handler; |
| 15 | +import android.os.HandlerThread; |
| 16 | +import android.os.Looper; |
16 | 17 | import android.system.ErrnoException; |
17 | 18 | import android.system.Os; |
| 19 | + |
18 | 20 | import com.google.gson.Gson; |
| 21 | + |
19 | 22 | import java.io.File; |
20 | 23 | import java.io.FileWriter; |
21 | 24 | import java.io.IOException; |
22 | 25 | import java.util.ArrayList; |
23 | 26 | import java.util.Arrays; |
24 | | -import java.util.Collections; |
25 | 27 | import java.util.List; |
26 | | -import java.util.stream.Collectors; |
27 | | -import org.pytorch.executorch.Module; |
28 | 28 |
|
29 | 29 | public class BenchmarkActivity extends Activity { |
30 | | - @Override |
31 | | - protected void onCreate(Bundle savedInstanceState) { |
32 | | - super.onCreate(savedInstanceState); |
33 | | - |
34 | | - try { |
35 | | - Os.setenv("ADSP_LIBRARY_PATH", getApplicationInfo().nativeLibraryDir, true); |
36 | | - } catch (ErrnoException e) { |
37 | | - finish(); |
38 | | - } |
39 | | - |
40 | | - Intent intent = getIntent(); |
41 | | - File modelDir = new File(intent.getStringExtra("model_dir")); |
42 | | - File model = |
43 | | - Arrays.stream(modelDir.listFiles()) |
44 | | - .filter(file -> file.getName().endsWith(".pte")) |
45 | | - .findFirst() |
46 | | - .get(); |
47 | 30 |
|
48 | | - int numIter = intent.getIntExtra("num_iter", 50); |
49 | | - int numWarmupIter = intent.getIntExtra("num_warm_up_iter", 10); |
| 31 | + File mModel; |
| 32 | + int mNumIter; |
| 33 | + int mNumWarmupIter; |
| 34 | + String mTokenizerPath; |
| 35 | + float mTemperature; |
| 36 | + String mPrompt; |
50 | 37 |
|
51 | | - long pssIdle = Debug.getPss(); |
| 38 | + HandlerThread mHandlerThread; |
| 39 | + BenchmarkHandler mHandler; |
52 | 40 |
|
53 | | - // TODO: Format the string with a parsable format |
54 | | - Stats stats = new Stats(); |
| 41 | + List<BenchmarkMetric> mResult; |
55 | 42 |
|
56 | | - new AsyncTask<Void, Void, Void>() { |
57 | | - @Override |
58 | | - protected Void doInBackground(Void... voids) { |
| 43 | + @Override |
| 44 | + protected void onCreate(Bundle savedInstanceState) { |
| 45 | + super.onCreate(savedInstanceState); |
59 | 46 |
|
60 | | - // Record the time it takes to load the model and the forward method |
61 | | - stats.loadStart = System.nanoTime(); |
62 | | - Module module = Module.load(model.getPath()); |
63 | | - stats.errorCode = module.loadMethod("forward"); |
64 | | - stats.loadEnd = System.nanoTime(); |
65 | | - |
66 | | - for (int i = 0; i < numWarmupIter; i++) { |
67 | | - module.forward(); |
| 47 | + try { |
| 48 | + Os.setenv("ADSP_LIBRARY_PATH", getApplicationInfo().nativeLibraryDir, true); |
| 49 | + } catch (ErrnoException e) { |
| 50 | + finish(); |
68 | 51 | } |
69 | 52 |
|
70 | | - for (int i = 0; i < numIter; i++) { |
71 | | - long start = System.nanoTime(); |
72 | | - module.forward(); |
73 | | - double forwardMs = (System.nanoTime() - start) * 1e-6; |
74 | | - stats.latency.add(forwardMs); |
| 53 | + Intent intent = getIntent(); |
| 54 | + File modelDir = new File(intent.getStringExtra("model_dir")); |
| 55 | + File model = |
| 56 | + Arrays.stream(modelDir.listFiles()) |
| 57 | + .filter(file -> file.getName().endsWith(".pte")) |
| 58 | + .findFirst() |
| 59 | + .get(); |
| 60 | + |
| 61 | + int numIter = intent.getIntExtra("num_iter", 50); |
| 62 | + int numWarmupIter = intent.getIntExtra("num_warm_up_iter", 10); |
| 63 | + String tokenizerPath = intent.getStringExtra("tokenizer_path"); |
| 64 | + float temperature = intent.getFloatExtra("temperature", 0.8f); |
| 65 | + String prompt = intent.getStringExtra("prompt"); |
| 66 | + |
| 67 | + mModel = model; |
| 68 | + mNumIter = numIter; |
| 69 | + mNumWarmupIter = numWarmupIter; |
| 70 | + mTokenizerPath = tokenizerPath; |
| 71 | + mTemperature = temperature; |
| 72 | + mPrompt = prompt; |
| 73 | + if (mPrompt == null) { |
| 74 | + mPrompt = "The ultimate answer"; |
75 | 75 | } |
76 | | - return null; |
77 | | - } |
78 | | - |
79 | | - @Override |
80 | | - protected void onPostExecute(Void aVoid) { |
81 | | - |
82 | | - final BenchmarkMetric.BenchmarkModel benchmarkModel = |
83 | | - BenchmarkMetric.extractBackendAndQuantization(model.getName().replace(".pte", "")); |
84 | | - final List<BenchmarkMetric> results = new ArrayList<>(); |
85 | | - // The list of metrics we have atm includes: |
86 | | - // Avg inference latency after N iterations |
87 | | - // Currently the result has large variance from outliers, so only use |
88 | | - // 80% samples in the middle (trimmean 0.2) |
89 | | - Collections.sort(stats.latency); |
90 | | - int resultSize = stats.latency.size(); |
91 | | - List<Double> usedLatencyResults = |
92 | | - stats.latency.subList(resultSize / 10, resultSize * 9 / 10); |
93 | | - |
94 | | - results.add( |
95 | | - new BenchmarkMetric( |
96 | | - benchmarkModel, |
97 | | - "avg_inference_latency(ms)", |
98 | | - stats.latency.stream().mapToDouble(l -> l).average().orElse(0.0f), |
99 | | - 0.0f)); |
100 | | - results.add( |
101 | | - new BenchmarkMetric( |
102 | | - benchmarkModel, |
103 | | - "trimmean_inference_latency(ms)", |
104 | | - usedLatencyResults.stream().mapToDouble(l -> l).average().orElse(0.0f), |
105 | | - 0.0f)); |
106 | | - // Model load time |
107 | | - results.add( |
108 | | - new BenchmarkMetric( |
109 | | - benchmarkModel, |
110 | | - "model_load_time(ms)", |
111 | | - (stats.loadEnd - stats.loadStart) * 1e-6, |
112 | | - 0.0f)); |
113 | | - // Load status |
114 | | - results.add(new BenchmarkMetric(benchmarkModel, "load_status", stats.errorCode, 0)); |
115 | | - // RAM PSS usage |
116 | | - results.add( |
117 | | - new BenchmarkMetric( |
118 | | - benchmarkModel, "ram_pss_usage(mb)", (Debug.getPss() - pssIdle) / 1024, 0)); |
| 76 | + mResult = new ArrayList<>(); |
| 77 | + |
| 78 | + mHandlerThread = new HandlerThread("ModelRunner"); |
| 79 | + mHandlerThread.start(); |
| 80 | + mHandler = new BenchmarkHandler(mHandlerThread.getLooper(), this); |
| 81 | + |
| 82 | + mHandler.sendEmptyMessage(BenchmarkHandler.MESSAGE_RUN_BENCHMARK); |
| 83 | + } |
119 | 84 |
|
| 85 | + void writeResult() { |
120 | 86 | try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) { |
121 | | - Gson gson = new Gson(); |
122 | | - writer.write(gson.toJson(results)); |
| 87 | + Gson gson = new Gson(); |
| 88 | + writer.write(gson.toJson(mResult)); |
123 | 89 | } catch (IOException e) { |
124 | | - e.printStackTrace(); |
| 90 | + e.printStackTrace(); |
| 91 | + } finally { |
| 92 | + finish(); |
125 | 93 | } |
126 | | - } |
127 | | - }.execute(); |
128 | | - } |
| 94 | + } |
129 | 95 | } |
130 | 96 |
|
131 | | -class Stats { |
132 | | - long loadStart; |
133 | | - long loadEnd; |
134 | | - List<Double> latency = new ArrayList<>(); |
135 | | - int errorCode = 0; |
| 97 | +class BenchmarkHandler extends Handler { |
| 98 | + public static int MESSAGE_RUN_BENCHMARK = 1; |
| 99 | + public static int MESSAGE_LLM_RUN_BENCHMARK = 2; |
| 100 | + |
| 101 | + ModelRunner mModelRunner; |
| 102 | + BenchmarkActivity mBenchmarkActivity; |
136 | 103 |
|
137 | | - @Override |
138 | | - public String toString() { |
139 | | - return "latency: " + latency.stream().map(Object::toString).collect(Collectors.joining("")); |
140 | | - } |
| 104 | + LlmModelRunner mLlmModelRunner; |
| 105 | + LlmBenchmark mLlmBenchmark; |
| 106 | + |
| 107 | + public BenchmarkHandler(Looper looper, BenchmarkActivity benchmarkActivity) { |
| 108 | + super(looper); |
| 109 | + mModelRunner = new ModelRunner(); |
| 110 | + mBenchmarkActivity = benchmarkActivity; |
| 111 | + } |
| 112 | + |
| 113 | + @Override |
| 114 | + public void handleMessage(android.os.Message msg) { |
| 115 | + if (msg.what == MESSAGE_RUN_BENCHMARK) { |
| 116 | + mModelRunner.runBenchmark(mBenchmarkActivity.mModel, mBenchmarkActivity.mNumWarmupIter, mBenchmarkActivity.mNumIter, mBenchmarkActivity.mResult); |
| 117 | + |
| 118 | + if (mBenchmarkActivity.mTokenizerPath == null) { |
| 119 | + mBenchmarkActivity.writeResult(); |
| 120 | + } else { |
| 121 | + this.sendEmptyMessage(MESSAGE_LLM_RUN_BENCHMARK); |
| 122 | + } |
| 123 | + } else if (msg.what == MESSAGE_LLM_RUN_BENCHMARK) { |
| 124 | + mLlmBenchmark = new LlmBenchmark(mBenchmarkActivity, mBenchmarkActivity.mModel.getPath(), mBenchmarkActivity.mTokenizerPath, mBenchmarkActivity.mPrompt, mBenchmarkActivity.mTemperature, mBenchmarkActivity.mResult); |
| 125 | + } |
| 126 | + } |
141 | 127 | } |
0 commit comments