-
Notifications
You must be signed in to change notification settings - Fork 358
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
move code under executorch/example (#3176)
Summary: Pull Request resolved: #3176 This diff moves llm manual code from outside github (Dave's and Georgey's) to executorch codebase for better pointing to. After this diff. //executorch/examples/llm_maunal will become the only source of truth of our llm manual code. Reviewed By: byjlw, dbort Differential Revision: D56365058 fbshipit-source-id: 97280fc0ca955caabb6056cddbb72102ed711f2c
- Loading branch information
1 parent
45fd796
commit b6e54d0
Showing
7 changed files
with
489 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
cmake_minimum_required(VERSION 3.19) | ||
project(nanogpt_runner) | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CXX_STANDARD_REQUIRED True) | ||
|
||
# Set options for executorch build. | ||
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON) | ||
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON) | ||
option(EXECUTORCH_BUILD_OPTIMIZED "" ON) | ||
option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend | ||
|
||
# Include the executorch subdirectory. | ||
add_subdirectory( | ||
${CMAKE_CURRENT_SOURCE_DIR}/third-party/executorch | ||
${CMAKE_BINARY_DIR}/executorch) | ||
|
||
# include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) | ||
|
||
add_executable(nanogpt_runner main.cpp) | ||
target_link_libraries( | ||
nanogpt_runner | ||
PRIVATE | ||
executorch | ||
extension_module_static # Provides the Module class | ||
optimized_native_cpu_ops_lib # Provides baseline cross-platform kernels | ||
xnnpack_backend) # Provides the XNNPACK CPU acceleration backend |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# LLM Manual | ||
|
||
This repository is a storage place for the files that [LLM Maunal](https://pytorch.org/executorch/main/llm/getting-started.html) needs. Please refer to the documentation website for more information. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include <algorithm> | ||
#include <vector> | ||
class BasicSampler { | ||
public: | ||
BasicSampler() {} | ||
int64_t sample(std::vector<float> logits) { | ||
// Find the token with the highest log probability. | ||
int64_t max_index = | ||
std::max_element(logits.begin(), logits.end()) - logits.begin(); | ||
return max_index; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include <fstream> | ||
#include <iostream> | ||
#include <sstream> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <vector> | ||
|
||
class BasicTokenizer { | ||
public: | ||
BasicTokenizer(const std::string& filePath) { | ||
std::ifstream file(filePath); | ||
|
||
if (!file) { | ||
std::cerr << "Unable to open file"; | ||
exit(9); // return with error code | ||
} | ||
std::string str( | ||
(std::istreambuf_iterator<char>(file)), | ||
std::istreambuf_iterator<char>()); | ||
|
||
size_t i = 0u; | ||
i = consume_whitespace(str, i); | ||
i = expect(str, i, '{'); | ||
|
||
while (i < str.size() && str[i] != '}') { | ||
i = consume_field(str, i); | ||
} | ||
|
||
// Build decode map as inverse of encode. | ||
for (auto& i : encode_) { | ||
decode_[i.second] = i.first; | ||
} | ||
} | ||
|
||
std::vector<int64_t> encode(const std::string& prompt) { | ||
std::vector<std::string> words = parse_prompt(prompt); | ||
std::vector<int64_t> result; | ||
for (auto word : words) { | ||
result.push_back(encode_[word]); | ||
} | ||
return result; | ||
} | ||
|
||
std::string decode(const std::vector<int64_t>& indices) { | ||
std::string result; | ||
for (const auto& index : indices) { | ||
result += decode_[index]; | ||
} | ||
return result; | ||
} | ||
|
||
private: | ||
std::unordered_map<std::string, int64_t> encode_; | ||
std::unordered_map<int64_t, std::string> decode_; | ||
|
||
// Advance the input string index until a non-whitespace character is found | ||
// or it reaches the end of string. | ||
size_t consume_whitespace(const std::string& data, size_t i) { | ||
while (i < data.size() && std::isspace(data[i])) { | ||
i++; | ||
} | ||
|
||
return i; | ||
} | ||
|
||
// Consumes an JSON field of the form | ||
// "str": id, | ||
size_t consume_field(const std::string& data, size_t i) { | ||
i = consume_whitespace(data, i); | ||
|
||
// Parse the key literal. | ||
i = expect(data, i, '"'); | ||
|
||
auto in_escape = false; | ||
std::string key = ""; | ||
while (i < data.size()) { | ||
if (in_escape) { | ||
key += data[i]; | ||
i++; | ||
in_escape = false; | ||
} else { // !in_escape | ||
if (data[i] == '"') { // End of string literal | ||
i++; | ||
break; | ||
} else if (data[i] == '\\') { // Escaped code point | ||
in_escape = true; | ||
} | ||
key += data[i]; | ||
i++; | ||
} | ||
} | ||
|
||
key = post_process_key(key); | ||
|
||
i = expect(data, i, ':'); | ||
i = consume_whitespace(data, i); | ||
|
||
// Read unsigned integer value | ||
auto value_start = i; | ||
while (i < data.size() && std::isdigit(data[i])) { | ||
i++; | ||
} | ||
auto value = static_cast<int64_t>( | ||
std::stol(data.substr(value_start, i - value_start))); | ||
|
||
encode_[key] = value; | ||
|
||
i = consume_whitespace(data, i); | ||
if (i < data.size() && data[i] == ',') { | ||
i++; | ||
} | ||
|
||
return i; | ||
} | ||
|
||
// Assert that the next character in the input string is equal to c. Increment | ||
// the input string index by one. | ||
size_t expect(const std::string& data, size_t i, char c) { | ||
if (i >= data.size() || data[i] != c) { | ||
std::cerr << "Invalid tokenizer vocabulary file. Expected '" << c | ||
<< "' at index " << i << std::endl; | ||
exit(1); | ||
} | ||
|
||
return i + 1; | ||
} | ||
|
||
std::string post_process_key(std::string key) { | ||
// Replace the unicode characters with the corresponding byte encoding | ||
// TODO: adopt byte encoder to handle unicode characters in json file. | ||
|
||
std::unordered_map<std::string, std::string> replacements = { | ||
{"\\u0120", " "}, | ||
{"\\u010a", "\n"}, | ||
}; | ||
|
||
for (const auto& replacement : replacements) { | ||
size_t pos = 0; | ||
// While loop through all instances of the substring in the string | ||
while ((pos = key.find(replacement.first, pos)) != std::string::npos) { | ||
key.replace(pos, replacement.first.length(), replacement.second); | ||
pos += replacement.second.length(); | ||
} | ||
} | ||
|
||
// remove duplicate backslashes | ||
for (size_t idx = 0; idx < key.length(); idx++) { | ||
if (key[idx] == '\\') { | ||
key.erase(idx, 1); | ||
if (key[idx] == '\\') { | ||
// If there are two backslashes, keep the second one | ||
idx += 1; | ||
} | ||
} | ||
} | ||
|
||
return key; | ||
} | ||
std::vector<std::string> parse_prompt(const std::string& prompt) { | ||
std::vector<std::string> result; | ||
std::string word; | ||
for (char c : prompt) { | ||
if (c == ' ') { | ||
if (!word.empty()) { | ||
result.push_back(word); | ||
word.clear(); | ||
} | ||
word += c; | ||
} else if (ispunct(c)) { | ||
if (!word.empty()) { | ||
result.push_back(word); | ||
word.clear(); | ||
} | ||
result.push_back(std::string(1, c)); | ||
} else { | ||
word += c; | ||
} | ||
} | ||
if (!word.empty()) { | ||
result.push_back(word); | ||
} | ||
return result; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
# export_nanogpt.py | ||
|
||
# Load partitioner for Xnnpack backend | ||
import torch | ||
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner | ||
|
||
# Model to be delegated to specific backend should use specific edge compile config | ||
from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config | ||
from executorch.exir import to_edge | ||
|
||
from model import GPT | ||
from torch._export import capture_pre_autograd_graph | ||
from torch.export import export | ||
from torch.nn.attention import sdpa_kernel, SDPBackend | ||
|
||
model = GPT.from_pretrained("gpt2") # use gpt2 weight as pretrained weight | ||
example_inputs = ( | ||
torch.randint(0, 100, (1, model.config.block_size), dtype=torch.long), | ||
) | ||
dynamic_shape = ({1: torch.export.Dim("token_dim", max=model.config.block_size)},) | ||
|
||
# Trace the model, converting it to a portable intermediate representation. | ||
# The torch.no_grad() call tells PyTorch to exclude training-specific logic. | ||
with sdpa_kernel([SDPBackend.MATH]), torch.no_grad(): | ||
m = capture_pre_autograd_graph(model, example_inputs, dynamic_shapes=dynamic_shape) | ||
traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape) | ||
|
||
# Convert the model into a runnable ExecuTorch program. | ||
# To be further lowered to Xnnpack backend, `traced_model` needs xnnpack-specific edge compile config | ||
edge_config = get_xnnpack_edge_compile_config() | ||
edge_manager = to_edge(traced_model, compile_config=edge_config) | ||
|
||
# Delegate exported model to Xnnpack backend by invoking `to_backend` function with Xnnpack partitioner. | ||
edge_manager = edge_manager.to_backend(XnnpackPartitioner()) | ||
et_program = edge_manager.to_executorch() | ||
|
||
# Save the Xnnpack-delegated ExecuTorch program to a file. | ||
with open("nanogpt.pte", "wb") as file: | ||
file.write(et_program.buffer) |
Oops, something went wrong.