Skip to content

move code under executorch/example #3176

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions examples/llm_manual/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.19)
project(nanogpt_runner)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)

# Set options for executorch build.
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
option(EXECUTORCH_BUILD_OPTIMIZED "" ON)
option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend

# Include the executorch subdirectory.
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/third-party/executorch
${CMAKE_BINARY_DIR}/executorch)

# include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)

add_executable(nanogpt_runner main.cpp)
target_link_libraries(
nanogpt_runner
PRIVATE
executorch
extension_module_static # Provides the Module class
optimized_native_cpu_ops_lib # Provides baseline cross-platform kernels
xnnpack_backend) # Provides the XNNPACK CPU acceleration backend
3 changes: 3 additions & 0 deletions examples/llm_manual/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# LLM Manual

This repository is a storage place for the files that [LLM Maunal](https://pytorch.org/executorch/main/llm/getting-started.html) needs. Please refer to the documentation website for more information.
20 changes: 20 additions & 0 deletions examples/llm_manual/basic_sampler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <algorithm>
#include <vector>
class BasicSampler {
public:
BasicSampler() {}
int64_t sample(std::vector<float> logits) {
// Find the token with the highest log probability.
int64_t max_index =
std::max_element(logits.begin(), logits.end()) - logits.begin();
return max_index;
}
};
192 changes: 192 additions & 0 deletions examples/llm_manual/basic_tokenizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>

class BasicTokenizer {
public:
BasicTokenizer(const std::string& filePath) {
std::ifstream file(filePath);

if (!file) {
std::cerr << "Unable to open file";
exit(9); // return with error code
}
std::string str(
(std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());

size_t i = 0u;
i = consume_whitespace(str, i);
i = expect(str, i, '{');

while (i < str.size() && str[i] != '}') {
i = consume_field(str, i);
}

// Build decode map as inverse of encode.
for (auto& i : encode_) {
decode_[i.second] = i.first;
}
}

std::vector<int64_t> encode(const std::string& prompt) {
std::vector<std::string> words = parse_prompt(prompt);
std::vector<int64_t> result;
for (auto word : words) {
result.push_back(encode_[word]);
}
return result;
}

std::string decode(const std::vector<int64_t>& indices) {
std::string result;
for (const auto& index : indices) {
result += decode_[index];
}
return result;
}

private:
std::unordered_map<std::string, int64_t> encode_;
std::unordered_map<int64_t, std::string> decode_;

// Advance the input string index until a non-whitespace character is found
// or it reaches the end of string.
size_t consume_whitespace(const std::string& data, size_t i) {
while (i < data.size() && std::isspace(data[i])) {
i++;
}

return i;
}

// Consumes an JSON field of the form
// "str": id,
size_t consume_field(const std::string& data, size_t i) {
i = consume_whitespace(data, i);

// Parse the key literal.
i = expect(data, i, '"');

auto in_escape = false;
std::string key = "";
while (i < data.size()) {
if (in_escape) {
key += data[i];
i++;
in_escape = false;
} else { // !in_escape
if (data[i] == '"') { // End of string literal
i++;
break;
} else if (data[i] == '\\') { // Escaped code point
in_escape = true;
}
key += data[i];
i++;
}
}

key = post_process_key(key);

i = expect(data, i, ':');
i = consume_whitespace(data, i);

// Read unsigned integer value
auto value_start = i;
while (i < data.size() && std::isdigit(data[i])) {
i++;
}
auto value = static_cast<int64_t>(
std::stol(data.substr(value_start, i - value_start)));

encode_[key] = value;

i = consume_whitespace(data, i);
if (i < data.size() && data[i] == ',') {
i++;
}

return i;
}

// Assert that the next character in the input string is equal to c. Increment
// the input string index by one.
size_t expect(const std::string& data, size_t i, char c) {
if (i >= data.size() || data[i] != c) {
std::cerr << "Invalid tokenizer vocabulary file. Expected '" << c
<< "' at index " << i << std::endl;
exit(1);
}

return i + 1;
}

std::string post_process_key(std::string key) {
// Replace the unicode characters with the corresponding byte encoding
// TODO: adopt byte encoder to handle unicode characters in json file.

std::unordered_map<std::string, std::string> replacements = {
{"\\u0120", " "},
{"\\u010a", "\n"},
};

for (const auto& replacement : replacements) {
size_t pos = 0;
// While loop through all instances of the substring in the string
while ((pos = key.find(replacement.first, pos)) != std::string::npos) {
key.replace(pos, replacement.first.length(), replacement.second);
pos += replacement.second.length();
}
}

// remove duplicate backslashes
for (size_t idx = 0; idx < key.length(); idx++) {
if (key[idx] == '\\') {
key.erase(idx, 1);
if (key[idx] == '\\') {
// If there are two backslashes, keep the second one
idx += 1;
}
}
}

return key;
}
std::vector<std::string> parse_prompt(const std::string& prompt) {
std::vector<std::string> result;
std::string word;
for (char c : prompt) {
if (c == ' ') {
if (!word.empty()) {
result.push_back(word);
word.clear();
}
word += c;
} else if (ispunct(c)) {
if (!word.empty()) {
result.push_back(word);
word.clear();
}
result.push_back(std::string(1, c));
} else {
word += c;
}
}
if (!word.empty()) {
result.push_back(word);
}
return result;
}
};
45 changes: 45 additions & 0 deletions examples/llm_manual/export_nanogpt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# export_nanogpt.py

# Load partitioner for Xnnpack backend
import torch
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner

# Model to be delegated to specific backend should use specific edge compile config
from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
from executorch.exir import to_edge

from model import GPT
from torch._export import capture_pre_autograd_graph
from torch.export import export
from torch.nn.attention import sdpa_kernel, SDPBackend

model = GPT.from_pretrained("gpt2") # use gpt2 weight as pretrained weight
example_inputs = (
torch.randint(0, 100, (1, model.config.block_size), dtype=torch.long),
)
dynamic_shape = ({1: torch.export.Dim("token_dim", max=model.config.block_size)},)

# Trace the model, converting it to a portable intermediate representation.
# The torch.no_grad() call tells PyTorch to exclude training-specific logic.
with sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
m = capture_pre_autograd_graph(model, example_inputs, dynamic_shapes=dynamic_shape)
traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape)

# Convert the model into a runnable ExecuTorch program.
# To be further lowered to Xnnpack backend, `traced_model` needs xnnpack-specific edge compile config
edge_config = get_xnnpack_edge_compile_config()
edge_manager = to_edge(traced_model, compile_config=edge_config)

# Delegate exported model to Xnnpack backend by invoking `to_backend` function with Xnnpack partitioner.
edge_manager = edge_manager.to_backend(XnnpackPartitioner())
et_program = edge_manager.to_executorch()

# Save the Xnnpack-delegated ExecuTorch program to a file.
with open("nanogpt.pte", "wb") as file:
file.write(et_program.buffer)
Loading