Skip to content

Add strings encryption "module" pass #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ add_library(LLVMObfuscator SHARED Plugin.cpp)
target_include_directories(LLVMObfuscator PRIVATE ${CMAKE_SOURCE_DIR})
target_include_directories(LLVMObfuscator PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

target_link_libraries(LLVMObfuscator LLVMCore LLVMSupport)
#Add if needed
#target_link_libraries(LLVMObfuscator LLVMCore LLVMSupport)

option(BUILD_DUMMY "Build dummy plugin" OFF)
if(BUILD_DUMMY)
Expand All @@ -55,3 +56,5 @@ add_subdirectory(bogus)
add_subdirectory(flattening)
add_subdirectory(split)
add_subdirectory(substitution)

add_subdirectory(string)
10 changes: 9 additions & 1 deletion Plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "substitution/Substitution.h"
#include "utils/CryptoUtils.h"

#include "string/StringObfuscation.h"

static const char PassesDelimiter = ',';
static const std::string EnvVarPrefix = "LLVM_OBF_";

Expand Down Expand Up @@ -38,7 +40,13 @@ bool addPassWithName(FunctionPassManager &FPM, StringRef &passName) {
}

bool addPassWithName(ModulePassManager &MPM, StringRef &passName) {
return false;
if (passName == "string-encryption") {
MPM.addPass(StringObfuscatorPass());
} else {
return false;
}

return true;
}

template <class T>
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ You can chose to insert passes in the optimization pipeline by setting the follo
For instance if you want to run the flattening, bogus and substitution passes in that order, you can do:
`export LLVM_OBF_SCALAROPTIMIZERLATE_PASSES="flattening, bogus, substitution, split-basic-blocks"`

Or you can run the string encryption pass with:
`export LLVM_OBF_OPTIMIZERLASTEP_PASSES="string"`

Refer to the llvm::PassBuilder documentation for more information on each insertion point.

### With opt
Expand Down
26 changes: 26 additions & 0 deletions string/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
find_program(XXD xxd)

# If defined, use the target compiler.
# For cross-compilation the LLVM-IR generated is still arch specific.
if (NOT ${TARGET_C_COMPILER})
set(CLANG ${TARGET_C_COMPILER})
else()
set(CLANG "${LLVM_TOOLS_BINARY_DIR}/clang")
endif()

# Compile the string decode function into IR bitcode and embed it as a c array
ADD_CUSTOM_COMMAND(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/decode.h
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/generate_ir_header.sh
${CLANG} ${XXD}
${CMAKE_CURRENT_SOURCE_DIR}/decode.c
${CMAKE_CURRENT_BINARY_DIR} decode.h
DEPENDS decode.c
)

add_custom_target(generateDecodeIRHeader ALL
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/decode.h
)

add_dependencies(LLVMObfuscator generateDecodeIRHeader)
target_sources(LLVMObfuscator PRIVATE StringObfuscation.cpp)
249 changes: 249 additions & 0 deletions string/StringObfuscation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
#include "StringObfuscation.h"
#include "string/decode.h"
#include "utils/Utils.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <llvm/IRReader/IRReader.h>
#include <llvm/Transforms/Utils/ModuleUtils.h>

#include "utils/CryptoUtils.h"

static const unsigned int RandomNameMinSize = 5;
static const unsigned int RandomMaxNameSize = 15;
static const char ALPHANUM[] =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

using namespace llvm;

namespace llvm {
ConstantDataArray *StringObfuscatorPass::encodeStringDataArray(LLVMContext &ctx,
const char *str,
size_t size,
uint8_t key) {
// Check this is a valid string (not containing zeros)
if (str[size - 1] == '\0') {
if (strnlen(str, size) != size - 1)
return nullptr;
} else {
if (strnlen(str, size) != size)
return nullptr;
}

// Encode the data
char *encodedStr = (char *)malloc(size);
for (unsigned int i = 0; i < size; i++) {
encodedStr[i] = str[i] ^ key;
}

// Update the value
auto encodedRef = StringRef(encodedStr, size);

// Return a new ConstantDataArray
return static_cast<ConstantDataArray *>(
ConstantDataArray::getString(ctx, encodedRef, false));
}

void StringObfuscatorPass::encodeGlobalString(LLVMContext &ctx,
GlobalVariable *gv,
ConstantDataArray *array) {
StringRef ref = array->getAsString();
const char *str = ref.data();
const unsigned int size = ref.size();

uint8_t key = cryptoutils->get_uint8_t();
auto encodedArray = encodeStringDataArray(ctx, str, size, key);
if (encodedArray != nullptr) {
gv->setInitializer(encodedArray);
gv->setConstant(false);
this->globalStrings.push_back(
GlobalStringVariable(gv, size, 0, false, key));
}
}

void StringObfuscatorPass::encodeStructString(LLVMContext &ctx,
GlobalVariable *gv,
ConstantStruct *cs,
ConstantDataArray *array,
unsigned int index) {
StringRef ref = array->getAsString();
const char *str = ref.data();
const unsigned int size = ref.size();

uint8_t key = llvm::cryptoutils->get_uint8_t();
auto encodedArray = encodeStringDataArray(ctx, str, size, key);
if (encodedArray != nullptr) {
cs->setOperand(index, encodedArray);
gv->setConstant(false);
this->globalStrings.push_back(
GlobalStringVariable(gv, size, index, true, key));
}
}

StringObfuscatorPass::StringObfuscatorPass() {}

bool StringObfuscatorPass::encodeAllStrings(Module &M) {
auto &ctx = M.getContext();

// For each global variable
for (GlobalVariable &gv : M.globals()) {
if (!gv.isConstant() // constant
|| !gv.hasInitializer() // unitialized
|| gv.hasExternalLinkage() // external
|| gv.getSection() == "llvm.metadata") { // Intrinsic Global Variables
//|| gv.getSection().find("__objc_methname") != string::npos) { // TODO :
// is this necessary ?
continue;
}

// Get the variable value
Constant *initializer = gv.getInitializer();

// Encode the value and update the variable
if (isa<ConstantDataArray>(initializer)) { // Global variable
auto array = cast<ConstantDataArray>(initializer);
if (array->isString()) {
encodeGlobalString(ctx, &gv, array);
}
} else if (isa<ConstantStruct>(initializer)) { // Variable in a struct
auto cs = cast<ConstantStruct>(initializer);
for (unsigned int i = 0; i < initializer->getNumOperands(); i++) {
auto operand = cs->getOperand(i);
if (isa<ConstantDataArray>(operand)) {
auto array = cast<ConstantDataArray>(operand);
if (array->isString()) {
encodeStructString(ctx, &gv, cs, array, i);
}
}
}
}
}

return !this->globalStrings.empty();
}

std::string StringObfuscatorPass::generateRandomName() {
std::string name = "";
auto charsetSize = strlen(ALPHANUM) - 1;

auto size =
MIN(cryptoutils->get_uint8_t() + RandomNameMinSize, RandomMaxNameSize);
for (unsigned int i = 0; i < size; i++) {
auto index = cryptoutils->get_range(charsetSize);
name += ALPHANUM[index];
}

return name;
}

Function *StringObfuscatorPass::addDecodeFunction(Module &M) {
auto &ctx = M.getContext();

// Parse the bitcode from the header (creates a new module which contains
// the decode function)
SMDiagnostic err;
auto buf = MemoryBuffer::getMemBuffer(
StringRef(reinterpret_cast<const char *>(decode_c_bc), decode_c_bc_len),
"", false);
std::unique_ptr<Module> decodeModule =
parseIR(buf->getMemBufferRef(), err, ctx);
Function *loadedFunction = decodeModule->getFunction("decodeString");

// Declare the decode function in M with the same signature as the loaded
// function
auto functionName = generateRandomName();
M.getOrInsertFunction(functionName, loadedFunction->getFunctionType());
Function *declaredFunction = M.getFunction(functionName);

// Map the declared and loaded functions arguments
ValueToValueMapTy vmap;
auto larg = loadedFunction->arg_begin();
for (auto darg = declaredFunction->arg_begin();
darg != declaredFunction->arg_end(); darg++) {
vmap[&*larg] = &*darg;
larg++;
}

// Copy the loaded function into the empty declared function (in the proper
// module)
SmallVector<ReturnInst *, 8> returns;
ClonedCodeInfo codeInfo;
CloneFunctionInto(declaredFunction, loadedFunction, vmap,
#if LLVM_VERSION_MAJOR < 13
true,
#else
CloneFunctionChangeType::DifferentModule,
#endif
returns, "", &codeInfo);

return declaredFunction;
}

void StringObfuscatorPass::addDecodeAllStringsFunction(
Module &M, Function *decodeFunction) {
auto &ctx = M.getContext();

FunctionCallee callee =
M.getOrInsertFunction(generateRandomName(), Type::getVoidTy(ctx));
Function *decodeAllStrings = cast<Function>(callee.getCallee());

decodeAllStrings->setCallingConv(CallingConv::C);

BasicBlock *decodeBlock =
BasicBlock::Create(ctx, "decodeBlock", decodeAllStrings);

// Insert function calls to decodeFunction to decrypt each encrypted string
// in the main
IRBuilder<> builder(decodeBlock);
for (auto str : this->globalStrings) {
Value *array = str.var;

// If this is a struct we need to get a pointer to the array
// at the field index
if (str.isStruct) {
array = builder.CreateStructGEP(
str.var->getValueType(), str.var, str.index);
}

// Get a pointer to the first element of the array (start of the string)
auto ptr = builder.CreateConstInBoundsGEP2_32(
array->getType(), array, 0, 0);

// Get the size of the string
auto size = ConstantInt::get(IntegerType::getInt32Ty(ctx), str.size);

auto key = ConstantInt::get(IntegerType::getInt8Ty(ctx), str.key);

// Call the decode function
builder.CreateCall(decodeFunction, {ptr, size, key});
}

builder.CreateRetVoid();

// Add the function to global constructors
llvm::appendToGlobalCtors(M, decodeAllStrings, 0);
}

PreservedAnalyses StringObfuscatorPass::run(Module &M,
ModuleAnalysisManager &MAM) {
// Encode all the global strings
if (!encodeAllStrings(M)) {
return PreservedAnalyses::all();
}

// Insert a function to decode a string
Function *decodeFunction = addDecodeFunction(M);

// Insert a function decoding all the strings in global constructors
addDecodeAllStringsFunction(M, decodeFunction);

return PreservedAnalyses::none();
}
} // namespace llvm
54 changes: 54 additions & 0 deletions string/StringObfuscation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#ifndef _STRING_OBFUSCATION_INCLUDES_
#define _STRING_OBFUSCATION_INCLUDES_

// LLVM include
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h" // For DemoteRegToStack and DemotePHIToStack

struct GlobalStringVariable {
llvm::GlobalVariable *var;
size_t size;
unsigned int index;
bool isStruct;
uint8_t key;

GlobalStringVariable(llvm::GlobalVariable *var, size_t size,
unsigned int index, bool isStruct, uint8_t key) {
this->var = var;
this->size = size;
this->index = index;
this->isStruct = isStruct;
this->key = key;
}
};

namespace llvm {
struct StringObfuscatorPass : public PassInfoMixin<StringObfuscatorPass> {
std::vector<GlobalStringVariable> globalStrings;

StringObfuscatorPass();
ConstantDataArray *encodeStringDataArray(LLVMContext &ctx, const char *str,
size_t size, uint8_t key);
void encodeStructString(LLVMContext &ctx, GlobalVariable *gv,
ConstantStruct *cs, ConstantDataArray *array,
unsigned int index);
void encodeGlobalString(LLVMContext &ctx, GlobalVariable *gv,
ConstantDataArray *array);
bool encodeAllStrings(Module &M);
std::string generateRandomName();
Function *addDecodeFunction(Module &M);
void addDecodeAllStringsFunction(Module &M, Function *decodeFunction);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};

} // namespace llvm

#endif
7 changes: 7 additions & 0 deletions string/decode.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <string.h>

void decodeString(char *str, int length, unsigned char key) {
for (int i = 0; i < length; i++) {
str[i] ^= key;
}
}
Loading