Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
github: []
github: [LunaStev]
ko_fi: lunasev
7 changes: 0 additions & 7 deletions Cargo.lock

This file was deleted.

6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
[workspace]
members = [
"assembler",
]

[package]
name = "whale"
version = "0.1.0"
edition = "2021"

[dependencies]
assembler = { path = "assembler" }
6 changes: 6 additions & 0 deletions assembler/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[package]
name = "assembler"
version = "0.1.0"
edition = "2021"

[dependencies]
34 changes: 34 additions & 0 deletions assembler/src/assembler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use crate::tokens::tokenize;
use crate::traits::ISA;
use crate::error::{AsmError};
use crate::ast::AST;

pub struct AssemblerOutput {
pub bytes: Vec<u8>,
pub symbols: Vec<(String, usize)>,
pub relocations: Vec<Relocation>,
}

#[derive(Debug, Clone)]
pub struct Relocation {
pub offset: usize,
pub symbol: String,
pub kind: RelocKind,
}

#[derive(Debug, Clone)]
pub enum RelocKind {
Absolute,
Relative,
}

pub fn assemble(source: &str, isa: &impl ISA) -> Result<AssemblerOutput, AsmError> {
let tokens = tokenize(source)
.map_err(|e| AsmError::LexerError(e.to_string()))?;

let ast: AST = isa.parse(&tokens)?;

let encoded = isa.encode(&ast)?;

Ok(encoded)
}
46 changes: 46 additions & 0 deletions assembler/src/ast.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#[derive(Debug, Clone)]
pub struct AST {
pub items: Vec<ASTNode>,
}

#[derive(Debug, Clone)]
pub enum ASTNode {
Instruction(Instruction),
Directive(Directive),
Label(String),
}

#[derive(Debug, Clone)]
pub struct Instruction {
pub mnemonic: String,
pub operands: Vec<Operand>,
}

#[derive(Debug, Clone)]
pub struct Directive {
pub name: String,
pub values: Vec<DirectiveValue>,
}

#[derive(Debug, Clone)]
pub enum DirectiveValue {
Number(i64),
StringLiteral(String),
Identifier(String),
}

#[derive(Debug, Clone)]
pub enum Operand {
Register(String),
Immediate(i64),
Label(String),
Memory(MemoryOperand)
}

#[derive(Debug, Clone)]
pub struct MemoryOperand {
pub base: Option<String>,
pub index: Option<String>,
pub scale: u8,
pub disp: i64,
}
24 changes: 24 additions & 0 deletions assembler/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use std::fmt;

#[derive(Debug, Clone)]
pub enum AsmError {
LexerError(String),
ParserError(String),
EncodeError(String),
SymbolError(String),
UnexpectedToken(String),
}

impl fmt::Display for AsmError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AsmError::LexerError(s) => write!(f, "Lexer error: {}", s),
AsmError::ParserError(s) => write!(f, "Parser error: {}", s),
AsmError::EncodeError(s) => write!(f, "Encode error: {}", s),
AsmError::SymbolError(s) => write!(f, "Symbol error: {}", s),
AsmError::UnexpectedToken(s) => write!(f, "Unexpected token: {}", s),
}
}
}

impl std::error::Error for AsmError {}
217 changes: 217 additions & 0 deletions assembler/src/isa/amd64/encoder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
use crate::ast::*;
use crate::error::AsmError;
use crate::assembler::{AssemblerOutput, Relocation, RelocKind};
use crate::error::AsmError::ParserError;
use crate::isa::amd64::encoding::{ModRM, REX};
use crate::isa::amd64::tables::*;

pub fn encode(ast: &AST) -> Result<AssemblerOutput, AsmError> {
let mut bytes = Vec::new();
let mut symbols = Vec::new();
let mut relocs = Vec::new();

for node in &ast.items {
match node {
ASTNode::Label(name) => {
// label name → offset
symbols.push((name.clone(), bytes.len()));
}

ASTNode::Instruction(ins) => {
encode_instruction(ins, &mut bytes, &mut relocs)?;
}

ASTNode::Directive(dir) => {
encode_directive(dir, &mut bytes)?;
}
}
}

Ok(AssemblerOutput { bytes, symbols, relocations: relocs })
}

fn encode_instruction(
ins: &Instruction,
bytes: &mut Vec<u8>,
relocs: &mut Vec<Relocation>,
) -> Result<(), AsmError> {

match ins.mnemonic.as_str() {
"mov" => encode_mov(ins, bytes, relocs),
"nop" => { bytes.push(0x90); Ok(()) }
_ => Err(AsmError::EncodeError(format!("Unknown mnemonic {}", ins.mnemonic))),
}
}

fn encode_mov(
ins: &Instruction,
bytes: &mut Vec<u8>,
relocs: &mut Vec<Relocation>,
) -> Result<(), AsmError> {

if ins.operands.len() != 2 {
return Err(AsmError::EncodeError("mov expects 2 operands".into()));
}

let dst = &ins.operands[0];
let src = &ins.operands[1];

// mov r64, imm64
if let Operand::Label(regname) = dst {
if let Some(reg) = lookup_reg(regname, 64) {
match src {
Operand::Immediate(val) => {
// REX.W + mov rax, imm64 = 0x48 B8 + imm64
bytes.push(0x48);
bytes.push(0xB8 + reg);

let imm = *val as i64;
bytes.extend_from_slice(&imm.to_le_bytes());
return Ok(());
}

Operand::Label(label) => {
// Requires reloc
bytes.push(0x48);
bytes.push(0xB8 + reg);
relocs.push(Relocation {
offset: bytes.len(),
symbol: label.clone(),
kind: RelocKind::Absolute,
});
bytes.extend_from_slice(&0i64.to_le_bytes());
return Ok(());
}

_ => {}
}
}
}

if let Operand::Register(dst_reg_name) = dst {
if let Operand::Register(src_reg_name) = src {
let dst_code = lookup_reg(dst_reg_name, 64)
.ok_or_else(|| AsmError::EncodeError("Invalid dst register".into()))?;

let src_code = lookup_reg(src_reg_name, 64)
.ok_or_else(|| AsmError::EncodeError("Invalid src register".into()))?;

let mut rex = REX {
w: true,
r: src_code >= 8,
x: false,
b: dst_code >= 8,
};

bytes.push(rex.encode());

bytes.push(0x89);

let modrm = ModRM::new(
0b11,
src_code & 7,
dst_code & 7,
);

bytes.push(modrm.encode());
return Ok(());
}
}

Err(AsmError::EncodeError("Unsupported mov form".into()))
}

fn lookup_reg(name: &str, mode: u8) -> Option<u8> {
let regs = match mode {
64 => REGISTERS_64,
32 => REGISTERS_32,
_ => return None,
};

regs.iter()
.find(|(n, _)| *n == name)
.map(|(_, code)| *code)
}

fn encode_directive(dir: &Directive, bytes: &mut Vec<u8>) -> Result<(), AsmError> {
match dir.name.as_str() {
"db" => encode_db(dir, bytes),
"dw" => encode_dw(dir, bytes),
"dd" => encode_dd(dir, bytes),
"dq" => encode_dq(dir, bytes),
_ => Err(AsmError::EncodeError(format!("Unknown directive {}", dir.name))),
}
}

fn encode_db(dir: &Directive, bytes: &mut Vec<u8>) -> Result<(), AsmError> {
for v in &dir.values {
match v {
DirectiveValue::Number(n) => {
bytes.push(*n as u8);
}
DirectiveValue::StringLiteral(s) => {
for ch in s.bytes() {
bytes.push(ch);
}
}
DirectiveValue::Identifier(_) => {
return Err(AsmError::EncodeError(
"db does not support identifier".into(),
));
}
}
}
Ok(())
}


fn encode_dw(dir: &Directive, bytes: &mut Vec<u8>) -> Result<(), AsmError> {
for v in &dir.values {
match v {
DirectiveValue::Number(n) => {
let val = *n as i16;
bytes.extend_from_slice(&val.to_le_bytes());
}
_ => return Err(AsmError::EncodeError("dw supports only numbers".into())),
}
}
Ok(())
}

fn encode_dd(dir: &Directive, bytes: &mut Vec<u8>) -> Result<(), AsmError> {
for v in &dir.values {
match v {
DirectiveValue::Number(n) => {
let val = *n as i32;
bytes.extend_from_slice(&val.to_le_bytes());
}
DirectiveValue::Identifier(name) => {
// relocatable data
// dd label
// → reserve 4 bytes and add reloc
// but for now: not implemented
return Err(AsmError::EncodeError("dd identifier reloc not implemented yet".into()));
}
_ => return Err(AsmError::EncodeError("dd supports only numbers".into())),
}
}
Ok(())
}

fn encode_dq(dir: &Directive, bytes: &mut Vec<u8>) -> Result<(), AsmError> {
for v in &dir.values {
match v {
DirectiveValue::Number(n) => {
let val = *n as i64;
bytes.extend_from_slice(&val.to_le_bytes());
}
DirectiveValue::Identifier(name) => {
// relocatable data 8 bytes
// dd label → reloc 4 bytes, dq label → reloc 8 bytes
return Err(AsmError::EncodeError("dq identifier reloc not implemented yet".into()));
}
_ => return Err(AsmError::EncodeError("dq supports only numbers".into())),
}
}
Ok(())
}
Loading