Skip to content

Commit f8d778a

Browse files
authored
Merge pull request #128 from cobalt-language/symbols
Add symbols
2 parents 89aa3c7 + f32a3e9 commit f8d778a

File tree

7 files changed

+320
-63
lines changed

7 files changed

+320
-63
lines changed

cobalt-ast/src/ast/misc.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,3 +321,33 @@ impl<'src> AST<'src> for ParenAST<'src> {
321321
self.base.print_impl(f, pre, file)
322322
}
323323
}
324+
#[derive(Debug, Clone)]
325+
pub struct SymbolAST<'src> {
326+
pub loc: SourceSpan,
327+
pub val: Cow<'src, [u8]>,
328+
}
329+
impl<'src> SymbolAST<'src> {
330+
pub fn new(loc: SourceSpan, val: Cow<'src, [u8]>) -> Self {
331+
SymbolAST { loc, val }
332+
}
333+
}
334+
impl<'src> AST<'src> for SymbolAST<'src> {
335+
fn loc(&self) -> SourceSpan {
336+
self.loc
337+
}
338+
fn codegen_impl<'ctx>(
339+
&self,
340+
_ctx: &CompCtx<'src, 'ctx>,
341+
_errs: &mut Vec<CobaltError<'src>>,
342+
) -> Value<'src, 'ctx> {
343+
Value::new(None, None, types::Symbol::new_ref(&self.val))
344+
}
345+
fn print_impl(
346+
&self,
347+
f: &mut std::fmt::Formatter,
348+
_pre: &mut TreePrefix,
349+
_file: Option<CobaltFile>,
350+
) -> std::fmt::Result {
351+
writeln!(f, "symbol: {:?}", bstr::BStr::new(&self.val))
352+
}
353+
}

cobalt-ast/src/types.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#![allow(unused_variables)]
1+
#![allow(unused_variables, ambiguous_glob_reexports)]
22
use crate::*;
33
use inkwell::types::{BasicType, BasicTypeEnum};
44
use inkwell::values::{BasicValueEnum, FunctionValue};
@@ -776,6 +776,6 @@ pub use func::*;
776776
pub use int::*;
777777
pub use intrinsic::*;
778778
pub use mem::*;
779-
pub use meta::*;
779+
pub use meta::{Symbol, *};
780780

781781
use ref_cast::*;

cobalt-ast/src/types/meta.rs

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
use super::*;
2+
use bstr::ByteSlice;
3+
use std::str::Utf8Error;
4+
25
#[derive(Debug, Display)]
36
#[display(fmt = "type")]
47
pub struct TypeData(());
@@ -109,6 +112,7 @@ impl Type for TypeData {
109112
Ok(Self::new())
110113
}
111114
}
115+
112116
#[derive(Debug, Display)]
113117
#[display(fmt = "module")]
114118
pub struct Module(());
@@ -134,6 +138,7 @@ impl Type for Module {
134138
attr: (Cow<'src, str>, SourceSpan),
135139
ctx: &CompCtx<'src, 'ctx>,
136140
) -> Result<Value<'src, 'ctx>, CobaltError<'src>> {
141+
use varmap::Symbol;
137142
if let Some(InterData::Module(s, i, n)) = &val.inter_val {
138143
ctx.with_vars(|v| VarMap::lookup_in_mod((s, i), &attr.0, v.root()))
139144
.map_or_else(
@@ -172,6 +177,7 @@ impl Type for Module {
172177
Ok(Self::new())
173178
}
174179
}
180+
175181
#[derive(Debug, Display)]
176182
#[display(fmt = "<error>")]
177183
pub struct Error(());
@@ -307,6 +313,7 @@ impl Type for Error {
307313
Ok(Self::new())
308314
}
309315
}
316+
310317
#[derive(Debug, Display)]
311318
#[display(fmt = "null")]
312319
pub struct Null(());
@@ -424,4 +431,117 @@ impl Type for Null {
424431
}
425432
}
426433

427-
submit_types!(TypeData, Module, Error, Null);
434+
#[derive(Debug, RefCastCustom)]
435+
#[repr(transparent)]
436+
pub struct Symbol(Box<[u8]>);
437+
impl Symbol {
438+
#[ref_cast_custom]
439+
#[allow(clippy::borrowed_box)]
440+
fn from_ref(this: &Box<[u8]>) -> &Self;
441+
pub fn new_ref<S: AsRef<[u8]>>(name: S) -> &'static Self {
442+
Self::from_ref(SYMBOL_INTERN.intern_ref(name.as_ref()))
443+
}
444+
pub fn new<S: Into<Box<[u8]>>>(name: S) -> &'static Self {
445+
Self::from_ref(SYMBOL_INTERN.intern(name.into()))
446+
}
447+
pub fn value(&self) -> &[u8] {
448+
&self.0
449+
}
450+
pub fn value_str(&self) -> Result<&str, Utf8Error> {
451+
std::str::from_utf8(&self.0)
452+
}
453+
}
454+
impl Display for Symbol {
455+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
456+
f.write_str("$")?;
457+
if let Ok(s) = self.0.to_str() {
458+
if s.chars().all(|c| c == '_' || c.is_alphanumeric()) {
459+
f.write_str(s)?;
460+
return Ok(());
461+
}
462+
}
463+
Debug::fmt(self.0.as_bstr(), f)
464+
}
465+
}
466+
impl ConcreteType for Symbol {
467+
const KIND: NonZeroU64 = make_id(b"symbol");
468+
}
469+
impl Type for Symbol {
470+
fn size(&'static self) -> SizeType {
471+
SizeType::Static(0)
472+
}
473+
fn align(&'static self) -> u16 {
474+
1
475+
}
476+
fn save(&'static self, out: &mut dyn Write) -> io::Result<()> {
477+
out.write_all(&(self.0.len() as u64).to_be_bytes())?;
478+
out.write_all(&self.0)
479+
}
480+
fn load(buf: &mut dyn BufRead) -> io::Result<TypeRef>
481+
where
482+
Self: ConcreteType,
483+
{
484+
let mut arr = [0; 8];
485+
buf.read_exact(&mut arr)?;
486+
let len = u64::from_be_bytes(arr);
487+
let mut vec = vec![0; len as _];
488+
buf.read_exact(&mut vec)?;
489+
Ok(Self::new(vec))
490+
}
491+
fn _can_iconv_to(&'static self, other: TypeRef, ctx: &CompCtx) -> bool {
492+
other.is_and::<types::Reference>(|r| {
493+
r.base()
494+
.is_and::<types::UnsizedArray>(|a| a.elem() == types::Int::unsigned(8))
495+
}) || other.is_and::<types::Pointer>(|p| p.base() == types::Int::unsigned(8))
496+
}
497+
fn _iconv_to<'src, 'ctx>(
498+
&'static self,
499+
val: Value<'src, 'ctx>,
500+
target: (TypeRef, Option<SourceSpan>),
501+
ctx: &CompCtx<'src, 'ctx>,
502+
) -> Result<Value<'src, 'ctx>, CobaltError<'src>> {
503+
if target.0.is_and::<types::Reference>(|r| {
504+
r.base()
505+
.is_and::<types::UnsizedArray>(|a| a.elem() == types::Int::unsigned(8))
506+
}) {
507+
let arr = ctx.context.const_string(self.value(), true);
508+
let gv = ctx.module.add_global(arr.get_type(), None, "cobalt.str");
509+
gv.set_initializer(&arr);
510+
gv.set_linkage(inkwell::module::Linkage::Private);
511+
let sv = ctx.context.const_struct(
512+
&[
513+
gv.as_pointer_value().into(),
514+
ctx.context
515+
.i64_type()
516+
.const_int(self.value().len() as _, false)
517+
.into(),
518+
],
519+
false,
520+
);
521+
Ok(Value::interpreted(
522+
sv.into(),
523+
InterData::Array(
524+
self.value()
525+
.iter()
526+
.map(|&v| InterData::Int(v as _))
527+
.collect(),
528+
),
529+
target.0,
530+
))
531+
} else if target
532+
.0
533+
.is_and::<types::Pointer>(|p| p.base() == types::Int::unsigned(8))
534+
{
535+
let arr = ctx.context.const_string(self.value(), true);
536+
let gv = ctx.module.add_global(arr.get_type(), None, "cobalt.str");
537+
gv.set_initializer(&arr);
538+
gv.set_linkage(inkwell::module::Linkage::Private);
539+
Ok(Value::compiled(gv.as_pointer_value().into(), target.0))
540+
} else {
541+
Err(cant_iconv(&val, target.0, target.1))
542+
}
543+
}
544+
}
545+
static SYMBOL_INTERN: Interner<Box<[u8]>> = Interner::new();
546+
547+
submit_types!(TypeData, Module, Error, Null, Symbol);

cobalt-parser/src/lexer/tokenizer.rs

Lines changed: 107 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::{iter::Peekable, rc::Rc, str::Chars};
1+
use std::{iter::Peekable, ops::Range, rc::Rc, str::Chars};
22

33
use super::{
44
tokens::{
@@ -86,7 +86,7 @@ impl<'src> SourceReader<'src> {
8686

8787
while let Some(&c) = self.peek() {
8888
match c {
89-
' ' | '\n' | '\t' => {
89+
' ' | '\r' | '\n' | '\t' => {
9090
self.next_char();
9191
}
9292

@@ -96,6 +96,71 @@ impl<'src> SourceReader<'src> {
9696
}
9797
}
9898

99+
'$' => {
100+
let start = self.index;
101+
self.next_char();
102+
let mut add_err = true;
103+
loop {
104+
match self.peek() {
105+
None => {
106+
if add_err {
107+
errors.push(CobaltError::ExpectedFound {
108+
ex: "symbol name",
109+
found: None,
110+
loc: self.index.into(),
111+
})
112+
}
113+
}
114+
Some('"') => {
115+
let mut span = self.eat_string(&mut errors);
116+
span.start -= 1;
117+
tokens.push(Token {
118+
kind: TokenKind::Literal(LiteralToken::Symbol(
119+
&self.source[span.clone()],
120+
)),
121+
span: span.into(),
122+
});
123+
}
124+
Some(' ' | '\r' | '\n' | '\t') => {
125+
self.next_char();
126+
if add_err {
127+
add_err = false;
128+
errors.push(CobaltError::ExpectedFound {
129+
ex: "symbol name",
130+
found: Some("whitespace".into()),
131+
loc: self.index.into(),
132+
});
133+
}
134+
continue;
135+
}
136+
Some(&c) => {
137+
if is_ident_start(c) {
138+
loop {
139+
self.next_char();
140+
let Some(&c) = self.peek() else { break };
141+
if !is_xid_continue(c) {
142+
break;
143+
}
144+
}
145+
tokens.push(Token {
146+
kind: TokenKind::Literal(LiteralToken::Symbol(
147+
&self.source[start..self.index],
148+
)),
149+
span: (start..self.index).into(),
150+
});
151+
} else if add_err {
152+
errors.push(CobaltError::ExpectedFound {
153+
ex: "symbol name",
154+
found: Some(format!("{c:?}").into()),
155+
loc: self.index.into(),
156+
});
157+
}
158+
}
159+
}
160+
break;
161+
}
162+
}
163+
99164
// An identifier is an xid_start followed by zero or more xid_continue.
100165
// Match an identifier.
101166
// TODO: include check for underscore, but then there must be at least
@@ -619,45 +684,10 @@ impl<'src> SourceReader<'src> {
619684

620685
// --- String literal.
621686
'"' => {
622-
let span_start = self.index;
623-
self.next_char();
624-
625-
let mut last_was_escape = false;
626-
loop {
627-
let c = self.peek();
628-
if c.is_none() {
629-
errors.push(CobaltError::ExpectedFound {
630-
ex: "rest of string literal",
631-
found: None,
632-
loc: SourceSpan::from((self.source.len(), 0)),
633-
});
634-
continue;
635-
}
636-
let c = c.unwrap();
637-
638-
if c == &'\\' {
639-
last_was_escape = !last_was_escape;
640-
self.next_char();
641-
continue;
642-
}
643-
644-
if c == &'"' && !last_was_escape {
645-
break;
646-
}
647-
648-
last_was_escape = false;
649-
650-
self.next_char();
651-
}
652-
653-
self.next_char();
654-
let span_end = self.index;
655-
687+
let Range { start, end } = self.eat_string(&mut errors);
656688
tokens.push(Token {
657-
kind: TokenKind::Literal(LiteralToken::Str(
658-
&self.source[span_start..span_end],
659-
)),
660-
span: SourceSpan::from((span_start, span_end - span_start)),
689+
kind: TokenKind::Literal(LiteralToken::Str(&self.source[start..end])),
690+
span: SourceSpan::from((start, end - start)),
661691
});
662692
}
663693

@@ -824,4 +854,41 @@ impl<'src> SourceReader<'src> {
824854
}
825855
}
826856
}
857+
858+
fn eat_string(&mut self, errors: &mut Vec<CobaltError<'src>>) -> Range<usize> {
859+
let span_start = self.index;
860+
self.next_char();
861+
862+
let mut last_was_escape = false;
863+
loop {
864+
let c = self.peek();
865+
if c.is_none() {
866+
errors.push(CobaltError::ExpectedFound {
867+
ex: "rest of string literal",
868+
found: None,
869+
loc: SourceSpan::from((self.source.len(), 0)),
870+
});
871+
continue;
872+
}
873+
let c = c.unwrap();
874+
875+
if c == &'\\' {
876+
last_was_escape = !last_was_escape;
877+
self.next_char();
878+
continue;
879+
}
880+
881+
if c == &'"' && !last_was_escape {
882+
break;
883+
}
884+
885+
last_was_escape = false;
886+
887+
self.next_char();
888+
}
889+
890+
self.next_char();
891+
892+
span_start..self.index
893+
}
827894
}

0 commit comments

Comments
 (0)