Compare commits

..

No commits in common. "1798baa9d3bf25d999b31360ed34aec440da270b" and "220d827dbbbf354a80ab5adeb4b9a8be01834e48" have entirely different histories.

10 changed files with 154 additions and 341 deletions

View File

@ -1,4 +1,4 @@
use crate::frontend::types::TypeStruct; use crate::frontend::types::{Type, TypeStruct};
use crate::frontend::Callable; use crate::frontend::Callable;
use crate::rtlil; use crate::rtlil;
use crate::rtlil::SigSpec; use crate::rtlil::SigSpec;
@ -39,7 +39,7 @@ fn instantiate_binop(celltype: &str, id: &str, args: &[SigSpec], ret: &SigSpec)
cell cell
} }
fn make_binop_callable<'ctx>(name: &str, _celltype: &'static str) -> Callable<'ctx> { fn make_binop_callable<'ctx>(name: &str, celltype: &'static str) -> Callable<'ctx> {
// FIXME: CRIMES CRIMES CRIMES // FIXME: CRIMES CRIMES CRIMES
let logic_type: &'static TypeStruct = Box::leak(Box::new(TypeStruct::logic_infer())); let logic_type: &'static TypeStruct = Box::leak(Box::new(TypeStruct::logic_infer()));
let args = vec![ let args = vec![
@ -53,7 +53,7 @@ fn make_binop_callable<'ctx>(name: &str, _celltype: &'static str) -> Callable<'c
} }
} }
fn make_unnop_callable<'ctx>(name: &str, _celltype: &'static str) -> Callable<'ctx> { fn make_unnop_callable<'ctx>(name: &str, celltype: &'static str) -> Callable<'ctx> {
// FIXME: CRIMES CRIMES CRIMES // FIXME: CRIMES CRIMES CRIMES
let logic_type: &'static TypeStruct = Box::leak(Box::new(TypeStruct::logic_infer())); let logic_type: &'static TypeStruct = Box::leak(Box::new(TypeStruct::logic_infer()));
let args = vec![(Some("A".to_owned()), logic_type)]; let args = vec![(Some("A".to_owned()), logic_type)];

View File

@ -8,8 +8,8 @@ pub use callable::Callable;
pub use types::{Type, TypeStruct}; pub use types::{Type, TypeStruct};
mod callable; mod callable;
pub mod typed_ir;
pub mod types; pub mod types;
pub mod typed_ir;
/// lots of code is still not width-aware, this constant keeps track of that /// lots of code is still not width-aware, this constant keeps track of that
const TODO_WIDTH: u32 = 1; const TODO_WIDTH: u32 = 1;
@ -255,7 +255,7 @@ fn lower_expression(
})); }));
} }
let cell_id = module.make_genid(callable.name()); let cell_id = module.make_genid(&callable.name());
let output_gen_id = format!("{}$out", &cell_id); let output_gen_id = format!("{}$out", &cell_id);
module.add_wire(rtlil::Wire::new(&output_gen_id, TODO_WIDTH, None)); module.add_wire(rtlil::Wire::new(&output_gen_id, TODO_WIDTH, None));
@ -316,7 +316,11 @@ pub fn lower_module(pa_module: parser::Module) -> Result<String, CompileError> {
parser::PortDirection::Input => rtlil::PortOption::Input(idx as i32 + 1), parser::PortDirection::Input => rtlil::PortOption::Input(idx as i32 + 1),
parser::PortDirection::Output => rtlil::PortOption::Output(idx as i32 + 1), parser::PortDirection::Output => rtlil::PortOption::Output(idx as i32 + 1),
}; };
let wire = rtlil::Wire::new(sig.il_id.to_owned(), TODO_WIDTH, Some(dir_option)); let wire = rtlil::Wire::new(
sig.il_id.to_owned(),
TODO_WIDTH,
Some(dir_option),
);
ir_module.add_wire(wire); ir_module.add_wire(wire);
} }
for item in pa_module.items { for item in pa_module.items {

View File

@ -9,7 +9,8 @@ struct Element<'ty> {
struct Signal<'ty> { struct Signal<'ty> {
pub id: u32, pub id: u32,
pub typ: Type<'ty>, pub typ: Type<'ty>
} }
struct Expression {} struct Expression {
}

View File

@ -9,6 +9,8 @@ use std::io::prelude::*;
use std::path::PathBuf; use std::path::PathBuf;
use structopt::StructOpt; use structopt::StructOpt;
use nom_greedyerror::convert_error;
use ariadne::Source;
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
#[structopt(name = "example", about = "An example of StructOpt usage.")] #[structopt(name = "example", about = "An example of StructOpt usage.")]
@ -40,19 +42,10 @@ fn main() {
.expect("error reading file"); .expect("error reading file");
let input: &str = input.as_str(); let input: &str = input.as_str();
let input = parser::Span::new(input); let input = parser::Span::new(input);
let lexed = parser::tokens::lex(input).unwrap(); let parsed = parser::parse(input);
let tokens = parser::tokens::TokenSpan::new(&lexed.1);
let parsed = parser::parse(tokens);
match parsed { match parsed {
Err(nom::Err::Error(err) | nom::Err::Failure(err)) => { Err(nom::Err::Error(err) | nom::Err::Failure(err)) => {
if opt.debug { parser::error::convert_error(input, err).eprint(Source::from(input.fragment())).unwrap();
println!("{err:#?}");
}
/*
parser::error::convert_error(input, err)
.eprint(Source::from(input.fragment()))
.unwrap();
*/
} }
Err(_) => ( unreachable!() ), Err(_) => ( unreachable!() ),
Ok(res) => { Ok(res) => {

View File

@ -1,17 +1,16 @@
pub mod error;
mod literals;
pub mod module; pub mod module;
pub mod proc; pub mod proc;
pub mod tokens; mod literals;
pub mod error;
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
character::complete::{alpha1, alphanumeric1, multispace0}, character::complete::{alpha1, alphanumeric1, char, multispace0, u64 as decimal},
combinator::{map, opt, recognize}, combinator::{map, opt, recognize},
error::{ErrorKind, ParseError}, error::{ParseError, ErrorKind},
multi::{many0, separated_list0}, multi::{many0, separated_list0},
sequence::{delimited, pair, preceded, separated_pair, tuple}, sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
}; };
use nom_greedyerror::GreedyError; use nom_greedyerror::GreedyError;
use nom_locate::LocatedSpan; use nom_locate::LocatedSpan;
@ -23,8 +22,8 @@ pub type IErr<I> = GreedyError<I, ErrorKind>;
// custom IResult type for VerboseError // custom IResult type for VerboseError
pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>; pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>;
use literals::hexadecimal;
pub use crate::parser::module::{module, Module, ModuleItem, PortDirection}; pub use crate::parser::module::{module, Module, ModuleItem, PortDirection};
use crate::parser::tokens::{token, TokenKind as tk, TokenSpan};
fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>( fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>(
inner: F, inner: F,
@ -43,20 +42,29 @@ fn identifier(input: Span) -> IResult<Span, Span> {
} }
// TODO: allow recursive generics // TODO: allow recursive generics
// TODO: allow expressions again fn typename(input: Span) -> IResult<Span, TypeName> {
fn typename(input: TokenSpan) -> IResult<TokenSpan, TypeName> {
map( map(
tuple(( tuple((
token(tk::Ident), identifier,
opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))), opt(delimited(char('<'), ws0(expression), char('>')))
)), )),
|(ident, _)| TypeName { |(ident, _)| {
name: ident.span(), TypeName {
generics: (), name: ident,
}, generics: ()
}
}
)(input) )(input)
} }
fn widthspec(input: Span) -> IResult<Span, u64> {
delimited(char('['), ws0(decimal), char(']'))(input)
}
fn intliteral(input: Span) -> IResult<Span, (u64, u64)> {
tuple((terminated(decimal, char('\'')), alt((decimal, hexadecimal))))(input)
}
#[derive(Debug)] #[derive(Debug)]
pub struct TypeName<'a> { pub struct TypeName<'a> {
name: Span<'a>, name: Span<'a>,
@ -67,7 +75,7 @@ pub struct TypeName<'a> {
pub struct NetDecl<'a> { pub struct NetDecl<'a> {
pub name: Span<'a>, pub name: Span<'a>,
pub typ: TypeName<'a>, pub typ: TypeName<'a>,
pub value: Option<Span<'a>>, pub value: Option<(u64, u64)>,
} }
#[derive(Debug)] #[derive(Debug)]
@ -107,91 +115,87 @@ pub enum Expression<'a> {
Operation(Box<Operation<'a>>), Operation(Box<Operation<'a>>),
} }
// TODO: reallow assignments fn declaration(i: Span) -> IResult<Span, NetDecl> {
fn declaration(i: TokenSpan) -> IResult<TokenSpan, NetDecl> {
map( map(
tuple(( tuple((
separated_pair(token(tk::Ident), token(tk::Colon), typename), separated_pair(identifier, ws0(char(':')), typename),
opt(preceded(token(tk::Assign), token(tk::Number))), opt(preceded(ws0(char('=')), intliteral)),
)), )),
|((ident, typ), _value)| NetDecl { |((ident, typ), value)| NetDecl {
name: ident.span(), name: ident,
typ, typ,
value: None, value,
}, },
)(i) )(i)
} }
fn operation(input: TokenSpan) -> IResult<TokenSpan, Operation> { fn operation(input: Span) -> IResult<Span, Operation> {
// temporarily given up on before I learn the shunting yard algorithm // temporarily given up on before I learn the shunting yard algorithm
alt(( alt((
map( map(
separated_pair(expression_nonrecurse, token(tk::BitAnd), expression), separated_pair(ws0(expression_nonrecurse), char('&'), ws0(expression)),
|(a, b)| Operation::And { a, b }, |(a, b)| Operation::And { a, b },
), ),
map( map(
separated_pair(expression_nonrecurse, token(tk::BitOr), expression), separated_pair(ws0(expression_nonrecurse), char('|'), ws0(expression)),
|(a, b)| Operation::Or { a, b }, |(a, b)| Operation::Or { a, b },
), ),
map( map(
separated_pair(expression_nonrecurse, token(tk::BitXor), expression), separated_pair(ws0(expression_nonrecurse), char('^'), ws0(expression)),
|(a, b)| Operation::Xor { a, b }, |(a, b)| Operation::Xor { a, b },
), ),
map(preceded(token(tk::BitNot), expression), Operation::Not), map(preceded(char('~'), expression), Operation::Not),
))(input) ))(input)
} }
fn call_item(input: TokenSpan) -> IResult<TokenSpan, Call> { fn call_item(input: Span) -> IResult<Span, Call> {
map( map(
tuple(( tuple((
token(tk::Ident), ws0(identifier),
delimited( delimited(
token(tk::LParen), char('('),
separated_list0(token(tk::Comma), expression), ws0(separated_list0(char(','), expression)),
token(tk::RParen), char(')'),
), ),
)), )),
|(name, args)| Call { |(name, args)| Call { name, args },
name: name.span(),
args,
},
)(input) )(input)
} }
/// parser combinators can not parse left-recursive grammars. To work around this, we split /// parser combinators can not parse left-recursive grammars. To work around this, we split
/// expressions into a recursive and non-recursive portion. /// expressions into a recursive and non-recursive portion.
/// Parsers reachable from this point must call expression_nonrecurse instead /// Parsers reachable from this point must call expression_nonrecurse instead
fn expression(input: TokenSpan) -> IResult<TokenSpan, Expression> { fn expression(input: Span) -> IResult<Span, Expression> {
alt(( alt((
map(operation, |op| Expression::Operation(Box::new(op))), map(ws0(operation), |op| Expression::Operation(Box::new(op))),
expression_nonrecurse, expression_nonrecurse,
))(input) ))(input)
} }
/// the portion of the expression grammar that can be parsed without left recursion /// the portion of the expression grammar that can be parsed without left recursion
fn expression_nonrecurse(input: TokenSpan) -> IResult<TokenSpan, Expression> { fn expression_nonrecurse(input: Span) -> IResult<Span, Expression> {
alt(( alt((
map(token(tk::Number), |_| Expression::Literal(42)), map(ws0(decimal), Expression::Literal),
map(call_item, |call| Expression::Call(Box::new(call))), map(ws0(call_item), |call| Expression::Call(Box::new(call))),
map(token(tk::Ident), |ident| { map(ws0(identifier), |ident| {
Expression::Ident(*ident.span().fragment()) Expression::Ident(*ident.fragment())
}), }),
delimited(token(tk::LParen), expression, token(tk::RParen)), delimited(char('('), expression, char(')')),
))(input) ))(input)
} }
fn assign_statement(input: TokenSpan) -> IResult<TokenSpan, Assign> { fn assign_statement(input: Span) -> IResult<Span, Assign> {
map( map(
separated_pair(token(tk::Ident), token(tk::EqAssign), expression), separated_pair(ws0(identifier), char('='), ws0(expression)),
|(lhs, expr)| Assign { |(lhs, expr)| Assign {
lhs: (*lhs.span().fragment()), lhs: (*lhs.fragment()),
expr, expr,
}, },
)(input) )(input)
} }
pub fn parse(input: TokenSpan) -> IResult<TokenSpan, Module> { pub fn parse(input: Span) -> IResult<Span, Module> {
module(input) ws0(module)(input)
} }
#[cfg(test)] #[cfg(test)]

View File

@ -1,26 +1,36 @@
use super::{IErr, Span}; use std::fmt::Debug;
use ariadne::{Label, Report, ReportKind}; use std::ops::Deref;
use nom_greedyerror::{GreedyErrorKind, Position};
use super::{Span, IErr};
use nom::error::ErrorKind;
use nom_greedyerror::{Position, GreedyErrorKind};
use ariadne::{Report, ReportKind, Label};
fn span_to_range(input: Span) -> std::ops::Range<usize> { fn span_to_range(input: Span) -> std::ops::Range<usize> {
input.position()..(input.position() + input.len()) input.position()..(input.position() + input.len())
} }
pub fn convert_error(_input: Span, e: IErr<Span>) -> Report { pub fn convert_error(
input: Span,
e: IErr<Span>,
) -> Report {
let mut labels = Vec::new(); let mut labels = Vec::new();
for err in e.errors { for err in e.errors {
let label = match err.1 { let label = match err.1 {
GreedyErrorKind::Context(ctx) => { GreedyErrorKind::Context(ctx) => {
Label::new(span_to_range(err.0)).with_message(format!("in {ctx}")) Label::new(span_to_range(err.0))
} .with_message(format!("in {ctx}"))
GreedyErrorKind::Char(c) => Label::new(err.0.position()..err.0.position()) },
.with_message(format!("expected {c:?}")), GreedyErrorKind::Char(c) => {
GreedyErrorKind::Nom(kind) => Label::new(err.0.position()..err.0.position()) Label::new(err.0.position()..err.0.position())
.with_message(format!("nom error {kind:?}")), .with_message(format!("expected {c:?}"))
},
GreedyErrorKind::Nom(_) => todo!(),
}; };
labels.push(label); labels.push(label);
} }
let mut rep = Report::build(ReportKind::Error, (), 0).with_message("Parse Error"); let mut rep = Report::build(ReportKind::Error, (), 0)
.with_message("Parse Error");
for lbl in labels { for lbl in labels {
rep = rep.with_label(lbl) rep = rep.with_label(lbl)
} }

View File

@ -17,8 +17,7 @@ pub fn hexadecimal(input: Span) -> IResult<Span, u64> {
))), ))),
), ),
|out: Span| { |out: Span| {
u64::from_str_radix(&str::replace(out.fragment(), "_", ""), 16) u64::from_str_radix(&str::replace(out.fragment(), "_", ""), 16).expect("error parsing literal")
.expect("error parsing literal")
}, },
)(input) )(input)
} }

View File

@ -1,16 +1,17 @@
use nom::{ use nom::{
branch::alt, branch::alt,
combinator::map, bytes::complete::tag,
character::complete::{char, multispace1},
combinator::{consumed, map},
error::context, error::context,
multi::{many0, separated_list0}, multi::{many0, separated_list0},
sequence::{delimited, preceded, tuple}, sequence::{delimited, terminated, tuple, preceded},
}; };
use crate::parser::{ use crate::parser::{
assign_statement, declaration, assign_statement, declaration, identifier,
proc::{proc_block, ProcBlock}, proc::{proc_block, ProcBlock},
tokens::{token, TokenKind as tk, TokenSpan}, ws0, Assign, IResult, NetDecl, Span, typename
typename, Assign, IResult, NetDecl, Span,
}; };
#[derive(Debug)] #[derive(Debug)]
@ -21,6 +22,7 @@ pub enum PortDirection {
#[derive(Debug)] #[derive(Debug)]
pub struct PortDecl<'a> { pub struct PortDecl<'a> {
pub pos: Span<'a>,
pub direction: PortDirection, pub direction: PortDirection,
pub net: NetDecl<'a>, pub net: NetDecl<'a>,
} }
@ -38,45 +40,56 @@ pub enum ModuleItem<'a> {
Proc(ProcBlock<'a>), Proc(ProcBlock<'a>),
} }
fn port_decl(i: TokenSpan) -> IResult<TokenSpan, PortDecl> { fn port_decl(i: Span) -> IResult<Span, PortDecl> {
map(declaration, |net| PortDecl { map(
consumed(
declaration,
),
|(pos, net)| PortDecl {
pos,
direction: PortDirection::Input, direction: PortDirection::Input,
net, net,
})(i) },
)(i)
} }
fn inputs_list(input: TokenSpan) -> IResult<TokenSpan, Vec<PortDecl>> { fn inputs_list(input: Span) -> IResult<Span, Vec<PortDecl>> {
separated_list0(token(tk::Comma), port_decl)(input) separated_list0(ws0(char(',')), ws0(port_decl))(input)
} }
fn assign_item(input: TokenSpan) -> IResult<TokenSpan, Assign> { fn assign_item(input: Span) -> IResult<Span, Assign> {
context( context(
"assignment", "assignment",
delimited(token(tk::Assign), assign_statement, token(tk::Semicolon)), delimited(
ws0(terminated(tag("assign"), multispace1)),
ws0(assign_statement),
ws0(char(';')),
),
)(input) )(input)
} }
fn module_item(input: TokenSpan) -> IResult<TokenSpan, ModuleItem> { fn module_item(input: Span) -> IResult<Span, ModuleItem> {
alt(( alt((
map(assign_item, ModuleItem::Assign), map(assign_item, ModuleItem::Assign),
map(proc_block, ModuleItem::Proc), map(proc_block, ModuleItem::Proc),
))(input) ))(input)
} }
pub fn module(input: TokenSpan) -> IResult<TokenSpan, Module> { /// parse a top-level module declaration
pub fn module(input: Span) -> IResult<Span, Module> {
context( context(
"module", "module",
map( map(
tuple(( tuple((
token(tk::Module), tag("module"),
token(tk::Ident), ws0(identifier),
delimited(token(tk::LParen), inputs_list, token(tk::RParen)), ws0(delimited(char('('), ws0(inputs_list), char(')'))),
preceded(token(tk::RArrow), typename), ws0(preceded(tag("->"), ws0(typename))),
delimited(token(tk::LBrace), many0(module_item), token(tk::RBrace)), ws0(delimited(char('{'), ws0(many0(ws0(module_item))), char('}'))),
)), )),
|(_, name, inputs, _ret, items)| Module { |(_, name, inputs, ret, items)| Module {
// TODO: bring back returns name,
name: name.span(), // TODO: add back in returns
ports: inputs, ports: inputs,
items, items,
}, },

View File

@ -1,5 +1,7 @@
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::tag,
character::complete::char,
combinator::map, combinator::map,
error::context, error::context,
multi::{many1, separated_list1}, multi::{many1, separated_list1},
@ -7,9 +9,7 @@ use nom::{
}; };
use crate::parser::{ use crate::parser::{
assign_statement, expression, assign_statement, expression, identifier, ws0, Assign, Expression, IResult, Span,
tokens::{token, TokenKind as tk, TokenSpan},
Assign, Expression, IResult, Span,
}; };
#[derive(Debug)] #[derive(Debug)]
@ -36,38 +36,38 @@ pub struct MatchBlock<'a> {
pub arms: Vec<(Expression<'a>, ProcStatement<'a>)>, pub arms: Vec<(Expression<'a>, ProcStatement<'a>)>,
} }
fn match_arm(input: TokenSpan) -> IResult<TokenSpan, (Expression, ProcStatement)> { fn match_arm(input: Span) -> IResult<Span, (Expression, ProcStatement)> {
separated_pair(expression, token(tk::FatArrow), proc_statement)(input) separated_pair(ws0(expression), tag("=>"), ws0(proc_statement))(input)
} }
fn match_block(input: TokenSpan) -> IResult<TokenSpan, MatchBlock> { fn match_block(input: Span) -> IResult<Span, MatchBlock> {
context( context(
"match block", "match block",
map( map(
tuple(( tuple((
token(tk::Match), ws0(tag("match")),
delimited(token(tk::LParen), expression, token(tk::RParen)), ws0(delimited(char('('), ws0(expression), char(')'))),
delimited( ws0(delimited(
token(tk::LBrace), char('{'),
separated_list1(token(tk::Comma), match_arm), separated_list1(char(','), ws0(match_arm)),
token(tk::RBrace), char('}'),
), )),
)), )),
|(_, expr, arms)| MatchBlock { expr, arms }, |(_, expr, arms)| MatchBlock { expr, arms },
), ),
)(input) )(input)
} }
fn statement_block(input: TokenSpan) -> IResult<TokenSpan, Vec<ProcStatement>> { fn statement_block(input: Span) -> IResult<Span, Vec<ProcStatement>> {
delimited( delimited(
token(tk::LBrace), char('{'),
separated_list1(token(tk::Semicolon), proc_statement), separated_list1(char(';'), ws0(proc_statement)),
token(tk::RBrace), char('}'),
)(input) )(input)
} }
/// parse a statement that is valid inside a proc block /// parse a statement that is valid inside a proc block
fn proc_statement(input: TokenSpan) -> IResult<TokenSpan, ProcStatement> { fn proc_statement(input: Span) -> IResult<Span, ProcStatement> {
alt(( alt((
map(match_block, ProcStatement::Match), map(match_block, ProcStatement::Match),
map(statement_block, ProcStatement::Block), map(statement_block, ProcStatement::Block),
@ -75,19 +75,16 @@ fn proc_statement(input: TokenSpan) -> IResult<TokenSpan, ProcStatement> {
))(input) ))(input)
} }
pub fn proc_block(input: TokenSpan) -> IResult<TokenSpan, ProcBlock> { pub fn proc_block(input: Span) -> IResult<Span, ProcBlock> {
context( context(
"proc block", "proc block",
map( map(
tuple(( tuple((
token(tk::Proc), ws0(tag("proc")),
delimited(token(tk::LParen), token(tk::Ident), token(tk::RParen)), ws0(delimited(char('('), ws0(identifier), char(')'))),
delimited(token(tk::LBrace), many1(proc_statement), token(tk::RBrace)), ws0(delimited(char('{'), many1(ws0(proc_statement)), char('}'))),
)), )),
|(_, net, items)| ProcBlock { |(_, net, items)| ProcBlock { net, items },
net: net.span(),
items,
},
), ),
)(input) )(input)
} }

View File

@ -1,208 +0,0 @@
//! convert text into a token stream
use super::{identifier, ws0, IResult, Span};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{anychar, digit1},
combinator::{consumed, map, recognize},
error::ParseError,
multi::many0,
};
use std::fmt;
pub struct Token<'a> {
span: Span<'a>,
kind: TokenKind,
}
impl fmt::Debug for Token<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:?} @{} {:?}",
self.kind,
self.span.location_offset(),
self.span.fragment()
)?;
Ok(())
}
}
impl<'a> Token<'a> {
fn new(span: Span<'a>, kind: TokenKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum TokenKind {
// no whitespace, for now
// no token trees either, for now
// Braces
LParen,
RParen,
LAngle,
RAngle,
LBrace,
RBrace,
LSquare,
RSquare,
// single chars
Colon,
Semicolon,
Comma,
BitNot,
BitAnd,
BitOr,
BitXor,
EqAssign,
// Multi Chars
FatArrow,
RArrow,
// Literals
Ident,
Number,
// Keywords
Module,
Assign,
Match,
Proc,
// Error
Error,
}
#[derive(Debug, Clone)]
pub struct TokenSpan<'a> {
rest: &'a [Token<'a>],
pos: usize,
}
impl<'a> TokenSpan<'a> {
pub fn new(rest: &'a [Token<'a>]) -> Self {
Self { rest, pos: 0 }
}
pub fn with_pos(rest: &'a [Token<'a>], pos: usize) -> Self {
Self { rest, pos }
}
}
impl nom::InputTake for TokenSpan<'_> {
fn take(&self, count: usize) -> Self {
TokenSpan::with_pos(&self.rest[..count], self.pos + count)
}
fn take_split(&self, count: usize) -> (Self, Self) {
let (head, tail) = &self.rest.split_at(count);
(
TokenSpan::with_pos(head, self.pos),
TokenSpan::with_pos(tail, self.pos + count),
)
}
}
impl nom::InputLength for TokenSpan<'_> {
fn input_len(&self) -> usize {
self.rest.len()
}
}
impl nom_greedyerror::Position for TokenSpan<'_> {
fn position(&self) -> usize {
self.pos
}
}
/// combinator that matches a token kind
pub fn token<'a, E>(
kind: TokenKind,
) -> impl FnMut(TokenSpan<'a>) -> nom::IResult<TokenSpan, &Token, E>
where
E: ParseError<TokenSpan<'a>>,
{
move |input: TokenSpan| {
let next = &input.rest[0];
if next.kind == kind.clone() {
let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1);
Ok((rest, next))
} else {
Err(nom::Err::Error(E::from_error_kind(
input,
// TODO: Proper errors here
nom::error::ErrorKind::Tag,
)))
}
}
}
pub fn lex(input: Span) -> IResult<Span, Vec<Token>> {
many0(ws0(alt((
lex_keywords,
lex_literals,
lex_braces,
lex_punctuation,
map(recognize(anychar), |span| {
Token::new(span, TokenKind::Error)
}),
))))(input)
}
fn lex_braces(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag("("), |_| TokenKind::LParen),
map(tag(")"), |_| TokenKind::RParen),
map(tag("<"), |_| TokenKind::LAngle),
map(tag(">"), |_| TokenKind::RAngle),
map(tag("{"), |_| TokenKind::LBrace),
map(tag("}"), |_| TokenKind::RBrace),
map(tag("["), |_| TokenKind::LSquare),
map(tag("]"), |_| TokenKind::RSquare),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_literals(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(identifier, |_| TokenKind::Ident),
map(digit1, |_| TokenKind::Number),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_punctuation(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag(":"), |_| TokenKind::Colon),
map(tag(";"), |_| TokenKind::Semicolon),
map(tag(","), |_| TokenKind::Comma),
map(tag("->"), |_| TokenKind::RArrow),
map(tag("=>"), |_| TokenKind::FatArrow),
map(tag("~"), |_| TokenKind::BitNot),
map(tag("&"), |_| TokenKind::BitAnd),
map(tag("^"), |_| TokenKind::BitXor),
map(tag("|"), |_| TokenKind::BitOr),
map(tag("="), |_| TokenKind::EqAssign),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_keywords(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag("module"), |_| TokenKind::Module),
map(tag("assign"), |_| TokenKind::Assign),
map(tag("match"), |_| TokenKind::Match),
map(tag("proc"), |_| TokenKind::Proc),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}