Compare commits

...

6 Commits

Author SHA1 Message Date
NotAFile 1798baa9d3 clippy fix 2022-02-02 01:03:03 +01:00
NotAFile 825703e1ce switch parser to using tokens 2022-02-02 01:00:11 +01:00
NotAFile 108297b966 cargo fmt 2022-02-01 23:24:07 +01:00
NotAFile daa4da8037 add tokenizer for new parser 2022-02-01 23:14:11 +01:00
NotAFile cec4f5fb8d add nom error handling 2022-02-01 23:11:59 +01:00
NotAFile c32da018ad cargo fmt 2022-02-01 19:46:06 +01:00
10 changed files with 341 additions and 154 deletions

View File

@ -1,4 +1,4 @@
use crate::frontend::types::{Type, TypeStruct}; use crate::frontend::types::TypeStruct;
use crate::frontend::Callable; use crate::frontend::Callable;
use crate::rtlil; use crate::rtlil;
use crate::rtlil::SigSpec; use crate::rtlil::SigSpec;
@ -39,7 +39,7 @@ fn instantiate_binop(celltype: &str, id: &str, args: &[SigSpec], ret: &SigSpec)
cell cell
} }
fn make_binop_callable<'ctx>(name: &str, celltype: &'static str) -> Callable<'ctx> { fn make_binop_callable<'ctx>(name: &str, _celltype: &'static str) -> Callable<'ctx> {
// FIXME: CRIMES CRIMES CRIMES // FIXME: CRIMES CRIMES CRIMES
let logic_type: &'static TypeStruct = Box::leak(Box::new(TypeStruct::logic_infer())); let logic_type: &'static TypeStruct = Box::leak(Box::new(TypeStruct::logic_infer()));
let args = vec![ let args = vec![
@ -53,7 +53,7 @@ fn make_binop_callable<'ctx>(name: &str, celltype: &'static str) -> Callable<'ct
} }
} }
fn make_unnop_callable<'ctx>(name: &str, celltype: &'static str) -> Callable<'ctx> { fn make_unnop_callable<'ctx>(name: &str, _celltype: &'static str) -> Callable<'ctx> {
// FIXME: CRIMES CRIMES CRIMES // FIXME: CRIMES CRIMES CRIMES
let logic_type: &'static TypeStruct = Box::leak(Box::new(TypeStruct::logic_infer())); let logic_type: &'static TypeStruct = Box::leak(Box::new(TypeStruct::logic_infer()));
let args = vec![(Some("A".to_owned()), logic_type)]; let args = vec![(Some("A".to_owned()), logic_type)];

View File

@ -8,8 +8,8 @@ pub use callable::Callable;
pub use types::{Type, TypeStruct}; pub use types::{Type, TypeStruct};
mod callable; mod callable;
pub mod types;
pub mod typed_ir; pub mod typed_ir;
pub mod types;
/// lots of code is still not width-aware, this constant keeps track of that /// lots of code is still not width-aware, this constant keeps track of that
const TODO_WIDTH: u32 = 1; const TODO_WIDTH: u32 = 1;
@ -255,7 +255,7 @@ fn lower_expression(
})); }));
} }
let cell_id = module.make_genid(&callable.name()); let cell_id = module.make_genid(callable.name());
let output_gen_id = format!("{}$out", &cell_id); let output_gen_id = format!("{}$out", &cell_id);
module.add_wire(rtlil::Wire::new(&output_gen_id, TODO_WIDTH, None)); module.add_wire(rtlil::Wire::new(&output_gen_id, TODO_WIDTH, None));
@ -316,11 +316,7 @@ pub fn lower_module(pa_module: parser::Module) -> Result<String, CompileError> {
parser::PortDirection::Input => rtlil::PortOption::Input(idx as i32 + 1), parser::PortDirection::Input => rtlil::PortOption::Input(idx as i32 + 1),
parser::PortDirection::Output => rtlil::PortOption::Output(idx as i32 + 1), parser::PortDirection::Output => rtlil::PortOption::Output(idx as i32 + 1),
}; };
let wire = rtlil::Wire::new( let wire = rtlil::Wire::new(sig.il_id.to_owned(), TODO_WIDTH, Some(dir_option));
sig.il_id.to_owned(),
TODO_WIDTH,
Some(dir_option),
);
ir_module.add_wire(wire); ir_module.add_wire(wire);
} }
for item in pa_module.items { for item in pa_module.items {

View File

@ -9,8 +9,7 @@ struct Element<'ty> {
struct Signal<'ty> { struct Signal<'ty> {
pub id: u32, pub id: u32,
pub typ: Type<'ty> pub typ: Type<'ty>,
} }
struct Expression { struct Expression {}
}

View File

@ -9,8 +9,6 @@ use std::io::prelude::*;
use std::path::PathBuf; use std::path::PathBuf;
use structopt::StructOpt; use structopt::StructOpt;
use nom_greedyerror::convert_error;
use ariadne::Source;
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
#[structopt(name = "example", about = "An example of StructOpt usage.")] #[structopt(name = "example", about = "An example of StructOpt usage.")]
@ -42,12 +40,21 @@ fn main() {
.expect("error reading file"); .expect("error reading file");
let input: &str = input.as_str(); let input: &str = input.as_str();
let input = parser::Span::new(input); let input = parser::Span::new(input);
let parsed = parser::parse(input); let lexed = parser::tokens::lex(input).unwrap();
let tokens = parser::tokens::TokenSpan::new(&lexed.1);
let parsed = parser::parse(tokens);
match parsed { match parsed {
Err(nom::Err::Error(err) | nom::Err::Failure(err)) => { Err(nom::Err::Error(err) | nom::Err::Failure(err)) => {
parser::error::convert_error(input, err).eprint(Source::from(input.fragment())).unwrap(); if opt.debug {
println!("{err:#?}");
}
/*
parser::error::convert_error(input, err)
.eprint(Source::from(input.fragment()))
.unwrap();
*/
} }
Err(_) => ( unreachable!() ), Err(_) => (unreachable!()),
Ok(res) => { Ok(res) => {
if opt.debug { if opt.debug {
println!("{:#?}", res); println!("{:#?}", res);

View File

@ -1,16 +1,17 @@
pub mod error;
mod literals;
pub mod module; pub mod module;
pub mod proc; pub mod proc;
mod literals; pub mod tokens;
pub mod error;
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
character::complete::{alpha1, alphanumeric1, char, multispace0, u64 as decimal}, character::complete::{alpha1, alphanumeric1, multispace0},
combinator::{map, opt, recognize}, combinator::{map, opt, recognize},
error::{ParseError, ErrorKind}, error::{ErrorKind, ParseError},
multi::{many0, separated_list0}, multi::{many0, separated_list0},
sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, sequence::{delimited, pair, preceded, separated_pair, tuple},
}; };
use nom_greedyerror::GreedyError; use nom_greedyerror::GreedyError;
use nom_locate::LocatedSpan; use nom_locate::LocatedSpan;
@ -22,8 +23,8 @@ pub type IErr<I> = GreedyError<I, ErrorKind>;
// custom IResult type for VerboseError // custom IResult type for VerboseError
pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>; pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>;
use literals::hexadecimal;
pub use crate::parser::module::{module, Module, ModuleItem, PortDirection}; pub use crate::parser::module::{module, Module, ModuleItem, PortDirection};
use crate::parser::tokens::{token, TokenKind as tk, TokenSpan};
fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>( fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>(
inner: F, inner: F,
@ -42,29 +43,20 @@ fn identifier(input: Span) -> IResult<Span, Span> {
} }
// TODO: allow recursive generics // TODO: allow recursive generics
fn typename(input: Span) -> IResult<Span, TypeName> { // TODO: allow expressions again
fn typename(input: TokenSpan) -> IResult<TokenSpan, TypeName> {
map( map(
tuple(( tuple((
identifier, token(tk::Ident),
opt(delimited(char('<'), ws0(expression), char('>'))) opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))),
)), )),
|(ident, _)| { |(ident, _)| TypeName {
TypeName { name: ident.span(),
name: ident, generics: (),
generics: () },
}
}
)(input) )(input)
} }
fn widthspec(input: Span) -> IResult<Span, u64> {
delimited(char('['), ws0(decimal), char(']'))(input)
}
fn intliteral(input: Span) -> IResult<Span, (u64, u64)> {
tuple((terminated(decimal, char('\'')), alt((decimal, hexadecimal))))(input)
}
#[derive(Debug)] #[derive(Debug)]
pub struct TypeName<'a> { pub struct TypeName<'a> {
name: Span<'a>, name: Span<'a>,
@ -75,7 +67,7 @@ pub struct TypeName<'a> {
pub struct NetDecl<'a> { pub struct NetDecl<'a> {
pub name: Span<'a>, pub name: Span<'a>,
pub typ: TypeName<'a>, pub typ: TypeName<'a>,
pub value: Option<(u64, u64)>, pub value: Option<Span<'a>>,
} }
#[derive(Debug)] #[derive(Debug)]
@ -115,87 +107,91 @@ pub enum Expression<'a> {
Operation(Box<Operation<'a>>), Operation(Box<Operation<'a>>),
} }
fn declaration(i: Span) -> IResult<Span, NetDecl> { // TODO: reallow assignments
fn declaration(i: TokenSpan) -> IResult<TokenSpan, NetDecl> {
map( map(
tuple(( tuple((
separated_pair(identifier, ws0(char(':')), typename), separated_pair(token(tk::Ident), token(tk::Colon), typename),
opt(preceded(ws0(char('=')), intliteral)), opt(preceded(token(tk::Assign), token(tk::Number))),
)), )),
|((ident, typ), value)| NetDecl { |((ident, typ), _value)| NetDecl {
name: ident, name: ident.span(),
typ, typ,
value, value: None,
}, },
)(i) )(i)
} }
fn operation(input: Span) -> IResult<Span, Operation> { fn operation(input: TokenSpan) -> IResult<TokenSpan, Operation> {
// temporarily given up on before I learn the shunting yard algorithm // temporarily given up on before I learn the shunting yard algorithm
alt(( alt((
map( map(
separated_pair(ws0(expression_nonrecurse), char('&'), ws0(expression)), separated_pair(expression_nonrecurse, token(tk::BitAnd), expression),
|(a, b)| Operation::And { a, b }, |(a, b)| Operation::And { a, b },
), ),
map( map(
separated_pair(ws0(expression_nonrecurse), char('|'), ws0(expression)), separated_pair(expression_nonrecurse, token(tk::BitOr), expression),
|(a, b)| Operation::Or { a, b }, |(a, b)| Operation::Or { a, b },
), ),
map( map(
separated_pair(ws0(expression_nonrecurse), char('^'), ws0(expression)), separated_pair(expression_nonrecurse, token(tk::BitXor), expression),
|(a, b)| Operation::Xor { a, b }, |(a, b)| Operation::Xor { a, b },
), ),
map(preceded(char('~'), expression), Operation::Not), map(preceded(token(tk::BitNot), expression), Operation::Not),
))(input) ))(input)
} }
fn call_item(input: Span) -> IResult<Span, Call> { fn call_item(input: TokenSpan) -> IResult<TokenSpan, Call> {
map( map(
tuple(( tuple((
ws0(identifier), token(tk::Ident),
delimited( delimited(
char('('), token(tk::LParen),
ws0(separated_list0(char(','), expression)), separated_list0(token(tk::Comma), expression),
char(')'), token(tk::RParen),
), ),
)), )),
|(name, args)| Call { name, args }, |(name, args)| Call {
name: name.span(),
args,
},
)(input) )(input)
} }
/// parser combinators can not parse left-recursive grammars. To work around this, we split /// parser combinators can not parse left-recursive grammars. To work around this, we split
/// expressions into a recursive and non-recursive portion. /// expressions into a recursive and non-recursive portion.
/// Parsers reachable from this point must call expression_nonrecurse instead /// Parsers reachable from this point must call expression_nonrecurse instead
fn expression(input: Span) -> IResult<Span, Expression> { fn expression(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt(( alt((
map(ws0(operation), |op| Expression::Operation(Box::new(op))), map(operation, |op| Expression::Operation(Box::new(op))),
expression_nonrecurse, expression_nonrecurse,
))(input) ))(input)
} }
/// the portion of the expression grammar that can be parsed without left recursion /// the portion of the expression grammar that can be parsed without left recursion
fn expression_nonrecurse(input: Span) -> IResult<Span, Expression> { fn expression_nonrecurse(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt(( alt((
map(ws0(decimal), Expression::Literal), map(token(tk::Number), |_| Expression::Literal(42)),
map(ws0(call_item), |call| Expression::Call(Box::new(call))), map(call_item, |call| Expression::Call(Box::new(call))),
map(ws0(identifier), |ident| { map(token(tk::Ident), |ident| {
Expression::Ident(*ident.fragment()) Expression::Ident(*ident.span().fragment())
}), }),
delimited(char('('), expression, char(')')), delimited(token(tk::LParen), expression, token(tk::RParen)),
))(input) ))(input)
} }
fn assign_statement(input: Span) -> IResult<Span, Assign> { fn assign_statement(input: TokenSpan) -> IResult<TokenSpan, Assign> {
map( map(
separated_pair(ws0(identifier), char('='), ws0(expression)), separated_pair(token(tk::Ident), token(tk::EqAssign), expression),
|(lhs, expr)| Assign { |(lhs, expr)| Assign {
lhs: (*lhs.fragment()), lhs: (*lhs.span().fragment()),
expr, expr,
}, },
)(input) )(input)
} }
pub fn parse(input: Span) -> IResult<Span, Module> { pub fn parse(input: TokenSpan) -> IResult<TokenSpan, Module> {
ws0(module)(input) module(input)
} }
#[cfg(test)] #[cfg(test)]

View File

@ -1,36 +1,26 @@
use std::fmt::Debug; use super::{IErr, Span};
use std::ops::Deref; use ariadne::{Label, Report, ReportKind};
use nom_greedyerror::{GreedyErrorKind, Position};
use super::{Span, IErr};
use nom::error::ErrorKind;
use nom_greedyerror::{Position, GreedyErrorKind};
use ariadne::{Report, ReportKind, Label};
fn span_to_range(input: Span) -> std::ops::Range<usize> { fn span_to_range(input: Span) -> std::ops::Range<usize> {
input.position()..(input.position() + input.len()) input.position()..(input.position() + input.len())
} }
pub fn convert_error( pub fn convert_error(_input: Span, e: IErr<Span>) -> Report {
input: Span,
e: IErr<Span>,
) -> Report {
let mut labels = Vec::new(); let mut labels = Vec::new();
for err in e.errors { for err in e.errors {
let label = match err.1 { let label = match err.1 {
GreedyErrorKind::Context(ctx) => { GreedyErrorKind::Context(ctx) => {
Label::new(span_to_range(err.0)) Label::new(span_to_range(err.0)).with_message(format!("in {ctx}"))
.with_message(format!("in {ctx}")) }
}, GreedyErrorKind::Char(c) => Label::new(err.0.position()..err.0.position())
GreedyErrorKind::Char(c) => { .with_message(format!("expected {c:?}")),
Label::new(err.0.position()..err.0.position()) GreedyErrorKind::Nom(kind) => Label::new(err.0.position()..err.0.position())
.with_message(format!("expected {c:?}")) .with_message(format!("nom error {kind:?}")),
},
GreedyErrorKind::Nom(_) => todo!(),
}; };
labels.push(label); labels.push(label);
} }
let mut rep = Report::build(ReportKind::Error, (), 0) let mut rep = Report::build(ReportKind::Error, (), 0).with_message("Parse Error");
.with_message("Parse Error");
for lbl in labels { for lbl in labels {
rep = rep.with_label(lbl) rep = rep.with_label(lbl)
} }

View File

@ -17,7 +17,8 @@ pub fn hexadecimal(input: Span) -> IResult<Span, u64> {
))), ))),
), ),
|out: Span| { |out: Span| {
u64::from_str_radix(&str::replace(out.fragment(), "_", ""), 16).expect("error parsing literal") u64::from_str_radix(&str::replace(out.fragment(), "_", ""), 16)
.expect("error parsing literal")
}, },
)(input) )(input)
} }

View File

@ -1,17 +1,16 @@
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::tag, combinator::map,
character::complete::{char, multispace1},
combinator::{consumed, map},
error::context, error::context,
multi::{many0, separated_list0}, multi::{many0, separated_list0},
sequence::{delimited, terminated, tuple, preceded}, sequence::{delimited, preceded, tuple},
}; };
use crate::parser::{ use crate::parser::{
assign_statement, declaration, identifier, assign_statement, declaration,
proc::{proc_block, ProcBlock}, proc::{proc_block, ProcBlock},
ws0, Assign, IResult, NetDecl, Span, typename tokens::{token, TokenKind as tk, TokenSpan},
typename, Assign, IResult, NetDecl, Span,
}; };
#[derive(Debug)] #[derive(Debug)]
@ -22,7 +21,6 @@ pub enum PortDirection {
#[derive(Debug)] #[derive(Debug)]
pub struct PortDecl<'a> { pub struct PortDecl<'a> {
pub pos: Span<'a>,
pub direction: PortDirection, pub direction: PortDirection,
pub net: NetDecl<'a>, pub net: NetDecl<'a>,
} }
@ -40,56 +38,45 @@ pub enum ModuleItem<'a> {
Proc(ProcBlock<'a>), Proc(ProcBlock<'a>),
} }
fn port_decl(i: Span) -> IResult<Span, PortDecl> { fn port_decl(i: TokenSpan) -> IResult<TokenSpan, PortDecl> {
map( map(declaration, |net| PortDecl {
consumed( direction: PortDirection::Input,
declaration, net,
), })(i)
|(pos, net)| PortDecl {
pos,
direction: PortDirection::Input,
net,
},
)(i)
} }
fn inputs_list(input: Span) -> IResult<Span, Vec<PortDecl>> { fn inputs_list(input: TokenSpan) -> IResult<TokenSpan, Vec<PortDecl>> {
separated_list0(ws0(char(',')), ws0(port_decl))(input) separated_list0(token(tk::Comma), port_decl)(input)
} }
fn assign_item(input: Span) -> IResult<Span, Assign> { fn assign_item(input: TokenSpan) -> IResult<TokenSpan, Assign> {
context( context(
"assignment", "assignment",
delimited( delimited(token(tk::Assign), assign_statement, token(tk::Semicolon)),
ws0(terminated(tag("assign"), multispace1)),
ws0(assign_statement),
ws0(char(';')),
),
)(input) )(input)
} }
fn module_item(input: Span) -> IResult<Span, ModuleItem> { fn module_item(input: TokenSpan) -> IResult<TokenSpan, ModuleItem> {
alt(( alt((
map(assign_item, ModuleItem::Assign), map(assign_item, ModuleItem::Assign),
map(proc_block, ModuleItem::Proc), map(proc_block, ModuleItem::Proc),
))(input) ))(input)
} }
/// parse a top-level module declaration pub fn module(input: TokenSpan) -> IResult<TokenSpan, Module> {
pub fn module(input: Span) -> IResult<Span, Module> {
context( context(
"module", "module",
map( map(
tuple(( tuple((
tag("module"), token(tk::Module),
ws0(identifier), token(tk::Ident),
ws0(delimited(char('('), ws0(inputs_list), char(')'))), delimited(token(tk::LParen), inputs_list, token(tk::RParen)),
ws0(preceded(tag("->"), ws0(typename))), preceded(token(tk::RArrow), typename),
ws0(delimited(char('{'), ws0(many0(ws0(module_item))), char('}'))), delimited(token(tk::LBrace), many0(module_item), token(tk::RBrace)),
)), )),
|(_, name, inputs, ret, items)| Module { |(_, name, inputs, _ret, items)| Module {
name, // TODO: bring back returns
// TODO: add back in returns name: name.span(),
ports: inputs, ports: inputs,
items, items,
}, },

View File

@ -1,7 +1,5 @@
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::tag,
character::complete::char,
combinator::map, combinator::map,
error::context, error::context,
multi::{many1, separated_list1}, multi::{many1, separated_list1},
@ -9,7 +7,9 @@ use nom::{
}; };
use crate::parser::{ use crate::parser::{
assign_statement, expression, identifier, ws0, Assign, Expression, IResult, Span, assign_statement, expression,
tokens::{token, TokenKind as tk, TokenSpan},
Assign, Expression, IResult, Span,
}; };
#[derive(Debug)] #[derive(Debug)]
@ -36,38 +36,38 @@ pub struct MatchBlock<'a> {
pub arms: Vec<(Expression<'a>, ProcStatement<'a>)>, pub arms: Vec<(Expression<'a>, ProcStatement<'a>)>,
} }
fn match_arm(input: Span) -> IResult<Span, (Expression, ProcStatement)> { fn match_arm(input: TokenSpan) -> IResult<TokenSpan, (Expression, ProcStatement)> {
separated_pair(ws0(expression), tag("=>"), ws0(proc_statement))(input) separated_pair(expression, token(tk::FatArrow), proc_statement)(input)
} }
fn match_block(input: Span) -> IResult<Span, MatchBlock> { fn match_block(input: TokenSpan) -> IResult<TokenSpan, MatchBlock> {
context( context(
"match block", "match block",
map( map(
tuple(( tuple((
ws0(tag("match")), token(tk::Match),
ws0(delimited(char('('), ws0(expression), char(')'))), delimited(token(tk::LParen), expression, token(tk::RParen)),
ws0(delimited( delimited(
char('{'), token(tk::LBrace),
separated_list1(char(','), ws0(match_arm)), separated_list1(token(tk::Comma), match_arm),
char('}'), token(tk::RBrace),
)), ),
)), )),
|(_, expr, arms)| MatchBlock { expr, arms }, |(_, expr, arms)| MatchBlock { expr, arms },
), ),
)(input) )(input)
} }
fn statement_block(input: Span) -> IResult<Span, Vec<ProcStatement>> { fn statement_block(input: TokenSpan) -> IResult<TokenSpan, Vec<ProcStatement>> {
delimited( delimited(
char('{'), token(tk::LBrace),
separated_list1(char(';'), ws0(proc_statement)), separated_list1(token(tk::Semicolon), proc_statement),
char('}'), token(tk::RBrace),
)(input) )(input)
} }
/// parse a statement that is valid inside a proc block /// parse a statement that is valid inside a proc block
fn proc_statement(input: Span) -> IResult<Span, ProcStatement> { fn proc_statement(input: TokenSpan) -> IResult<TokenSpan, ProcStatement> {
alt(( alt((
map(match_block, ProcStatement::Match), map(match_block, ProcStatement::Match),
map(statement_block, ProcStatement::Block), map(statement_block, ProcStatement::Block),
@ -75,16 +75,19 @@ fn proc_statement(input: Span) -> IResult<Span, ProcStatement> {
))(input) ))(input)
} }
pub fn proc_block(input: Span) -> IResult<Span, ProcBlock> { pub fn proc_block(input: TokenSpan) -> IResult<TokenSpan, ProcBlock> {
context( context(
"proc block", "proc block",
map( map(
tuple(( tuple((
ws0(tag("proc")), token(tk::Proc),
ws0(delimited(char('('), ws0(identifier), char(')'))), delimited(token(tk::LParen), token(tk::Ident), token(tk::RParen)),
ws0(delimited(char('{'), many1(ws0(proc_statement)), char('}'))), delimited(token(tk::LBrace), many1(proc_statement), token(tk::RBrace)),
)), )),
|(_, net, items)| ProcBlock { net, items }, |(_, net, items)| ProcBlock {
net: net.span(),
items,
},
), ),
)(input) )(input)
} }

208
src/parser/tokens.rs Normal file
View File

@ -0,0 +1,208 @@
//! convert text into a token stream
use super::{identifier, ws0, IResult, Span};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{anychar, digit1},
combinator::{consumed, map, recognize},
error::ParseError,
multi::many0,
};
use std::fmt;
pub struct Token<'a> {
span: Span<'a>,
kind: TokenKind,
}
impl fmt::Debug for Token<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:?} @{} {:?}",
self.kind,
self.span.location_offset(),
self.span.fragment()
)?;
Ok(())
}
}
impl<'a> Token<'a> {
fn new(span: Span<'a>, kind: TokenKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum TokenKind {
// no whitespace, for now
// no token trees either, for now
// Braces
LParen,
RParen,
LAngle,
RAngle,
LBrace,
RBrace,
LSquare,
RSquare,
// single chars
Colon,
Semicolon,
Comma,
BitNot,
BitAnd,
BitOr,
BitXor,
EqAssign,
// Multi Chars
FatArrow,
RArrow,
// Literals
Ident,
Number,
// Keywords
Module,
Assign,
Match,
Proc,
// Error
Error,
}
#[derive(Debug, Clone)]
pub struct TokenSpan<'a> {
rest: &'a [Token<'a>],
pos: usize,
}
impl<'a> TokenSpan<'a> {
pub fn new(rest: &'a [Token<'a>]) -> Self {
Self { rest, pos: 0 }
}
pub fn with_pos(rest: &'a [Token<'a>], pos: usize) -> Self {
Self { rest, pos }
}
}
impl nom::InputTake for TokenSpan<'_> {
fn take(&self, count: usize) -> Self {
TokenSpan::with_pos(&self.rest[..count], self.pos + count)
}
fn take_split(&self, count: usize) -> (Self, Self) {
let (head, tail) = &self.rest.split_at(count);
(
TokenSpan::with_pos(head, self.pos),
TokenSpan::with_pos(tail, self.pos + count),
)
}
}
impl nom::InputLength for TokenSpan<'_> {
fn input_len(&self) -> usize {
self.rest.len()
}
}
impl nom_greedyerror::Position for TokenSpan<'_> {
fn position(&self) -> usize {
self.pos
}
}
/// combinator that matches a token kind
pub fn token<'a, E>(
kind: TokenKind,
) -> impl FnMut(TokenSpan<'a>) -> nom::IResult<TokenSpan, &Token, E>
where
E: ParseError<TokenSpan<'a>>,
{
move |input: TokenSpan| {
let next = &input.rest[0];
if next.kind == kind.clone() {
let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1);
Ok((rest, next))
} else {
Err(nom::Err::Error(E::from_error_kind(
input,
// TODO: Proper errors here
nom::error::ErrorKind::Tag,
)))
}
}
}
pub fn lex(input: Span) -> IResult<Span, Vec<Token>> {
many0(ws0(alt((
lex_keywords,
lex_literals,
lex_braces,
lex_punctuation,
map(recognize(anychar), |span| {
Token::new(span, TokenKind::Error)
}),
))))(input)
}
fn lex_braces(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag("("), |_| TokenKind::LParen),
map(tag(")"), |_| TokenKind::RParen),
map(tag("<"), |_| TokenKind::LAngle),
map(tag(">"), |_| TokenKind::RAngle),
map(tag("{"), |_| TokenKind::LBrace),
map(tag("}"), |_| TokenKind::RBrace),
map(tag("["), |_| TokenKind::LSquare),
map(tag("]"), |_| TokenKind::RSquare),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_literals(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(identifier, |_| TokenKind::Ident),
map(digit1, |_| TokenKind::Number),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_punctuation(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag(":"), |_| TokenKind::Colon),
map(tag(";"), |_| TokenKind::Semicolon),
map(tag(","), |_| TokenKind::Comma),
map(tag("->"), |_| TokenKind::RArrow),
map(tag("=>"), |_| TokenKind::FatArrow),
map(tag("~"), |_| TokenKind::BitNot),
map(tag("&"), |_| TokenKind::BitAnd),
map(tag("^"), |_| TokenKind::BitXor),
map(tag("|"), |_| TokenKind::BitOr),
map(tag("="), |_| TokenKind::EqAssign),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_keywords(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag("module"), |_| TokenKind::Module),
map(tag("assign"), |_| TokenKind::Assign),
map(tag("match"), |_| TokenKind::Match),
map(tag("proc"), |_| TokenKind::Proc),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}