From 825703e1ce5865bd664a95acbb0c8e27cdfac2ff Mon Sep 17 00:00:00 2001 From: NotAFile Date: Wed, 2 Feb 2022 01:00:11 +0100 Subject: [PATCH] switch parser to using tokens --- src/main.rs | 6 +++- src/parser.rs | 83 +++++++++++++++++++++----------------------- src/parser/module.rs | 42 +++++++++------------- src/parser/proc.rs | 47 ++++++++++++++----------- src/parser/tokens.rs | 53 ++++++++++++++++++++-------- 5 files changed, 126 insertions(+), 105 deletions(-) diff --git a/src/main.rs b/src/main.rs index a97885e..03b7a85 100644 --- a/src/main.rs +++ b/src/main.rs @@ -41,15 +41,19 @@ fn main() { .expect("error reading file"); let input: &str = input.as_str(); let input = parser::Span::new(input); - let parsed = parser::parse(input); + let lexed = parser::tokens::lex(input).unwrap(); + let tokens = parser::tokens::TokenSpan::new(&lexed.1); + let parsed = parser::parse(tokens); match parsed { Err(nom::Err::Error(err) | nom::Err::Failure(err)) => { if opt.debug { println!("{err:#?}"); } + /* parser::error::convert_error(input, err) .eprint(Source::from(input.fragment())) .unwrap(); + */ } Err(_) => (unreachable!()), Ok(res) => { diff --git a/src/parser.rs b/src/parser.rs index 79345e4..6aa7cc7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -24,7 +24,7 @@ pub type IErr = GreedyError; pub type IResult> = nom::IResult; pub use crate::parser::module::{module, Module, ModuleItem, PortDirection}; -use literals::hexadecimal; +use crate::parser::tokens::{token, TokenKind as tk, TokenSpan}; fn ws0<'a, F: 'a, O, E: ParseError>>( inner: F, @@ -43,27 +43,20 @@ fn identifier(input: Span) -> IResult { } // TODO: allow recursive generics -fn typename(input: Span) -> IResult { +// TODO: allow expressions again +fn typename(input: TokenSpan) -> IResult { map( tuple(( - identifier, - opt(delimited(char('<'), ws0(expression), char('>'))), + token(tk::Ident), + opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))), )), |(ident, _)| TypeName { - name: ident, + name: ident.span(), generics: (), }, )(input) } -fn widthspec(input: Span) -> IResult { - delimited(char('['), ws0(decimal), char(']'))(input) -} - -fn intliteral(input: Span) -> IResult { - tuple((terminated(decimal, char('\'')), alt((decimal, hexadecimal))))(input) -} - #[derive(Debug)] pub struct TypeName<'a> { name: Span<'a>, @@ -74,7 +67,7 @@ pub struct TypeName<'a> { pub struct NetDecl<'a> { pub name: Span<'a>, pub typ: TypeName<'a>, - pub value: Option<(u64, u64)>, + pub value: Option>, } #[derive(Debug)] @@ -114,87 +107,91 @@ pub enum Expression<'a> { Operation(Box>), } -fn declaration(i: Span) -> IResult { +// TODO: reallow assignments +fn declaration(i: TokenSpan) -> IResult { map( tuple(( - separated_pair(identifier, ws0(char(':')), typename), - opt(preceded(ws0(char('=')), intliteral)), + separated_pair(token(tk::Ident), token(tk::Colon), typename), + opt(preceded(token(tk::Assign), token(tk::Number))), )), |((ident, typ), value)| NetDecl { - name: ident, + name: ident.span(), typ, - value, + value: None, }, )(i) } -fn operation(input: Span) -> IResult { +fn operation(input: TokenSpan) -> IResult { // temporarily given up on before I learn the shunting yard algorithm alt(( map( - separated_pair(ws0(expression_nonrecurse), char('&'), ws0(expression)), + separated_pair(expression_nonrecurse, token(tk::BitAnd), expression), |(a, b)| Operation::And { a, b }, ), map( - separated_pair(ws0(expression_nonrecurse), char('|'), ws0(expression)), + separated_pair(expression_nonrecurse, token(tk::BitOr), expression), |(a, b)| Operation::Or { a, b }, ), map( - separated_pair(ws0(expression_nonrecurse), char('^'), ws0(expression)), + separated_pair(expression_nonrecurse, token(tk::BitXor), expression), |(a, b)| Operation::Xor { a, b }, ), - map(preceded(char('~'), expression), Operation::Not), + map(preceded(token(tk::BitNot), expression), Operation::Not), ))(input) } -fn call_item(input: Span) -> IResult { +fn call_item(input: TokenSpan) -> IResult { map( tuple(( - ws0(identifier), + token(tk::Ident), delimited( - char('('), - ws0(separated_list0(char(','), expression)), - char(')'), + token(tk::LParen), + separated_list0(token(tk::Comma), expression), + token(tk::RParen), ), )), - |(name, args)| Call { name, args }, + |(name, args)| Call { + name: name.span(), + args, + }, )(input) } /// parser combinators can not parse left-recursive grammars. To work around this, we split /// expressions into a recursive and non-recursive portion. /// Parsers reachable from this point must call expression_nonrecurse instead -fn expression(input: Span) -> IResult { +fn expression(input: TokenSpan) -> IResult { alt(( - map(ws0(operation), |op| Expression::Operation(Box::new(op))), + map(operation, |op| Expression::Operation(Box::new(op))), expression_nonrecurse, ))(input) } /// the portion of the expression grammar that can be parsed without left recursion -fn expression_nonrecurse(input: Span) -> IResult { +fn expression_nonrecurse(input: TokenSpan) -> IResult { alt(( - map(ws0(decimal), Expression::Literal), - map(ws0(call_item), |call| Expression::Call(Box::new(call))), - map(ws0(identifier), |ident| { - Expression::Ident(*ident.fragment()) + map(token(tk::Number), |_| Expression::Literal(42)), + map(call_item, |call| Expression::Call(Box::new(call))), + map(token(tk::Ident), |ident| { + Expression::Ident(*ident.span().fragment()) }), - delimited(char('('), expression, char(')')), + delimited(token(tk::LParen), expression, token(tk::RParen)), ))(input) } -fn assign_statement(input: Span) -> IResult { +fn assign_statement(input: TokenSpan) -> IResult { map( - separated_pair(ws0(identifier), char('='), ws0(expression)), + separated_pair(token(tk::Ident), token(tk::EqAssign), expression), |(lhs, expr)| Assign { - lhs: (*lhs.fragment()), + lhs: (*lhs.span().fragment()), expr, }, )(input) } -pub fn parse(input: Span) -> IResult { - ws0(module)(input) +pub fn parse(input: TokenSpan) -> IResult { + module(input) } #[cfg(test)] diff --git a/src/parser/module.rs b/src/parser/module.rs index bae6009..005079a 100644 --- a/src/parser/module.rs +++ b/src/parser/module.rs @@ -11,6 +11,7 @@ use nom::{ use crate::parser::{ assign_statement, declaration, identifier, proc::{proc_block, ProcBlock}, + tokens::{token, Token, TokenKind as tk, TokenSpan}, typename, ws0, Assign, IResult, NetDecl, Span, }; @@ -22,7 +23,6 @@ pub enum PortDirection { #[derive(Debug)] pub struct PortDecl<'a> { - pub pos: Span<'a>, pub direction: PortDirection, pub net: NetDecl<'a>, } @@ -40,55 +40,45 @@ pub enum ModuleItem<'a> { Proc(ProcBlock<'a>), } -fn port_decl(i: Span) -> IResult { - map(consumed(declaration), |(pos, net)| PortDecl { - pos, +fn port_decl(i: TokenSpan) -> IResult { + map(declaration, |net| PortDecl { direction: PortDirection::Input, net, })(i) } -fn inputs_list(input: Span) -> IResult> { - separated_list0(ws0(char(',')), ws0(port_decl))(input) +fn inputs_list(input: TokenSpan) -> IResult> { + separated_list0(token(tk::Comma), port_decl)(input) } -fn assign_item(input: Span) -> IResult { +fn assign_item(input: TokenSpan) -> IResult { context( "assignment", - delimited( - ws0(terminated(tag("assign"), multispace1)), - ws0(assign_statement), - ws0(char(';')), - ), + delimited(token(tk::Assign), assign_statement, token(tk::Semicolon)), )(input) } -fn module_item(input: Span) -> IResult { +fn module_item(input: TokenSpan) -> IResult { alt(( map(assign_item, ModuleItem::Assign), map(proc_block, ModuleItem::Proc), ))(input) } -/// parse a top-level module declaration -pub fn module(input: Span) -> IResult { +pub fn module(input: TokenSpan) -> IResult { context( "module", map( tuple(( - tag("module"), - ws0(identifier), - ws0(delimited(char('('), ws0(inputs_list), char(')'))), - ws0(preceded(tag("->"), ws0(typename))), - ws0(delimited( - char('{'), - ws0(many0(ws0(module_item))), - char('}'), - )), + token(tk::Module), + token(tk::Ident), + delimited(token(tk::LParen), inputs_list, token(tk::RParen)), + preceded(token(tk::RArrow), typename), + delimited(token(tk::LBrace), many0(module_item), token(tk::RBrace)), )), |(_, name, inputs, ret, items)| Module { - name, - // TODO: add back in returns + // TODO: bring back returns + name: name.span(), ports: inputs, items, }, diff --git a/src/parser/proc.rs b/src/parser/proc.rs index cd7445d..1ac0796 100644 --- a/src/parser/proc.rs +++ b/src/parser/proc.rs @@ -9,7 +9,9 @@ use nom::{ }; use crate::parser::{ - assign_statement, expression, identifier, ws0, Assign, Expression, IResult, Span, + assign_statement, expression, identifier, + tokens::{token, Token, TokenKind as tk, TokenSpan}, + ws0, Assign, Expression, IResult, Span, }; #[derive(Debug)] @@ -36,38 +38,38 @@ pub struct MatchBlock<'a> { pub arms: Vec<(Expression<'a>, ProcStatement<'a>)>, } -fn match_arm(input: Span) -> IResult { - separated_pair(ws0(expression), tag("=>"), ws0(proc_statement))(input) +fn match_arm(input: TokenSpan) -> IResult { + separated_pair(expression, token(tk::FatArrow), proc_statement)(input) } -fn match_block(input: Span) -> IResult { +fn match_block(input: TokenSpan) -> IResult { context( "match block", map( tuple(( - ws0(tag("match")), - ws0(delimited(char('('), ws0(expression), char(')'))), - ws0(delimited( - char('{'), - separated_list1(char(','), ws0(match_arm)), - char('}'), - )), + token(tk::Match), + delimited(token(tk::LParen), expression, token(tk::RParen)), + delimited( + token(tk::LBrace), + separated_list1(token(tk::Comma), match_arm), + token(tk::RBrace), + ), )), |(_, expr, arms)| MatchBlock { expr, arms }, ), )(input) } -fn statement_block(input: Span) -> IResult> { +fn statement_block(input: TokenSpan) -> IResult> { delimited( - char('{'), - separated_list1(char(';'), ws0(proc_statement)), - char('}'), + token(tk::LBrace), + separated_list1(token(tk::Semicolon), proc_statement), + token(tk::RBrace), )(input) } /// parse a statement that is valid inside a proc block -fn proc_statement(input: Span) -> IResult { +fn proc_statement(input: TokenSpan) -> IResult { alt(( map(match_block, ProcStatement::Match), map(statement_block, ProcStatement::Block), @@ -75,16 +77,19 @@ fn proc_statement(input: Span) -> IResult { ))(input) } -pub fn proc_block(input: Span) -> IResult { +pub fn proc_block(input: TokenSpan) -> IResult { context( "proc block", map( tuple(( - ws0(tag("proc")), - ws0(delimited(char('('), ws0(identifier), char(')'))), - ws0(delimited(char('{'), many1(ws0(proc_statement)), char('}'))), + token(tk::Proc), + delimited(token(tk::LParen), token(tk::Ident), token(tk::RParen)), + delimited(token(tk::LBrace), many1(proc_statement), token(tk::RBrace)), )), - |(_, net, items)| ProcBlock { net, items }, + |(_, net, items)| ProcBlock { + net: net.span(), + items, + }, ), )(input) } diff --git a/src/parser/tokens.rs b/src/parser/tokens.rs index 29df0c1..f6ee473 100644 --- a/src/parser/tokens.rs +++ b/src/parser/tokens.rs @@ -34,13 +34,10 @@ impl<'a> Token<'a> { fn new(span: Span<'a>, kind: TokenKind) -> Self { Self { span, kind } } -} -pub fn pretty_tokens(mut w: impl io::Write, toks: &[Token]) -> io::Result<()> { - for tok in toks { - writeln!(w, "{:?}", tok)?; + pub fn span(&self) -> Span { + self.span } - Ok(()) } #[derive(Debug, PartialEq, Clone)] @@ -60,21 +57,27 @@ pub enum TokenKind { Colon, Semicolon, Comma, - Caret, - Tilde, - Assign, + BitNot, + BitAnd, + BitOr, + BitXor, + EqAssign, // Multi Chars + FatArrow, RArrow, // Literals Ident, Number, // Keywords Module, + Assign, + Match, + Proc, // Error Error, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct TokenSpan<'a> { rest: &'a [Token<'a>], pos: usize, @@ -103,6 +106,12 @@ impl nom::InputTake for TokenSpan<'_> { } } +impl nom::InputLength for TokenSpan<'_> { + fn input_len(&self) -> usize { + self.rest.len() + } +} + impl nom_greedyerror::Position for TokenSpan<'_> { fn position(&self) -> usize { self.pos @@ -119,11 +128,12 @@ where move |input: TokenSpan| { let next = &input.rest[0]; if next.kind == kind.clone() { - let rest = TokenSpan::new(&input.rest[1..]); + let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1); Ok((rest, next)) } else { Err(nom::Err::Error(E::from_error_kind( input, + // TODO: Proper errors here nom::error::ErrorKind::Tag, ))) } @@ -132,7 +142,7 @@ where pub fn lex(input: Span) -> IResult> { many0(ws0(alt(( - map(tag("module"), |span| Token::new(span, TokenKind::Module)), + lex_keywords, lex_literals, lex_braces, lex_punctuation, @@ -174,10 +184,25 @@ fn lex_punctuation(input: Span) -> IResult { map(tag(":"), |_| TokenKind::Colon), map(tag(";"), |_| TokenKind::Semicolon), map(tag(","), |_| TokenKind::Comma), - map(tag("^"), |_| TokenKind::Caret), map(tag("->"), |_| TokenKind::RArrow), - map(tag("~"), |_| TokenKind::Tilde), - map(tag("="), |_| TokenKind::Assign), + map(tag("=>"), |_| TokenKind::FatArrow), + map(tag("~"), |_| TokenKind::BitNot), + map(tag("&"), |_| TokenKind::BitAnd), + map(tag("^"), |_| TokenKind::BitXor), + map(tag("|"), |_| TokenKind::BitOr), + map(tag("="), |_| TokenKind::EqAssign), + ))), + |(span, kind)| Token::new(span, kind), + )(input) +} + +fn lex_keywords(input: Span) -> IResult { + map( + consumed(alt(( + map(tag("module"), |_| TokenKind::Module), + map(tag("assign"), |_| TokenKind::Assign), + map(tag("match"), |_| TokenKind::Match), + map(tag("proc"), |_| TokenKind::Proc), ))), |(span, kind)| Token::new(span, kind), )(input)