switch parser to using tokens

This commit is contained in:
NotAFile 2022-02-02 01:00:11 +01:00
parent 108297b966
commit 825703e1ce
5 changed files with 126 additions and 105 deletions

View File

@ -41,15 +41,19 @@ fn main() {
.expect("error reading file"); .expect("error reading file");
let input: &str = input.as_str(); let input: &str = input.as_str();
let input = parser::Span::new(input); let input = parser::Span::new(input);
let parsed = parser::parse(input); let lexed = parser::tokens::lex(input).unwrap();
let tokens = parser::tokens::TokenSpan::new(&lexed.1);
let parsed = parser::parse(tokens);
match parsed { match parsed {
Err(nom::Err::Error(err) | nom::Err::Failure(err)) => { Err(nom::Err::Error(err) | nom::Err::Failure(err)) => {
if opt.debug { if opt.debug {
println!("{err:#?}"); println!("{err:#?}");
} }
/*
parser::error::convert_error(input, err) parser::error::convert_error(input, err)
.eprint(Source::from(input.fragment())) .eprint(Source::from(input.fragment()))
.unwrap(); .unwrap();
*/
} }
Err(_) => (unreachable!()), Err(_) => (unreachable!()),
Ok(res) => { Ok(res) => {

View File

@ -24,7 +24,7 @@ pub type IErr<I> = GreedyError<I, ErrorKind>;
pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>; pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>;
pub use crate::parser::module::{module, Module, ModuleItem, PortDirection}; pub use crate::parser::module::{module, Module, ModuleItem, PortDirection};
use literals::hexadecimal; use crate::parser::tokens::{token, TokenKind as tk, TokenSpan};
fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>( fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>(
inner: F, inner: F,
@ -43,27 +43,20 @@ fn identifier(input: Span) -> IResult<Span, Span> {
} }
// TODO: allow recursive generics // TODO: allow recursive generics
fn typename(input: Span) -> IResult<Span, TypeName> { // TODO: allow expressions again
fn typename(input: TokenSpan) -> IResult<TokenSpan, TypeName> {
map( map(
tuple(( tuple((
identifier, token(tk::Ident),
opt(delimited(char('<'), ws0(expression), char('>'))), opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))),
)), )),
|(ident, _)| TypeName { |(ident, _)| TypeName {
name: ident, name: ident.span(),
generics: (), generics: (),
}, },
)(input) )(input)
} }
fn widthspec(input: Span) -> IResult<Span, u64> {
delimited(char('['), ws0(decimal), char(']'))(input)
}
fn intliteral(input: Span) -> IResult<Span, (u64, u64)> {
tuple((terminated(decimal, char('\'')), alt((decimal, hexadecimal))))(input)
}
#[derive(Debug)] #[derive(Debug)]
pub struct TypeName<'a> { pub struct TypeName<'a> {
name: Span<'a>, name: Span<'a>,
@ -74,7 +67,7 @@ pub struct TypeName<'a> {
pub struct NetDecl<'a> { pub struct NetDecl<'a> {
pub name: Span<'a>, pub name: Span<'a>,
pub typ: TypeName<'a>, pub typ: TypeName<'a>,
pub value: Option<(u64, u64)>, pub value: Option<Span<'a>>,
} }
#[derive(Debug)] #[derive(Debug)]
@ -114,87 +107,91 @@ pub enum Expression<'a> {
Operation(Box<Operation<'a>>), Operation(Box<Operation<'a>>),
} }
fn declaration(i: Span) -> IResult<Span, NetDecl> { // TODO: reallow assignments
fn declaration(i: TokenSpan) -> IResult<TokenSpan, NetDecl> {
map( map(
tuple(( tuple((
separated_pair(identifier, ws0(char(':')), typename), separated_pair(token(tk::Ident), token(tk::Colon), typename),
opt(preceded(ws0(char('=')), intliteral)), opt(preceded(token(tk::Assign), token(tk::Number))),
)), )),
|((ident, typ), value)| NetDecl { |((ident, typ), value)| NetDecl {
name: ident, name: ident.span(),
typ, typ,
value, value: None,
}, },
)(i) )(i)
} }
fn operation(input: Span) -> IResult<Span, Operation> { fn operation(input: TokenSpan) -> IResult<TokenSpan, Operation> {
// temporarily given up on before I learn the shunting yard algorithm // temporarily given up on before I learn the shunting yard algorithm
alt(( alt((
map( map(
separated_pair(ws0(expression_nonrecurse), char('&'), ws0(expression)), separated_pair(expression_nonrecurse, token(tk::BitAnd), expression),
|(a, b)| Operation::And { a, b }, |(a, b)| Operation::And { a, b },
), ),
map( map(
separated_pair(ws0(expression_nonrecurse), char('|'), ws0(expression)), separated_pair(expression_nonrecurse, token(tk::BitOr), expression),
|(a, b)| Operation::Or { a, b }, |(a, b)| Operation::Or { a, b },
), ),
map( map(
separated_pair(ws0(expression_nonrecurse), char('^'), ws0(expression)), separated_pair(expression_nonrecurse, token(tk::BitXor), expression),
|(a, b)| Operation::Xor { a, b }, |(a, b)| Operation::Xor { a, b },
), ),
map(preceded(char('~'), expression), Operation::Not), map(preceded(token(tk::BitNot), expression), Operation::Not),
))(input) ))(input)
} }
fn call_item(input: Span) -> IResult<Span, Call> { fn call_item(input: TokenSpan) -> IResult<TokenSpan, Call> {
map( map(
tuple(( tuple((
ws0(identifier), token(tk::Ident),
delimited( delimited(
char('('), token(tk::LParen),
ws0(separated_list0(char(','), expression)), separated_list0(token(tk::Comma), expression),
char(')'), token(tk::RParen),
), ),
)), )),
|(name, args)| Call { name, args }, |(name, args)| Call {
name: name.span(),
args,
},
)(input) )(input)
} }
/// parser combinators can not parse left-recursive grammars. To work around this, we split /// parser combinators can not parse left-recursive grammars. To work around this, we split
/// expressions into a recursive and non-recursive portion. /// expressions into a recursive and non-recursive portion.
/// Parsers reachable from this point must call expression_nonrecurse instead /// Parsers reachable from this point must call expression_nonrecurse instead
fn expression(input: Span) -> IResult<Span, Expression> { fn expression(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt(( alt((
map(ws0(operation), |op| Expression::Operation(Box::new(op))), map(operation, |op| Expression::Operation(Box::new(op))),
expression_nonrecurse, expression_nonrecurse,
))(input) ))(input)
} }
/// the portion of the expression grammar that can be parsed without left recursion /// the portion of the expression grammar that can be parsed without left recursion
fn expression_nonrecurse(input: Span) -> IResult<Span, Expression> { fn expression_nonrecurse(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt(( alt((
map(ws0(decimal), Expression::Literal), map(token(tk::Number), |_| Expression::Literal(42)),
map(ws0(call_item), |call| Expression::Call(Box::new(call))), map(call_item, |call| Expression::Call(Box::new(call))),
map(ws0(identifier), |ident| { map(token(tk::Ident), |ident| {
Expression::Ident(*ident.fragment()) Expression::Ident(*ident.span().fragment())
}), }),
delimited(char('('), expression, char(')')), delimited(token(tk::LParen), expression, token(tk::RParen)),
))(input) ))(input)
} }
fn assign_statement(input: Span) -> IResult<Span, Assign> { fn assign_statement(input: TokenSpan) -> IResult<TokenSpan, Assign> {
map( map(
separated_pair(ws0(identifier), char('='), ws0(expression)), separated_pair(token(tk::Ident), token(tk::EqAssign), expression),
|(lhs, expr)| Assign { |(lhs, expr)| Assign {
lhs: (*lhs.fragment()), lhs: (*lhs.span().fragment()),
expr, expr,
}, },
)(input) )(input)
} }
pub fn parse(input: Span) -> IResult<Span, Module> { pub fn parse(input: TokenSpan) -> IResult<TokenSpan, Module> {
ws0(module)(input) module(input)
} }
#[cfg(test)] #[cfg(test)]

View File

@ -11,6 +11,7 @@ use nom::{
use crate::parser::{ use crate::parser::{
assign_statement, declaration, identifier, assign_statement, declaration, identifier,
proc::{proc_block, ProcBlock}, proc::{proc_block, ProcBlock},
tokens::{token, Token, TokenKind as tk, TokenSpan},
typename, ws0, Assign, IResult, NetDecl, Span, typename, ws0, Assign, IResult, NetDecl, Span,
}; };
@ -22,7 +23,6 @@ pub enum PortDirection {
#[derive(Debug)] #[derive(Debug)]
pub struct PortDecl<'a> { pub struct PortDecl<'a> {
pub pos: Span<'a>,
pub direction: PortDirection, pub direction: PortDirection,
pub net: NetDecl<'a>, pub net: NetDecl<'a>,
} }
@ -40,55 +40,45 @@ pub enum ModuleItem<'a> {
Proc(ProcBlock<'a>), Proc(ProcBlock<'a>),
} }
fn port_decl(i: Span) -> IResult<Span, PortDecl> { fn port_decl(i: TokenSpan) -> IResult<TokenSpan, PortDecl> {
map(consumed(declaration), |(pos, net)| PortDecl { map(declaration, |net| PortDecl {
pos,
direction: PortDirection::Input, direction: PortDirection::Input,
net, net,
})(i) })(i)
} }
fn inputs_list(input: Span) -> IResult<Span, Vec<PortDecl>> { fn inputs_list(input: TokenSpan) -> IResult<TokenSpan, Vec<PortDecl>> {
separated_list0(ws0(char(',')), ws0(port_decl))(input) separated_list0(token(tk::Comma), port_decl)(input)
} }
fn assign_item(input: Span) -> IResult<Span, Assign> { fn assign_item(input: TokenSpan) -> IResult<TokenSpan, Assign> {
context( context(
"assignment", "assignment",
delimited( delimited(token(tk::Assign), assign_statement, token(tk::Semicolon)),
ws0(terminated(tag("assign"), multispace1)),
ws0(assign_statement),
ws0(char(';')),
),
)(input) )(input)
} }
fn module_item(input: Span) -> IResult<Span, ModuleItem> { fn module_item(input: TokenSpan) -> IResult<TokenSpan, ModuleItem> {
alt(( alt((
map(assign_item, ModuleItem::Assign), map(assign_item, ModuleItem::Assign),
map(proc_block, ModuleItem::Proc), map(proc_block, ModuleItem::Proc),
))(input) ))(input)
} }
/// parse a top-level module declaration pub fn module(input: TokenSpan) -> IResult<TokenSpan, Module> {
pub fn module(input: Span) -> IResult<Span, Module> {
context( context(
"module", "module",
map( map(
tuple(( tuple((
tag("module"), token(tk::Module),
ws0(identifier), token(tk::Ident),
ws0(delimited(char('('), ws0(inputs_list), char(')'))), delimited(token(tk::LParen), inputs_list, token(tk::RParen)),
ws0(preceded(tag("->"), ws0(typename))), preceded(token(tk::RArrow), typename),
ws0(delimited( delimited(token(tk::LBrace), many0(module_item), token(tk::RBrace)),
char('{'),
ws0(many0(ws0(module_item))),
char('}'),
)),
)), )),
|(_, name, inputs, ret, items)| Module { |(_, name, inputs, ret, items)| Module {
name, // TODO: bring back returns
// TODO: add back in returns name: name.span(),
ports: inputs, ports: inputs,
items, items,
}, },

View File

@ -9,7 +9,9 @@ use nom::{
}; };
use crate::parser::{ use crate::parser::{
assign_statement, expression, identifier, ws0, Assign, Expression, IResult, Span, assign_statement, expression, identifier,
tokens::{token, Token, TokenKind as tk, TokenSpan},
ws0, Assign, Expression, IResult, Span,
}; };
#[derive(Debug)] #[derive(Debug)]
@ -36,38 +38,38 @@ pub struct MatchBlock<'a> {
pub arms: Vec<(Expression<'a>, ProcStatement<'a>)>, pub arms: Vec<(Expression<'a>, ProcStatement<'a>)>,
} }
fn match_arm(input: Span) -> IResult<Span, (Expression, ProcStatement)> { fn match_arm(input: TokenSpan) -> IResult<TokenSpan, (Expression, ProcStatement)> {
separated_pair(ws0(expression), tag("=>"), ws0(proc_statement))(input) separated_pair(expression, token(tk::FatArrow), proc_statement)(input)
} }
fn match_block(input: Span) -> IResult<Span, MatchBlock> { fn match_block(input: TokenSpan) -> IResult<TokenSpan, MatchBlock> {
context( context(
"match block", "match block",
map( map(
tuple(( tuple((
ws0(tag("match")), token(tk::Match),
ws0(delimited(char('('), ws0(expression), char(')'))), delimited(token(tk::LParen), expression, token(tk::RParen)),
ws0(delimited( delimited(
char('{'), token(tk::LBrace),
separated_list1(char(','), ws0(match_arm)), separated_list1(token(tk::Comma), match_arm),
char('}'), token(tk::RBrace),
)), ),
)), )),
|(_, expr, arms)| MatchBlock { expr, arms }, |(_, expr, arms)| MatchBlock { expr, arms },
), ),
)(input) )(input)
} }
fn statement_block(input: Span) -> IResult<Span, Vec<ProcStatement>> { fn statement_block(input: TokenSpan) -> IResult<TokenSpan, Vec<ProcStatement>> {
delimited( delimited(
char('{'), token(tk::LBrace),
separated_list1(char(';'), ws0(proc_statement)), separated_list1(token(tk::Semicolon), proc_statement),
char('}'), token(tk::RBrace),
)(input) )(input)
} }
/// parse a statement that is valid inside a proc block /// parse a statement that is valid inside a proc block
fn proc_statement(input: Span) -> IResult<Span, ProcStatement> { fn proc_statement(input: TokenSpan) -> IResult<TokenSpan, ProcStatement> {
alt(( alt((
map(match_block, ProcStatement::Match), map(match_block, ProcStatement::Match),
map(statement_block, ProcStatement::Block), map(statement_block, ProcStatement::Block),
@ -75,16 +77,19 @@ fn proc_statement(input: Span) -> IResult<Span, ProcStatement> {
))(input) ))(input)
} }
pub fn proc_block(input: Span) -> IResult<Span, ProcBlock> { pub fn proc_block(input: TokenSpan) -> IResult<TokenSpan, ProcBlock> {
context( context(
"proc block", "proc block",
map( map(
tuple(( tuple((
ws0(tag("proc")), token(tk::Proc),
ws0(delimited(char('('), ws0(identifier), char(')'))), delimited(token(tk::LParen), token(tk::Ident), token(tk::RParen)),
ws0(delimited(char('{'), many1(ws0(proc_statement)), char('}'))), delimited(token(tk::LBrace), many1(proc_statement), token(tk::RBrace)),
)), )),
|(_, net, items)| ProcBlock { net, items }, |(_, net, items)| ProcBlock {
net: net.span(),
items,
},
), ),
)(input) )(input)
} }

View File

@ -34,13 +34,10 @@ impl<'a> Token<'a> {
fn new(span: Span<'a>, kind: TokenKind) -> Self { fn new(span: Span<'a>, kind: TokenKind) -> Self {
Self { span, kind } Self { span, kind }
} }
}
pub fn pretty_tokens(mut w: impl io::Write, toks: &[Token]) -> io::Result<()> { pub fn span(&self) -> Span {
for tok in toks { self.span
writeln!(w, "{:?}", tok)?;
} }
Ok(())
} }
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
@ -60,21 +57,27 @@ pub enum TokenKind {
Colon, Colon,
Semicolon, Semicolon,
Comma, Comma,
Caret, BitNot,
Tilde, BitAnd,
Assign, BitOr,
BitXor,
EqAssign,
// Multi Chars // Multi Chars
FatArrow,
RArrow, RArrow,
// Literals // Literals
Ident, Ident,
Number, Number,
// Keywords // Keywords
Module, Module,
Assign,
Match,
Proc,
// Error // Error
Error, Error,
} }
#[derive(Debug)] #[derive(Debug, Clone)]
pub struct TokenSpan<'a> { pub struct TokenSpan<'a> {
rest: &'a [Token<'a>], rest: &'a [Token<'a>],
pos: usize, pos: usize,
@ -103,6 +106,12 @@ impl nom::InputTake for TokenSpan<'_> {
} }
} }
impl nom::InputLength for TokenSpan<'_> {
fn input_len(&self) -> usize {
self.rest.len()
}
}
impl nom_greedyerror::Position for TokenSpan<'_> { impl nom_greedyerror::Position for TokenSpan<'_> {
fn position(&self) -> usize { fn position(&self) -> usize {
self.pos self.pos
@ -119,11 +128,12 @@ where
move |input: TokenSpan| { move |input: TokenSpan| {
let next = &input.rest[0]; let next = &input.rest[0];
if next.kind == kind.clone() { if next.kind == kind.clone() {
let rest = TokenSpan::new(&input.rest[1..]); let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1);
Ok((rest, next)) Ok((rest, next))
} else { } else {
Err(nom::Err::Error(E::from_error_kind( Err(nom::Err::Error(E::from_error_kind(
input, input,
// TODO: Proper errors here
nom::error::ErrorKind::Tag, nom::error::ErrorKind::Tag,
))) )))
} }
@ -132,7 +142,7 @@ where
pub fn lex(input: Span) -> IResult<Span, Vec<Token>> { pub fn lex(input: Span) -> IResult<Span, Vec<Token>> {
many0(ws0(alt(( many0(ws0(alt((
map(tag("module"), |span| Token::new(span, TokenKind::Module)), lex_keywords,
lex_literals, lex_literals,
lex_braces, lex_braces,
lex_punctuation, lex_punctuation,
@ -174,10 +184,25 @@ fn lex_punctuation(input: Span) -> IResult<Span, Token> {
map(tag(":"), |_| TokenKind::Colon), map(tag(":"), |_| TokenKind::Colon),
map(tag(";"), |_| TokenKind::Semicolon), map(tag(";"), |_| TokenKind::Semicolon),
map(tag(","), |_| TokenKind::Comma), map(tag(","), |_| TokenKind::Comma),
map(tag("^"), |_| TokenKind::Caret),
map(tag("->"), |_| TokenKind::RArrow), map(tag("->"), |_| TokenKind::RArrow),
map(tag("~"), |_| TokenKind::Tilde), map(tag("=>"), |_| TokenKind::FatArrow),
map(tag("="), |_| TokenKind::Assign), map(tag("~"), |_| TokenKind::BitNot),
map(tag("&"), |_| TokenKind::BitAnd),
map(tag("^"), |_| TokenKind::BitXor),
map(tag("|"), |_| TokenKind::BitOr),
map(tag("="), |_| TokenKind::EqAssign),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_keywords(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag("module"), |_| TokenKind::Module),
map(tag("assign"), |_| TokenKind::Assign),
map(tag("match"), |_| TokenKind::Match),
map(tag("proc"), |_| TokenKind::Proc),
))), ))),
|(span, kind)| Token::new(span, kind), |(span, kind)| Token::new(span, kind),
)(input) )(input)