switch parser to using tokens

This commit is contained in:
NotAFile 2022-02-02 01:00:11 +01:00
parent 108297b966
commit 825703e1ce
5 changed files with 126 additions and 105 deletions

View File

@ -41,15 +41,19 @@ fn main() {
.expect("error reading file");
let input: &str = input.as_str();
let input = parser::Span::new(input);
let parsed = parser::parse(input);
let lexed = parser::tokens::lex(input).unwrap();
let tokens = parser::tokens::TokenSpan::new(&lexed.1);
let parsed = parser::parse(tokens);
match parsed {
Err(nom::Err::Error(err) | nom::Err::Failure(err)) => {
if opt.debug {
println!("{err:#?}");
}
/*
parser::error::convert_error(input, err)
.eprint(Source::from(input.fragment()))
.unwrap();
*/
}
Err(_) => (unreachable!()),
Ok(res) => {

View File

@ -24,7 +24,7 @@ pub type IErr<I> = GreedyError<I, ErrorKind>;
pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>;
pub use crate::parser::module::{module, Module, ModuleItem, PortDirection};
use literals::hexadecimal;
use crate::parser::tokens::{token, TokenKind as tk, TokenSpan};
fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>(
inner: F,
@ -43,27 +43,20 @@ fn identifier(input: Span) -> IResult<Span, Span> {
}
// TODO: allow recursive generics
fn typename(input: Span) -> IResult<Span, TypeName> {
// TODO: allow expressions again
fn typename(input: TokenSpan) -> IResult<TokenSpan, TypeName> {
map(
tuple((
identifier,
opt(delimited(char('<'), ws0(expression), char('>'))),
token(tk::Ident),
opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))),
)),
|(ident, _)| TypeName {
name: ident,
name: ident.span(),
generics: (),
},
)(input)
}
fn widthspec(input: Span) -> IResult<Span, u64> {
delimited(char('['), ws0(decimal), char(']'))(input)
}
fn intliteral(input: Span) -> IResult<Span, (u64, u64)> {
tuple((terminated(decimal, char('\'')), alt((decimal, hexadecimal))))(input)
}
#[derive(Debug)]
pub struct TypeName<'a> {
name: Span<'a>,
@ -74,7 +67,7 @@ pub struct TypeName<'a> {
pub struct NetDecl<'a> {
pub name: Span<'a>,
pub typ: TypeName<'a>,
pub value: Option<(u64, u64)>,
pub value: Option<Span<'a>>,
}
#[derive(Debug)]
@ -114,87 +107,91 @@ pub enum Expression<'a> {
Operation(Box<Operation<'a>>),
}
fn declaration(i: Span) -> IResult<Span, NetDecl> {
// TODO: reallow assignments
fn declaration(i: TokenSpan) -> IResult<TokenSpan, NetDecl> {
map(
tuple((
separated_pair(identifier, ws0(char(':')), typename),
opt(preceded(ws0(char('=')), intliteral)),
separated_pair(token(tk::Ident), token(tk::Colon), typename),
opt(preceded(token(tk::Assign), token(tk::Number))),
)),
|((ident, typ), value)| NetDecl {
name: ident,
name: ident.span(),
typ,
value,
value: None,
},
)(i)
}
fn operation(input: Span) -> IResult<Span, Operation> {
fn operation(input: TokenSpan) -> IResult<TokenSpan, Operation> {
// temporarily given up on before I learn the shunting yard algorithm
alt((
map(
separated_pair(ws0(expression_nonrecurse), char('&'), ws0(expression)),
separated_pair(expression_nonrecurse, token(tk::BitAnd), expression),
|(a, b)| Operation::And { a, b },
),
map(
separated_pair(ws0(expression_nonrecurse), char('|'), ws0(expression)),
separated_pair(expression_nonrecurse, token(tk::BitOr), expression),
|(a, b)| Operation::Or { a, b },
),
map(
separated_pair(ws0(expression_nonrecurse), char('^'), ws0(expression)),
separated_pair(expression_nonrecurse, token(tk::BitXor), expression),
|(a, b)| Operation::Xor { a, b },
),
map(preceded(char('~'), expression), Operation::Not),
map(preceded(token(tk::BitNot), expression), Operation::Not),
))(input)
}
fn call_item(input: Span) -> IResult<Span, Call> {
fn call_item(input: TokenSpan) -> IResult<TokenSpan, Call> {
map(
tuple((
ws0(identifier),
token(tk::Ident),
delimited(
char('('),
ws0(separated_list0(char(','), expression)),
char(')'),
token(tk::LParen),
separated_list0(token(tk::Comma), expression),
token(tk::RParen),
),
)),
|(name, args)| Call { name, args },
|(name, args)| Call {
name: name.span(),
args,
},
)(input)
}
/// parser combinators can not parse left-recursive grammars. To work around this, we split
/// expressions into a recursive and non-recursive portion.
/// Parsers reachable from this point must call expression_nonrecurse instead
fn expression(input: Span) -> IResult<Span, Expression> {
fn expression(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt((
map(ws0(operation), |op| Expression::Operation(Box::new(op))),
map(operation, |op| Expression::Operation(Box::new(op))),
expression_nonrecurse,
))(input)
}
/// the portion of the expression grammar that can be parsed without left recursion
fn expression_nonrecurse(input: Span) -> IResult<Span, Expression> {
fn expression_nonrecurse(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt((
map(ws0(decimal), Expression::Literal),
map(ws0(call_item), |call| Expression::Call(Box::new(call))),
map(ws0(identifier), |ident| {
Expression::Ident(*ident.fragment())
map(token(tk::Number), |_| Expression::Literal(42)),
map(call_item, |call| Expression::Call(Box::new(call))),
map(token(tk::Ident), |ident| {
Expression::Ident(*ident.span().fragment())
}),
delimited(char('('), expression, char(')')),
delimited(token(tk::LParen), expression, token(tk::RParen)),
))(input)
}
fn assign_statement(input: Span) -> IResult<Span, Assign> {
fn assign_statement(input: TokenSpan) -> IResult<TokenSpan, Assign> {
map(
separated_pair(ws0(identifier), char('='), ws0(expression)),
separated_pair(token(tk::Ident), token(tk::EqAssign), expression),
|(lhs, expr)| Assign {
lhs: (*lhs.fragment()),
lhs: (*lhs.span().fragment()),
expr,
},
)(input)
}
pub fn parse(input: Span) -> IResult<Span, Module> {
ws0(module)(input)
pub fn parse(input: TokenSpan) -> IResult<TokenSpan, Module> {
module(input)
}
#[cfg(test)]

View File

@ -11,6 +11,7 @@ use nom::{
use crate::parser::{
assign_statement, declaration, identifier,
proc::{proc_block, ProcBlock},
tokens::{token, Token, TokenKind as tk, TokenSpan},
typename, ws0, Assign, IResult, NetDecl, Span,
};
@ -22,7 +23,6 @@ pub enum PortDirection {
#[derive(Debug)]
pub struct PortDecl<'a> {
pub pos: Span<'a>,
pub direction: PortDirection,
pub net: NetDecl<'a>,
}
@ -40,55 +40,45 @@ pub enum ModuleItem<'a> {
Proc(ProcBlock<'a>),
}
fn port_decl(i: Span) -> IResult<Span, PortDecl> {
map(consumed(declaration), |(pos, net)| PortDecl {
pos,
fn port_decl(i: TokenSpan) -> IResult<TokenSpan, PortDecl> {
map(declaration, |net| PortDecl {
direction: PortDirection::Input,
net,
})(i)
}
fn inputs_list(input: Span) -> IResult<Span, Vec<PortDecl>> {
separated_list0(ws0(char(',')), ws0(port_decl))(input)
fn inputs_list(input: TokenSpan) -> IResult<TokenSpan, Vec<PortDecl>> {
separated_list0(token(tk::Comma), port_decl)(input)
}
fn assign_item(input: Span) -> IResult<Span, Assign> {
fn assign_item(input: TokenSpan) -> IResult<TokenSpan, Assign> {
context(
"assignment",
delimited(
ws0(terminated(tag("assign"), multispace1)),
ws0(assign_statement),
ws0(char(';')),
),
delimited(token(tk::Assign), assign_statement, token(tk::Semicolon)),
)(input)
}
fn module_item(input: Span) -> IResult<Span, ModuleItem> {
fn module_item(input: TokenSpan) -> IResult<TokenSpan, ModuleItem> {
alt((
map(assign_item, ModuleItem::Assign),
map(proc_block, ModuleItem::Proc),
))(input)
}
/// parse a top-level module declaration
pub fn module(input: Span) -> IResult<Span, Module> {
pub fn module(input: TokenSpan) -> IResult<TokenSpan, Module> {
context(
"module",
map(
tuple((
tag("module"),
ws0(identifier),
ws0(delimited(char('('), ws0(inputs_list), char(')'))),
ws0(preceded(tag("->"), ws0(typename))),
ws0(delimited(
char('{'),
ws0(many0(ws0(module_item))),
char('}'),
)),
token(tk::Module),
token(tk::Ident),
delimited(token(tk::LParen), inputs_list, token(tk::RParen)),
preceded(token(tk::RArrow), typename),
delimited(token(tk::LBrace), many0(module_item), token(tk::RBrace)),
)),
|(_, name, inputs, ret, items)| Module {
name,
// TODO: add back in returns
// TODO: bring back returns
name: name.span(),
ports: inputs,
items,
},

View File

@ -9,7 +9,9 @@ use nom::{
};
use crate::parser::{
assign_statement, expression, identifier, ws0, Assign, Expression, IResult, Span,
assign_statement, expression, identifier,
tokens::{token, Token, TokenKind as tk, TokenSpan},
ws0, Assign, Expression, IResult, Span,
};
#[derive(Debug)]
@ -36,38 +38,38 @@ pub struct MatchBlock<'a> {
pub arms: Vec<(Expression<'a>, ProcStatement<'a>)>,
}
fn match_arm(input: Span) -> IResult<Span, (Expression, ProcStatement)> {
separated_pair(ws0(expression), tag("=>"), ws0(proc_statement))(input)
fn match_arm(input: TokenSpan) -> IResult<TokenSpan, (Expression, ProcStatement)> {
separated_pair(expression, token(tk::FatArrow), proc_statement)(input)
}
fn match_block(input: Span) -> IResult<Span, MatchBlock> {
fn match_block(input: TokenSpan) -> IResult<TokenSpan, MatchBlock> {
context(
"match block",
map(
tuple((
ws0(tag("match")),
ws0(delimited(char('('), ws0(expression), char(')'))),
ws0(delimited(
char('{'),
separated_list1(char(','), ws0(match_arm)),
char('}'),
)),
token(tk::Match),
delimited(token(tk::LParen), expression, token(tk::RParen)),
delimited(
token(tk::LBrace),
separated_list1(token(tk::Comma), match_arm),
token(tk::RBrace),
),
)),
|(_, expr, arms)| MatchBlock { expr, arms },
),
)(input)
}
fn statement_block(input: Span) -> IResult<Span, Vec<ProcStatement>> {
fn statement_block(input: TokenSpan) -> IResult<TokenSpan, Vec<ProcStatement>> {
delimited(
char('{'),
separated_list1(char(';'), ws0(proc_statement)),
char('}'),
token(tk::LBrace),
separated_list1(token(tk::Semicolon), proc_statement),
token(tk::RBrace),
)(input)
}
/// parse a statement that is valid inside a proc block
fn proc_statement(input: Span) -> IResult<Span, ProcStatement> {
fn proc_statement(input: TokenSpan) -> IResult<TokenSpan, ProcStatement> {
alt((
map(match_block, ProcStatement::Match),
map(statement_block, ProcStatement::Block),
@ -75,16 +77,19 @@ fn proc_statement(input: Span) -> IResult<Span, ProcStatement> {
))(input)
}
pub fn proc_block(input: Span) -> IResult<Span, ProcBlock> {
pub fn proc_block(input: TokenSpan) -> IResult<TokenSpan, ProcBlock> {
context(
"proc block",
map(
tuple((
ws0(tag("proc")),
ws0(delimited(char('('), ws0(identifier), char(')'))),
ws0(delimited(char('{'), many1(ws0(proc_statement)), char('}'))),
token(tk::Proc),
delimited(token(tk::LParen), token(tk::Ident), token(tk::RParen)),
delimited(token(tk::LBrace), many1(proc_statement), token(tk::RBrace)),
)),
|(_, net, items)| ProcBlock { net, items },
|(_, net, items)| ProcBlock {
net: net.span(),
items,
},
),
)(input)
}

View File

@ -34,13 +34,10 @@ impl<'a> Token<'a> {
fn new(span: Span<'a>, kind: TokenKind) -> Self {
Self { span, kind }
}
}
pub fn pretty_tokens(mut w: impl io::Write, toks: &[Token]) -> io::Result<()> {
for tok in toks {
writeln!(w, "{:?}", tok)?;
pub fn span(&self) -> Span {
self.span
}
Ok(())
}
#[derive(Debug, PartialEq, Clone)]
@ -60,21 +57,27 @@ pub enum TokenKind {
Colon,
Semicolon,
Comma,
Caret,
Tilde,
Assign,
BitNot,
BitAnd,
BitOr,
BitXor,
EqAssign,
// Multi Chars
FatArrow,
RArrow,
// Literals
Ident,
Number,
// Keywords
Module,
Assign,
Match,
Proc,
// Error
Error,
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct TokenSpan<'a> {
rest: &'a [Token<'a>],
pos: usize,
@ -103,6 +106,12 @@ impl nom::InputTake for TokenSpan<'_> {
}
}
impl nom::InputLength for TokenSpan<'_> {
fn input_len(&self) -> usize {
self.rest.len()
}
}
impl nom_greedyerror::Position for TokenSpan<'_> {
fn position(&self) -> usize {
self.pos
@ -119,11 +128,12 @@ where
move |input: TokenSpan| {
let next = &input.rest[0];
if next.kind == kind.clone() {
let rest = TokenSpan::new(&input.rest[1..]);
let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1);
Ok((rest, next))
} else {
Err(nom::Err::Error(E::from_error_kind(
input,
// TODO: Proper errors here
nom::error::ErrorKind::Tag,
)))
}
@ -132,7 +142,7 @@ where
pub fn lex(input: Span) -> IResult<Span, Vec<Token>> {
many0(ws0(alt((
map(tag("module"), |span| Token::new(span, TokenKind::Module)),
lex_keywords,
lex_literals,
lex_braces,
lex_punctuation,
@ -174,10 +184,25 @@ fn lex_punctuation(input: Span) -> IResult<Span, Token> {
map(tag(":"), |_| TokenKind::Colon),
map(tag(";"), |_| TokenKind::Semicolon),
map(tag(","), |_| TokenKind::Comma),
map(tag("^"), |_| TokenKind::Caret),
map(tag("->"), |_| TokenKind::RArrow),
map(tag("~"), |_| TokenKind::Tilde),
map(tag("="), |_| TokenKind::Assign),
map(tag("=>"), |_| TokenKind::FatArrow),
map(tag("~"), |_| TokenKind::BitNot),
map(tag("&"), |_| TokenKind::BitAnd),
map(tag("^"), |_| TokenKind::BitXor),
map(tag("|"), |_| TokenKind::BitOr),
map(tag("="), |_| TokenKind::EqAssign),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_keywords(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag("module"), |_| TokenKind::Module),
map(tag("assign"), |_| TokenKind::Assign),
map(tag("match"), |_| TokenKind::Match),
map(tag("proc"), |_| TokenKind::Proc),
))),
|(span, kind)| Token::new(span, kind),
)(input)