switch parser to using tokens
This commit is contained in:
parent
108297b966
commit
825703e1ce
|
@ -41,15 +41,19 @@ fn main() {
|
|||
.expect("error reading file");
|
||||
let input: &str = input.as_str();
|
||||
let input = parser::Span::new(input);
|
||||
let parsed = parser::parse(input);
|
||||
let lexed = parser::tokens::lex(input).unwrap();
|
||||
let tokens = parser::tokens::TokenSpan::new(&lexed.1);
|
||||
let parsed = parser::parse(tokens);
|
||||
match parsed {
|
||||
Err(nom::Err::Error(err) | nom::Err::Failure(err)) => {
|
||||
if opt.debug {
|
||||
println!("{err:#?}");
|
||||
}
|
||||
/*
|
||||
parser::error::convert_error(input, err)
|
||||
.eprint(Source::from(input.fragment()))
|
||||
.unwrap();
|
||||
*/
|
||||
}
|
||||
Err(_) => (unreachable!()),
|
||||
Ok(res) => {
|
||||
|
|
|
@ -24,7 +24,7 @@ pub type IErr<I> = GreedyError<I, ErrorKind>;
|
|||
pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>;
|
||||
|
||||
pub use crate::parser::module::{module, Module, ModuleItem, PortDirection};
|
||||
use literals::hexadecimal;
|
||||
use crate::parser::tokens::{token, TokenKind as tk, TokenSpan};
|
||||
|
||||
fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>(
|
||||
inner: F,
|
||||
|
@ -43,27 +43,20 @@ fn identifier(input: Span) -> IResult<Span, Span> {
|
|||
}
|
||||
|
||||
// TODO: allow recursive generics
|
||||
fn typename(input: Span) -> IResult<Span, TypeName> {
|
||||
// TODO: allow expressions again
|
||||
fn typename(input: TokenSpan) -> IResult<TokenSpan, TypeName> {
|
||||
map(
|
||||
tuple((
|
||||
identifier,
|
||||
opt(delimited(char('<'), ws0(expression), char('>'))),
|
||||
token(tk::Ident),
|
||||
opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))),
|
||||
)),
|
||||
|(ident, _)| TypeName {
|
||||
name: ident,
|
||||
name: ident.span(),
|
||||
generics: (),
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn widthspec(input: Span) -> IResult<Span, u64> {
|
||||
delimited(char('['), ws0(decimal), char(']'))(input)
|
||||
}
|
||||
|
||||
fn intliteral(input: Span) -> IResult<Span, (u64, u64)> {
|
||||
tuple((terminated(decimal, char('\'')), alt((decimal, hexadecimal))))(input)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TypeName<'a> {
|
||||
name: Span<'a>,
|
||||
|
@ -74,7 +67,7 @@ pub struct TypeName<'a> {
|
|||
pub struct NetDecl<'a> {
|
||||
pub name: Span<'a>,
|
||||
pub typ: TypeName<'a>,
|
||||
pub value: Option<(u64, u64)>,
|
||||
pub value: Option<Span<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -114,87 +107,91 @@ pub enum Expression<'a> {
|
|||
Operation(Box<Operation<'a>>),
|
||||
}
|
||||
|
||||
fn declaration(i: Span) -> IResult<Span, NetDecl> {
|
||||
// TODO: reallow assignments
|
||||
fn declaration(i: TokenSpan) -> IResult<TokenSpan, NetDecl> {
|
||||
map(
|
||||
tuple((
|
||||
separated_pair(identifier, ws0(char(':')), typename),
|
||||
opt(preceded(ws0(char('=')), intliteral)),
|
||||
separated_pair(token(tk::Ident), token(tk::Colon), typename),
|
||||
opt(preceded(token(tk::Assign), token(tk::Number))),
|
||||
)),
|
||||
|((ident, typ), value)| NetDecl {
|
||||
name: ident,
|
||||
name: ident.span(),
|
||||
typ,
|
||||
value,
|
||||
value: None,
|
||||
},
|
||||
)(i)
|
||||
}
|
||||
|
||||
fn operation(input: Span) -> IResult<Span, Operation> {
|
||||
fn operation(input: TokenSpan) -> IResult<TokenSpan, Operation> {
|
||||
// temporarily given up on before I learn the shunting yard algorithm
|
||||
alt((
|
||||
map(
|
||||
separated_pair(ws0(expression_nonrecurse), char('&'), ws0(expression)),
|
||||
separated_pair(expression_nonrecurse, token(tk::BitAnd), expression),
|
||||
|(a, b)| Operation::And { a, b },
|
||||
),
|
||||
map(
|
||||
separated_pair(ws0(expression_nonrecurse), char('|'), ws0(expression)),
|
||||
separated_pair(expression_nonrecurse, token(tk::BitOr), expression),
|
||||
|(a, b)| Operation::Or { a, b },
|
||||
),
|
||||
map(
|
||||
separated_pair(ws0(expression_nonrecurse), char('^'), ws0(expression)),
|
||||
separated_pair(expression_nonrecurse, token(tk::BitXor), expression),
|
||||
|(a, b)| Operation::Xor { a, b },
|
||||
),
|
||||
map(preceded(char('~'), expression), Operation::Not),
|
||||
map(preceded(token(tk::BitNot), expression), Operation::Not),
|
||||
))(input)
|
||||
}
|
||||
|
||||
fn call_item(input: Span) -> IResult<Span, Call> {
|
||||
fn call_item(input: TokenSpan) -> IResult<TokenSpan, Call> {
|
||||
map(
|
||||
tuple((
|
||||
ws0(identifier),
|
||||
token(tk::Ident),
|
||||
delimited(
|
||||
char('('),
|
||||
ws0(separated_list0(char(','), expression)),
|
||||
char(')'),
|
||||
token(tk::LParen),
|
||||
separated_list0(token(tk::Comma), expression),
|
||||
token(tk::RParen),
|
||||
),
|
||||
)),
|
||||
|(name, args)| Call { name, args },
|
||||
|(name, args)| Call {
|
||||
name: name.span(),
|
||||
args,
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
/// parser combinators can not parse left-recursive grammars. To work around this, we split
|
||||
/// expressions into a recursive and non-recursive portion.
|
||||
/// Parsers reachable from this point must call expression_nonrecurse instead
|
||||
fn expression(input: Span) -> IResult<Span, Expression> {
|
||||
fn expression(input: TokenSpan) -> IResult<TokenSpan, Expression> {
|
||||
alt((
|
||||
map(ws0(operation), |op| Expression::Operation(Box::new(op))),
|
||||
map(operation, |op| Expression::Operation(Box::new(op))),
|
||||
expression_nonrecurse,
|
||||
))(input)
|
||||
}
|
||||
|
||||
/// the portion of the expression grammar that can be parsed without left recursion
|
||||
fn expression_nonrecurse(input: Span) -> IResult<Span, Expression> {
|
||||
fn expression_nonrecurse(input: TokenSpan) -> IResult<TokenSpan, Expression> {
|
||||
alt((
|
||||
map(ws0(decimal), Expression::Literal),
|
||||
map(ws0(call_item), |call| Expression::Call(Box::new(call))),
|
||||
map(ws0(identifier), |ident| {
|
||||
Expression::Ident(*ident.fragment())
|
||||
map(token(tk::Number), |_| Expression::Literal(42)),
|
||||
map(call_item, |call| Expression::Call(Box::new(call))),
|
||||
map(token(tk::Ident), |ident| {
|
||||
Expression::Ident(*ident.span().fragment())
|
||||
}),
|
||||
delimited(char('('), expression, char(')')),
|
||||
delimited(token(tk::LParen), expression, token(tk::RParen)),
|
||||
))(input)
|
||||
}
|
||||
|
||||
fn assign_statement(input: Span) -> IResult<Span, Assign> {
|
||||
fn assign_statement(input: TokenSpan) -> IResult<TokenSpan, Assign> {
|
||||
map(
|
||||
separated_pair(ws0(identifier), char('='), ws0(expression)),
|
||||
separated_pair(token(tk::Ident), token(tk::EqAssign), expression),
|
||||
|(lhs, expr)| Assign {
|
||||
lhs: (*lhs.fragment()),
|
||||
lhs: (*lhs.span().fragment()),
|
||||
expr,
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
pub fn parse(input: Span) -> IResult<Span, Module> {
|
||||
ws0(module)(input)
|
||||
pub fn parse(input: TokenSpan) -> IResult<TokenSpan, Module> {
|
||||
module(input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -11,6 +11,7 @@ use nom::{
|
|||
use crate::parser::{
|
||||
assign_statement, declaration, identifier,
|
||||
proc::{proc_block, ProcBlock},
|
||||
tokens::{token, Token, TokenKind as tk, TokenSpan},
|
||||
typename, ws0, Assign, IResult, NetDecl, Span,
|
||||
};
|
||||
|
||||
|
@ -22,7 +23,6 @@ pub enum PortDirection {
|
|||
|
||||
#[derive(Debug)]
|
||||
pub struct PortDecl<'a> {
|
||||
pub pos: Span<'a>,
|
||||
pub direction: PortDirection,
|
||||
pub net: NetDecl<'a>,
|
||||
}
|
||||
|
@ -40,55 +40,45 @@ pub enum ModuleItem<'a> {
|
|||
Proc(ProcBlock<'a>),
|
||||
}
|
||||
|
||||
fn port_decl(i: Span) -> IResult<Span, PortDecl> {
|
||||
map(consumed(declaration), |(pos, net)| PortDecl {
|
||||
pos,
|
||||
fn port_decl(i: TokenSpan) -> IResult<TokenSpan, PortDecl> {
|
||||
map(declaration, |net| PortDecl {
|
||||
direction: PortDirection::Input,
|
||||
net,
|
||||
})(i)
|
||||
}
|
||||
|
||||
fn inputs_list(input: Span) -> IResult<Span, Vec<PortDecl>> {
|
||||
separated_list0(ws0(char(',')), ws0(port_decl))(input)
|
||||
fn inputs_list(input: TokenSpan) -> IResult<TokenSpan, Vec<PortDecl>> {
|
||||
separated_list0(token(tk::Comma), port_decl)(input)
|
||||
}
|
||||
|
||||
fn assign_item(input: Span) -> IResult<Span, Assign> {
|
||||
fn assign_item(input: TokenSpan) -> IResult<TokenSpan, Assign> {
|
||||
context(
|
||||
"assignment",
|
||||
delimited(
|
||||
ws0(terminated(tag("assign"), multispace1)),
|
||||
ws0(assign_statement),
|
||||
ws0(char(';')),
|
||||
),
|
||||
delimited(token(tk::Assign), assign_statement, token(tk::Semicolon)),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn module_item(input: Span) -> IResult<Span, ModuleItem> {
|
||||
fn module_item(input: TokenSpan) -> IResult<TokenSpan, ModuleItem> {
|
||||
alt((
|
||||
map(assign_item, ModuleItem::Assign),
|
||||
map(proc_block, ModuleItem::Proc),
|
||||
))(input)
|
||||
}
|
||||
|
||||
/// parse a top-level module declaration
|
||||
pub fn module(input: Span) -> IResult<Span, Module> {
|
||||
pub fn module(input: TokenSpan) -> IResult<TokenSpan, Module> {
|
||||
context(
|
||||
"module",
|
||||
map(
|
||||
tuple((
|
||||
tag("module"),
|
||||
ws0(identifier),
|
||||
ws0(delimited(char('('), ws0(inputs_list), char(')'))),
|
||||
ws0(preceded(tag("->"), ws0(typename))),
|
||||
ws0(delimited(
|
||||
char('{'),
|
||||
ws0(many0(ws0(module_item))),
|
||||
char('}'),
|
||||
)),
|
||||
token(tk::Module),
|
||||
token(tk::Ident),
|
||||
delimited(token(tk::LParen), inputs_list, token(tk::RParen)),
|
||||
preceded(token(tk::RArrow), typename),
|
||||
delimited(token(tk::LBrace), many0(module_item), token(tk::RBrace)),
|
||||
)),
|
||||
|(_, name, inputs, ret, items)| Module {
|
||||
name,
|
||||
// TODO: add back in returns
|
||||
// TODO: bring back returns
|
||||
name: name.span(),
|
||||
ports: inputs,
|
||||
items,
|
||||
},
|
||||
|
|
|
@ -9,7 +9,9 @@ use nom::{
|
|||
};
|
||||
|
||||
use crate::parser::{
|
||||
assign_statement, expression, identifier, ws0, Assign, Expression, IResult, Span,
|
||||
assign_statement, expression, identifier,
|
||||
tokens::{token, Token, TokenKind as tk, TokenSpan},
|
||||
ws0, Assign, Expression, IResult, Span,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -36,38 +38,38 @@ pub struct MatchBlock<'a> {
|
|||
pub arms: Vec<(Expression<'a>, ProcStatement<'a>)>,
|
||||
}
|
||||
|
||||
fn match_arm(input: Span) -> IResult<Span, (Expression, ProcStatement)> {
|
||||
separated_pair(ws0(expression), tag("=>"), ws0(proc_statement))(input)
|
||||
fn match_arm(input: TokenSpan) -> IResult<TokenSpan, (Expression, ProcStatement)> {
|
||||
separated_pair(expression, token(tk::FatArrow), proc_statement)(input)
|
||||
}
|
||||
|
||||
fn match_block(input: Span) -> IResult<Span, MatchBlock> {
|
||||
fn match_block(input: TokenSpan) -> IResult<TokenSpan, MatchBlock> {
|
||||
context(
|
||||
"match block",
|
||||
map(
|
||||
tuple((
|
||||
ws0(tag("match")),
|
||||
ws0(delimited(char('('), ws0(expression), char(')'))),
|
||||
ws0(delimited(
|
||||
char('{'),
|
||||
separated_list1(char(','), ws0(match_arm)),
|
||||
char('}'),
|
||||
)),
|
||||
token(tk::Match),
|
||||
delimited(token(tk::LParen), expression, token(tk::RParen)),
|
||||
delimited(
|
||||
token(tk::LBrace),
|
||||
separated_list1(token(tk::Comma), match_arm),
|
||||
token(tk::RBrace),
|
||||
),
|
||||
)),
|
||||
|(_, expr, arms)| MatchBlock { expr, arms },
|
||||
),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn statement_block(input: Span) -> IResult<Span, Vec<ProcStatement>> {
|
||||
fn statement_block(input: TokenSpan) -> IResult<TokenSpan, Vec<ProcStatement>> {
|
||||
delimited(
|
||||
char('{'),
|
||||
separated_list1(char(';'), ws0(proc_statement)),
|
||||
char('}'),
|
||||
token(tk::LBrace),
|
||||
separated_list1(token(tk::Semicolon), proc_statement),
|
||||
token(tk::RBrace),
|
||||
)(input)
|
||||
}
|
||||
|
||||
/// parse a statement that is valid inside a proc block
|
||||
fn proc_statement(input: Span) -> IResult<Span, ProcStatement> {
|
||||
fn proc_statement(input: TokenSpan) -> IResult<TokenSpan, ProcStatement> {
|
||||
alt((
|
||||
map(match_block, ProcStatement::Match),
|
||||
map(statement_block, ProcStatement::Block),
|
||||
|
@ -75,16 +77,19 @@ fn proc_statement(input: Span) -> IResult<Span, ProcStatement> {
|
|||
))(input)
|
||||
}
|
||||
|
||||
pub fn proc_block(input: Span) -> IResult<Span, ProcBlock> {
|
||||
pub fn proc_block(input: TokenSpan) -> IResult<TokenSpan, ProcBlock> {
|
||||
context(
|
||||
"proc block",
|
||||
map(
|
||||
tuple((
|
||||
ws0(tag("proc")),
|
||||
ws0(delimited(char('('), ws0(identifier), char(')'))),
|
||||
ws0(delimited(char('{'), many1(ws0(proc_statement)), char('}'))),
|
||||
token(tk::Proc),
|
||||
delimited(token(tk::LParen), token(tk::Ident), token(tk::RParen)),
|
||||
delimited(token(tk::LBrace), many1(proc_statement), token(tk::RBrace)),
|
||||
)),
|
||||
|(_, net, items)| ProcBlock { net, items },
|
||||
|(_, net, items)| ProcBlock {
|
||||
net: net.span(),
|
||||
items,
|
||||
},
|
||||
),
|
||||
)(input)
|
||||
}
|
||||
|
|
|
@ -34,13 +34,10 @@ impl<'a> Token<'a> {
|
|||
fn new(span: Span<'a>, kind: TokenKind) -> Self {
|
||||
Self { span, kind }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pretty_tokens(mut w: impl io::Write, toks: &[Token]) -> io::Result<()> {
|
||||
for tok in toks {
|
||||
writeln!(w, "{:?}", tok)?;
|
||||
pub fn span(&self) -> Span {
|
||||
self.span
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
|
@ -60,21 +57,27 @@ pub enum TokenKind {
|
|||
Colon,
|
||||
Semicolon,
|
||||
Comma,
|
||||
Caret,
|
||||
Tilde,
|
||||
Assign,
|
||||
BitNot,
|
||||
BitAnd,
|
||||
BitOr,
|
||||
BitXor,
|
||||
EqAssign,
|
||||
// Multi Chars
|
||||
FatArrow,
|
||||
RArrow,
|
||||
// Literals
|
||||
Ident,
|
||||
Number,
|
||||
// Keywords
|
||||
Module,
|
||||
Assign,
|
||||
Match,
|
||||
Proc,
|
||||
// Error
|
||||
Error,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TokenSpan<'a> {
|
||||
rest: &'a [Token<'a>],
|
||||
pos: usize,
|
||||
|
@ -103,6 +106,12 @@ impl nom::InputTake for TokenSpan<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
impl nom::InputLength for TokenSpan<'_> {
|
||||
fn input_len(&self) -> usize {
|
||||
self.rest.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl nom_greedyerror::Position for TokenSpan<'_> {
|
||||
fn position(&self) -> usize {
|
||||
self.pos
|
||||
|
@ -119,11 +128,12 @@ where
|
|||
move |input: TokenSpan| {
|
||||
let next = &input.rest[0];
|
||||
if next.kind == kind.clone() {
|
||||
let rest = TokenSpan::new(&input.rest[1..]);
|
||||
let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1);
|
||||
Ok((rest, next))
|
||||
} else {
|
||||
Err(nom::Err::Error(E::from_error_kind(
|
||||
input,
|
||||
// TODO: Proper errors here
|
||||
nom::error::ErrorKind::Tag,
|
||||
)))
|
||||
}
|
||||
|
@ -132,7 +142,7 @@ where
|
|||
|
||||
pub fn lex(input: Span) -> IResult<Span, Vec<Token>> {
|
||||
many0(ws0(alt((
|
||||
map(tag("module"), |span| Token::new(span, TokenKind::Module)),
|
||||
lex_keywords,
|
||||
lex_literals,
|
||||
lex_braces,
|
||||
lex_punctuation,
|
||||
|
@ -174,10 +184,25 @@ fn lex_punctuation(input: Span) -> IResult<Span, Token> {
|
|||
map(tag(":"), |_| TokenKind::Colon),
|
||||
map(tag(";"), |_| TokenKind::Semicolon),
|
||||
map(tag(","), |_| TokenKind::Comma),
|
||||
map(tag("^"), |_| TokenKind::Caret),
|
||||
map(tag("->"), |_| TokenKind::RArrow),
|
||||
map(tag("~"), |_| TokenKind::Tilde),
|
||||
map(tag("="), |_| TokenKind::Assign),
|
||||
map(tag("=>"), |_| TokenKind::FatArrow),
|
||||
map(tag("~"), |_| TokenKind::BitNot),
|
||||
map(tag("&"), |_| TokenKind::BitAnd),
|
||||
map(tag("^"), |_| TokenKind::BitXor),
|
||||
map(tag("|"), |_| TokenKind::BitOr),
|
||||
map(tag("="), |_| TokenKind::EqAssign),
|
||||
))),
|
||||
|(span, kind)| Token::new(span, kind),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn lex_keywords(input: Span) -> IResult<Span, Token> {
|
||||
map(
|
||||
consumed(alt((
|
||||
map(tag("module"), |_| TokenKind::Module),
|
||||
map(tag("assign"), |_| TokenKind::Assign),
|
||||
map(tag("match"), |_| TokenKind::Match),
|
||||
map(tag("proc"), |_| TokenKind::Proc),
|
||||
))),
|
||||
|(span, kind)| Token::new(span, kind),
|
||||
)(input)
|
||||
|
|
Loading…
Reference in New Issue