Compare commits

..

3 Commits

Author SHA1 Message Date
NotAFile 73580c4ff0 move parser to mod.rs 2022-02-02 01:42:17 +01:00
NotAFile a2411244f4 move rest of parser 2022-02-02 01:41:19 +01:00
NotAFile e7d881d9ed move expressions to file 2022-02-02 01:31:59 +01:00
6 changed files with 259 additions and 231 deletions

View File

@ -1,228 +0,0 @@
pub mod error;
mod literals;
pub mod module;
pub mod proc;
pub mod tokens;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{alpha1, alphanumeric1, multispace0},
combinator::{map, opt, recognize},
error::{ErrorKind, ParseError},
multi::{many0, separated_list0},
sequence::{delimited, pair, preceded, separated_pair, tuple},
};
use nom_greedyerror::GreedyError;
use nom_locate::LocatedSpan;
// custom span type for nom_locate
pub type Span<'a> = LocatedSpan<&'a str>;
pub type IErr<I> = GreedyError<I, ErrorKind>;
// custom IResult type for VerboseError
pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>;
pub use crate::parser::module::{module, Module, ModuleItem, PortDirection};
use crate::parser::tokens::{token, TokenKind as tk, TokenSpan};
fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>(
inner: F,
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O, E>
where
F: FnMut(Span<'a>) -> IResult<Span<'a>, O, E>,
{
delimited(multispace0, inner, multispace0)
}
fn identifier(input: Span) -> IResult<Span, Span> {
recognize(pair(
alt((alpha1, tag("_"))),
many0(alt((alphanumeric1, tag("_")))),
))(input)
}
// TODO: allow recursive generics
// TODO: allow expressions again
fn typename(input: TokenSpan) -> IResult<TokenSpan, TypeName> {
map(
tuple((
token(tk::Ident),
opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))),
)),
|(ident, _)| TypeName {
name: ident.span(),
generics: (),
},
)(input)
}
#[derive(Debug)]
pub struct TypeName<'a> {
name: Span<'a>,
generics: (),
}
#[derive(Debug)]
pub struct NetDecl<'a> {
pub name: Span<'a>,
pub typ: TypeName<'a>,
pub value: Option<Span<'a>>,
}
#[derive(Debug)]
pub struct Assign<'a> {
pub lhs: &'a str,
pub expr: Expression<'a>,
}
#[derive(Debug, Clone)]
pub enum Operation<'a> {
And {
a: Expression<'a>,
b: Expression<'a>,
},
Or {
a: Expression<'a>,
b: Expression<'a>,
},
Xor {
a: Expression<'a>,
b: Expression<'a>,
},
Not(Expression<'a>),
}
#[derive(Debug, Clone)]
pub struct Call<'a> {
pub name: Span<'a>,
pub args: Vec<Expression<'a>>,
}
#[derive(Debug, Clone)]
pub enum Expression<'a> {
Ident(&'a str),
Literal(u64),
Call(Box<Call<'a>>),
Operation(Box<Operation<'a>>),
}
// TODO: reallow assignments
fn declaration(i: TokenSpan) -> IResult<TokenSpan, NetDecl> {
map(
tuple((
separated_pair(token(tk::Ident), token(tk::Colon), typename),
opt(preceded(token(tk::Assign), token(tk::Number))),
)),
|((ident, typ), _value)| NetDecl {
name: ident.span(),
typ,
value: None,
},
)(i)
}
fn operation(input: TokenSpan) -> IResult<TokenSpan, Operation> {
// temporarily given up on before I learn the shunting yard algorithm
alt((
map(
separated_pair(expression_nonrecurse, token(tk::BitAnd), expression),
|(a, b)| Operation::And { a, b },
),
map(
separated_pair(expression_nonrecurse, token(tk::BitOr), expression),
|(a, b)| Operation::Or { a, b },
),
map(
separated_pair(expression_nonrecurse, token(tk::BitXor), expression),
|(a, b)| Operation::Xor { a, b },
),
map(preceded(token(tk::BitNot), expression), Operation::Not),
))(input)
}
fn call_item(input: TokenSpan) -> IResult<TokenSpan, Call> {
map(
tuple((
token(tk::Ident),
delimited(
token(tk::LParen),
separated_list0(token(tk::Comma), expression),
token(tk::RParen),
),
)),
|(name, args)| Call {
name: name.span(),
args,
},
)(input)
}
/// parser combinators can not parse left-recursive grammars. To work around this, we split
/// expressions into a recursive and non-recursive portion.
/// Parsers reachable from this point must call expression_nonrecurse instead
fn expression(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt((
map(operation, |op| Expression::Operation(Box::new(op))),
expression_nonrecurse,
))(input)
}
/// the portion of the expression grammar that can be parsed without left recursion
fn expression_nonrecurse(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt((
map(token(tk::Number), |_| Expression::Literal(42)),
map(call_item, |call| Expression::Call(Box::new(call))),
map(token(tk::Ident), |ident| {
Expression::Ident(*ident.span().fragment())
}),
delimited(token(tk::LParen), expression, token(tk::RParen)),
))(input)
}
fn assign_statement(input: TokenSpan) -> IResult<TokenSpan, Assign> {
map(
separated_pair(token(tk::Ident), token(tk::EqAssign), expression),
|(lhs, expr)| Assign {
lhs: (*lhs.span().fragment()),
expr,
},
)(input)
}
pub fn parse(input: TokenSpan) -> IResult<TokenSpan, Module> {
module(input)
}
#[cfg(test)]
mod test {
use super::*;
use nom::combinator::all_consuming;
#[test]
fn test_operation() {
operation(" a | b ".into()).unwrap();
operation(" a & b ".into()).unwrap();
}
#[test]
fn test_expression() {
expression(" a ".into()).unwrap();
expression(" a | b ".into()).unwrap();
expression(" a | b | c ".into()).unwrap();
}
#[test]
fn test_assignment() {
// TODO: make wrapper and use for all tests
all_consuming(assign_statement)(" a = b ".into()).unwrap();
all_consuming(assign_statement)(" a = b | c ".into()).unwrap();
}
#[test]
fn test_call() {
call_item("thing ( )".into()).unwrap();
call_item("thing ( a , b , c )".into()).unwrap();
call_item("thing(a,b,c)".into()).unwrap();
}
}

67
src/parser/declaration.rs Normal file
View File

@ -0,0 +1,67 @@
use nom::{
combinator::{map, opt},
sequence::{delimited, preceded, separated_pair, tuple},
};
use super::expression::{expression, Expression};
use super::tokens::{token, TokenKind as tk, TokenSpan};
use super::{IResult, Span};
// TODO: allow recursive generics
// TODO: allow expressions again
pub fn typename(input: TokenSpan) -> IResult<TokenSpan, TypeName> {
map(
tuple((
token(tk::Ident),
opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))),
)),
|(ident, _)| TypeName {
name: ident.span(),
generics: (),
},
)(input)
}
#[derive(Debug)]
pub struct TypeName<'a> {
name: Span<'a>,
generics: (),
}
#[derive(Debug)]
pub struct NetDecl<'a> {
pub name: Span<'a>,
pub typ: TypeName<'a>,
pub value: Option<Span<'a>>,
}
#[derive(Debug)]
pub struct Assign<'a> {
pub lhs: &'a str,
pub expr: Expression<'a>,
}
pub fn assign_statement(input: TokenSpan) -> IResult<TokenSpan, Assign> {
map(
separated_pair(token(tk::Ident), token(tk::EqAssign), expression),
|(lhs, expr)| Assign {
lhs: (*lhs.span().fragment()),
expr,
},
)(input)
}
// TODO: reallow assignments
pub fn declaration(i: TokenSpan) -> IResult<TokenSpan, NetDecl> {
map(
tuple((
separated_pair(token(tk::Ident), token(tk::Colon), typename),
opt(preceded(token(tk::Assign), token(tk::Number))),
)),
|((ident, typ), _value)| NetDecl {
name: ident.span(),
typ,
value: None,
},
)(i)
}

97
src/parser/expression.rs Normal file
View File

@ -0,0 +1,97 @@
use super::tokens::{token, TokenKind as tk, TokenSpan};
use super::{IResult, Span};
use nom::{
branch::alt,
combinator::map,
multi::separated_list0,
sequence::{delimited, preceded, separated_pair, tuple},
};
#[derive(Debug, Clone)]
pub enum Operation<'a> {
And {
a: Expression<'a>,
b: Expression<'a>,
},
Or {
a: Expression<'a>,
b: Expression<'a>,
},
Xor {
a: Expression<'a>,
b: Expression<'a>,
},
Not(Expression<'a>),
}
pub fn operation(input: TokenSpan) -> IResult<TokenSpan, Operation> {
// temporarily given up on before I learn the shunting yard algorithm
alt((
map(
separated_pair(expression_nonrecurse, token(tk::BitAnd), expression),
|(a, b)| Operation::And { a, b },
),
map(
separated_pair(expression_nonrecurse, token(tk::BitOr), expression),
|(a, b)| Operation::Or { a, b },
),
map(
separated_pair(expression_nonrecurse, token(tk::BitXor), expression),
|(a, b)| Operation::Xor { a, b },
),
map(preceded(token(tk::BitNot), expression), Operation::Not),
))(input)
}
#[derive(Debug, Clone)]
pub struct Call<'a> {
pub name: Span<'a>,
pub args: Vec<Expression<'a>>,
}
pub fn call_item(input: TokenSpan) -> IResult<TokenSpan, Call> {
map(
tuple((
token(tk::Ident),
delimited(
token(tk::LParen),
separated_list0(token(tk::Comma), expression),
token(tk::RParen),
),
)),
|(name, args)| Call {
name: name.span(),
args,
},
)(input)
}
#[derive(Debug, Clone)]
pub enum Expression<'a> {
Ident(&'a str),
Literal(u64),
Call(Box<Call<'a>>),
Operation(Box<Operation<'a>>),
}
/// parser combinators can not parse left-recursive grammars. To work around this, we split
/// expressions into a recursive and non-recursive portion.
/// Parsers reachable from this point must call expression_nonrecurse instead
pub fn expression(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt((
map(operation, |op| Expression::Operation(Box::new(op))),
expression_nonrecurse,
))(input)
}
/// the portion of the expression grammar that can be parsed without left recursion
fn expression_nonrecurse(input: TokenSpan) -> IResult<TokenSpan, Expression> {
alt((
map(token(tk::Number), |_| Expression::Literal(42)),
map(call_item, |call| Expression::Call(Box::new(call))),
map(token(tk::Ident), |ident| {
Expression::Ident(*ident.span().fragment())
}),
delimited(token(tk::LParen), expression, token(tk::RParen)),
))(input)
}

View File

@ -1,8 +1,11 @@
use nom::{
character::complete::{char, one_of},
branch::alt,
bytes::complete::tag,
character::complete::{alpha1, alphanumeric1, char, multispace0, one_of},
combinator::{map, recognize},
error::ParseError,
multi::{many0, many1},
sequence::{preceded, terminated},
sequence::{delimited, pair, preceded, terminated},
};
use crate::parser::{IResult, Span};
@ -23,6 +26,22 @@ pub fn hexadecimal(input: Span) -> IResult<Span, u64> {
)(input)
}
pub fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>(
inner: F,
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O, E>
where
F: FnMut(Span<'a>) -> IResult<Span<'a>, O, E>,
{
delimited(multispace0, inner, multispace0)
}
pub fn identifier(input: Span) -> IResult<Span, Span> {
recognize(pair(
alt((alpha1, tag("_"))),
many0(alt((alphanumeric1, tag("_")))),
))(input)
}
#[cfg(test)]
mod test {
use super::*;

70
src/parser/mod.rs Normal file
View File

@ -0,0 +1,70 @@
pub mod declaration;
pub mod error;
pub mod expression;
mod literals;
pub mod module;
pub mod proc;
pub mod tokens;
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{alpha1, alphanumeric1, multispace0},
combinator::{map, opt, recognize},
error::{ErrorKind, ParseError},
multi::{many0, separated_list0},
sequence::{delimited, pair, preceded, separated_pair, tuple},
};
use nom_greedyerror::GreedyError;
use nom_locate::LocatedSpan;
// custom span type for nom_locate
pub type Span<'a> = LocatedSpan<&'a str>;
pub type IErr<I> = GreedyError<I, ErrorKind>;
// custom IResult type for VerboseError
pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>;
pub use crate::parser::declaration::{
assign_statement, declaration, typename, Assign, NetDecl, TypeName,
};
pub use crate::parser::expression::{expression, Call, Expression, Operation};
pub use crate::parser::module::{module, Module, ModuleItem, PortDirection};
use crate::parser::tokens::{token, TokenKind as tk, TokenSpan};
pub fn parse(input: TokenSpan) -> IResult<TokenSpan, Module> {
module(input)
}
#[cfg(test)]
mod test {
use super::*;
use nom::combinator::all_consuming;
#[test]
fn test_operation() {
operation(" a | b ".into()).unwrap();
operation(" a & b ".into()).unwrap();
}
#[test]
fn test_expression() {
expression(" a ".into()).unwrap();
expression(" a | b ".into()).unwrap();
expression(" a | b | c ".into()).unwrap();
}
#[test]
fn test_assignment() {
// TODO: make wrapper and use for all tests
all_consuming(assign_statement)(" a = b ".into()).unwrap();
all_consuming(assign_statement)(" a = b | c ".into()).unwrap();
}
#[test]
fn test_call() {
call_item("thing ( )".into()).unwrap();
call_item("thing ( a , b , c )".into()).unwrap();
call_item("thing(a,b,c)".into()).unwrap();
}
}

View File

@ -1,6 +1,9 @@
//! convert text into a token stream
use super::{identifier, ws0, IResult, Span};
use super::{
literals::{identifier, ws0},
IResult, Span,
};
use nom::{
branch::alt,
bytes::complete::tag,