move parser to mod.rs

move rest of parser
move expressions to file
2022-02-02 01:42:17 +01:00 · 2022-02-02 01:41:19 +01:00 · 2022-02-02 01:31:59 +01:00
6 changed files with 259 additions and 231 deletions
--- a/src/parser.rs
+++ b/src/parser.rs
@ -1,228 +0,0 @@
 pub mod error;
 mod literals;
 pub mod module;
 pub mod proc;
 pub mod tokens;
 use nom::{
    branch::alt,
    bytes::complete::tag,
    character::complete::{alpha1, alphanumeric1, multispace0},
    combinator::{map, opt, recognize},
    error::{ErrorKind, ParseError},
    multi::{many0, separated_list0},
    sequence::{delimited, pair, preceded, separated_pair, tuple},
 };
 use nom_greedyerror::GreedyError;
 use nom_locate::LocatedSpan;
 // custom span type for nom_locate
 pub type Span<'a> = LocatedSpan<&'a str>;
 pub type IErr<I> = GreedyError<I, ErrorKind>;
 // custom IResult type for VerboseError
 pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>;
 pub use crate::parser::module::{module, Module, ModuleItem, PortDirection};
 use crate::parser::tokens::{token, TokenKind as tk, TokenSpan};
 fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>(
    inner: F,
 ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O, E>
 where
    F: FnMut(Span<'a>) -> IResult<Span<'a>, O, E>,
 {
    delimited(multispace0, inner, multispace0)
 }
 fn identifier(input: Span) -> IResult<Span, Span> {
    recognize(pair(
        alt((alpha1, tag("_"))),
        many0(alt((alphanumeric1, tag("_")))),
    ))(input)
 }
 // TODO: allow recursive generics
 // TODO: allow expressions again
 fn typename(input: TokenSpan) -> IResult<TokenSpan, TypeName> {
    map(
        tuple((
            token(tk::Ident),
            opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))),
        )),
        |(ident, _)| TypeName {
            name: ident.span(),
            generics: (),
        },
    )(input)
 }
 #[derive(Debug)]
 pub struct TypeName<'a> {
    name: Span<'a>,
    generics: (),
 }
 #[derive(Debug)]
 pub struct NetDecl<'a> {
    pub name: Span<'a>,
    pub typ: TypeName<'a>,
    pub value: Option<Span<'a>>,
 }
 #[derive(Debug)]
 pub struct Assign<'a> {
    pub lhs: &'a str,
    pub expr: Expression<'a>,
 }
 #[derive(Debug, Clone)]
 pub enum Operation<'a> {
    And {
        a: Expression<'a>,
        b: Expression<'a>,
    },
    Or {
        a: Expression<'a>,
        b: Expression<'a>,
    },
    Xor {
        a: Expression<'a>,
        b: Expression<'a>,
    },
    Not(Expression<'a>),
 }
 #[derive(Debug, Clone)]
 pub struct Call<'a> {
    pub name: Span<'a>,
    pub args: Vec<Expression<'a>>,
 }
 #[derive(Debug, Clone)]
 pub enum Expression<'a> {
    Ident(&'a str),
    Literal(u64),
    Call(Box<Call<'a>>),
    Operation(Box<Operation<'a>>),
 }
 // TODO: reallow assignments
 fn declaration(i: TokenSpan) -> IResult<TokenSpan, NetDecl> {
    map(
        tuple((
            separated_pair(token(tk::Ident), token(tk::Colon), typename),
            opt(preceded(token(tk::Assign), token(tk::Number))),
        )),
        |((ident, typ), _value)| NetDecl {
            name: ident.span(),
            typ,
            value: None,
        },
    )(i)
 }
 fn operation(input: TokenSpan) -> IResult<TokenSpan, Operation> {
    // temporarily given up on before I learn the shunting yard algorithm
    alt((
        map(
            separated_pair(expression_nonrecurse, token(tk::BitAnd), expression),
            |(a, b)| Operation::And { a, b },
        ),
        map(
            separated_pair(expression_nonrecurse, token(tk::BitOr), expression),
            |(a, b)| Operation::Or { a, b },
        ),
        map(
            separated_pair(expression_nonrecurse, token(tk::BitXor), expression),
            |(a, b)| Operation::Xor { a, b },
        ),
        map(preceded(token(tk::BitNot), expression), Operation::Not),
    ))(input)
 }
 fn call_item(input: TokenSpan) -> IResult<TokenSpan, Call> {
    map(
        tuple((
            token(tk::Ident),
            delimited(
                token(tk::LParen),
                separated_list0(token(tk::Comma), expression),
                token(tk::RParen),
            ),
        )),
        |(name, args)| Call {
            name: name.span(),
            args,
        },
    )(input)
 }
 /// parser combinators can not parse left-recursive grammars. To work around this, we split
 /// expressions into a recursive and non-recursive portion.
 /// Parsers reachable from this point must call expression_nonrecurse instead
 fn expression(input: TokenSpan) -> IResult<TokenSpan, Expression> {
    alt((
        map(operation, |op| Expression::Operation(Box::new(op))),
        expression_nonrecurse,
    ))(input)
 }
 /// the portion of the expression grammar that can be parsed without left recursion
 fn expression_nonrecurse(input: TokenSpan) -> IResult<TokenSpan, Expression> {
    alt((
        map(token(tk::Number), |_| Expression::Literal(42)),
        map(call_item, |call| Expression::Call(Box::new(call))),
        map(token(tk::Ident), |ident| {
            Expression::Ident(*ident.span().fragment())
        }),
        delimited(token(tk::LParen), expression, token(tk::RParen)),
    ))(input)
 }
 fn assign_statement(input: TokenSpan) -> IResult<TokenSpan, Assign> {
    map(
        separated_pair(token(tk::Ident), token(tk::EqAssign), expression),
        |(lhs, expr)| Assign {
            lhs: (*lhs.span().fragment()),
            expr,
        },
    )(input)
 }
 pub fn parse(input: TokenSpan) -> IResult<TokenSpan, Module> {
    module(input)
 }
 #[cfg(test)]
 mod test {
    use super::*;
    use nom::combinator::all_consuming;
    #[test]
    fn test_operation() {
        operation(" a | b ".into()).unwrap();
        operation(" a & b ".into()).unwrap();
    }
    #[test]
    fn test_expression() {
        expression(" a ".into()).unwrap();
        expression(" a | b ".into()).unwrap();
        expression(" a | b | c ".into()).unwrap();
    }
    #[test]
    fn test_assignment() {
        // TODO: make wrapper and use for all tests
        all_consuming(assign_statement)(" a = b ".into()).unwrap();
        all_consuming(assign_statement)(" a = b | c ".into()).unwrap();
    }
    #[test]
    fn test_call() {
        call_item("thing ( )".into()).unwrap();
        call_item("thing ( a , b , c )".into()).unwrap();
        call_item("thing(a,b,c)".into()).unwrap();
    }
 }
--- a/src/parser/declaration.rs
+++ b/src/parser/declaration.rs
@ -0,0 +1,67 @@
 use nom::{
    combinator::{map, opt},
    sequence::{delimited, preceded, separated_pair, tuple},
 };
 use super::expression::{expression, Expression};
 use super::tokens::{token, TokenKind as tk, TokenSpan};
 use super::{IResult, Span};
 // TODO: allow recursive generics
 // TODO: allow expressions again
 pub fn typename(input: TokenSpan) -> IResult<TokenSpan, TypeName> {
    map(
        tuple((
            token(tk::Ident),
            opt(delimited(token(tk::LAngle), expression, token(tk::RAngle))),
        )),
        |(ident, _)| TypeName {
            name: ident.span(),
            generics: (),
        },
    )(input)
 }
 #[derive(Debug)]
 pub struct TypeName<'a> {
    name: Span<'a>,
    generics: (),
 }
 #[derive(Debug)]
 pub struct NetDecl<'a> {
    pub name: Span<'a>,
    pub typ: TypeName<'a>,
    pub value: Option<Span<'a>>,
 }
 #[derive(Debug)]
 pub struct Assign<'a> {
    pub lhs: &'a str,
    pub expr: Expression<'a>,
 }
 pub fn assign_statement(input: TokenSpan) -> IResult<TokenSpan, Assign> {
    map(
        separated_pair(token(tk::Ident), token(tk::EqAssign), expression),
        |(lhs, expr)| Assign {
            lhs: (*lhs.span().fragment()),
            expr,
        },
    )(input)
 }
 // TODO: reallow assignments
 pub fn declaration(i: TokenSpan) -> IResult<TokenSpan, NetDecl> {
    map(
        tuple((
            separated_pair(token(tk::Ident), token(tk::Colon), typename),
            opt(preceded(token(tk::Assign), token(tk::Number))),
        )),
        |((ident, typ), _value)| NetDecl {
            name: ident.span(),
            typ,
            value: None,
        },
    )(i)
 }
--- a/src/parser/expression.rs
+++ b/src/parser/expression.rs
@ -0,0 +1,97 @@
 use super::tokens::{token, TokenKind as tk, TokenSpan};
 use super::{IResult, Span};
 use nom::{
    branch::alt,
    combinator::map,
    multi::separated_list0,
    sequence::{delimited, preceded, separated_pair, tuple},
 };
 #[derive(Debug, Clone)]
 pub enum Operation<'a> {
    And {
        a: Expression<'a>,
        b: Expression<'a>,
    },
    Or {
        a: Expression<'a>,
        b: Expression<'a>,
    },
    Xor {
        a: Expression<'a>,
        b: Expression<'a>,
    },
    Not(Expression<'a>),
 }
 pub fn operation(input: TokenSpan) -> IResult<TokenSpan, Operation> {
    // temporarily given up on before I learn the shunting yard algorithm
    alt((
        map(
            separated_pair(expression_nonrecurse, token(tk::BitAnd), expression),
            |(a, b)| Operation::And { a, b },
        ),
        map(
            separated_pair(expression_nonrecurse, token(tk::BitOr), expression),
            |(a, b)| Operation::Or { a, b },
        ),
        map(
            separated_pair(expression_nonrecurse, token(tk::BitXor), expression),
            |(a, b)| Operation::Xor { a, b },
        ),
        map(preceded(token(tk::BitNot), expression), Operation::Not),
    ))(input)
 }
 #[derive(Debug, Clone)]
 pub struct Call<'a> {
    pub name: Span<'a>,
    pub args: Vec<Expression<'a>>,
 }
 pub fn call_item(input: TokenSpan) -> IResult<TokenSpan, Call> {
    map(
        tuple((
            token(tk::Ident),
            delimited(
                token(tk::LParen),
                separated_list0(token(tk::Comma), expression),
                token(tk::RParen),
            ),
        )),
        |(name, args)| Call {
            name: name.span(),
            args,
        },
    )(input)
 }
 #[derive(Debug, Clone)]
 pub enum Expression<'a> {
    Ident(&'a str),
    Literal(u64),
    Call(Box<Call<'a>>),
    Operation(Box<Operation<'a>>),
 }
 /// parser combinators can not parse left-recursive grammars. To work around this, we split
 /// expressions into a recursive and non-recursive portion.
 /// Parsers reachable from this point must call expression_nonrecurse instead
 pub fn expression(input: TokenSpan) -> IResult<TokenSpan, Expression> {
    alt((
        map(operation, |op| Expression::Operation(Box::new(op))),
        expression_nonrecurse,
    ))(input)
 }
 /// the portion of the expression grammar that can be parsed without left recursion
 fn expression_nonrecurse(input: TokenSpan) -> IResult<TokenSpan, Expression> {
    alt((
        map(token(tk::Number), |_| Expression::Literal(42)),
        map(call_item, |call| Expression::Call(Box::new(call))),
        map(token(tk::Ident), |ident| {
            Expression::Ident(*ident.span().fragment())
        }),
        delimited(token(tk::LParen), expression, token(tk::RParen)),
    ))(input)
 }
--- a/src/parser/literals.rs
+++ b/src/parser/literals.rs
@ -1,8 +1,11 @@
 use nom::{
-    character::complete::{char, one_of},
+    branch::alt,
    bytes::complete::tag,
    character::complete::{alpha1, alphanumeric1, char, multispace0, one_of},
    combinator::{map, recognize},
    error::ParseError,
    multi::{many0, many1},
-    sequence::{preceded, terminated},
+    sequence::{delimited, pair, preceded, terminated},
 };
 use crate::parser::{IResult, Span};
@ -23,6 +26,22 @@ pub fn hexadecimal(input: Span) -> IResult<Span, u64> {
    )(input)
 }
 pub fn ws0<'a, F: 'a, O, E: ParseError<Span<'a>>>(
    inner: F,
 ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O, E>
 where
    F: FnMut(Span<'a>) -> IResult<Span<'a>, O, E>,
 {
    delimited(multispace0, inner, multispace0)
 }
 pub fn identifier(input: Span) -> IResult<Span, Span> {
    recognize(pair(
        alt((alpha1, tag("_"))),
        many0(alt((alphanumeric1, tag("_")))),
    ))(input)
 }
 #[cfg(test)]
 mod test {
    use super::*;
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@ -0,0 +1,70 @@
 pub mod declaration;
 pub mod error;
 pub mod expression;
 mod literals;
 pub mod module;
 pub mod proc;
 pub mod tokens;
 use nom::{
    branch::alt,
    bytes::complete::tag,
    character::complete::{alpha1, alphanumeric1, multispace0},
    combinator::{map, opt, recognize},
    error::{ErrorKind, ParseError},
    multi::{many0, separated_list0},
    sequence::{delimited, pair, preceded, separated_pair, tuple},
 };
 use nom_greedyerror::GreedyError;
 use nom_locate::LocatedSpan;
 // custom span type for nom_locate
 pub type Span<'a> = LocatedSpan<&'a str>;
 pub type IErr<I> = GreedyError<I, ErrorKind>;
 // custom IResult type for VerboseError
 pub type IResult<I, O, E = IErr<I>> = nom::IResult<I, O, E>;
 pub use crate::parser::declaration::{
    assign_statement, declaration, typename, Assign, NetDecl, TypeName,
 };
 pub use crate::parser::expression::{expression, Call, Expression, Operation};
 pub use crate::parser::module::{module, Module, ModuleItem, PortDirection};
 use crate::parser::tokens::{token, TokenKind as tk, TokenSpan};
 pub fn parse(input: TokenSpan) -> IResult<TokenSpan, Module> {
    module(input)
 }
 #[cfg(test)]
 mod test {
    use super::*;
    use nom::combinator::all_consuming;
    #[test]
    fn test_operation() {
        operation(" a | b ".into()).unwrap();
        operation(" a & b ".into()).unwrap();
    }
    #[test]
    fn test_expression() {
        expression(" a ".into()).unwrap();
        expression(" a | b ".into()).unwrap();
        expression(" a | b | c ".into()).unwrap();
    }
    #[test]
    fn test_assignment() {
        // TODO: make wrapper and use for all tests
        all_consuming(assign_statement)(" a = b ".into()).unwrap();
        all_consuming(assign_statement)(" a = b | c ".into()).unwrap();
    }
    #[test]
    fn test_call() {
        call_item("thing ( )".into()).unwrap();
        call_item("thing ( a , b , c )".into()).unwrap();
        call_item("thing(a,b,c)".into()).unwrap();
    }
 }
--- a/src/parser/tokens.rs
+++ b/src/parser/tokens.rs
@ -1,6 +1,9 @@
 //! convert text into a token stream
-use super::{identifier, ws0, IResult, Span};
+use super::{
    literals::{identifier, ws0},
    IResult, Span,
 };
 use nom::{
    branch::alt,
    bytes::complete::tag,
Author	SHA1	Message	Date
NotAFile	73580c4ff0	move parser to mod.rs	2022-02-02 01:42:17 +01:00
NotAFile	a2411244f4	move rest of parser	2022-02-02 01:41:19 +01:00
NotAFile	e7d881d9ed	move expressions to file	2022-02-02 01:31:59 +01:00