//! convert text into a token stream use std::io; use std::fmt; use super::{identifier, ws0, IResult, Span}; use nom::{ branch::alt, bytes::complete::tag, character::complete::{digit1, anychar}, combinator::{consumed, map, recognize}, multi::many0, error::ParseError, }; pub struct Token<'a> { span: Span<'a>, kind: TokenKind, } impl fmt::Debug for Token<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:?} @{} {:?}", self.kind, self.span.location_offset(), self.span.fragment())?; Ok(()) } } impl<'a> Token<'a> { fn new(span: Span<'a>, kind: TokenKind) -> Self { Self { span, kind } } } pub fn pretty_tokens(mut w: impl io::Write, toks: &[Token]) -> io::Result<()> { for tok in toks { writeln!(w, "{:?}", tok)?; }; Ok(()) } #[derive(Debug, PartialEq, Clone)] pub enum TokenKind { // no whitespace, for now // no token trees either, for now // Braces LParen, RParen, LAngle, RAngle, LBrace, RBrace, LSquare, RSquare, // single chars Colon, Semicolon, Comma, Caret, Tilde, Assign, // Multi Chars RArrow, // Literals Ident, Number, // Keywords Module, // Error Error, } #[derive(Debug)] pub struct TokenSpan<'a> { rest: &'a [Token<'a>], pos: usize, } impl<'a> TokenSpan<'a> { pub fn new(rest: &'a [Token<'a>]) -> Self { Self { rest, pos: 0 } } pub fn with_pos(rest: &'a [Token<'a>], pos: usize) -> Self { Self { rest, pos } } } impl nom::InputTake for TokenSpan<'_> { fn take(&self, count: usize) -> Self { TokenSpan::with_pos(&self.rest[..count], self.pos + count) } fn take_split(&self, count: usize) -> (Self, Self) { let (head, tail) = &self.rest.split_at(count); (TokenSpan::with_pos(&head, self.pos), TokenSpan::with_pos(&tail, self.pos + count)) } } impl nom_greedyerror::Position for TokenSpan<'_> { fn position(&self) -> usize { self.pos } } /// combinator that matches a token kind pub fn token<'a, E>(kind: TokenKind) -> impl FnMut(TokenSpan<'a>) -> nom::IResult where E: ParseError> { move |input: TokenSpan| { let next = &input.rest[0]; if next.kind == kind.clone() { let rest = TokenSpan::new(&input.rest[1..]); Ok((rest, next)) } else { Err(nom::Err::Error(E::from_error_kind(input, nom::error::ErrorKind::Tag))) } } } pub fn lex(input: Span) -> IResult> { many0(ws0(alt(( map(tag("module"), |span| Token::new(span, TokenKind::Module)), lex_literals, lex_braces, lex_punctuation, map(recognize(anychar), |span| Token::new(span, TokenKind::Error)), ))))(input) } fn lex_braces(input: Span) -> IResult { map( consumed(alt(( map(tag("("), |_| TokenKind::LParen), map(tag(")"), |_| TokenKind::RParen), map(tag("<"), |_| TokenKind::LAngle), map(tag(">"), |_| TokenKind::RAngle), map(tag("{"), |_| TokenKind::LBrace), map(tag("}"), |_| TokenKind::RBrace), map(tag("["), |_| TokenKind::LSquare), map(tag("]"), |_| TokenKind::RSquare), ))), |(span, kind)| Token::new(span, kind), )(input) } fn lex_literals(input: Span) -> IResult { map( consumed(alt(( map(identifier, |_| TokenKind::Ident), map(digit1, |_| TokenKind::Number), ))), |(span, kind)| Token::new(span, kind), )(input) } fn lex_punctuation(input: Span) -> IResult { map( consumed(alt(( map(tag(":"), |_| TokenKind::Colon), map(tag(";"), |_| TokenKind::Semicolon), map(tag(","), |_| TokenKind::Comma), map(tag("^"), |_| TokenKind::Caret), map(tag("->"), |_| TokenKind::RArrow), map(tag("~"), |_| TokenKind::Tilde), map(tag("="), |_| TokenKind::Assign), ))), |(span, kind)| Token::new(span, kind), )(input) }