//! convert text into a token stream use super::{ literals::{identifier, ws0}, IResult, Span, }; use nom::{ branch::alt, bytes::complete::tag, character::complete::{anychar, digit1}, combinator::{consumed, map, recognize}, error::ParseError, multi::many0, }; use std::fmt; pub struct Token<'a> { span: Span<'a>, kind: TokenKind, } impl fmt::Debug for Token<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{:?} @{} {:?}", self.kind, self.span.location_offset(), self.span.fragment() )?; Ok(()) } } impl<'a> Token<'a> { fn new(span: Span<'a>, kind: TokenKind) -> Self { Self { span, kind } } pub fn span(&self) -> Span { self.span } } #[derive(Debug, PartialEq, Clone)] pub enum TokenKind { // no whitespace, for now // no token trees either, for now // Braces LParen, RParen, LAngle, RAngle, LBrace, RBrace, LSquare, RSquare, // single chars Colon, Semicolon, Comma, BitNot, BitAnd, BitOr, BitXor, EqAssign, // Multi Chars FatArrow, RArrow, // Literals Ident, Number, // Keywords Module, Assign, Match, Proc, // Error Error, } #[derive(Debug, Clone)] pub struct TokenSpan<'a> { rest: &'a [Token<'a>], pos: usize, } impl<'a> TokenSpan<'a> { pub fn new(rest: &'a [Token<'a>]) -> Self { Self { rest, pos: 0 } } pub fn with_pos(rest: &'a [Token<'a>], pos: usize) -> Self { Self { rest, pos } } } impl nom::InputTake for TokenSpan<'_> { fn take(&self, count: usize) -> Self { TokenSpan::with_pos(&self.rest[..count], self.pos + count) } fn take_split(&self, count: usize) -> (Self, Self) { let (head, tail) = &self.rest.split_at(count); ( TokenSpan::with_pos(head, self.pos), TokenSpan::with_pos(tail, self.pos + count), ) } } impl nom::InputLength for TokenSpan<'_> { fn input_len(&self) -> usize { self.rest.len() } } impl nom_greedyerror::Position for TokenSpan<'_> { fn position(&self) -> usize { self.pos } } /// combinator that matches a token kind pub fn token<'a, E>( kind: TokenKind, ) -> impl FnMut(TokenSpan<'a>) -> nom::IResult where E: ParseError>, { move |input: TokenSpan| { let next = &input.rest[0]; if next.kind == kind.clone() { let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1); Ok((rest, next)) } else { Err(nom::Err::Error(E::from_error_kind( input, // TODO: Proper errors here nom::error::ErrorKind::Tag, ))) } } } pub fn lex(input: Span) -> IResult> { many0(ws0(alt(( lex_keywords, lex_literals, lex_braces, lex_punctuation, map(recognize(anychar), |span| { Token::new(span, TokenKind::Error) }), ))))(input) } fn lex_braces(input: Span) -> IResult { map( consumed(alt(( map(tag("("), |_| TokenKind::LParen), map(tag(")"), |_| TokenKind::RParen), map(tag("<"), |_| TokenKind::LAngle), map(tag(">"), |_| TokenKind::RAngle), map(tag("{"), |_| TokenKind::LBrace), map(tag("}"), |_| TokenKind::RBrace), map(tag("["), |_| TokenKind::LSquare), map(tag("]"), |_| TokenKind::RSquare), ))), |(span, kind)| Token::new(span, kind), )(input) } fn lex_literals(input: Span) -> IResult { map( consumed(alt(( map(identifier, |_| TokenKind::Ident), map(digit1, |_| TokenKind::Number), ))), |(span, kind)| Token::new(span, kind), )(input) } fn lex_punctuation(input: Span) -> IResult { map( consumed(alt(( map(tag(":"), |_| TokenKind::Colon), map(tag(";"), |_| TokenKind::Semicolon), map(tag(","), |_| TokenKind::Comma), map(tag("->"), |_| TokenKind::RArrow), map(tag("=>"), |_| TokenKind::FatArrow), map(tag("~"), |_| TokenKind::BitNot), map(tag("&"), |_| TokenKind::BitAnd), map(tag("^"), |_| TokenKind::BitXor), map(tag("|"), |_| TokenKind::BitOr), map(tag("="), |_| TokenKind::EqAssign), ))), |(span, kind)| Token::new(span, kind), )(input) } fn lex_keywords(input: Span) -> IResult { map( consumed(alt(( map(tag("module"), |_| TokenKind::Module), map(tag("assign"), |_| TokenKind::Assign), map(tag("match"), |_| TokenKind::Match), map(tag("proc"), |_| TokenKind::Proc), ))), |(span, kind)| Token::new(span, kind), )(input) }