//! convert text into a token stream use super::{ error::{Error, InputPos}, literals::{const_bits, identifier, ws0}, IResult, Span, }; use nom::{ branch::alt, bytes::complete::{is_not, tag}, character::complete::{anychar, digit1, line_ending}, combinator::{consumed, map, recognize}, error::ParseError, multi::many0, sequence::tuple, InputTake, }; use std::fmt; #[derive(Clone, Copy)] pub struct Token<'a> { span: Span<'a>, kind: TokenKind, } impl fmt::Debug for Token<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{:?} @{} {:?}", self.kind, self.span.location_offset(), self.span.fragment() )?; Ok(()) } } impl<'a> Token<'a> { fn new(span: Span<'a>, kind: TokenKind) -> Self { Self { span, kind } } pub fn span(self) -> Span<'a> { self.span } pub fn kind(&self) -> TokenKind { self.kind } } #[derive(Debug, PartialEq, Copy, Clone)] pub enum TokenKind { // no whitespace, for now // no token trees either, for now // Braces LParen, RParen, LAngle, RAngle, LBrace, RBrace, LSquare, RSquare, // single chars Colon, Semicolon, Comma, BitNot, BitAnd, BitOr, BitXor, EqAssign, Not, // Multi Chars FatArrow, RArrow, // Literals Ident, Number, Constant, // Keywords Module, Assign, Match, State, Proc, Comb, // whitespace Comment, // Error Error, Eof, } #[derive(Debug, Copy, Clone)] pub struct TokenSpan<'a> { rest: &'a [Token<'a>], pos: usize, } impl<'a> TokenSpan<'a> { pub fn new(rest: &'a [Token<'a>]) -> Self { Self { rest, pos: 0 } } pub fn with_pos(rest: &'a [Token<'a>], pos: usize) -> Self { Self { rest, pos } } pub fn first(&self) -> Option<&Token> { self.rest.first() } pub fn last(&self) -> Option<&Token> { self.rest.last() } } impl InputTake for TokenSpan<'_> { fn take(&self, count: usize) -> Self { TokenSpan::with_pos(&self.rest[..count], self.pos + count) } fn take_split(&self, count: usize) -> (Self, Self) { let (head, tail) = &self.rest.split_at(count); ( TokenSpan::with_pos(head, self.pos), TokenSpan::with_pos(tail, self.pos + count), ) } } impl nom::InputLength for TokenSpan<'_> { fn input_len(&self) -> usize { self.rest.len() } } impl InputPos for TokenSpan<'_> { fn position(&self) -> usize { self.pos } } /// combinator that matches a token kind pub fn token<'a>(kind: TokenKind) -> impl FnMut(TokenSpan<'a>) -> IResult { move |input: TokenSpan| { let next = if let Some(i) = input.rest.first() { *i } else { return Err(nom::Err::Error(Error::from_error_kind( input, nom::error::ErrorKind::Eof, ))); }; // TODO: HACKS HACKS HACKS EWW if next.kind == TokenKind::Comment { let (_, tail) = input.take_split(1); return token(kind)(tail); } if next.kind == kind { let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1); Ok((rest, next)) } else { Err(nom::Err::Error(Error::from_tokenkind(input, kind))) } } } pub fn lex(input: Span) -> IResult> { many0(ws0(alt(( lex_keywords, lex_trivials, lex_literals, lex_braces, lex_punctuation, map(recognize(anychar), |span| { Token::new(span, TokenKind::Error) }), ))))(input) } fn lex_braces(input: Span) -> IResult { map( consumed(alt(( map(tag("("), |_| TokenKind::LParen), map(tag(")"), |_| TokenKind::RParen), map(tag("<"), |_| TokenKind::LAngle), map(tag(">"), |_| TokenKind::RAngle), map(tag("{"), |_| TokenKind::LBrace), map(tag("}"), |_| TokenKind::RBrace), map(tag("["), |_| TokenKind::LSquare), map(tag("]"), |_| TokenKind::RSquare), ))), |(span, kind)| Token::new(span, kind), )(input) } fn lex_literals(input: Span) -> IResult { map( consumed(alt(( map(const_bits, |_| TokenKind::Constant), map(identifier, |_| TokenKind::Ident), map(digit1, |_| TokenKind::Number), ))), |(span, kind)| Token::new(span, kind), )(input) } fn lex_punctuation(input: Span) -> IResult { map( consumed(alt(( map(tag(":"), |_| TokenKind::Colon), map(tag(";"), |_| TokenKind::Semicolon), map(tag(","), |_| TokenKind::Comma), map(tag("->"), |_| TokenKind::RArrow), map(tag("=>"), |_| TokenKind::FatArrow), map(tag("~"), |_| TokenKind::BitNot), map(tag("&"), |_| TokenKind::BitAnd), map(tag("^"), |_| TokenKind::BitXor), map(tag("|"), |_| TokenKind::BitOr), map(tag("!"), |_| TokenKind::Not), map(tag("="), |_| TokenKind::EqAssign), ))), |(span, kind)| Token::new(span, kind), )(input) } fn lex_keywords(input: Span) -> IResult { map( consumed(alt(( map(tag("module"), |_| TokenKind::Module), map(tag("assign"), |_| TokenKind::Assign), map(tag("match"), |_| TokenKind::Match), map(tag("proc"), |_| TokenKind::Proc), map(tag("comb"), |_| TokenKind::Comb), map(tag("state"), |_| TokenKind::State), ))), |(span, kind)| Token::new(span, kind), )(input) } fn lex_trivials(input: Span) -> IResult { map( consumed(alt((map( tuple((tag("//"), is_not("\r\n"), line_ending)), |_| TokenKind::Comment, ),))), |(span, kind)| Token::new(span, kind), )(input) }