futilehdl/src/parser/tokens.rs

253 lines
6.2 KiB
Rust

//! convert text into a token stream
use super::{
error::{Error, InputPos},
literals::{const_bits, identifier, ws0},
IResult, Span,
};
use nom::{
branch::alt,
bytes::complete::{is_not, tag, take_until},
character::complete::{anychar, digit1, line_ending},
combinator::{consumed, map, recognize},
error::ParseError,
multi::many0,
sequence::tuple,
InputTake,
};
use std::fmt;
#[derive(Clone, Copy)]
pub struct Token<'a> {
span: Span<'a>,
kind: TokenKind,
}
impl fmt::Debug for Token<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:?} @{} {:?}",
self.kind,
self.span.location_offset(),
self.span.fragment()
)?;
Ok(())
}
}
impl<'a> Token<'a> {
fn new(span: Span<'a>, kind: TokenKind) -> Self {
Self { span, kind }
}
pub fn span(self) -> Span<'a> {
self.span
}
pub fn kind(&self) -> TokenKind {
self.kind
}
}
#[derive(Debug, PartialEq, Copy, Clone)]
pub enum TokenKind {
// no whitespace, for now
// no token trees either, for now
// Braces
LParen,
RParen,
LAngle,
RAngle,
LBrace,
RBrace,
LSquare,
RSquare,
// single chars
Colon,
Semicolon,
Comma,
BitNot,
BitAnd,
BitOr,
BitXor,
EqAssign,
Not,
// Multi Chars
FatArrow,
RArrow,
// Literals
Ident,
Number,
Constant,
// Keywords
Module,
Assign,
Match,
State,
Proc,
Comb,
// whitespace
Comment,
// Error
Error,
}
#[derive(Debug, Copy, Clone)]
pub struct TokenSpan<'a> {
rest: &'a [Token<'a>],
pos: usize,
}
impl<'a> TokenSpan<'a> {
pub fn new(rest: &'a [Token<'a>]) -> Self {
Self { rest, pos: 0 }
}
pub fn with_pos(rest: &'a [Token<'a>], pos: usize) -> Self {
Self { rest, pos }
}
pub fn first(&self) -> Option<&Token> {
self.rest.first()
}
pub fn last(&self) -> Option<&Token> {
self.rest.last()
}
}
impl InputTake for TokenSpan<'_> {
fn take(&self, count: usize) -> Self {
TokenSpan::with_pos(&self.rest[..count], self.pos + count)
}
fn take_split(&self, count: usize) -> (Self, Self) {
let (head, tail) = &self.rest.split_at(count);
(
TokenSpan::with_pos(head, self.pos),
TokenSpan::with_pos(tail, self.pos + count),
)
}
}
impl nom::InputLength for TokenSpan<'_> {
fn input_len(&self) -> usize {
self.rest.len()
}
}
impl InputPos for TokenSpan<'_> {
fn position(&self) -> usize {
self.pos
}
}
/// combinator that matches a token kind
pub fn token<'a>(kind: TokenKind) -> impl FnMut(TokenSpan<'a>) -> IResult<TokenSpan, Token> {
move |input: TokenSpan| {
let next = if let Some(i) = input.rest.first() {
*i
} else {
return Err(nom::Err::Error(Error::from_error_kind(
input,
nom::error::ErrorKind::Eof,
)));
};
// TODO: HACKS HACKS HACKS EWW
if next.kind == TokenKind::Comment {
let (_, tail) = input.take_split(1);
return token(kind)(tail);
}
if next.kind == kind {
let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1);
Ok((rest, next))
} else {
Err(nom::Err::Error(Error::from_tokenkind(input, kind)))
}
}
}
pub fn lex(input: Span) -> IResult<Span, Vec<Token>> {
many0(ws0(alt((
lex_keywords,
lex_trivials,
lex_literals,
lex_braces,
lex_punctuation,
map(recognize(anychar), |span| {
Token::new(span, TokenKind::Error)
}),
))))(input)
}
fn lex_braces(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag("("), |_| TokenKind::LParen),
map(tag(")"), |_| TokenKind::RParen),
map(tag("<"), |_| TokenKind::LAngle),
map(tag(">"), |_| TokenKind::RAngle),
map(tag("{"), |_| TokenKind::LBrace),
map(tag("}"), |_| TokenKind::RBrace),
map(tag("["), |_| TokenKind::LSquare),
map(tag("]"), |_| TokenKind::RSquare),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_literals(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(const_bits, |_| TokenKind::Constant),
map(identifier, |_| TokenKind::Ident),
map(digit1, |_| TokenKind::Number),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_punctuation(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag(":"), |_| TokenKind::Colon),
map(tag(";"), |_| TokenKind::Semicolon),
map(tag(","), |_| TokenKind::Comma),
map(tag("->"), |_| TokenKind::RArrow),
map(tag("=>"), |_| TokenKind::FatArrow),
map(tag("~"), |_| TokenKind::BitNot),
map(tag("&"), |_| TokenKind::BitAnd),
map(tag("^"), |_| TokenKind::BitXor),
map(tag("|"), |_| TokenKind::BitOr),
map(tag("!"), |_| TokenKind::Not),
map(tag("="), |_| TokenKind::EqAssign),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_keywords(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tag("module"), |_| TokenKind::Module),
map(tag("assign"), |_| TokenKind::Assign),
map(tag("match"), |_| TokenKind::Match),
map(tag("proc"), |_| TokenKind::Proc),
map(tag("comb"), |_| TokenKind::Comb),
map(tag("state"), |_| TokenKind::State),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}
fn lex_trivials(input: Span) -> IResult<Span, Token> {
map(
consumed(alt((
map(tuple((tag("//"), is_not("\r\n"), line_ending)), |_| {
TokenKind::Comment
}),
map(tuple((tag("/*"), take_until("*/"), tag("*/"))), |_| {
TokenKind::Comment
}),
))),
|(span, kind)| Token::new(span, kind),
)(input)
}