228 lines
5.4 KiB
Rust
228 lines
5.4 KiB
Rust
//! convert text into a token stream
|
|
|
|
use super::{
|
|
error::{Error, InputPos},
|
|
literals::{identifier, ws0},
|
|
IResult, Span,
|
|
};
|
|
use nom::{
|
|
branch::alt,
|
|
bytes::complete::tag,
|
|
character::complete::{anychar, digit1},
|
|
combinator::{consumed, map, recognize},
|
|
error::ParseError,
|
|
multi::many0,
|
|
InputTake,
|
|
};
|
|
use std::fmt;
|
|
|
|
#[derive(Clone, Copy)]
|
|
pub struct Token<'a> {
|
|
span: Span<'a>,
|
|
kind: TokenKind,
|
|
}
|
|
|
|
impl fmt::Debug for Token<'_> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(
|
|
f,
|
|
"{:?} @{} {:?}",
|
|
self.kind,
|
|
self.span.location_offset(),
|
|
self.span.fragment()
|
|
)?;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl<'a> Token<'a> {
|
|
fn new(span: Span<'a>, kind: TokenKind) -> Self {
|
|
Self { span, kind }
|
|
}
|
|
|
|
pub fn span(self) -> Span<'a> {
|
|
self.span
|
|
}
|
|
pub fn kind(&self) -> TokenKind {
|
|
self.kind
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Copy, Clone)]
|
|
pub enum TokenKind {
|
|
// no whitespace, for now
|
|
// no token trees either, for now
|
|
// Braces
|
|
LParen,
|
|
RParen,
|
|
LAngle,
|
|
RAngle,
|
|
LBrace,
|
|
RBrace,
|
|
LSquare,
|
|
RSquare,
|
|
// single chars
|
|
Colon,
|
|
Semicolon,
|
|
Comma,
|
|
BitNot,
|
|
BitAnd,
|
|
BitOr,
|
|
BitXor,
|
|
EqAssign,
|
|
Not,
|
|
// Multi Chars
|
|
FatArrow,
|
|
RArrow,
|
|
// Literals
|
|
Ident,
|
|
Number,
|
|
// Keywords
|
|
Module,
|
|
Assign,
|
|
Match,
|
|
State,
|
|
Proc,
|
|
Comb,
|
|
// Error
|
|
Error,
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone)]
|
|
pub struct TokenSpan<'a> {
|
|
rest: &'a [Token<'a>],
|
|
pos: usize,
|
|
}
|
|
|
|
impl<'a> TokenSpan<'a> {
|
|
pub fn new(rest: &'a [Token<'a>]) -> Self {
|
|
Self { rest, pos: 0 }
|
|
}
|
|
pub fn with_pos(rest: &'a [Token<'a>], pos: usize) -> Self {
|
|
Self { rest, pos }
|
|
}
|
|
pub fn first(&self) -> Option<&Token> {
|
|
self.rest.first()
|
|
}
|
|
pub fn last(&self) -> Option<&Token> {
|
|
self.rest.last()
|
|
}
|
|
}
|
|
|
|
impl InputTake for TokenSpan<'_> {
|
|
fn take(&self, count: usize) -> Self {
|
|
TokenSpan::with_pos(&self.rest[..count], self.pos + count)
|
|
}
|
|
|
|
fn take_split(&self, count: usize) -> (Self, Self) {
|
|
let (head, tail) = &self.rest.split_at(count);
|
|
(
|
|
TokenSpan::with_pos(head, self.pos),
|
|
TokenSpan::with_pos(tail, self.pos + count),
|
|
)
|
|
}
|
|
}
|
|
|
|
impl nom::InputLength for TokenSpan<'_> {
|
|
fn input_len(&self) -> usize {
|
|
self.rest.len()
|
|
}
|
|
}
|
|
|
|
impl InputPos for TokenSpan<'_> {
|
|
fn position(&self) -> usize {
|
|
self.pos
|
|
}
|
|
}
|
|
|
|
/// combinator that matches a token kind
|
|
pub fn token<'a>(kind: TokenKind) -> impl FnMut(TokenSpan<'a>) -> IResult<TokenSpan, Token> {
|
|
move |input: TokenSpan| {
|
|
let next = if let Some(i) = input.rest.first() {
|
|
*i
|
|
} else {
|
|
return Err(nom::Err::Error(Error::from_error_kind(
|
|
input,
|
|
nom::error::ErrorKind::Eof,
|
|
)));
|
|
};
|
|
if next.kind == kind {
|
|
let rest = TokenSpan::with_pos(&input.rest[1..], input.pos + 1);
|
|
Ok((rest, next))
|
|
} else {
|
|
Err(nom::Err::Error(Error::from_tokenkind(input, kind)))
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn lex(input: Span) -> IResult<Span, Vec<Token>> {
|
|
many0(ws0(alt((
|
|
lex_keywords,
|
|
lex_literals,
|
|
lex_braces,
|
|
lex_punctuation,
|
|
map(recognize(anychar), |span| {
|
|
Token::new(span, TokenKind::Error)
|
|
}),
|
|
))))(input)
|
|
}
|
|
|
|
fn lex_braces(input: Span) -> IResult<Span, Token> {
|
|
map(
|
|
consumed(alt((
|
|
map(tag("("), |_| TokenKind::LParen),
|
|
map(tag(")"), |_| TokenKind::RParen),
|
|
map(tag("<"), |_| TokenKind::LAngle),
|
|
map(tag(">"), |_| TokenKind::RAngle),
|
|
map(tag("{"), |_| TokenKind::LBrace),
|
|
map(tag("}"), |_| TokenKind::RBrace),
|
|
map(tag("["), |_| TokenKind::LSquare),
|
|
map(tag("]"), |_| TokenKind::RSquare),
|
|
))),
|
|
|(span, kind)| Token::new(span, kind),
|
|
)(input)
|
|
}
|
|
|
|
fn lex_literals(input: Span) -> IResult<Span, Token> {
|
|
map(
|
|
consumed(alt((
|
|
map(identifier, |_| TokenKind::Ident),
|
|
map(digit1, |_| TokenKind::Number),
|
|
))),
|
|
|(span, kind)| Token::new(span, kind),
|
|
)(input)
|
|
}
|
|
|
|
fn lex_punctuation(input: Span) -> IResult<Span, Token> {
|
|
map(
|
|
consumed(alt((
|
|
map(tag(":"), |_| TokenKind::Colon),
|
|
map(tag(";"), |_| TokenKind::Semicolon),
|
|
map(tag(","), |_| TokenKind::Comma),
|
|
map(tag("->"), |_| TokenKind::RArrow),
|
|
map(tag("=>"), |_| TokenKind::FatArrow),
|
|
map(tag("~"), |_| TokenKind::BitNot),
|
|
map(tag("&"), |_| TokenKind::BitAnd),
|
|
map(tag("^"), |_| TokenKind::BitXor),
|
|
map(tag("|"), |_| TokenKind::BitOr),
|
|
map(tag("!"), |_| TokenKind::Not),
|
|
map(tag("="), |_| TokenKind::EqAssign),
|
|
))),
|
|
|(span, kind)| Token::new(span, kind),
|
|
)(input)
|
|
}
|
|
|
|
fn lex_keywords(input: Span) -> IResult<Span, Token> {
|
|
map(
|
|
consumed(alt((
|
|
map(tag("module"), |_| TokenKind::Module),
|
|
map(tag("assign"), |_| TokenKind::Assign),
|
|
map(tag("match"), |_| TokenKind::Match),
|
|
map(tag("proc"), |_| TokenKind::Proc),
|
|
map(tag("comb"), |_| TokenKind::Comb),
|
|
map(tag("state"), |_| TokenKind::State),
|
|
))),
|
|
|(span, kind)| Token::new(span, kind),
|
|
)(input)
|
|
}
|