I am writing a compiler for esoteric language as one of my first Rust projects. I am using rust-pest to generate a parser. Since I'm new in Rust, code review would be great.
Full source code can be found here.
Here are some code snippets.
Thanks in advance.
ast.rs
use std::rc::Rc as Rc;
#[derive(Debug)]
pub struct Module {
pub functions: Vec<FnDecl>
}
#[derive(Debug)]
pub struct FnDecl {
pub name: String,
pub inputs: Vec<Arg>,
pub output: String,
pub body: Rc<Stmt>,
}
#[derive(Debug)]
pub struct Arg {
pub typ: String,
pub name: String,
}
#[derive(Debug)]
pub enum Stmt {
Invalid,
Assign(AssignStmt),
Block(BlockStmt),
Expr(Rc<Expr>),
}
#[derive(Debug)]
pub struct AssignStmt {
pub name: String,
pub typ: String,
pub value: Rc<Expr>,
}
#[derive(Debug)]
pub struct BlockStmt {
pub list: Vec<Rc<Stmt>>,
}
#[derive(Debug)]
pub enum Expr {
Id(IdExpr),
Call(CallExpr),
}
#[derive(Debug)]
pub struct IdExpr {
pub name: String
}
#[derive(Debug)]
pub struct CallExpr {
pub name: String,
pub params: Vec<Rc<Expr>>,
}
parser.rs
use pest::iterators::Pair;
use pest::Parser;
use std::io;
use crate::ast;
use super::utils;
#[derive(Parser)]
#[grammar = "parser/grammar.pest"]
struct JFECParser;
fn parse_arg(arg_p: Pair<Rule>) -> Result<ast::Arg, io::Error> {
let mut iter = arg_p.into_inner();
let name = utils::next_string(&mut iter)?;
let typ = utils::next_string(&mut iter)?;
return Ok(ast::Arg { name, typ });
}
fn parse_args(args_p: Pair<Rule>) -> Result<Vec<ast::Arg>, io::Error> {
let mut res = vec!();
for p in args_p.into_inner() {
match p.as_rule() {
Rule::param => {
let arg = parse_arg(p)?;
res.push(arg);
}
_ => unreachable!(),
}
}
return Ok(res);
}
fn parse_call_params(call_params_p: Pair<Rule>) -> Result<Vec<Box<ast::Expr>>, io::Error> {
let mut res = vec!();
for p in call_params_p.into_inner() {
match p.as_rule() {
Rule::expr => {
let expr = parse_expr(p)?;
res.push(expr);
}
_ => unreachable!(),
}
}
Ok(res)
}
fn parse_call_expr(call_expr_p: Pair<Rule>) -> Result<Box<ast::Expr>, io::Error> {
let mut name = String::default();
let mut params = vec!();
for expr_p in call_expr_p.into_inner() {
match expr_p.as_rule() {
Rule::id => {
name = utils::to_string(expr_p);
}
Rule::call_params => {
params = parse_call_params(expr_p)?;
}
_ => unreachable!()
}
}
Ok(Box::new(ast::Expr::Call(ast::CallExpr { name, params })))
}
fn parse_expr(expr_p: Pair<Rule>) -> Result<Box<ast::Expr>, io::Error> {
let expr = utils::inner_next(expr_p)?;
match expr.as_rule() {
Rule::id => {
return Ok(Box::new(
ast::Expr::Id(ast::IdExpr { name: utils::to_string(expr) })));
}
Rule::call_expr => {
return parse_call_expr(expr);
}
_ => unreachable!(),
}
}
fn parse_stmt(stmt_p: Pair<Rule>) -> Result<Box<ast::Stmt>, io::Error> {
let stmt = utils::inner_next(stmt_p)?;
match stmt.as_rule() {
Rule::assign_stmt => {
let mut iter = stmt.into_inner();
let name = utils::next_string(&mut iter)?;
let typ = utils::next_string(&mut iter)?;
let val = utils::get_next(&mut iter)?;
let value = parse_expr(val)?;
return Ok(Box::new(ast::Stmt::Assign(
ast::AssignStmt { name, typ, value }
)));
}
Rule::expr_stmt => {
let next = utils::inner_next(stmt)?;
let expr = parse_expr(next)?;
return Ok(Box::new(ast::Stmt::Expr(expr)));
}
Rule::block_stmt => {
let block = parse_block(stmt)?;
return Ok(block);
}
_ => unreachable!(),
}
}
fn parse_block(block_p: Pair<Rule>) -> Result<Box<ast::Stmt>, io::Error> {
let mut list = vec!();
let next = utils::inner_next(block_p)?;
for p in next.into_inner() {
match p.as_rule() {
Rule::stmt => {
let stmt = parse_stmt(p)?;
list.push(stmt);
}
_ => unreachable!(),
}
}
Ok(Box::new(ast::Stmt::Block(ast::BlockStmt { list })))
}
fn parse_fn_decl(fndecl_p: Pair<Rule>) -> Result<ast::FnDecl, io::Error> {
let mut name = String::default();
let mut inputs = vec!();
let mut output = String::default();
let mut body = Box::new(ast::Stmt::Invalid);
for p in fndecl_p.into_inner() {
match p.as_rule() {
Rule::id => {
name = p.as_str().to_string();
}
Rule::param_list => {
inputs = parse_args(p)?;
}
Rule::ret_typ => {
output = utils::to_string(p);
}
Rule::block_stmt => {
body = parse_block(p)?;
}
_ => unreachable!(),
}
}
Ok(ast::FnDecl { name, inputs, output, body })
}
pub fn create_ast(text: &String) -> Result<ast::Module, io::Error> {
let mut parsed = JFECParser::parse(Rule::program, &text).expect("parse error");
let module = utils::get_next(&mut parsed)?;
let mut fn_decls = vec!();
for decl in module.into_inner() {
match decl.as_rule() {
Rule::decl => {
let next = utils::inner_next(decl)?;
let decl = parse_fn_decl(next)?;
fn_decls.push(decl);
}
Rule::EOI => (),
_ => unreachable!(),
}
}
Ok(ast::Module { functions: fn_decls })
}
utils.rs
use pest::iterators::{Pair, Pairs};
use pest::RuleType;
use std::io;
pub fn to_string<R: RuleType>(p: Pair<R>) -> String {
p.as_str().to_string()
}
pub fn next_string<R: RuleType>(p: &mut Pairs<R>) -> Result<String, io::Error> {
let next = get_next(p)?;
Ok(to_string(next))
}
pub fn get_next<'s, 't, R: RuleType>(p: &'s mut Pairs<'t, R>) -> Result<Pair<'t, R>, io::Error> {
if let Some(val) = p.next() {
Ok(val)
} else {
Err(io::Error::new(io::ErrorKind::InvalidInput, "Fail to get next element"))
}
}
pub fn inner_next<R: RuleType>(p: Pair<R>) -> Result<Pair<R>, io::Error> {
get_next(&mut p.into_inner())
}
grammar.pest
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
program = { SOI ~ decl* ~ EOI }
decl = { fn_decl }
fn_decl = { "fn" ~ id ~ "(" ~ param_list? ~ ")" ~ ret_typ ~ block_stmt }
ret_typ = { ("->" ~ id)? }
block_stmt = { "{" ~ stmt_list ~ "}" }
param = { id ~ ":" ~ id }
param_list = { param ~ ("," ~ param)* }
stmt_list = { stmt* }
stmt = { assign_stmt | expr_stmt | block_stmt }
assign_stmt = { "let" ~ id ~ ":" ~ id ~ "=" ~ expr ~ ";"}
expr_stmt = { expr ~ ";" }
expr = { call_expr | id }
call_params = { expr ~ ("," ~ expr)* }
call_expr = { id ~ "(" ~ call_params? ~")" }
id = @{ ASCII_ALPHA+ }
The language looks like rust. Currently only function decls and assign statements are supported. See the code snippet below.
fn foo( a: int, b: int, c: int ) -> int {
let a: int = bar(baz(x, y), z);
}
rust fn foo( a: int, b: int, c: int ) -> int { let a: int = bar(baz(x, y), z); }I have added grammar.pest to make it more clear. \$\endgroup\$