Select Git revision
.coveragerc
parser.cpp 55.24 KiB
#include <cstring>
#include <string>
#include "parser.hpp"
#include "perfvisitor.hpp"
#include "token.hpp"
#include "util.hpp"
#include "io/pprintf.hpp"
// specialize on const char* for lazy evaluation of compile time strings
bool Parser::expect(tok tok, const char* str) {
if (tok == token_.type) {
return true;
}
error(
strlen(str) > 0 ? str
: std::string("unexpected token ") + yellow(token_.spelling));
return false;
}
bool Parser::expect(tok tok, std::string const& str) {
if (tok == token_.type) {
return true;
}
error(
str.size() > 0 ? str
: std::string("unexpected token ") + yellow(token_.spelling));
return false;
}
void Parser::error(std::string msg) {
std::string location_info = pprintf(
"%:% ", module_ ? module_->source_name() : "", token_.location);
if (status_ == lexerStatus::error) {
// append to current string
error_string_ += "\n" + white(location_info) + "\n " + msg;
}
else {
error_string_ = white(location_info) + "\n " + msg;
status_ = lexerStatus::error;
}
}
void Parser::error(std::string msg, Location loc) {
std::string location_info = pprintf(
"%:% ", module_ ? module_->source_name() : "", loc);
if (status_ == lexerStatus::error) {
// append to current string
error_string_ += "\n" + green(location_info) + msg;
}
else {
error_string_ = green(location_info) + msg;
status_ = lexerStatus::error;
}
}
Parser::Parser(Module& m, bool advance):
Lexer(m.buffer()),
module_(&m) {
// prime the first token
get_token();
if (advance) {
parse();
}}
Parser::Parser(std::string const& buf):
Lexer(buf),
module_(nullptr) {
// prime the first token
get_token();
}
bool Parser::parse() {
// perform first pass to read the descriptive blocks and
// record the location of the verb blocks
while (token_.type != tok::eof) {
switch (token_.type) {
case tok::title:
parse_title();
break;
case tok::neuron:
parse_neuron_block();
break;
case tok::state:
parse_state_block();
break;
case tok::units:
parse_units_block();
break;
case tok::constant:
parse_constant_block();
break;
case tok::parameter:
parse_parameter_block();
break;
case tok::assigned:
parse_assigned_block();
break;
// INITIAL, KINETIC, DERIVATIVE, PROCEDURE, NET_RECEIVE and BREAKPOINT blocks
// are all lowered to ProcedureExpression
case tok::net_receive:
case tok::breakpoint:
case tok::initial:
case tok::post_event:
case tok::kinetic:
case tok::linear:
case tok::derivative:
case tok::procedure: {
auto p = parse_procedure();
if (!p) break;
module_->add_callable(std::move(p));
} break;
case tok::function: {
auto f = parse_function();
if (!f) break;
module_->add_callable(std::move(f));
} break;
default:
error(pprintf("expected block type, found '%'", token_.spelling));
break;
}
if (status() == lexerStatus::error) {
std::cerr << red("error: ") << error_string_ << std::endl;
return false;
}
}
return true;
}
// consume a comma separated list of identifiers
// NOTE: leaves the current location at begining of the last identifier in the list
// OK: empty list ""// OK: list with one identifier "a"
// OK: list with mutiple identifier "a, b, c, d"
// BAD: list with keyword "a, b, else, d"
// list with trailing comma "a, b,\n"
// list with keyword "a, if, b"
std::vector<Token> Parser::comma_separated_identifiers() {
std::vector<Token> tokens;
int startline = location_.line;
// handle is an empty list at the end of a line
if (peek().location.line > startline) {
// this happens when scanning WRITE below:
// USEION k READ a, b WRITE
// leave to the caller to decide whether an empty list is an error
return tokens;
}
while (1) {
get_token();
// first check if a new line was encounterd
if (location_.line > startline) {
return tokens;
}
else if (token_.type == tok::identifier) {
tokens.push_back(token_);
}
else if (is_keyword(token_)) {
error(pprintf("found keyword '%', expected a variable name", token_.spelling));
return tokens;
}
else if (token_.type == tok::real || token_.type == tok::integer) {
error(pprintf("found number '%', expected a variable name", token_.spelling));
return tokens;
}
else {
error(pprintf("found '%', expected a variable name", token_.spelling));
return tokens;
}
// look ahead to check for a comma. This approach ensures that the
// first token after the end of the list is not consumed
if (peek().type == tok::comma) {
// load the comma
get_token();
// assert that the list can't run off the end of a line
if (peek().location.line > startline) {
error("line can't end with a '" + yellow(",") + "'");
return tokens;
}
}
else {
break;
}
}
get_token(); // prime the first token after the list
return tokens;
}
/*
NEURON {
THREADSAFE
SUFFIX KdShu2007
USEION k WRITE ik READ xy
RANGE gkbar, ik, ek
GLOBAL minf, mtau, hinf, htau
}
*/
void Parser::parse_neuron_block() {
NeuronBlock neuron_block;
get_token();
// assert that the block starts with a curly brace
if (token_.type != tok::lbrace) {
error(pprintf("NEURON block must start with a curly brace {, found '%'",
token_.spelling));
return;
}
// initialize neuron block
neuron_block.threadsafe = false;
// there are no use cases for curly brace in a NEURON block, so we don't
// have to count them we have to get the next token before entering the loop
// to handle the case of an empty block {}
get_token();
while (token_.type != tok::rbrace) {
switch (token_.type) {
case tok::threadsafe:
neuron_block.threadsafe = true;
get_token(); // consume THREADSAFE
break;
case tok::suffix:
case tok::point_process:
case tok::junction_process:
neuron_block.kind = (token_.type == tok::suffix) ? moduleKind::density :
(token_.type == tok::point_process) ? moduleKind::point : moduleKind::junction;
// set the modul kind
module_->kind(neuron_block.kind);
get_token(); // consume SUFFIX / POINT_PROCESS
// assert that a valid name for the Neuron has been specified
if (token_.type != tok::identifier) {
error(pprintf("invalid name for SUFFIX, found '%'", token_.spelling));
return;
}
neuron_block.name = token_.spelling;
get_token(); // consume the name
break;
// this will be a comma-separated list of identifiers
case tok::global:
// the ranges are a comma-seperated list of identifiers
{
auto identifiers = comma_separated_identifiers();
// bail if there was an error reading the list
if (status_ == lexerStatus::error) {
return;
}
for (auto const& id: identifiers) {
neuron_block.globals.push_back(id);
}
}
break;
// this will be a comma-separated list of identifiers
case tok::range:
// the ranges are a comma-seperated list of identifiers
{
auto identifiers = comma_separated_identifiers();
if (status_ == lexerStatus::error) { // bail if there was an error reading the list
return;
}
for (auto const& id: identifiers) {
neuron_block.ranges.push_back(id);
}
} break;
case tok::useion: {
IonDep ion;
// we have to parse the name of the ion first
get_token();
// check this is an identifier token
if (token_.type != tok::identifier) {
error(pprintf("invalid name for an ion chanel '%'", token_.spelling));
return;
}
ion.name = token_.spelling;
get_token(); // consume the ion name
// this loop ensures that we don't gobble any tokens past
// the end of the USEION clause
while (token_.type == tok::read || token_.type == tok::write) {
auto& target = (token_.type == tok::read) ? ion.read
: ion.write;
std::vector<Token> identifiers = comma_separated_identifiers();
// bail if there was an error reading the list
if (status_ == lexerStatus::error) {
return;
}
for (auto const& id: identifiers) {
target.push_back(id);
}
}
if (token_.type == tok::valence) {
ion.has_valence_expr = true;
// consume "Valence"
get_token();
// take and consume variable name or signed integer
if (token_.type == tok::identifier) {
ion.valence_var = token_;
get_token();
}
else {
ion.expected_valence = value_signed_integer();
}
}
// add the ion dependency to the NEURON block
neuron_block.ions.push_back(std::move(ion));
} break;
case tok::nonspecific_current:
// Assume that there is one non-specific current per mechanism.
// It would be easy to extend this to multiple currents,
// however there are no mechanisms in the CoreNeuron repository
// that do this
{
get_token(); // consume NONSPECIFIC_CURRENT
auto tok = token_;
// parse the current name and check for errors
auto id = parse_identifier();
if (status_ == lexerStatus::error) {
return;
}
// store the token with nonspecific current's name and location
neuron_block.nonspecific_current = tok;
}
break;
// the parser encountered an invalid symbol
default:
error(pprintf("there was an invalid statement '%' in NEURON block",
token_.spelling));
return;
}
}
// copy neuron block into module
module_->neuron_block(neuron_block);
// now we have a curly brace, so prime the next token
get_token();
}
void Parser::parse_state_block() {
StateBlock state_block;
get_token();
// assert that the block starts with a curly brace
if (token_.type != tok::lbrace) {
error(pprintf("STATE block must start with a curly brace {, found '%'", token_.spelling));
return;
}
// there are no use cases for curly brace in a STATE block, so we don't have
// to count them we have to get the next token before entering the loop to
// handle the case of an empty block {}
get_token();
while (token_.type != tok::rbrace && token_.type != tok::eof) {
int line = location_.line;
Id parm;
if (token_.type != tok::identifier) {
error(pprintf("'%' is not a valid name for a state variable",
token_.spelling));
return;
}
parm.token = token_;
get_token();
if (token_.type == tok::from) {
// silently skips from/to
from_to_description();
if (status_ == lexerStatus::error) {
return;
}
}
// get unit parameters
if (line == location_.line && token_.type == tok::lparen) {
parm.units = unit_description();
if (status_ == lexerStatus::error) {
error(pprintf("STATUS block unexpected symbol '%s'",
token_.spelling));
return;
}
}
state_block.state_variables.push_back(parm);
}
// add this state block information to the module
module_->state_block(state_block);
// now we have a curly brace, so prime the next token
get_token();}
// scan a unit block
void Parser::parse_units_block() {
UnitsBlock units_block;
get_token();
// assert that the block starts with a curly brace
if (token_.type != tok::lbrace) {
error(pprintf("UNITS block must start with a curly brace {, found '%'", token_.spelling));
return;
}
// there are no use cases for curly brace in a UNITS block, so we don't have to count them
get_token();
while (token_.type != tok::rbrace) {
// get the alias
std::vector<Token> lhs = unit_description();
if (status_ != lexerStatus::happy) return;
// consume the '=' sign
if (token_.type != tok::eq) {
error(pprintf("expected '=', found '%'", token_.spelling));
return;
}
get_token(); // next token
// get the units
std::vector<Token> rhs = unit_description();
if (status_ != lexerStatus::happy) return;
// store the unit definition
units_block.unit_aliases.push_back({lhs, rhs});
}
// add this state block information to the module
module_->units_block(units_block);
// now we have a curly brace, so prime the next token
get_token();
}
//////////////////////////////////////////////////////
// the parameter block describes variables that are
// to be used as parameters. Some are given values,
// others are simply listed, and some have units
// assigned to them. Here we want to get a list of the
// parameter names, along with values if given.
// We also store the token that describes the units
//////////////////////////////////////////////////////
void Parser::parse_parameter_block() {
ParameterBlock block;
get_token();
// assert that the block starts with a curly brace
if (token_.type != tok::lbrace) {
error(pprintf("PARAMETER block must start with a curly brace {, found '%'", token_.spelling));
return;
}
int success = 1;
// there are no use cases for curly brace in a UNITS block, so we don't have to count them
get_token();
while (token_.type != tok::rbrace && token_.type != tok::eof) {
int line = location_.line;
Id parm;
// read the parameter name
if (token_.type != tok::identifier) {
success = 0;
goto parm_exit;
}
parm.token = token_; // save full token
get_token();
// look for equality
if (token_.type == tok::eq) {
get_token(); // consume '='
parm.value = value_literal();
if (status_ == lexerStatus::error) {
success = 0;
goto parm_exit;
}
}
// get the units
if (line == location_.line && token_.type == tok::lparen) {
parm.units = unit_description();
if (status_ == lexerStatus::error) {
success = 0;
goto parm_exit;
}
}
// get the range
if (line == location_.line && token_.type == tok::lt) {
parm.range = range_description();
if (status_ == lexerStatus::error) {
success = 0;
goto parm_exit;
}
}
block.parameters.push_back(parm);
}
// error if EOF before closing curly brace
if (token_.type == tok::eof) {
error("PARAMETER block must have closing '}'");
goto parm_exit;
}
get_token(); // consume closing brace
module_->parameter_block(block);
parm_exit:
// only write error message if one hasn't already been logged by the lexer
if (!success && status_ == lexerStatus::happy) {
error(pprintf("PARAMETER block unexpected symbol '%s'", token_.spelling));
}
return;
}
void Parser::parse_constant_block() {
get_token();
// assert that the block starts with a curly brace
if (token_.type != tok::lbrace) {
error(pprintf("CONSTANT block must start with a curly brace {, found '%'", token_.spelling));
return;
}
get_token();
while (token_.type != tok::rbrace && token_.type != tok::eof) {
int line = location_.line;
std::string name, value;
// read the constant name
if (token_.type != tok::identifier) {
error(pprintf("CONSTANT block unexpected symbol '%s'", token_.spelling));
return;
}
name = token_.spelling; // save full token
get_token();
// look for equality
if (token_.type == tok::eq) {
get_token(); // consume '='
value = value_literal();
if (status_ == lexerStatus::error) {
return;
}
}
// get the units
if (line == location_.line && token_.type == tok::lparen) {
unit_description();
if (status_ == lexerStatus::error) {
return;
}
}
constants_map_.insert({name, value});
}
// error if EOF before closing curly brace
if (token_.type == tok::eof) {
error("CONSTANT block must have closing '}'");
return;
}
get_token(); // consume closing brace
return;
}
void Parser::parse_assigned_block() {
AssignedBlock block;
get_token();
// assert that the block starts with a curly brace
if (token_.type != tok::lbrace) {
error(pprintf("ASSIGNED block must start with a curly brace {, found '%'", token_.spelling));
return;
}
int success = 1;
// there are no use cases for curly brace in an ASSIGNED block, so we don't have to count them
get_token();
while (token_.type != tok::rbrace && token_.type != tok::eof) {
int line = location_.line;
std::vector<Token> variables; // we can have more than one variable on a line
// the first token must be ...
if (token_.type != tok::identifier) {
success = 0;
goto ass_exit;
}
// read all of the identifiers until we run out of identifiers or reach a new line
while (token_.type == tok::identifier && line == location_.line) {
variables.push_back(token_);
get_token();
}
// there are some parameters at the end of the line
if (line == location_.line && token_.type == tok::lparen) {
auto u = unit_description();
if (status_ == lexerStatus::error) {
success = 0;
goto ass_exit;
}
for (auto const& t: variables) {
block.parameters.push_back(Id(t, "", u));
}
}
else {
for (auto const& t: variables) {
block.parameters.push_back(Id(t, "", {}));
}
}
}
// error if EOF before closing curly brace
if (token_.type == tok::eof) {
error("ASSIGNED block must have closing '}'");
goto ass_exit;
}
get_token(); // consume closing brace
module_->assigned_block(block);
ass_exit:
// only write error message if one hasn't already been logged by the lexer
if (!success && status_ == lexerStatus::happy) {
error(pprintf("ASSIGNED block unexpected symbol '%'", token_.spelling));
}
return;
}
// Parse a value (integral or real) with possible preceding unary minus,
// and return as a string.
std::string Parser::value_literal() {
bool negate = false;
if (token_.type == tok::minus) {
negate = true;
get_token();
}
if (constants_map_.find(token_.spelling) != constants_map_.end()) {
// Remove double negation
auto v = constants_map_.at(token_.spelling);
if (v.at(0) == '-' && negate) {
v.erase(0, 1);
negate = false;
}
auto value = negate ? "-" + v : v;
get_token();
return value;
}
if (token_.type != tok::integer && token_.type != tok::real) {
error(pprintf("numeric constant not an integer or real number '%'", token_));
return "";
}
else {
auto value = negate ? "-" + token_.spelling : token_.spelling;
get_token();
return value;
}
}
// Parse an integral value with possible preceding unary plus or minus,
// and return as an int.
int Parser::value_signed_integer() {
std::string value;
if (token_.type == tok::minus) {
value = "-";
get_token();
}
else if (token_.type == tok::plus) {
get_token();
}
if (token_.type != tok::integer) {
error(pprintf("numeric constant not an integer '%'", token_));
return 0;
}
else {
value += token_.spelling;
get_token();
return std::stoi(value);
}
}
std::vector<Token> Parser::unit_description() {
static const tok legal_tokens[] = {tok::identifier, tok::divide, tok::real, tok::integer};
int startline = location_.line;
std::vector<Token> tokens;
// check that we start with a left parenthesis
if (token_.type != tok::lparen) {
error(pprintf("unit description must start with a parenthesis '%'", token_));
goto unit_exit;
}
get_token();
while (token_.type != tok::rparen) {
// check for illegal tokens or a new line
if (!is_in(token_.type, legal_tokens) || startline < location_.line) {
error(pprintf("incorrect unit description '%'", token_));
goto unit_exit;
}
// add this token to the set
tokens.push_back(token_);
get_token();
}
// remove trailing right parenthesis ')'
get_token();
unit_exit:
return tokens;
}
std::pair<std::string, std::string> Parser::range_description() {
std::string lb, ub;
if (token_.type != tok::lt) {
error(pprintf("range description must start with a left angle bracket '%'", token_));
return {};
}
get_token();
lb = value_literal();
if (token_.type != tok::comma) {
error(pprintf("range description must separate lower and upper bound with a comma '%'", token_));
return {};
}
get_token();
ub = value_literal();
if (token_.type != tok::gt) {
error(pprintf("range description must end with a right angle bracket '%'", token_));
return {};
}
get_token();
return {lb, ub};
}
std::pair<std::string, std::string> Parser::from_to_description() {
std::string lb, ub;
if (token_.type != tok::from) {
error(pprintf("range description must be of form FROM <number> TO <number>, found '%'", token_));
return {};
}
get_token();
lb = value_literal();
if (token_.type != tok::to) {
error(pprintf("range description must be of form FROM <number> TO <number>, found '%'", token_));
return {};
}
get_token();
ub = value_literal();
return {lb, ub};
}
// Returns a prototype expression for a function or procedure call
// Takes an optional argument that allows the user to specify the
// name of the prototype, which is used for prototypes where the name
// is implcitly defined (e.g. INITIAL and BREAKPOINT blocks)
expression_ptr Parser::parse_prototype(std::string name = std::string()) {
Token identifier = token_;
if (name.size()) {
// we assume that the current token_ is still pointing at
// the keyword, i.e. INITIAL or BREAKPOINT
identifier.type = tok::identifier;
identifier.spelling = name;
}
// load the parenthesis
get_token();
// check for an argument list enclosed in parenthesis (...)
// return a prototype with an empty argument list if not found
if (token_.type != tok::lparen) {
return expression_ptr{new PrototypeExpression(identifier.location, identifier.spelling, {})};
}
get_token(); // consume '('
std::vector<Token> arg_tokens;
while (token_.type != tok::rparen) {
// check identifier
if (token_.type != tok::identifier) {
error("expected a valid identifier, found '" + yellow(token_.spelling) + "'");
return nullptr;
}
arg_tokens.push_back(token_);
get_token(); // consume the identifier
// args may have a unit attached
if (token_.type == tok::lparen) {
unit_description();
if (status_ == lexerStatus::error) {
return {};
}
}
// look for a comma
if (!(token_.type == tok::comma || token_.type == tok::rparen)) {
error("expected a comma or closing parenthesis, found '" + yellow(token_.spelling) + "'");
return nullptr;
}
if (token_.type == tok::comma) {
get_token(); // consume ','
}
}
if (token_.type != tok::rparen) {
error("procedure argument list must have closing parenthesis ')'");
return nullptr;
}
get_token(); // consume closing parenthesis
// pack the arguments into LocalDeclarations
std::vector<expression_ptr> arg_expressions;
for (auto const& t: arg_tokens) {
arg_expressions.emplace_back(make_expression<ArgumentExpression>(t.location, t));
}
return make_expression<PrototypeExpression>(identifier.location, identifier.spelling, std::move(arg_expressions));
}
void Parser::parse_title() {
std::string title;
int this_line = location().line;
Token tkn = peek();
while (tkn.location.line == this_line && tkn.type != tok::eof && status_ == lexerStatus::happy) {
get_token();
title += token_.spelling;
tkn = peek();
}
// set the module title
module_->title(title);
// load next token
get_token();
}
/// parse a procedure
/// can handle both PROCEDURE and INITIAL blocks
/// an initial block is stored as a procedure with name 'initial' and empty argument list
symbol_ptr Parser::parse_procedure() {
expression_ptr p;
procedureKind kind = procedureKind::normal;
switch (token_.type) {
case tok::derivative:
kind = procedureKind::derivative;
get_token(); // consume keyword token
if (!expect(tok::identifier)) return nullptr;
p = parse_prototype();
break;
case tok::kinetic:
kind = procedureKind::kinetic;
get_token(); // consume keyword token
if (!expect(tok::identifier)) return nullptr; p = parse_prototype();
break;
case tok::linear:
kind = procedureKind::linear;
get_token(); // consume keyword token
if (!expect(tok::identifier)) return nullptr;
p = parse_prototype();
break;
case tok::procedure:
kind = procedureKind::normal;
get_token(); // consume keyword token
if (!expect(tok::identifier)) return nullptr;
p = parse_prototype();
break;
case tok::initial:
kind = procedureKind::initial;
p = parse_prototype("initial");
break;
case tok::breakpoint:
kind = procedureKind::breakpoint;
p = parse_prototype("breakpoint");
break;
case tok::net_receive:
kind = procedureKind::net_receive;
p = parse_prototype("net_receive");
break;
case tok::post_event:
kind = procedureKind::post_event;
p = parse_prototype("post_event");
break;
default:
// it is a compiler error if trying to parse_procedure() without
// having DERIVATIVE, KINETIC, PROCEDURE, INITIAL or BREAKPOINT keyword
throw compiler_exception(
"attempt to parse_procedure() without {DERIVATIVE,KINETIC,PROCEDURE,INITIAL,BREAKPOINT}",
location_);
}
if (p == nullptr) return nullptr;
// check for opening left brace {
if (!expect(tok::lbrace)) return nullptr;
// parse the body of the function
expression_ptr body = parse_block(false);
if (body == nullptr) return nullptr;
auto proto = p->is_prototype();
if(kind == procedureKind::net_receive) {
return make_symbol<NetReceiveExpression> (proto->location(), proto->name(), std::move(proto->args()), std::move(body));
}
if(kind == procedureKind::post_event) {
return make_symbol<PostEventExpression> (proto->location(), proto->name(), std::move(proto->args()), std::move(body));
}
return make_symbol<ProcedureExpression> (proto->location(), proto->name(), std::move(proto->args()), std::move(body), kind);
}
symbol_ptr Parser::parse_function() {
get_token(); // consume FUNCTION token
// check that a valid identifier name was specified by the user
if (!expect(tok::identifier)) return nullptr;
// parse the prototype
auto p = parse_prototype();
if (p == nullptr) return nullptr;
// Functions may have a unit attached
if (token_.type == tok::lparen) {
unit_description();
if (status_ == lexerStatus::error) { return {};
}
}
// check for opening left brace {
if (!expect(tok::lbrace)) return nullptr;
// parse the body of the function
auto body = parse_block(false);
if (body == nullptr) return nullptr;
PrototypeExpression* proto = p->is_prototype();
return make_symbol<FunctionExpression>(proto->location(), proto->name(), std::move(proto->args()), std::move(body));
}
// this is the first port of call when parsing a new line inside a verb block
// it tests to see whether the expression is:
// :: LOCAL identifier
// :: expression
expression_ptr Parser::parse_statement() {
switch (token_.type) {
case tok::if_stmt:
return parse_if();
break;
case tok::conductance:
return parse_conductance();
case tok::solve:
return parse_solve();
case tok::local:
return parse_local();
case tok::identifier:
return parse_line_expression();
case tok::conserve:
return parse_conserve_expression();
case tok::compartment:
return parse_compartment_statement();
case tok::tilde:
return parse_tilde_expression();
case tok::initial:
// only used for INITIAL block in NET_RECEIVE
return parse_initial();
default:
error(pprintf("unexpected token type % '%'", token_string(token_.type), token_.spelling));
return nullptr;
}
return nullptr;
}
expression_ptr Parser::parse_identifier() {
if (constants_map_.find(token_.spelling) != constants_map_.end()) {
// save location and value of the identifier
auto id = make_expression<NumberExpression>(token_.location, constants_map_.at(token_.spelling));
// consume the number
get_token();
// return the value of the constant
return id;
}
// save name and location of the identifier
auto id = make_expression<IdentifierExpression>(token_.location, token_.spelling);
// consume identifier
get_token();
// return variable identifier
return id;
}
expression_ptr Parser::parse_call() { // save name and location of the identifier
Token idtoken = token_;
// consume identifier
get_token();
// check for a function call
// assert this is so
if (token_.type != tok::lparen) {
throw compiler_exception(
"should not be parsing parse_call without trailing '('",
location_);
}
std::vector<expression_ptr> args;
// parse a function call
get_token(); // consume '('
while (token_.type != tok::rparen) {
auto e = parse_expression();
if (!e) return e;
args.emplace_back(std::move(e));
// reached the end of the argument list
if (token_.type == tok::rparen) break;
// insist on a comma between arguments
if (!expect(tok::comma, "call arguments must be separated by ','"))
return expression_ptr();
get_token(); // consume ','
}
// check that we have a closing parenthesis
if (!expect(tok::rparen, "function call missing closing ')'")) {
return expression_ptr();
}
get_token(); // consume ')'
return make_expression<CallExpression>(idtoken.location, idtoken.spelling, std::move(args));
}
// parse a full line expression, i.e. one of
// :: procedure call e.g. rates(v+0.01)
// :: assignment expression e.g. x = y + 3
// to parse a subexpression, see parse_expression()
// proceeds by first parsing the LHS (which may be a variable or function call)
// then attempts to parse the RHS if
// 1. the lhs is not a procedure call
// 2. the operator that follows is =
expression_ptr Parser::parse_line_expression() {
int line = location_.line;
expression_ptr lhs;
Token next = peek();
if (next.type == tok::lparen) {
lhs = parse_call();
// we have to ensure that a procedure call is alone on the line
// to avoid :
// :: assigning to it e.g. foo() = x + 6
// :: stray symbols coming after e.g. foo() + x
// We assume that foo is a procedure call, if it is an eroneous
// function call this has to be caught in the second pass.
// or optimized away with a warning
if (!lhs) return lhs;
if (location_.line == line && token_.type != tok::eof) {
error(pprintf(
"expected a new line after call expression, found '%'",
yellow(token_.spelling)));
return expression_ptr(); }
return lhs;
}
else if (next.type == tok::prime) {
lhs = make_expression<DerivativeExpression>(location_, token_.spelling);
// consume both name and derivative operator
get_token();
get_token();
// a derivative statement must be followed by '='
if (token_.type != tok::eq) {
error("a derivative declaration must have an assignment of the "
"form\n x' = expression\n where x is a state variable");
return expression_ptr();
}
}
else {
lhs = parse_unaryop();
}
if (!lhs) { // error
return lhs;
}
// we parse a binary expression if followed by an operator
if (token_.type == tok::eq) {
Token op = token_; // save the '=' operator with location
get_token(); // consume the '=' operator
return parse_binop(std::move(lhs), op);
}
else if (line == location_.line && token_.type != tok::eof) {
error(pprintf("expected an assignment '%' or new line, found '%'",
yellow("="),
yellow(token_.spelling)));
return nullptr;
}
return lhs;
}
expression_ptr Parser::parse_stoich_term() {
expression_ptr coeff = make_expression<IntegerExpression>(location_, 1);
auto here = location_;
bool negative = false;
while (token_.type == tok::minus) {
negative = !negative;
get_token(); // consume '-'
}
if (token_.type == tok::integer) {
coeff = parse_integer();
}
if (token_.type != tok::identifier) {
error(pprintf("expected an identifier, found '%'", yellow(token_.spelling)));
return nullptr;
}
if (negative) {
coeff = make_expression<IntegerExpression>(here, -coeff->is_integer()->integer_value());
}
return make_expression<StoichTermExpression>(here, std::move(coeff), parse_identifier());
}
expression_ptr Parser::parse_stoich_expression() {
std::vector<expression_ptr> terms;
auto here = location_;
if (token_.type == tok::integer || token_.type == tok::identifier || token_.type == tok::minus) {
auto term = parse_stoich_term(); if (!term) return nullptr;
terms.push_back(std::move(term));
while (token_.type == tok::plus || token_.type == tok::minus) {
if (token_.type == tok::plus) {
get_token(); // consume plus
}
auto term = parse_stoich_term();
if (!term) return nullptr;
terms.push_back(std::move(term));
}
}
return make_expression<StoichExpression>(here, std::move(terms));
}
expression_ptr Parser::parse_tilde_expression() {
auto here = location_;
if (token_.type != tok::tilde) {
error(pprintf("expected '%', found '%'", yellow("~"), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume tilde
if (search_to_eol(tok::arrow)) {
expression_ptr lhs = parse_stoich_expression();
if (!lhs) return nullptr;
// reaction halves must comprise non-negative terms
for (const auto& term: lhs->is_stoich()->terms()) {
// should always be true
if (auto sterm = term->is_stoich_term()) {
if (sterm->negative()) {
error(pprintf("expected only non-negative terms in reaction lhs, found '%'",
yellow(term->to_string())));
return nullptr;
}
}
}
if (token_.type != tok::arrow) {
error(pprintf("expected '%', found '%'", yellow("<->"), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume arrow
expression_ptr rhs = parse_stoich_expression();
if (!rhs) return nullptr;
for (const auto& term: rhs->is_stoich()->terms()) {
// should always be true
if (auto sterm = term->is_stoich_term()) {
if (sterm->negative()) {
error(pprintf("expected only non-negative terms in reaction rhs, found '%'",
yellow(term->to_string())));
return nullptr;
}
}
}
if (token_.type != tok::lparen) {
error(pprintf("expected '%', found '%'", yellow("("), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume lparen expression_ptr fwd = parse_expression();
if (!fwd) return nullptr;
if (token_.type != tok::comma) {
error(pprintf("expected '%', found '%'", yellow(","), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume comma
expression_ptr rev = parse_expression();
if (!rev) return nullptr;
if (token_.type != tok::rparen) {
error(pprintf("expected '%', found '%'", yellow(")"), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume rparen
return make_expression<ReactionExpression>(here, std::move(lhs), std::move(rhs), std::move(fwd), std::move(rev));
}
else if (search_to_eol(tok::eq)) {
auto lhs_bin = parse_expression(tok::eq);
if (token_.type != tok::eq) {
error(pprintf("expected '%', found '%'", yellow("="), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume =
auto rhs = parse_expression();
return make_expression<LinearExpression>(here, std::move(lhs_bin), std::move(rhs));
}
else {
error(pprintf("expected stoichiometric or linear expression, found neither"));
return nullptr;
}
}
expression_ptr Parser::parse_conserve_expression() {
auto here = location_;
if (token_.type != tok::conserve) {
error(pprintf("expected '%', found '%'", yellow("CONSERVE"), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume 'CONSERVE'
auto lhs = parse_stoich_expression();
if (!lhs) return nullptr;
if (token_.type != tok::eq) {
error(pprintf("expected '%', found '%'", yellow("="), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume '='
auto rhs = parse_expression();
if (!rhs) return nullptr;
return make_expression<ConserveExpression>(here, std::move(lhs), std::move(rhs));
}
expression_ptr Parser::parse_expression(int prec, tok stop_token) {
auto lhs = parse_unaryop();
if (lhs == nullptr) return nullptr;
// Combine all sub-expressions with precedence greater than prec.
for (;;) {
if (token_.type == stop_token) {
return lhs; }
auto op = token_;
auto p_op = binop_precedence(op.type);
// Note: all tokens that are not infix binary operators have
// precedence of -1, so expressions like function calls will short
// circuit this loop here.
if (p_op <= prec) return lhs;
get_token(); // consume the infix binary operator
lhs = parse_binop(std::move(lhs), op);
if (!lhs) return nullptr;
}
return lhs;
}
expression_ptr Parser::parse_expression() {
return parse_expression(0);
}
expression_ptr Parser::parse_expression(tok t) {
return parse_expression(0, t);
}
/// Parse a unary expression.
/// If called when the current node in the AST is not a unary expression the call
/// will be forwarded to parse_primary. This mechanism makes it possible to parse
/// all nodes in the expression using parse_unary, which simplifies the call sites
/// with either a primary or unary node is to be parsed.
/// It also simplifies parsing nested unary functions, e.g. x + - - y
expression_ptr Parser::parse_unaryop() {
expression_ptr e;
Token op = token_;
switch (token_.type) {
case tok::plus:
// plus sign is simply ignored
get_token(); // consume '+'
return parse_unaryop();
case tok::minus:
get_token(); // consume '-'
e = parse_unaryop(); // handle recursive unary
if (!e) return nullptr;
return unary_expression(token_.location, op.type, std::move(e));
case tok::exp:
case tok::sin:
case tok::cos:
case tok::log:
case tok::abs:
case tok::safeinv:
case tok::exprelr:
get_token(); // consume operator (exp, sin, cos or log)
if (token_.type != tok::lparen) {
error("missing parenthesis after call to " + yellow(op.spelling));
return nullptr;
}
e = parse_unaryop(); // handle recursive unary
if (!e) return nullptr;
return unary_expression(token_.location, op.type, std::move(e));
default:
return parse_primary();
}
return nullptr;
}
/// parse a primary expression node
/// expects one of :
/// :: number/// :: identifier
/// :: call
/// :: parenthesis expression (parsed recursively)
/// :: prefix binary operators
expression_ptr Parser::parse_primary() {
switch (token_.type) {
case tok::real:
return parse_real();
case tok::integer:
return parse_integer();
case tok::identifier:
if (peek().type == tok::lparen) {
return parse_call();
}
return parse_identifier();
case tok::lparen:
return parse_parenthesis_expression();
case tok::min:
case tok::max: {
auto op = token_;
// handle infix binary operators, e.g. min(l,r) and max(l,r)
get_token(); // consume operator keyword token
if (token_.type != tok::lparen) {
error("expected opening parenthesis '('");
return nullptr;
}
get_token(); // consume (
auto lhs = parse_expression();
if (!lhs) return nullptr;
if (token_.type != tok::comma) {
error("expected comma ','");
return nullptr;
}
get_token(); // consume ,
auto rhs = parse_expression();
if (!rhs) return nullptr;
if (token_.type != tok::rparen) {
error("expected closing parenthesis ')'");
return nullptr;
}
get_token(); // consume )
return binary_expression(op.location, op.type, std::move(lhs), std::move(rhs));
}
default: // fall through to return nullptr at end of function
error(pprintf("unexpected token '%' in expression",
yellow(token_.spelling)));
}
return nullptr;
}
expression_ptr Parser::parse_parenthesis_expression() {
// never call unless at start of parenthesis
if (token_.type != tok::lparen) {
throw compiler_exception(
"attempt to parse a parenthesis_expression() without opening parenthesis",
location_);
}
get_token(); // consume '('
auto e = parse_expression();
// check for closing parenthesis ')'
if (!e || !expect(tok::rparen)) return nullptr;
get_token(); // consume ')'
return e;
}
expression_ptr Parser::parse_real() {
auto e = make_expression<NumberExpression>(token_.location, token_.spelling);
get_token(); // consume the number
return e;
}
expression_ptr Parser::parse_integer() {
auto e = make_expression<IntegerExpression>(token_.location, token_.spelling);
get_token(); // consume the number
return e;
}
expression_ptr Parser::parse_binop(expression_ptr&& lhs, Token op_left) {
auto p_op_left = binop_precedence(op_left.type);
auto rhs = parse_expression(p_op_left);
if (!rhs) return nullptr;
auto op_right = token_;
auto p_op_right = binop_precedence(op_right.type);
bool right_assoc = operator_associativity(op_right.type) == associativityKind::right;
if (p_op_right > p_op_left) {
throw compiler_exception(
"parse_binop() : encountered operator of higher precedence",
location_);
}
if (p_op_right < p_op_left) {
return binary_expression(op_left.location, op_left.type, std::move(lhs), std::move(rhs));
}
get_token(); // consume op_right
if (right_assoc) {
rhs = parse_binop(std::move(rhs), op_right);
if (!rhs) return nullptr;
return binary_expression(op_left.location, op_left.type, std::move(lhs), std::move(rhs));
}
else {
lhs = binary_expression(op_left.location, op_left.type, std::move(lhs), std::move(rhs));
return parse_binop(std::move(lhs), op_right);
}
}
/// parse a local variable definition
/// a local variable definition is a line with the form
/// LOCAL x
/// where x is a valid identifier name
expression_ptr Parser::parse_local() {
Location loc = location_;
get_token(); // consume LOCAL
// create local expression stub
auto e = make_expression<LocalDeclaration>(loc);
if (!e) return e;
// add symbols
while (1) {
if (!expect(tok::identifier)) return nullptr;
// try adding variable name to list
if (!e->is_local_declaration()->add_variable(token_)) {
error(e->error_message());
return nullptr;
} get_token(); // consume identifier
// look for comma that indicates continuation of the variable list
if (token_.type == tok::comma) {
get_token();
}
else {
break;
}
}
return e;
}
/// parse a SOLVE statement
/// a SOLVE statement specifies a procedure and a method
/// SOLVE procedure METHOD method
/// we also support SOLVE statements without a METHOD clause
/// for backward compatability with performance hacks that
/// are implemented in some key mod files (i.e. Prob* synapses)
expression_ptr Parser::parse_solve() {
int line = location_.line;
Location loc = location_; // solve location for expression
std::string name;
solverMethod method;
solverVariant variant;
get_token(); // consume the SOLVE keyword
if (token_.type != tok::identifier) goto solve_statement_error;
name = token_.spelling; // save name of procedure
get_token(); // consume the procedure identifier
variant = solverVariant::regular;
if (token_.type != tok::method && token_.type != tok::steadystate) {
method = solverMethod::none;
}
else {
if (token_.type == tok::steadystate) {
variant = solverVariant::steadystate;
}
get_token(); // consume the METHOD keyword
switch (token_.type) {
case tok::cnexp:
method = solverMethod::cnexp;
break;
case tok::sparse:
method = solverMethod::sparse;
break;
default:
goto solve_statement_error;
}
get_token(); // consume the method description
}
// check that the rest of the line was empty
if (line == location_.line) {
if (token_.type != tok::eof) goto solve_statement_error;
}
return make_expression<SolveExpression>(loc, name, method, variant);
solve_statement_error:
error("SOLVE statements must have the form\n"
" SOLVE x METHOD method\n"
" or\n"
" SOLVE x STEADYSTATE sparse\n"
" or\n"
" SOLVE x\n" "where 'x' is the name of a DERIVATIVE block and "
"'method' is 'cnexp' or 'sparse'",
loc);
return nullptr;
}
/// parse a CONDUCTANCE statement
/// a CONDUCTANCE statement specifies a variable and a channel
/// where the channel is optional
/// CONDUCTANCE name USEION channel
/// CONDUCTANCE name
expression_ptr Parser::parse_conductance() {
int line = location_.line;
Location loc = location_; // solve location for expression
std::string name;
std::string channel;
get_token(); // consume the CONDUCTANCE keyword
if (token_.type != tok::identifier) goto conductance_statement_error;
name = token_.spelling; // save name of variable
get_token(); // consume the variable identifier
if (token_.type == tok::useion) {
get_token(); // consume the USEION keyword
if (token_.type != tok::identifier) goto conductance_statement_error;
channel = token_.spelling;
get_token(); // consume the ion channel type
}
// check that the rest of the line was empty
if (line == location_.line) {
if (token_.type != tok::eof) goto conductance_statement_error;
}
return make_expression<ConductanceExpression>(loc, name, channel);
conductance_statement_error:
error("CONDUCTANCE statements must have the form\n"
" CONDUCTANCE g USEION channel\n"
" or\n"
" CONDUCTANCE g\n"
"where 'g' is the name of a variable, and 'channel' is the type of ion channel",
loc);
return nullptr;
}
expression_ptr Parser::parse_if() {
Token if_token = token_;
get_token(); // consume 'if'
if (!expect(tok::lparen)) return nullptr;
// parse the conditional
auto cond = parse_parenthesis_expression();
if (!cond) return nullptr;
// parse the block of the true branch
auto true_branch = parse_block(true);
if (!true_branch) return nullptr;
// parse the false branch if there is an else
expression_ptr false_branch;
if (token_.type == tok::else_stmt) {
get_token(); // consume else
// handle 'else if {}' case recursively
if (token_.type == tok::if_stmt) {
expr_list_type if_block; auto exp = parse_if();
if_block.push_back(std::move(exp));
false_branch = make_expression<BlockExpression>(Location(), std::move(if_block), true);
}
// we have a closing 'else {}'
else if (token_.type == tok::lbrace) {
false_branch = parse_block(true);
}
else {
error("expect either '" + yellow("if") + "' or '" + yellow("{") + " after else");
return nullptr;
}
}
return make_expression<IfExpression>(if_token.location, std::move(cond), std::move(true_branch), std::move(false_branch));
}
// takes a flag indicating whether the block is at procedure/function body,
// or lower. Can be used to check for illegal statements inside a nested block,
// e.g. LOCAL declarations.
expression_ptr Parser::parse_block(bool is_nested) {
// blocks have to be enclosed in curly braces {}
expect(tok::lbrace);
get_token(); // consume '{'
// save the location of the first statement as the starting point for the block
Location block_location = token_.location;
expr_list_type body;
while (token_.type != tok::rbrace) {
auto e = parse_statement();
if (!e) return e;
if (is_nested) {
if (e->is_local_declaration()) {
error("LOCAL variable declarations are not allowed inside a nested scope");
return nullptr;
}
if (e->is_reaction()) {
error("reaction expressions are not allowed inside a nested scope");
return nullptr;
}
}
body.emplace_back(std::move(e));
}
if (token_.type != tok::rbrace) {
error(pprintf("could not find closing '%' for else statement that started at ",
yellow("}"),
block_location));
return nullptr;
}
get_token(); // consume closing '}'
return make_expression<BlockExpression>(block_location, std::move(body), is_nested);
}
expression_ptr Parser::parse_initial() {
// has to start with INITIAL: error in compiler implementaion otherwise
expect(tok::initial);
// save the location of the first statement as the starting point for the block
Location block_location = token_.location;
get_token(); // consume 'INITIAL'
if (!expect(tok::lbrace)) return nullptr;
get_token(); // consume '{'
expr_list_type body;
while (token_.type != tok::rbrace) {
auto e = parse_statement();
if (!e) return e;
// disallow variable declarations in an INITIAL block
if (e->is_local_declaration()) {
error("LOCAL variable declarations are not allowed inside a nested scope");
return nullptr;
}
body.emplace_back(std::move(e));
}
if (token_.type != tok::rbrace) {
error(pprintf("could not find closing '%' for else statement that started at ",
yellow("}"),
block_location));
return nullptr;
}
get_token(); // consume closing '}'
return make_expression<InitialBlock>(block_location, std::move(body));
}
expression_ptr Parser::parse_compartment_statement() {
auto here = location_;
if (token_.type != tok::compartment) {
error(pprintf("expected '%', found '%'", yellow("COMPARTMENT"), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume 'COMPARTMENT'
auto scale_factor = parse_expression(tok::rbrace);
if (!scale_factor) return nullptr;
if (token_.type != tok::lbrace) {
error(pprintf("expected '%', found '%'", yellow("{"), yellow(token_.spelling)));
return nullptr;
}
get_token(); // consume '{'
std::vector<expression_ptr> states;
while (token_.type != tok::rbrace) {
// check identifier
if (token_.type != tok::identifier) {
error("expected a valid identifier, found '" + yellow(token_.spelling) + "'");
return nullptr;
}
auto e = make_expression<IdentifierExpression>(token_.location, token_.spelling);
states.emplace_back(std::move(e));
get_token(); // consume the identifier
}
get_token(); // consume the rbrace
return make_expression<CompartmentExpression>(here, std::move(scale_factor), std::move(states));
}