Skip to content
Snippets Groups Projects
Commit 584c70fe authored by Sam Yates's avatar Sam Yates
Browse files

Tighten number lexing

* Tokenize strings of the form "7E+X", "2.3E " etc as a number
  (integer, real) followed by a new token starting with 'E'.
  Note that this makes the string "1.2E3E4" no longer cause a
  lexer error status.
* Add tests for corresponding numeric disambiguation in
  `text_lexer.cpp`.
* Add tests for stoichiometric expressions that ensure that only
  otherwise ambiguous phrases such as "7E+2F" fail to parse
  ("7E+F" should parse correctly as a stoich expression.)
* Add missing comma in list of 'good' expressions in
  the `Parser.parse_line_expressions` test.
parent 3850c41a
No related branches found
No related tags found
No related merge requests found
......@@ -28,6 +28,9 @@ inline bool is_eof(char c) {
inline bool is_operator(char c) {
return (c=='+' || c=='-' || c=='*' || c=='/' || c=='^' || c=='\'');
}
inline bool is_plusminus(char c) {
return (c=='+' || c=='-');
}
//*********************
// Lexer
......@@ -258,13 +261,21 @@ Token Lexer::number() {
incorrectly_formed_mantisa = true;
}
}
else if(c=='e' || c=='E') {
uses_scientific_notation++;
str += c;
current_++;
// Consume the next char if +/-
if (*current_ == '+' || *current_ == '-') {
str += *current_++;
else if(!uses_scientific_notation && (c=='e' || c=='E')) {
if(is_numeric(current_[1]) ||
is_plusminus(current_[1]) && is_numeric(current_[2]))
{
uses_scientific_notation++;
str += c;
current_++;
// Consume the next char if +/-
if (is_plusminus(*current_)) {
str += *current_++;
}
}
else {
// the 'e' or 'E' is the beginning of a new token
break;
}
}
else {
......@@ -283,11 +294,6 @@ Token Lexer::number() {
error_string_ = pprintf("too many .'s when reading the number '%'", yellow(str));
status_ = lexerStatus::error;
}
// check that e or E is not used more than once in the number
if(uses_scientific_notation>1) {
error_string_ = pprintf("can't parse the number '%'", yellow(str));
status_ = lexerStatus::error;
}
tok type;
if(status_==lexerStatus::error) {
......
#include <cctype>
#include <cmath>
#include <cstdio>
#include <iterator>
#include <utility>
......@@ -31,6 +33,22 @@ public:
}
return tok;
}
char character() {
char c = Lexer::character();
if (g_verbose_flag) {
std::cout << "character: ";
if (!std::isprint(c)) {
char buf[5] = "XXXX";
snprintf(buf, sizeof buf, "0x%02x", (unsigned)c);
std::cout << buf << '\n';
}
else {
std::cout << c << '\n';
}
}
return c;
}
};
/**************************************************************
......@@ -307,4 +325,29 @@ TEST(Lexer, numbers) {
EXPECT_EQ(floats.cend(), iter);
EXPECT_EQ(tok::eof, t.type);
EXPECT_EQ(check_ints, ints);
// check case where 'E' is not followed by +, -, or a digit explicitly
lexer = VerboseLexer("7.2E");
t = lexer.parse();
EXPECT_EQ(lexerStatus::happy, lexer.status());
EXPECT_EQ(tok::real, t.type);
EXPECT_EQ(t.spelling, "7.2");
EXPECT_EQ(lexer.character(), 'E');
lexer = VerboseLexer("3E+E2");
t = lexer.parse();
EXPECT_EQ(lexerStatus::happy, lexer.status());
EXPECT_EQ(tok::integer, t.type);
EXPECT_EQ(t.spelling, "3");
EXPECT_EQ(lexer.character(), 'E');
EXPECT_EQ(lexer.character(), '+');
// 'bad' numbers should give errors
lexer = VerboseLexer("1.2.3");
lexer.parse();
EXPECT_EQ(lexerStatus::error, lexer.status());
lexer = VerboseLexer("1.2E4.3");
lexer.parse();
EXPECT_EQ(lexerStatus::error, lexer.status());
}
......@@ -282,7 +282,7 @@ TEST(Parser, parse_parenthesis_expression) {
// test parsing of line expressions
TEST(Parser, parse_line_expression) {
const char* good_expr[] = {
"qt=q10^((celsius-22)/10)"
"qt=q10^((celsius-22)/10)",
"x=2 ",
"x=2 ",
"x = -y\n "
......@@ -319,7 +319,7 @@ TEST(Parser, parse_line_expression) {
TEST(Parser, parse_stoich_term) {
const char* good_pos_expr[] = {
"B", "B3", "3B3", "0A", "12A"
"B", "B3", "3B3", "0A", "12A", "4E"
};
for (auto& text: good_pos_expr) {
......@@ -338,7 +338,7 @@ TEST(Parser, parse_stoich_term) {
EXPECT_TRUE((s && s->negative()));
}
const char* bad_expr[] = {
"0.2A", "5"
"0.2A", "5", "3e2" // "3e2" should lex as real number 300.0
};
for (auto& text: bad_expr) {
......@@ -403,6 +403,7 @@ TEST(Parser, parse_reaction_expression) {
"~ A + B <-> C + D (k1, k2)",
"~ 2B <-> C + D + E (k1(3,v), k2)",
"~ <-> C + D + 7 E (k1, f(a,b)-2)",
"~ <-> C + D + 7E+F (k1, f(a,b)-2)",
"~ <-> (f,g)",
"~ A + 3B + C<-> (f,g)"
};
......@@ -417,6 +418,7 @@ TEST(Parser, parse_reaction_expression) {
"~ A + B <-> C + (k1, k2)",
"~ 2.3B <-> C + D + E (k1(3,v), k2)",
"~ <-> C + D + 7E",
"~ <-> C + D + 7E+2F (k1, f(a,b)-2)", // "7E+2" will lex as real number
"~ <-> (,g)",
"~ A - 3B + C<-> (f,g)",
" A <-> B (k1, k2)",
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment