Skip to content
Snippets Groups Projects
Commit 584c70fe authored by Sam Yates's avatar Sam Yates
Browse files

Tighten number lexing

* Tokenize strings of the form "7E+X", "2.3E " etc as a number
  (integer, real) followed by a new token starting with 'E'.
  Note that this makes the string "1.2E3E4" no longer cause a
  lexer error status.
* Add tests for corresponding numeric disambiguation in
  `text_lexer.cpp`.
* Add tests for stoichiometric expressions that ensure that only
  otherwise ambiguous phrases such as "7E+2F" fail to parse
  ("7E+F" should parse correctly as a stoich expression.)
* Add missing comma in list of 'good' expressions in
  the `Parser.parse_line_expressions` test.
parent 3850c41a
No related branches found
No related tags found
No related merge requests found
...@@ -28,6 +28,9 @@ inline bool is_eof(char c) { ...@@ -28,6 +28,9 @@ inline bool is_eof(char c) {
inline bool is_operator(char c) { inline bool is_operator(char c) {
return (c=='+' || c=='-' || c=='*' || c=='/' || c=='^' || c=='\''); return (c=='+' || c=='-' || c=='*' || c=='/' || c=='^' || c=='\'');
} }
inline bool is_plusminus(char c) {
return (c=='+' || c=='-');
}
//********************* //*********************
// Lexer // Lexer
...@@ -258,13 +261,21 @@ Token Lexer::number() { ...@@ -258,13 +261,21 @@ Token Lexer::number() {
incorrectly_formed_mantisa = true; incorrectly_formed_mantisa = true;
} }
} }
else if(c=='e' || c=='E') { else if(!uses_scientific_notation && (c=='e' || c=='E')) {
uses_scientific_notation++; if(is_numeric(current_[1]) ||
str += c; is_plusminus(current_[1]) && is_numeric(current_[2]))
current_++; {
// Consume the next char if +/- uses_scientific_notation++;
if (*current_ == '+' || *current_ == '-') { str += c;
str += *current_++; current_++;
// Consume the next char if +/-
if (is_plusminus(*current_)) {
str += *current_++;
}
}
else {
// the 'e' or 'E' is the beginning of a new token
break;
} }
} }
else { else {
...@@ -283,11 +294,6 @@ Token Lexer::number() { ...@@ -283,11 +294,6 @@ Token Lexer::number() {
error_string_ = pprintf("too many .'s when reading the number '%'", yellow(str)); error_string_ = pprintf("too many .'s when reading the number '%'", yellow(str));
status_ = lexerStatus::error; status_ = lexerStatus::error;
} }
// check that e or E is not used more than once in the number
if(uses_scientific_notation>1) {
error_string_ = pprintf("can't parse the number '%'", yellow(str));
status_ = lexerStatus::error;
}
tok type; tok type;
if(status_==lexerStatus::error) { if(status_==lexerStatus::error) {
......
#include <cctype>
#include <cmath> #include <cmath>
#include <cstdio>
#include <iterator> #include <iterator>
#include <utility> #include <utility>
...@@ -31,6 +33,22 @@ public: ...@@ -31,6 +33,22 @@ public:
} }
return tok; return tok;
} }
char character() {
char c = Lexer::character();
if (g_verbose_flag) {
std::cout << "character: ";
if (!std::isprint(c)) {
char buf[5] = "XXXX";
snprintf(buf, sizeof buf, "0x%02x", (unsigned)c);
std::cout << buf << '\n';
}
else {
std::cout << c << '\n';
}
}
return c;
}
}; };
/************************************************************** /**************************************************************
...@@ -307,4 +325,29 @@ TEST(Lexer, numbers) { ...@@ -307,4 +325,29 @@ TEST(Lexer, numbers) {
EXPECT_EQ(floats.cend(), iter); EXPECT_EQ(floats.cend(), iter);
EXPECT_EQ(tok::eof, t.type); EXPECT_EQ(tok::eof, t.type);
EXPECT_EQ(check_ints, ints); EXPECT_EQ(check_ints, ints);
// check case where 'E' is not followed by +, -, or a digit explicitly
lexer = VerboseLexer("7.2E");
t = lexer.parse();
EXPECT_EQ(lexerStatus::happy, lexer.status());
EXPECT_EQ(tok::real, t.type);
EXPECT_EQ(t.spelling, "7.2");
EXPECT_EQ(lexer.character(), 'E');
lexer = VerboseLexer("3E+E2");
t = lexer.parse();
EXPECT_EQ(lexerStatus::happy, lexer.status());
EXPECT_EQ(tok::integer, t.type);
EXPECT_EQ(t.spelling, "3");
EXPECT_EQ(lexer.character(), 'E');
EXPECT_EQ(lexer.character(), '+');
// 'bad' numbers should give errors
lexer = VerboseLexer("1.2.3");
lexer.parse();
EXPECT_EQ(lexerStatus::error, lexer.status());
lexer = VerboseLexer("1.2E4.3");
lexer.parse();
EXPECT_EQ(lexerStatus::error, lexer.status());
} }
...@@ -282,7 +282,7 @@ TEST(Parser, parse_parenthesis_expression) { ...@@ -282,7 +282,7 @@ TEST(Parser, parse_parenthesis_expression) {
// test parsing of line expressions // test parsing of line expressions
TEST(Parser, parse_line_expression) { TEST(Parser, parse_line_expression) {
const char* good_expr[] = { const char* good_expr[] = {
"qt=q10^((celsius-22)/10)" "qt=q10^((celsius-22)/10)",
"x=2 ", "x=2 ",
"x=2 ", "x=2 ",
"x = -y\n " "x = -y\n "
...@@ -319,7 +319,7 @@ TEST(Parser, parse_line_expression) { ...@@ -319,7 +319,7 @@ TEST(Parser, parse_line_expression) {
TEST(Parser, parse_stoich_term) { TEST(Parser, parse_stoich_term) {
const char* good_pos_expr[] = { const char* good_pos_expr[] = {
"B", "B3", "3B3", "0A", "12A" "B", "B3", "3B3", "0A", "12A", "4E"
}; };
for (auto& text: good_pos_expr) { for (auto& text: good_pos_expr) {
...@@ -338,7 +338,7 @@ TEST(Parser, parse_stoich_term) { ...@@ -338,7 +338,7 @@ TEST(Parser, parse_stoich_term) {
EXPECT_TRUE((s && s->negative())); EXPECT_TRUE((s && s->negative()));
} }
const char* bad_expr[] = { const char* bad_expr[] = {
"0.2A", "5" "0.2A", "5", "3e2" // "3e2" should lex as real number 300.0
}; };
for (auto& text: bad_expr) { for (auto& text: bad_expr) {
...@@ -403,6 +403,7 @@ TEST(Parser, parse_reaction_expression) { ...@@ -403,6 +403,7 @@ TEST(Parser, parse_reaction_expression) {
"~ A + B <-> C + D (k1, k2)", "~ A + B <-> C + D (k1, k2)",
"~ 2B <-> C + D + E (k1(3,v), k2)", "~ 2B <-> C + D + E (k1(3,v), k2)",
"~ <-> C + D + 7 E (k1, f(a,b)-2)", "~ <-> C + D + 7 E (k1, f(a,b)-2)",
"~ <-> C + D + 7E+F (k1, f(a,b)-2)",
"~ <-> (f,g)", "~ <-> (f,g)",
"~ A + 3B + C<-> (f,g)" "~ A + 3B + C<-> (f,g)"
}; };
...@@ -417,6 +418,7 @@ TEST(Parser, parse_reaction_expression) { ...@@ -417,6 +418,7 @@ TEST(Parser, parse_reaction_expression) {
"~ A + B <-> C + (k1, k2)", "~ A + B <-> C + (k1, k2)",
"~ 2.3B <-> C + D + E (k1(3,v), k2)", "~ 2.3B <-> C + D + E (k1(3,v), k2)",
"~ <-> C + D + 7E", "~ <-> C + D + 7E",
"~ <-> C + D + 7E+2F (k1, f(a,b)-2)", // "7E+2" will lex as real number
"~ <-> (,g)", "~ <-> (,g)",
"~ A - 3B + C<-> (f,g)", "~ A - 3B + C<-> (f,g)",
" A <-> B (k1, k2)", " A <-> B (k1, k2)",
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment