Lexer: slightly improve strings & comments lexing
[scilab.git] / scilab / modules / ast / src / cpp / parse / flex / scanscilab.ll
index 204c91b..292c1bd 100644 (file)
@@ -23,6 +23,7 @@ extern "C"
 }
 
 static int comment_level = 0;
+static int paren_level = 0;
 static int last_token = 0;
 static int exit_status = PARSE_ERROR;
 static std::string current_file;
@@ -90,6 +91,8 @@ newline                       ("\r"|"\n"|"\r\n")
 blankline              {spaces}+{newline}
 emptyline       {newline}({spaces}|[,;])+{newline}
 next                   \.\.+
+char_in_line_comment    [^\r\n]*
+char_in_comment         [^\r\n\/*]*
 
 boolnot                        ("@"|"~")
 booltrue               ("%t"|"%T")
@@ -120,6 +123,7 @@ endblockcomment             "*/"
 
 dquote                 "\""
 quote                  "'"
+in_string               [^\"\'\r\n\.]*
 
 dot             "."
 dotquote               ".'"
@@ -397,10 +401,10 @@ assign                    "="
 }
 
 
-<INITIAL,MATRIX>{lparen}               {
+<INITIAL>{lparen}                      {
   return scan_throw(LPAREN);
 }
-<INITIAL,MATRIX>{rparen}               {
+<INITIAL>{rparen}                      {
   return scan_throw(RPAREN);
 }
 
@@ -693,6 +697,16 @@ assign                     "="
 
 <MATRIX>
 {
+  {lparen} {
+    paren_level++;
+    return scan_throw(LPAREN);
+  }
+
+  {rparen} {
+    paren_level--;
+    return scan_throw(RPAREN);
+  }
+
   {spaces}*{lparen} {
       unput(yytext[yyleng -1]);
       if (last_token == ID
@@ -749,21 +763,27 @@ assign                    "="
     if (last_token != LBRACK
        && last_token != EOL
        && last_token != SEMI
-       && last_token != COMMA)
+       && last_token != COMMA
+       && paren_level == 0)
    {
        return scan_throw(COMMA);
-   }  
+   }
+   else
+   {
+       unput('+');
+   }
   }
+
   {spaces}{minus}                       {
     unput('-');
     if (last_token != LBRACK
        && last_token != EOL
        && last_token != SEMI
-       && last_token != COMMA)
+       && last_token != COMMA
+       && paren_level == 0)
    {
        return scan_throw(COMMA);
-   }  
+   }
   }
 
   .                                    {
@@ -868,6 +888,10 @@ assign                     "="
         FREE (pwstBuffer);
         return scan_throw(COMMENT);
     }
+    else
+    {
+        delete pstBuffer;
+    }
   }
 
   <<EOF>>      {
@@ -887,12 +911,11 @@ assign                    "="
     return scan_throw(COMMENT);
   }
 
-  .         {
-     // Put the char in a temporary CHAR buffer to go through UTF-8 trouble
-     // only translate to WCHAR_T when popping state.
-     *pstBuffer += yytext;
+  {char_in_line_comment}         {
+      // Put the char in a temporary CHAR buffer to go through UTF-8 trouble
+      // only translate to WCHAR_T when popping state.
+      *pstBuffer += yytext;
   }
-
 }
 
 
@@ -919,7 +942,8 @@ assign                      "="
     *yylval.comment += L"\n//";
   }
 
-  .                                            {
+  {char_in_comment}                            |
+  .                                             {
       wchar_t *pwText = to_wide_string(yytext);
       *yylval.comment += std::wstring(pwText);
       FREE(pwText);
@@ -994,7 +1018,8 @@ assign                     "="
     scan_error(str);
   }
 
-  .                                            {
+  {in_string}                                          |
+  .                                                     {
     scan_step();
     *pstBuffer += yytext;
   }
@@ -1061,7 +1086,8 @@ assign                    "="
     scan_error(str);
   }
 
-  .         {
+  {in_string}         |
+  .                   {
    scan_step();
    *pstBuffer += yytext;
   }