Lexer: slightly improve strings & comments lexing
[scilab.git] / scilab / modules / ast / src / cpp / parse / flex / scanscilab.ll
index 20a8344..292c1bd 100644 (file)
@@ -22,8 +22,8 @@ extern "C"
 #include "sci_malloc.h"
 }
 
-static int matrix_level = 0;
 static int comment_level = 0;
+static int paren_level = 0;
 static int last_token = 0;
 static int exit_status = PARSE_ERROR;
 static std::string current_file;
@@ -31,8 +31,6 @@ static std::string program_name;
 
 static std::string *pstBuffer;
 
-static bool rejected = false;
-
 #define YY_USER_ACTION                          \
  yylloc.last_column += yyleng;
 
@@ -58,7 +56,6 @@ static bool rejected = false;
 %x LINEBREAK
 
 %x MATRIX
-%x MATRIXMINUSID
 
 %x SHELLMODE
 %x BEGINID
@@ -87,13 +84,15 @@ utf3            ({utf31}|{utf32}|{utf33}|{utf34})
 utf4            ({utf41}|{utf42}|{utf43})
 
 utf             ({utf2}|{utf3}|{utf4})
-id              (([a-zA-Z_%#?$]|{utf})([a-zA-Z_0-9#?$]|{utf})*)
+id              ((([a-zA-Z_%#?]|{utf})([a-zA-Z_0-9#?$]|{utf})*)|([$]([a-zA-Z_0-9#?$]|{utf})+))
 
 
 newline                        ("\r"|"\n"|"\r\n")
 blankline              {spaces}+{newline}
 emptyline       {newline}({spaces}|[,;])+{newline}
 next                   \.\.+
+char_in_line_comment    [^\r\n]*
+char_in_comment         [^\r\n\/*]*
 
 boolnot                        ("@"|"~")
 booltrue               ("%t"|"%T")
@@ -124,6 +123,7 @@ endblockcomment             "*/"
 
 dquote                 "\""
 quote                  "'"
+in_string               [^\"\'\r\n\.]*
 
 dot             "."
 dotquote               ".'"
@@ -139,20 +139,20 @@ ldivide                   "\\"
 times                  "*"
 power                  ("^"|"**")
 
-equal                  "=="
-notequal               ("~="|"@="|"<>")
+equal                  "="{spaces}*"="
+notequal               ("~"{spaces}*"="|"@"{spaces}*"="|"<"{spaces}*">")
 lowerthan              "<"
 greaterthan            ">"
-lowerequal             "<="
-greaterequal           ">="
+lowerequal             "<"{spaces}*"="
+greaterequal           ">"{spaces}*"="
 
 krontimes              ".*."
 kronrdivide            "./."
 kronldivide            ".\\."
 
-controltimes    ("*."[^0-9.])
-controlrdivide ("/."[^0-9.])
-controlldivide  ("\\."[^0-9.])
+controltimes    ("*."[^0-9])
+controlrdivide ("/."[^0-9])
+controlldivide  ("\\."[^0-9])
 
 assign                 "="
 
@@ -342,7 +342,7 @@ assign                      "="
     return scan_throw(RETURN);
 }
 
-^{spaces}*/({id}){spaces}[^(=<>~@] {
+^{spaces}*/({id}){spaces}[^ \t\v\f(=<>~@] {
         BEGIN(BEGINID);
 }
 
@@ -360,8 +360,8 @@ assign                      "="
         }
         yylval.str = new std::wstring(pwText);
        FREE(pwText);
-        if (symbol::Context::getInstance()->get(symbol::Symbol(*yylval.str)) != NULL
-            && symbol::Context::getInstance()->get(symbol::Symbol(*yylval.str))->isCallable())
+       types::InternalType * pIT = symbol::Context::getInstance()->get(symbol::Symbol(*yylval.str));
+        if (pIT && pIT->isCallable())
         {
             scan_throw(ID);
             BEGIN(SHELLMODE);
@@ -401,10 +401,10 @@ assign                    "="
 }
 
 
-<INITIAL,MATRIX>{lparen}               {
+<INITIAL>{lparen}                      {
   return scan_throw(LPAREN);
 }
-<INITIAL,MATRIX>{rparen}               {
+<INITIAL>{rparen}                      {
   return scan_throw(RPAREN);
 }
 
@@ -697,6 +697,16 @@ assign                     "="
 
 <MATRIX>
 {
+  {lparen} {
+    paren_level++;
+    return scan_throw(LPAREN);
+  }
+
+  {rparen} {
+    paren_level--;
+    return scan_throw(RPAREN);
+  }
+
   {spaces}*{lparen} {
       unput(yytext[yyleng -1]);
       if (last_token == ID
@@ -738,93 +748,44 @@ assign                    "="
     return scan_throw(RBRACE);
   }
 
-  {plus}                               {
+  {plus}                               |
+  {spaces}{plus}{spaces}                {
     return scan_throw(PLUS);
   }
 
-  {minus}                              {
+  {minus}                              |
+  {spaces}{minus}{spaces}               {
     return scan_throw(MINUS);
   }
 
-  {spaces}({plus}|{minus}){lparen}*{integer}                   {
-   int i;
-   for (i = yyleng - 1 ; i >= 0 ; --i)
-   {
-       unput(yytext[i]);
-   }
-   yy_push_state(MATRIXMINUSID);
-   if (last_token != LBRACK
+  {spaces}{plus}                        {
+    // no need to unput the '+'
+    if (last_token != LBRACK
        && last_token != EOL
        && last_token != SEMI
-       && last_token != COMMA)
+       && last_token != COMMA
+       && paren_level == 0)
    {
        return scan_throw(COMMA);
    }
+   else
+   {
+       unput('+');
+   }
   }
 
-  {spaces}({plus}|{minus}){lparen}*{number}    {
-      int i;
-      for (i = yyleng - 1 ; i >= 0 ; --i)
-      {
-          unput(yytext[i]);
-      }
-      yy_push_state(MATRIXMINUSID);
-      if (last_token != LBRACK
-          && last_token != EOL
-          && last_token != SEMI
-          && last_token != COMMA)
-      {
-          return scan_throw(COMMA);
-      }
-  }
-
-  {spaces}({plus}|{minus}){lparen}*{floating}  {
-      int i;
-      for (i = yyleng - 1 ; i >= 0 ; --i)
-      {
-          unput(yytext[i]);
-      }
-      yy_push_state(MATRIXMINUSID);
-      if (last_token != LBRACK
-          && last_token != EOL
-          && last_token != SEMI
-          && last_token != COMMA)
-      {
-          return scan_throw(COMMA);
-      }
-  }
-
-  {spaces}({plus}|{minus}){lparen}*{little}    {
-      int i;
-      for (i = yyleng - 1 ; i >= 0 ; --i)
-      {
-          unput(yytext[i]);
-      }
-      yy_push_state(MATRIXMINUSID);
-      if (last_token != LBRACK
-          && last_token != EOL
-          && last_token != SEMI
-          && last_token != COMMA)
-      {
-          return scan_throw(COMMA);
-      }
+  {spaces}{minus}                       {
+    unput('-');
+    if (last_token != LBRACK
+       && last_token != EOL
+       && last_token != SEMI
+       && last_token != COMMA
+       && paren_level == 0)
+   {
+       return scan_throw(COMMA);
+   }
   }
 
-  {spaces}({minus}|{plus}){lparen}*{id}                {
-      int i;
-      for (i = yyleng - 1; i >= 0 ; --i)
-      {
-          unput(yytext[i]);
-      }
-      yy_push_state(MATRIXMINUSID);
-      if (last_token != LBRACK
-          && last_token != EOL
-          && last_token != SEMI
-          && last_token != COMMA)
-      {
-          return scan_throw(COMMA);
-      }
-  }
   .                                    {
     std::string str = "unexpected token '";
     str += yytext;
@@ -853,97 +814,6 @@ assign                     "="
   }
 }
 
-<MATRIXMINUSID>
-{
-  {minus}                              {
-    return scan_throw(MINUS);
-  }
-
-  {plus}                               {
-     /* Do Nothing. */
-  }
-
-  {integer}                            {
-    yy_pop_state();
-    yylval.number = atof(yytext);
-#ifdef TOKENDEV
-    std::cout << "--> [DEBUG] INTEGER : " << yytext << std::endl;
-#endif
-    scan_step();
-    return scan_throw(VARINT);
-  }
-
-  {number}                             {
-    yy_pop_state();
-    yylval.number = atof(yytext);
-#ifdef TOKENDEV
-    std::cout << "--> [DEBUG] NUMBER : " << yytext << std::endl;
-#endif
-    scan_step();
-    return scan_throw(NUM);
-  }
-
-  {little}                             {
-    yy_pop_state();
-    yylval.number = atof(yytext);
-#ifdef TOKENDEV
-    std::cout << "--> [DEBUG] LITTLE : " << yytext << std::endl;
-#endif
-    scan_step();
-    return scan_throw(NUM);
-  }
-
-  {floating}                           {
-    yy_pop_state();
-    scan_exponent_convert(yytext);
-    yylval.number = atof(yytext);
-#ifdef TOKENDEV
-    std::cout << "--> [DEBUG] FLOATING : " << yytext << std::endl;
-#endif
-    scan_step();
-    return scan_throw(VARFLOAT);
-  }
-
-  {id}                                 {
-    yy_pop_state();
-    wchar_t* pwText = to_wide_string(yytext);
-    if (yytext != NULL && pwText == NULL)
-    {
-        std::string str = "can not convert'";
-        str += yytext;
-        str += "' to UTF-8";
-        exit_status = SCAN_ERROR;
-        scan_error("can not convert string to UTF-8");
-    }
-    yylval.str = new std::wstring(pwText);
-    FREE(pwText);
-#ifdef TOKENDEV
-    std::cout << "--> [DEBUG] ID : " << yytext << std::endl;
-#endif
-    scan_step();
-    return scan_throw(ID);
-  }
-
-  {spaces}                             {
-    /* Do Nothing. */
-  }
-
-  {lparen} {
-      return scan_throw(LPAREN);
-  }
-
-  {rparen} {
-      return scan_throw(RPAREN);
-  }
-  .                                    {
-    std::string str = "unexpected token '";
-    str += yytext;
-    str += "' within a matrix.";
-    exit_status = SCAN_ERROR;
-    scan_error(str);
-  }
-}
-
 <LINEBREAK>
 {
   {newline}                            {
@@ -1018,6 +888,10 @@ assign                    "="
         FREE (pwstBuffer);
         return scan_throw(COMMENT);
     }
+    else
+    {
+        delete pstBuffer;
+    }
   }
 
   <<EOF>>      {
@@ -1037,12 +911,11 @@ assign                   "="
     return scan_throw(COMMENT);
   }
 
-  .         {
-     // Put the char in a temporary CHAR buffer to go through UTF-8 trouble
-     // only translate to WCHAR_T when popping state.
-     *pstBuffer += yytext;
+  {char_in_line_comment}         {
+      // Put the char in a temporary CHAR buffer to go through UTF-8 trouble
+      // only translate to WCHAR_T when popping state.
+      *pstBuffer += yytext;
   }
-
 }
 
 
@@ -1069,7 +942,8 @@ assign                     "="
     *yylval.comment += L"\n//";
   }
 
-  .                                            {
+  {char_in_comment}                            |
+  .                                             {
       wchar_t *pwText = to_wide_string(yytext);
       *yylval.comment += std::wstring(pwText);
       FREE(pwText);
@@ -1144,7 +1018,8 @@ assign                    "="
     scan_error(str);
   }
 
-  .                                            {
+  {in_string}                                          |
+  .                                                     {
     scan_step();
     *pstBuffer += yytext;
   }
@@ -1211,7 +1086,8 @@ assign                    "="
     scan_error(str);
   }
 
-  .         {
+  {in_string}         |
+  .                   {
    scan_step();
    *pstBuffer += yytext;
   }