improve strcat, strsubst, part performance 31/21431/5
Antoine ELIAS [Thu, 5 Mar 2020 08:45:13 +0000 (09:45 +0100)]
//strcat
a = ones(1,300000);
b = string(a);
tic();strcat(b);toc()

//strsubst
A = string(rand(100, 100));
tic;B = strcat(A);toc
tic;strsubst(B, "3", "X");toc
tic;strsubst(B, "30", "X");toc
tic;strsubst(B, "3", "XYZ");toc

//part
c = 1e5;
str = strcat(string(int(rand(1, c) * 10)));
idx = int(rand(1, c) * c + 1);
tic;part(str, idx);toc

Change-Id: I8f2635cf3ce2e3ab257690935ddf0961b403ea40

scilab/CHANGES.md
scilab/modules/string/sci_gateway/cpp/sci_part.cpp
scilab/modules/string/sci_gateway/cpp/sci_strcat.cpp
scilab/modules/string/sci_gateway/cpp/sci_strsubst.cpp
scilab/modules/string/tests/benchmarks/bench_part2.tst [new file with mode: 0644]

index 4a5bdb5..64c2623 100644 (file)
@@ -265,6 +265,10 @@ Bug Fixes
 ---------
 
 ### Bugs fixed in 6.1.0:
+* [#16342](https://bugzilla.scilab.org/16342): `strcat()` was much slower in Scilab 6.0.2.
+
+
+### Bugs fixed in 6.1.0:
 * [#2694](https://bugzilla.scilab.org/2694): `bitget` did not accept positive integers of types int8, int16 or int32.
 * [#5824](https://bugzilla.scilab.org/5824): The `datafit` algorithm was not documented.
 * [#6070](https://bugzilla.scilab.org/6070): How to make multiscaled plots was not documented.
index 81c24ab..5c6c028 100644 (file)
@@ -79,23 +79,44 @@ types::Function::ReturnValue sci_part(types::typed_list &in, int _iRetCount, typ
         return types::Function::Error;
     }
 
-    int* piIndex = new int[pD->getSize()];
-    for (int i = 0 ; i < pD->getSize() ; i++)
+    size_t i_len = pD->getSize();
+    std::vector<int> index(i_len);
+    for (int i = 0 ; i < i_len; i++)
     {
-        piIndex[i] = static_cast<int>(pD->getReal()[i]);
-        if (piIndex[i] < 1)
+        int idx = static_cast<int>(pD->get()[i]);
+        if (idx < 1)
         {
             Scierror(36, _("%s: Wrong values for input argument #%d: Must be >= 1.\n"), "part", 2);
-            delete[] piIndex;
             return types::Function::Error;
         }
+
+        index[i] = idx;
     }
 
-    wchar_t** pwstOut = partfunctionW(pS->get(), pS->getRows(), pS->getCols(), piIndex, pD->getSize());
-    delete[] piIndex;
+    //wchar_t** pwstOut = partfunctionW(pS->get(), pS->getRows(), pS->getCols(), piIndex, pD->getSize());
     types::String* pOut = new types::String(pS->getRows(), pS->getCols());
-    pOut->set(pwstOut);
-    freeArrayOfWideString(pwstOut, pOut->getSize());
+    std::wstring string_in;
+    std::wstring string_out;
+
+    for (int i = 0; i < pS->getSize(); ++i)
+    {
+        string_in.assign(pS->get()[i]);
+        size_t s_len = string_in.size();
+        string_out.assign(i_len, L' ');
+
+        for (int j = 0; j < i_len; ++j)
+        {
+            if (index[j] > s_len)
+            {
+                continue;
+            }
+
+            string_out[j] = string_in[index[j] - 1];
+        }
+
+        pOut->set(i, string_out.data());
+    }
+
     out.push_back(pOut);
     return types::Function::OK;
 }
index e44a112..23cdefa 100644 (file)
@@ -40,13 +40,6 @@ extern "C"
 #define ONE_CHAR 1
 #define EMPTY_CHAR ""
 /*-------------------------------------------------------------------------------------*/
-static int sci_strcat_three_rhs(char *fname);
-static int sci_strcat_two_rhs(char *fname);
-static int sci_strcat_one_rhs(char *fname);
-static int sci_strcat_rhs_one_is_a_matrix(char *fname);
-static int sumlengthstring(int rhspos);
-static int *lengthEachString(int rhspos, int *sizeArrayReturned);
-/*-------------------------------------------------------------------------------------*/
 types::Function::ReturnValue sci_strcat(types::typed_list &in, int _iRetCount, types::typed_list &out)
 {
     int iMode               = 0;
@@ -118,47 +111,25 @@ types::Function::ReturnValue sci_strcat(types::typed_list &in, int _iRetCount, t
 
             int size = pS->getSize();
             wchar_t** s = pS->get();
+            std::wstring string_out(s[0]);
 
-            int insertLen = 0;
-            if (pwstToInsert)
-            {
-                insertLen = (int)wcslen(pwstToInsert);
-            }
-
-            /*compute final size*/
-            int iLen = 1; //L'\0'
-            for (int i = 0 ; i < size ; i++)
-            {
-                iLen += (int)wcslen(s[i]);
-            }
-
-            if (pwstToInsert != NULL)
-            {
-                iLen += insertLen * (size - 1);
-            }
-
-            wchar_t* pwstOut = (wchar_t*)MALLOC(sizeof(wchar_t) * iLen);
-            pwstOut[0] = L'\0';
-
-            wcscpy(pwstOut, s[0]);
             if (pwstToInsert)
             {
                 for (int i = 1; i < size; ++i)
                 {
-                    wcscat(pwstOut, pwstToInsert);
-                    wcscat(pwstOut, s[i]);
+                    string_out += pwstToInsert;
+                    string_out += s[i];
                 }
             }
             else
             {
                 for (int i = 1; i < size; ++i)
                 {
-                    wcscat(pwstOut, s[i]);
+                    string_out += s[i];
                 }
             }
 
-            pOut->set(0, pwstOut);
-            FREE(pwstOut);
+            pOut->set(0, string_out.data());
         }
         break;
         case 1 : //"r"
@@ -169,48 +140,29 @@ types::Function::ReturnValue sci_strcat(types::typed_list &in, int _iRetCount, t
 
             pOut = new types::String(1, cols);
 
-            int insertLen = 0;
-            if (pwstToInsert)
-            {
-                insertLen = (int)wcslen(pwstToInsert);
-            }
-            
             /*compute final size*/
+            std::wstring string_out;
             for (int i = 0 ; i < cols ; ++i)
             {
-                int iLen = 1; //L'\0'
-                for (int j = 0 ; j < rows; ++j)
-                {
-                    iLen += (int)wcslen(s[i * rows + j]);
-                }
-
-                if (pwstToInsert != NULL)
-                {
-                    iLen += insertLen * (rows - 1);
-                }
+                string_out.assign(s[i * rows]);
 
-                wchar_t* pwstOut = (wchar_t*)MALLOC(sizeof(wchar_t) * iLen);
-                pwstOut[0] = L'\0';
-
-                wcscpy(pwstOut, s[i * rows]);
                 if (pwstToInsert)
                 {
                     for (int j = 1; j < rows; ++j)
                     {
-                        wcscat(pwstOut, pwstToInsert);
-                        wcscat(pwstOut, s[i * rows + j]);
+                        string_out += pwstToInsert;
+                        string_out += s[i * rows + j];
                     }
                 }
                 else
                 {
                     for (int j = 1; j < rows; ++j)
                     {
-                        wcscat(pwstOut, s[i * rows + j]);
+                        string_out += s[i * rows + j];
                     }
                 }
 
-                pOut->set(0, i, pwstOut);
-                FREE(pwstOut);
+                pOut->set(0, i, string_out.data());
             }
             break;
         }
@@ -222,48 +174,28 @@ types::Function::ReturnValue sci_strcat(types::typed_list &in, int _iRetCount, t
 
             pOut = new types::String(rows, 1);
 
-            int insertLen = 0;
-            if (pwstToInsert)
-            {
-                insertLen = (int)wcslen(pwstToInsert);
-            }
-
             /*compute final size*/
+            std::wstring string_out;
             for (int i = 0 ; i < rows ; ++i)
             {
-                int iLen = 1; //L'\0'
-                for (int j = 0 ; j < cols ; ++j)
-                {
-                    iLen += (int)wcslen(s[j * rows + i]);
-                }
-
-                if (pwstToInsert != NULL)
-                {
-                    iLen += insertLen * (cols - 1);
-                }
-
-                wchar_t* pwstOut = (wchar_t*)MALLOC(sizeof(wchar_t) * iLen);
-                pwstOut[0] = L'\0';
-
-                wcscpy(pwstOut, s[i]);
+                string_out.assign(s[i]);
                 if (pwstToInsert)
                 {
                     for (int j = 1; j < cols; ++j)
                     {
-                        wcscat(pwstOut, pwstToInsert);
-                        wcscat(pwstOut, s[j * rows + i]);
+                        string_out += pwstToInsert;
+                        string_out += s[j * rows + i];
                     }
                 }
                 else
                 {
                     for (int j = 1; j < cols; ++j)
                     {
-                        wcscat(pwstOut, s[j * rows + i]);
+                        string_out += s[j * rows + i];
                     }
                 }
 
-                pOut->set(i, 0, pwstOut);
-                FREE(pwstOut);
+                pOut->set(i, 0, string_out.data());
             }
             break;
         }
index 1dc6b59..c888fc2 100644 (file)
@@ -93,14 +93,12 @@ types::Function::ReturnValue sci_strsubst(types::typed_list &in, int _iRetCount,
     }
 
     types::String* pS = in[0]->getAs<types::String>();
-
     types::String* pOut = new types::String(pS->getRows(), pS->getCols());
-    wchar_t** pwstOutput = NULL;
 
     if (bRegExp)
     {
         int iErr = 0;
-        pwstOutput = wcssubst_reg(const_cast<const wchar_t**>(pS->get()), pS->getSize(), pwstSearch, pwstReplace, &iErr);
+        wchar_t** pwstOutput = wcssubst_reg(const_cast<const wchar_t**>(pS->get()), pS->getSize(), pwstSearch, pwstReplace, &iErr);
         if (iErr != NO_MATCH && iErr != PCRE_FINISHED_OK && iErr != PCRE_EXIT)
         {
             freeArrayOfWideString(pwstOutput, pOut->getSize());
@@ -108,14 +106,53 @@ types::Function::ReturnValue sci_strsubst(types::typed_list &in, int _iRetCount,
             delete pOut;
             return types::Function::Error;
         }
+
+        pOut->set(pwstOutput);
+        freeArrayOfWideString(pwstOutput, pOut->getSize());
     }
     else
     {
-        pwstOutput = wcssubst(const_cast<const wchar_t**>(pS->get()), pS->getSize(), pwstSearch, pwstReplace);
+        std::wstring search = pwstSearch;
+        std::wstring replace = pwstReplace;
+        size_t s_len = search.size();
+        size_t r_len = replace.size();
+
+        // on empty search string, convert only empty string to the replace
+        if (s_len == 0)
+        {
+            for (int i = 0; i < pS->getSize(); ++i)
+            {
+                wchar_t* wcs = pS->get()[i];
+                if (wcslen(wcs) > 0)
+                {
+                    pOut->set(i, wcs);
+                }
+                else
+                {
+                    pOut->set(i, pwstReplace);
+                }
+            }
+            out.push_back(pOut);
+            return types::Function::OK;
+        }
+
+        // regular string search
+        std::wstring string_out;
+        for (int i = 0; i < pS->getSize(); ++i)
+        {
+            string_out.assign(pS->get()[i]);
+
+            size_t pos = string_out.find(search);
+            while (pos != std::string::npos)
+            {
+                string_out.replace(pos, s_len, replace);
+                pos = string_out.find(search, pos + r_len);
+            }
+
+            pOut->set(i, string_out.data());
+        }
     }
 
-    pOut->set(pwstOutput);
-    freeArrayOfWideString(pwstOutput, pOut->getSize());
     out.push_back(pOut);
     return types::Function::OK;
 }
diff --git a/scilab/modules/string/tests/benchmarks/bench_part2.tst b/scilab/modules/string/tests/benchmarks/bench_part2.tst
new file mode 100644 (file)
index 0000000..6048ba6
--- /dev/null
@@ -0,0 +1,20 @@
+// =============================================================================
+// Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
+// Copyright (C) 2020 - ESI Group - Antoine ELIAS
+//
+//  This file is distributed under the same license as the Scilab package.
+// =============================================================================
+
+//==============================================================================
+// Benchmark for part function
+//==============================================================================
+
+// <-- BENCH NB RUN : 10000 -->
+
+c = 1e5;
+str = strcat(string(int(rand(1, c) * 10)));
+idx = int(rand(1, c) * c + 1);
+
+// <-- BENCH START -->
+part(str, idx);
+// <-- BENCH END -->