utf: module string 2
[scilab.git] / scilab / modules / string / sci_gateway / cpp / sci_grep.cpp
1 /*
2 * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
3 *  Copyright (C) 2010 - DIGITEO - Antoine ELIAS
4 *
5 * This file must be used under the terms of the CeCILL.
6 * This source file is licensed as described in the file COPYING, which
7 * you should have received as part of this distribution.  The terms
8 * are also available at
9 * http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
10 *
11 */
12
13 /* desc : search position of a character string in an other string
14 using regular express .                                         */
15 /*------------------------------------------------------------------------*/
16
17 #include "string_gw.hxx"
18 #include "function.hxx"
19 #include "double.hxx"
20 #include "string.hxx"
21
22 extern "C"
23 {
24 #include "os_string.h"
25 #include "Scierror.h"
26 #include "localization.h"
27 #include "pcre.h"
28 #include "pcreposix.h"
29 #include "sci_malloc.h" /* MALLOC */
30 #include "charEncoding.h"
31 #include "pcre_private.h"
32 #include "pcre_error.h"
33 }
34
35 /*------------------------------------------------------------------------*/
36 #define GREP_OK             0
37 #define GREP_ERROR          1
38 #define MEMORY_ALLOC_ERROR -1
39 /*------------------------------------------------------------------------*/
40 typedef struct grep_results
41 {
42     int sizeArraysMax;
43     int currentLength;
44     int *values;
45     int *positions;
46 } GREPRESULTS;
47 /*------------------------------------------------------------------------*/
48 static int GREP_NEW(GREPRESULTS *results, char **Inputs_param_one, int mn_one, char **Inputs_param_two, int mn_two);
49 static int GREP_OLD(GREPRESULTS *results, char **Inputs_param_one, int mn_one, char **Inputs_param_two, int mn_two);
50 /*------------------------------------------------------------------------*/
51 types::Function::ReturnValue sci_grep(types::typed_list &in, int _iRetCount, types::typed_list &out)
52 {
53     bool bRegularExpression = false;
54
55     //check input paramters
56     if (in.size() < 2 || in.size() > 3)
57     {
58         Scierror(999, _("%s: Wrong number of input arguments: %d or %d expected.\n"), "grep", 2, 3);
59         return types::Function::Error;
60     }
61
62     if (in[0]->isDouble() && in[0]->getAs<types::Double>()->getSize() == 0)
63     {
64         types::Double *pD = types::Double::Empty();
65         out.push_back(pD);
66         return types::Function::OK;
67     }
68
69     if (in.size() == 3)
70     {
71         //"r" for regular expression
72         if (in[2]->isString() == false)
73         {
74             Scierror(999, _("%s: Wrong type for input argument #%d: String expected.\n"), "grep", 3);
75             return types::Function::Error;
76         }
77
78         types::String* pS = in[2]->getAs<types::String>();
79         if (pS->getSize() != 1)
80         {
81             Scierror(999, _("%s: Wrong type for input argument #%d: string expected.\n"), "grep", 3);
82             return types::Function::Error;
83         }
84
85         if (pS->get(0)[0] == 'r')
86         {
87             bRegularExpression = true;
88         }
89     }
90
91     if (in[0]->isString() == false)
92     {
93         Scierror(999, _("%s: Wrong type for input argument #%d: String expected.\n"), "grep", 1);
94         return types::Function::Error;
95     }
96
97     if (in[1]->isString() == false)
98     {
99         Scierror(999, _("%s: Wrong type for input argument #%d: String expected.\n"), "grep", 2);
100         return types::Function::Error;
101     }
102
103     types::String* pS1 = in[0]->getAs<types::String>();
104     types::String* pS2 = in[1]->getAs<types::String>();
105
106
107     for (int i = 0 ; i < pS2->getSize() ; i++)
108     {
109         if (strlen(pS2->get(i)) == 0)
110         {
111             Scierror(249, _("%s: Wrong values for input argument #%d: Non-empty strings expected.\n"), "grep", 2);
112             return types::Function::Error;
113         }
114     }
115
116     GREPRESULTS grepresults;
117     int code_error_grep = GREP_OK;
118
119     grepresults.currentLength = 0;
120     grepresults.sizeArraysMax = 0;
121     grepresults.positions = NULL;
122     grepresults.values = NULL;
123
124     if (bRegularExpression)
125     {
126         code_error_grep = GREP_NEW(&grepresults, pS1->get(), pS1->getSize(), pS2->get(), pS2->getSize());
127     }
128     else
129     {
130         code_error_grep = GREP_OLD(&grepresults, pS1->get(), pS1->getSize(), pS2->get(), pS2->getSize());
131     }
132
133     switch (code_error_grep)
134     {
135         case GREP_OK :
136         {
137             types::Double* pD1 = NULL;
138             if (grepresults.currentLength == 0)
139             {
140                 pD1 = types::Double::Empty();
141             }
142             else
143             {
144                 pD1 = new types::Double(1, grepresults.currentLength);
145                 double* pDbl1 = pD1->getReal();
146                 for (int i = 0 ; i < grepresults.currentLength ; i++ )
147                 {
148                     pDbl1[i] = static_cast<double>(grepresults.values[i]);
149                 }
150             }
151
152             out.push_back(pD1);
153
154             if (_iRetCount == 2)
155             {
156                 types::Double* pD2 = NULL;
157                 if (grepresults.currentLength == 0)
158                 {
159                     pD2 = types::Double::Empty();
160                 }
161                 else
162                 {
163                     pD2 = new types::Double(1, grepresults.currentLength);
164                     double* pDbl2 = pD2->getReal();
165                     for (int i = 0 ; i < grepresults.currentLength ; i++ )
166                     {
167                         pDbl2[i] = static_cast<double>(grepresults.positions[i]);
168                     }
169                 }
170
171                 out.push_back(pD2);
172             }
173
174             if (grepresults.values)
175             {
176                 FREE(grepresults.values);
177                 grepresults.values = NULL;
178             }
179             if (grepresults.positions)
180             {
181                 FREE(grepresults.positions);
182                 grepresults.positions = NULL;
183             }
184         }
185         break;
186
187         case MEMORY_ALLOC_ERROR :
188             Scierror(999, _("%s: No more memory.\n"), "grep");
189         //no break, to free reserved memory.
190         case GREP_ERROR :
191         {
192             if (grepresults.values)
193             {
194                 FREE(grepresults.values);
195                 grepresults.values = NULL;
196             }
197             if (grepresults.positions)
198             {
199                 FREE(grepresults.positions);
200                 grepresults.positions = NULL;
201             }
202             return types::Function::Error;
203         }
204         break;
205     }
206
207     return types::Function::OK;
208 }
209 //Function::ReturnValue sci_grep(typed_list &in, int _iRetCount, typed_list &out)
210 //{
211 //      CheckRhs(2,3);
212 //      CheckLhs(1,2);
213 //
214 //      if (VarType(1) == sci_matrix)
215 //      {
216 //              int m1 = 0, n1 = 0;
217 //              char **Str=NULL;
218 //
219 //              GetRhsVar(1,MATRIX_OF_DOUBLE_DATATYPE,&m1,&n1,&Str);
220 //
221 //              if ((m1 == 0) && (n1 == 0))
222 //              {
223 //                      int l = 0;
224 //                      CreateVar(Rhs+1,MATRIX_OF_DOUBLE_DATATYPE,&m1,&n1,&l);
225 //                      LhsVar(1) = Rhs+1 ;
226 //                      C2F(putlhsvar)();
227 //
228 //                      return 0;
229 //              }
230 //      }
231 //
232 //      if (Rhs == 3)
233 //      {
234 //              if (VarType(3) == sci_strings)
235 //              {
236 //                      char typ = 'd'; /*default */
237 //                      int m3 = 0,n3 = 0,l3 = 0;
238 //
239 //                      GetRhsVar(3,STRING_DATATYPE,&m3,&n3,&l3);
240 //                      if ( m3*n3 != 0) typ = cstk(l3)[0];
241 //
242 //                      if (typ == 'r' )
243 //                      {
244 //                              sci_grep_common(fname,TRUE);
245 //                      }
246 //                      else
247 //                      {
248 //                              Scierror(999,_("%s: Wrong value for input argument #%d: ''%s'' expected.\n"),fname,3,"s");
249 //                              return 0;
250 //                      }
251 //              }
252 //              else
253 //              {
254 //                      Scierror(999,_("%s: Wrong type for input argument #%d: String expected.\n"),fname,3);
255 //                      return 0;
256 //              }
257 //      }
258 //      else /* Rhs == 2 */
259 //      {
260 //              sci_grep_common(fname,FALSE);
261 //      }
262 //      return 0;
263 //}
264 /*-----------------------------------------------------------------------------------*/
265 static int GREP_NEW(GREPRESULTS *results, char **Inputs_param_one, int mn_one, char **Inputs_param_two, int mn_two)
266 {
267     int x = 0, y = 0;
268     int iRet = GREP_OK;
269     pcre_error_code answer = PCRE_FINISHED_OK;
270     for (x = 0; x <  mn_one ; x++)
271     {
272         results->sizeArraysMax = results->sizeArraysMax + (int)strlen(Inputs_param_one[x]);
273     }
274
275     results->values = (int *)MALLOC(sizeof(int) * (3 * results->sizeArraysMax + 1));
276     results->positions = (int *)MALLOC(sizeof(int) * (3 * results->sizeArraysMax + 1));
277
278     if ( (results->values == NULL) || (results->positions == NULL) )
279     {
280         if (results->values)
281         {
282             FREE(results->values);
283             results->values = NULL;
284         }
285         if (results->positions)
286         {
287             FREE(results->positions);
288             results->positions = NULL;
289         }
290         return MEMORY_ALLOC_ERROR;
291     }
292
293     results->currentLength = 0;
294     for ( y = 0; y < mn_one; ++y)
295     {
296         for ( x = 0; x < mn_two; ++x)
297         {
298             int Output_Start = 0;
299             int Output_End = 0;
300             answer = pcre_private(Inputs_param_one[y], Inputs_param_two[x], &Output_Start, &Output_End, NULL, NULL);
301
302             if ( answer == PCRE_FINISHED_OK )
303             {
304                 if (results->currentLength < results->sizeArraysMax)
305                 {
306                     results->values[results->currentLength] = y + 1;
307                     results->positions[results->currentLength] = x + 1;
308                     results->currentLength++;
309                 }
310             }
311             else if (answer != NO_MATCH)
312             {
313                 pcre_error("grep", answer);
314                 iRet = GREP_ERROR;
315             }
316         }
317     }
318
319     if (results->currentLength > results->sizeArraysMax)
320     {
321         results->currentLength = results->sizeArraysMax;
322     }
323
324     return iRet;
325 }
326 /*-----------------------------------------------------------------------------------*/
327 static int GREP_OLD(GREPRESULTS *results, char **Inputs_param_one, int mn_one, char **Inputs_param_two, int mn_two)
328 {
329     int x = 0, y = 0;
330
331     results->values = (int *)MALLOC(sizeof(int) * (mn_one * mn_two + 1));
332     results->positions = (int *)MALLOC(sizeof(int) * (mn_one * mn_two + 1));
333
334     for (y = 0; y < mn_one; ++y)
335     {
336         for (x = 0; x < mn_two; ++x)
337         {
338             wchar_t* wcInputOne = to_wide_string(Inputs_param_one[y]);
339             wchar_t* wcInputTwo = to_wide_string(Inputs_param_two[x]);
340
341             if (wcInputOne && wcInputTwo)
342             {
343                 if (wcsstr(wcInputOne, wcInputTwo) != NULL)
344                 {
345                     results->values[results->currentLength] = y + 1;
346                     results->positions[results->currentLength] = x + 1;
347                     results->currentLength++;
348                 }
349             }
350
351             if (wcInputOne)
352             {
353                 FREE(wcInputOne);
354                 wcInputOne = NULL;
355             }
356             if (wcInputTwo)
357             {
358                 FREE(wcInputTwo);
359                 wcInputTwo = NULL;
360             }
361         }
362     }
363     return GREP_OK;
364 }
365 /*-----------------------------------------------------------------------------------*/
366 //static int sci_grep_common(char *fname,BOOL new_grep)
367 //{
368 //      int i = 0;
369 //
370 //      int m1 = 0, n1 = 0;
371 //      char **Strings_Input_One = NULL;
372 //      int m1n1 = 0; /* m1 * n1 */
373 //
374 //      int m2 = 0, n2 = 0;
375 //      char **Strings_Input_Two = NULL;
376 //      int m2n2 = 0; /* m2 * n2 */
377 //
378 //      GREPRESULTS grepresults;
379 //      int code_error_grep = GREP_OK;
380 //
381 //      GetRhsVar(1,MATRIX_OF_STRING_DATATYPE,&m1,&n1,&Strings_Input_One);
382 //      m1n1 = m1*n1;
383 //      GetRhsVar(2,MATRIX_OF_STRING_DATATYPE,&m2,&n2,&Strings_Input_Two);
384 //      m2n2 = m2*n2;
385 //
386 //      for (i = 0;i < m2n2;i++)
387 //      {
388 //              if ( strlen(Strings_Input_Two[i]) == 0)
389 //              {
390 //                      freeArrayOfString(Strings_Input_One,m1n1);
391 //                      freeArrayOfString(Strings_Input_Two,m2n2);
392 //                      Scierror(249,_("%s: Wrong values for input argument #%d: Non-empty strings expected.\n"),fname,2);
393 //                      return 0;
394 //              }
395 //      }
396 //
397 //      grepresults.currentLength = 0;
398 //      grepresults.sizeArraysMax = 0;
399 //      grepresults.positions = NULL;
400 //      grepresults.values = NULL;
401 //
402 //      if (new_grep)
403 //      {
404 //              code_error_grep = GREP_NEW(&grepresults,Strings_Input_One,m1n1,Strings_Input_Two,m2n2);
405 //      }
406 //      else
407 //      {
408 //              code_error_grep = GREP_OLD(&grepresults,Strings_Input_One,m1n1,Strings_Input_Two,m2n2);
409 //      }
410 //
411 //      freeArrayOfString(Strings_Input_One,m1n1);
412 //      freeArrayOfString(Strings_Input_Two,m2n2);
413 //
414 //      switch (code_error_grep)
415 //      {
416 //      case GREP_OK :
417 //              {
418 //                      int x = 0;
419 //                      int numRow   = 0;
420 //                      int outIndex = 0;
421 //
422 //                      numRow   = 1;  /* Output values[]*/
423 //                      outIndex = 0;
424 //                      CreateVar(Rhs+1,MATRIX_OF_DOUBLE_DATATYPE,&numRow,&grepresults.currentLength,&outIndex);
425 //                      for ( x = 0 ; x < grepresults.currentLength ; x++ )
426 //                      {
427 //                              stk(outIndex)[x] = (double)grepresults.values[x] ;
428 //                      }
429 //                      LhsVar(1) = Rhs+1 ;
430 //                      if (Lhs == 2)
431 //                      {
432 //                              /* Output positions[]*/
433 //                              numRow   = 1;
434 //                              outIndex = 0;
435 //                              CreateVar(Rhs+2,MATRIX_OF_DOUBLE_DATATYPE,&numRow,&grepresults.currentLength,&outIndex);
436 //                              for ( x = 0 ; x < grepresults.currentLength ; x++ )
437 //                              {
438 //                                      stk(outIndex)[x] = (double)grepresults.positions[x] ;
439 //                              }
440 //                              LhsVar(2) = Rhs+2;
441 //                      }
442 //                      C2F(putlhsvar)();
443 //                      if (grepresults.values) {FREE(grepresults.values); grepresults.values = NULL;}
444 //                      if (grepresults.positions) {FREE(grepresults.positions); grepresults.positions = NULL;}
445 //              }
446 //              break;
447 //
448 //      case MEMORY_ALLOC_ERROR :
449 //              {
450 //                      if (grepresults.values) {FREE(grepresults.values); grepresults.values = NULL;}
451 //                      if (grepresults.positions) {FREE(grepresults.positions); grepresults.positions = NULL;}
452 //                      Scierror(999,_("%s: No more memory.\n"),fname);
453 //              }
454 //              break;
455 //      }
456 //      return 0;
457 //}
458
459 /*-----------------------------------------------------------------------------------*/