e5dc3c94ef7c287e00abc83faaf9f3615a715ff9
[scilab.git] / scilab / modules / string / src / c / pcre_private.c
1 /*
2 * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
3 * Copyright (C) INRIA
4 * Copyright (C) DIGITEO - 2009
5 *
6  * Copyright (C) 2012 - 2016 - Scilab Enterprises
7  *
8  * This file is hereby licensed under the terms of the GNU GPL v2.0,
9  * pursuant to article 5.3.4 of the CeCILL v.2.1.
10  * This file was originally licensed under the terms of the CeCILL v2.1,
11  * and continues to be available under such terms.
12  * For more information, see the COPYING file which you should have received
13  * along with this program.
14 *
15 */
16
17 /*-------------------------------------------------------------------------------*/
18 #include <ctype.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <stdlib.h>
22 #include <time.h>
23 #include <locale.h>
24 #include <errno.h>
25 #include <pcre.h>
26 #include "sci_malloc.h"
27 #include "BOOL.h"
28 #include "pcre_private.h"
29 #include "os_string.h"
30 #include "strsubst.h"
31 #include "configvariable_interface.h"
32 #include "sciprint.h"
33 #include "charEncoding.h"
34 #include "freeArrayOfString.h"
35 /*-------------------------------------------------------------------------------*/
36 /* A number of things vary for Windows builds. Originally, pcretest opened its
37 input and output without "b"; then I was told that "b" was needed in some
38 environments, so it was added for release 5.0 to both the input and output. (It
39 makes no difference on Unix-like systems.) Later I was told that it is wrong
40 for the input on Windows. I've now abstracted the modes into two macros that
41 are set here, to make it easier to fiddle with them, and removed "b" from the
42 input mode under Windows. */
43 /*-------------------------------------------------------------------------------*/
44 #if _MSC_VER
45 #include <io.h>                /* For _setmode() */
46 #include <fcntl.h>             /* For _O_BINARY */
47 #define INPUT_MODE   "r"
48 #define OUTPUT_MODE  "wb"
49 #else
50 #include <sys/time.h>          /* These two includes are needed */
51 #include <sys/resource.h>      /* for setrlimit(). */
52 #define INPUT_MODE   "rb"
53 #define OUTPUT_MODE  "wb"
54 #endif
55
56 #define LINK_SIZE               2
57
58 /* We have to include pcre_internal.h because we need the internal info for
59 displaying the results of pcre_study() and we also need to know about the
60 internal macros, structures, and other internal data values; pcretest has
61 "inside information" compared to a program that strictly follows the PCRE API.
62
63 Although pcre_internal.h does itself include pcre.h, we explicitly include it
64 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
65 appropriately for an application, not for building PCRE. */
66
67 #include <pcre.h>
68 #include "pcre_internal.h"
69
70 /* We need access to the data tables that PCRE uses. So as not to have to keep
71 two copies, we include the source file here, changing the names of the external
72 symbols to prevent clashes. */
73
74 #define _pcre_utf8_table1      utf8_table1
75 #define _pcre_utf8_table1_size utf8_table1_size
76 #define _pcre_utf8_table2      utf8_table2
77 #define _pcre_utf8_table3      utf8_table3
78 #define _pcre_utf8_table4      utf8_table4
79 #define _pcre_utt              utt
80 #define _pcre_utt_size         utt_size
81 #define _pcre_utt_names        utt_names
82 #define _pcre_OP_lengths       OP_lengths
83
84 #include "pcre_tables.c"
85
86 /* It is possible to compile this test program without including support for
87 testing the POSIX interface, though this is not available via the standard
88 Makefile. */
89
90 #if !defined NOPOSIX
91 #include "pcreposix.h"
92 #endif
93
94 /* It is also possible, for the benefit of the version currently imported into
95 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
96 interface to the DFA matcher (NODFA), and without the doublecheck of the old
97 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
98 UTF8 support if PCRE is built without it. */
99
100 #ifndef SUPPORT_UTF8
101 #ifndef NOUTF8
102 #define NOUTF8
103 #endif
104 #endif
105 /*-------------------------------------------------------------------------------*/
106 /* Static variables */
107
108 static int callout_count = 0;
109 static int callout_fail_count = 0;
110 static int callout_fail_id = 0;
111
112 /* The buffers grow automatically if very long input lines are encountered. */
113
114 char *buffer = NULL;
115
116
117 static int check_match_limit(pcre *re, pcre_extra *extra, char *bptr, int len,
118                              int start_offset, int options, int *use_offsets, int use_size_offsets,
119                              int flag, unsigned long int *limit, int errnumber);
120
121
122 /*************************************************
123 *        Check match or recursion limit          *
124 *************************************************/
125
126 static int check_match_limit(pcre *re, pcre_extra *extra, char *bptr, int len,
127                              int start_offset, int options, int *use_offsets, int use_size_offsets,
128                              int flag, unsigned long int *limit, int errnumber)
129 {
130     int count;
131     int min = 0;
132     int mid = 64;
133     int max = -1;
134
135     extra->flags |= flag;
136
137     for (;;)
138     {
139         *limit = mid;
140
141         count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
142                           use_offsets, use_size_offsets);
143
144         if (count == errnumber)
145         {
146             min = mid;
147             mid = (mid == max - 1) ? max : (max > 0) ? (min + max) / 2 : mid * 2;
148         }
149
150         else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
151                  count == PCRE_ERROR_PARTIAL)
152         {
153             if (mid == min + 1)
154             {
155                 break;
156             }
157             max = mid;
158             mid = (min + mid) / 2;
159         }
160         else
161         {
162             break;    /* Some other error */
163         }
164     }
165
166     extra->flags &= ~flag;
167     return count;
168 }
169
170
171 /*************************************************
172 *               Algorithm                      *
173 *************************************************/
174
175 /* Read lines from named file or stdin and write to named file or stdout; lines
176 consist of a regular expression, in delimiters and optionally followed by
177 options, followed by a set of test data, terminated by an empty line. */
178
179 pcre_error_code pcre_private(char *INPUT_LINE, char *INPUT_PAT, int *Output_Start, int *Output_End, char*** _pstCapturedString, int* _piCapturedStringCount)
180 {
181     /* ALL strings are managed as UTF-8 by default */
182     int options = PCRE_UTF8;
183     int size_offsets = 45;
184     int size_offsets_max;
185     int *offsets = NULL;
186     int all_use_dfa = 0;
187     BOOL LOOP_PCRE_TST = FALSE;
188
189     /* These vectors store, end-to-end, a list of captured substring names. Assume
190     that 1024 is plenty long enough for the few names we'll be testing. */
191
192     char copynames[1024];
193     char getnames[1024];
194
195     char *copynamesptr = NULL;
196     char *getnamesptr = NULL;
197
198     int rc = 0;
199     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
200     if (rc != 1)
201     {
202         return UTF8_NOT_SUPPORTED;
203     }
204
205     /* bug 3891 */
206     /* backslash characters are not interpreted for input */
207     buffer = strsub(INPUT_LINE, "\\", "\\\\");
208
209     size_offsets_max = size_offsets;
210     offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
211     if (offsets == NULL)
212     {
213         if (buffer)
214         {
215             FREE(buffer);
216             buffer = NULL;
217         }
218         return NOT_ENOUGH_MEMORY_FOR_VECTOR;
219     }
220     /* Main loop */
221     LOOP_PCRE_TST = FALSE;
222     while (!LOOP_PCRE_TST)
223     {
224         pcre *re = NULL;
225         pcre_extra *extra = NULL;
226         const char *error = NULL;
227         char *back_p = NULL;
228         char *p = NULL;
229         char *pp = NULL;
230         char *ppp = NULL;
231         const unsigned char *tables = NULL;
232         int do_G = 0;
233         int do_g = 0;
234         int erroroffset = 0, len = 0, delimiter;
235
236         LOOP_PCRE_TST = TRUE;
237         p = os_strdup(INPUT_PAT);
238         back_p = p;
239         while (isspace(*p))
240         {
241             p++;
242         }
243         if (*p == 0)
244         {
245             continue;
246         }
247         /* In-line pattern (the usual case). Get the delimiter and seek the end of
248         the pattern; if is isn't complete, read more. */
249
250         delimiter = *p++;
251
252         if (isalnum(delimiter) || delimiter == '\\')
253         {
254             if (buffer)
255             {
256                 FREE(buffer);
257                 buffer = NULL;
258             }
259             if (offsets)
260             {
261                 FREE(offsets);
262                 offsets = NULL;
263             }
264             if (back_p)
265             {
266                 FREE(back_p);
267                 back_p = NULL;
268             }
269             return DELIMITER_NOT_ALPHANUMERIC;
270         }
271
272         pp = p;
273
274         while (*pp != 0)
275         {
276             if (*pp == '\\' && pp[1] != 0)
277             {
278                 pp++;
279             }
280             else if (*pp == delimiter)
281             {
282                 break;
283             }
284             pp++;
285         }
286
287         /* If the delimiter can't be found, it's a syntax error */
288         if (*pp == 0)
289         {
290             if (buffer)
291             {
292                 FREE(buffer);
293                 buffer = NULL;
294             }
295             if (offsets)
296             {
297                 FREE(offsets);
298                 offsets = NULL;
299             }
300             if (back_p)
301             {
302                 FREE(back_p);
303                 back_p = NULL;
304             }
305             if (offsets)
306             {
307                 FREE(offsets);
308             }
309             return CAN_NOT_COMPILE_PATTERN;
310         }
311
312         /* If the first character after the delimiter is backslash, make
313         the pattern end with backslash. This is purely to provide a way
314         of testing for the error message when a pattern ends with backslash. */
315
316         if (pp[1] == '\\')
317         {
318             *pp++ = '\\';
319         }
320
321         /* Terminate the pattern at the delimiter, and save a copy of the pattern
322         for callouts. */
323
324         *pp++ = 0;
325
326         /* Look for options after final delimiter */
327
328         //options = 8192;
329
330         while (*pp != 0)
331         {
332             switch (*pp++)
333             {
334                 case 'f':
335                     options |= PCRE_FIRSTLINE;
336                     break;
337                 case 'g':
338                     do_g = 1;
339                     break;
340                 case 'i':
341                     options |= PCRE_CASELESS;
342                     break;
343                 case 'm':
344                     options |= PCRE_MULTILINE;
345                     break;
346                 case 's':
347                     options |= PCRE_DOTALL;
348                     break;
349                 case 'x':
350                     options |= PCRE_EXTENDED;
351                     break;
352                 case '+':
353                     break;
354                 case 'A':
355                     options |= PCRE_ANCHORED;
356                     break;
357                 case 'B':
358                     break;
359                 case 'C':
360                     options |= PCRE_AUTO_CALLOUT;
361                     break;
362                 case 'D':
363                     break;
364                 case 'E':
365                     options |= PCRE_DOLLAR_ENDONLY;
366                     break;
367                 case 'F':
368                     break;
369                 case 'G':
370                     do_G = 1;
371                     break;
372                 case 'I':
373                     break;
374                 case 'J':
375                     options |= PCRE_DUPNAMES;
376                     break;
377                 case 'M':
378                     break;
379                 case 'N':
380                     options |= PCRE_NO_AUTO_CAPTURE;
381                     break;
382                 case 'S':
383                     break;
384                 case 'U':
385                     options |= PCRE_UNGREEDY;
386                     break;
387                 case 'X':
388                     options |= PCRE_EXTRA;
389                     break;
390                 case 'Z':
391                     break;
392                 case '8':
393                 {
394                     int rc = 0;
395                     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
396                     if (rc != 1)
397                     {
398                         if (buffer)
399                         {
400                             FREE(buffer);
401                             buffer = NULL;
402                         }
403                         if (offsets)
404                         {
405                             FREE(offsets);
406                         }
407                         return UTF8_NOT_SUPPORTED;
408                     }
409                     options |= PCRE_UTF8;
410                 }
411                 break;
412                 case '?':
413                     options |= PCRE_NO_UTF8_CHECK;
414                     break;
415                 case 'L':
416                     ppp = pp;
417                     /* The '\r' test here is so that it works on Windows. */
418                     /* The '0' test is just in case this is an unterminated line. */
419                     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ')
420                     {
421                         ppp++;
422                     }
423                     *ppp = 0;
424                     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
425                     {
426                         goto SKIP_DATA;
427                     }
428
429                     tables = pcre_maketables();
430                     pp = ppp;
431                     break;
432                 case '>':
433                     while (*pp != 0)
434                     {
435                         pp++;
436                     }
437                     while (isspace(pp[-1]))
438                     {
439                         pp--;
440                     }
441                     *pp = 0;
442                     break;
443                 case '<':
444                 {
445                     while (*pp++ != '>')
446                     {
447                         ;
448                     }
449                 }
450                 break;
451                 case '\r':                      /* So that it works in Windows */
452                 case '\n':
453                 case ' ':
454                     break;
455
456                 default:
457                     goto SKIP_DATA;
458             }
459         }
460
461         /* Handle compiling via the POSIX interface, which doesn't support the
462         timing, showing, or debugging options, nor the ability to pass over
463         local character tables. */
464
465
466         {
467             re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
468             /* Compilation failed; go back for another re, skipping to blank line
469             if non-interactive. */
470             if (re == NULL)
471             {
472 SKIP_DATA:
473                 if (buffer)
474                 {
475                     FREE(buffer);
476                     buffer = NULL;
477                 }
478                 if (offsets)
479                 {
480                     FREE(offsets);
481                     offsets = NULL;
482                 }
483                 if (tables)
484                 {
485                     (*pcre_free)((void*)tables);
486                     tables = NULL;
487                 }
488                 if (extra)
489                 {
490                     FREE(extra);
491                     extra = NULL;
492                 }
493                 if (back_p)
494                 {
495                     FREE(back_p);
496                     back_p = NULL;
497                 }
498                 return CAN_NOT_COMPILE_PATTERN;
499             }
500
501         }        /* End of non-POSIX compile */
502
503         /* Read data lines and test them */
504         {
505             char *q = NULL;
506             char *bptr = NULL;
507             int *use_offsets = offsets;
508             int use_size_offsets = size_offsets;
509             int callout_data = 0;
510             int callout_data_set = 0;
511             int count = 0;
512             int c = 0;
513             int copystrings = 0;
514             int find_match_limit = 0;
515             int getstrings = 0;
516             int gmatched = 0;
517             int start_offset = 0;
518             int g_notempty = 0;
519             int use_dfa = 0;
520
521             options = 0;
522             *copynames = 0;
523             *getnames = 0;
524
525             copynamesptr = copynames;
526             getnamesptr = getnames;
527
528             callout_count = 0;
529             callout_fail_count = 999999;
530             callout_fail_id = -1;
531
532             if (extra != NULL)
533             {
534                 extra->flags &= ~(PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
535             }
536             p = buffer;
537             bptr = q = buffer;
538             while ((c = *p++) != 0)
539             {
540                 int i = 0;
541                 int n = 0;
542
543                 if (c == '\\') switch ((c = *p++))
544                     {
545                         case 'a':
546                             c =    7;
547                             break;
548                         case 'b':
549                             c = '\b';
550                             break;
551                         case 'e':
552                             c =   27;
553                             break;
554                         case 'f':
555                             c = '\f';
556                             break;
557                         case 'n':
558                             c = '\n';
559                             break;
560                         case 'r':
561                             c = '\r';
562                             break;
563                         case 't':
564                             c = '\t';
565                             break;
566                         case 'v':
567                             c = '\v';
568                             break;
569                         case '0':
570                         case '1':
571                         case '2':
572                         case '3':
573                         case '4':
574                         case '5':
575                         case '6':
576                         case '7':
577                             c -= '0';
578                             while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
579                             {
580                                 c = c * 8 + *p++ - '0';
581                             }
582                             break;
583                         case 'x':
584                             /* Ordinary \x */
585                             c = 0;
586                             while (i++ < 2 && isxdigit(*p))
587                             {
588                                 c = c * 16 + tolower(*p) - ((isdigit(*p)) ? '0' : 'W');
589                                 p++;
590                             }
591                             break;
592                         case 0:   /* \ followed by EOF allows for an empty line */
593                             p--;
594                             continue;
595                         case '>':
596                             while (isdigit(*p))
597                             {
598                                 start_offset = start_offset * 10 + *p++ - '0';
599                             }
600                             continue;
601                         case 'A':  /* Option setting */
602                             options |= PCRE_ANCHORED;
603                             continue;
604                         case 'B':
605                             options |= PCRE_NOTBOL;
606                             continue;
607                         case 'C':
608                             if (isdigit(*p))    /* Set copy string */
609                             {
610                                 while (isdigit(*p))
611                                 {
612                                     n = n * 10 + *p++ - '0';
613                                 }
614                                 copystrings |= 1 << n;
615                             }
616                             else if (isalnum(*p))
617                             {
618                                 char *npp = copynamesptr;
619                                 while (isalnum(*p))
620                                 {
621                                     *npp++ = *p++;
622                                 }
623                                 *npp++ = 0;
624                                 *npp = 0;
625                                 pcre_get_stringnumber(re, (char *)copynamesptr);
626                                 copynamesptr = npp;
627                             }
628                             else if (*p == '+')
629                             {
630                                 p++;
631                             }
632                             else if (*p == '-')
633                             {
634                                 p++;
635                             }
636                             else if (*p == '!')
637                             {
638                                 callout_fail_id = 0;
639                                 p++;
640                                 while (isdigit(*p))
641                                 {
642                                     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
643                                 }
644                                 callout_fail_count = 0;
645                                 if (*p == '!')
646                                 {
647                                     p++;
648                                     while (isdigit(*p))
649                                     {
650                                         callout_fail_count = callout_fail_count * 10 + *p++ - '0';
651                                     }
652                                 }
653                             }
654                             else if (*p == '*')
655                             {
656                                 int sign = 1;
657                                 callout_data = 0;
658                                 if (*(++p) == '-')
659                                 {
660                                     sign = -1;
661                                     p++;
662                                 }
663                                 while (isdigit(*p))
664                                 {
665                                     callout_data = callout_data * 10 + *p++ - '0';
666                                 }
667                                 callout_data *= sign;
668                                 callout_data_set = 1;
669                             }
670                             continue;
671                         case 'G':
672                             if (isdigit(*p))
673                             {
674                                 while (isdigit(*p))
675                                 {
676                                     n = n * 10 + *p++ - '0';
677                                 }
678                                 getstrings |= 1 << n;
679                             }
680                             else if (isalnum(*p))
681                             {
682                                 char *npp = getnamesptr;
683                                 while (isalnum(*p))
684                                 {
685                                     *npp++ = *p++;
686                                 }
687                                 *npp++ = 0;
688                                 *npp = 0;
689                                 pcre_get_stringnumber(re, (char *)getnamesptr);
690                                 getnamesptr = npp;
691                             }
692                             continue;
693                         case 'L':
694                             continue;
695                         case 'M':
696                             find_match_limit = 1;
697                             continue;
698                         case 'N':
699                             options |= PCRE_NOTEMPTY;
700                             continue;
701                         case 'O':
702                             while (isdigit(*p))
703                             {
704                                 n = n * 10 + *p++ - '0';
705                             }
706                             if (n > size_offsets_max)
707                             {
708                                 size_offsets_max = n;
709                                 if (offsets)
710                                 {
711                                     FREE(offsets);
712                                 }
713                                 use_offsets = offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
714                             }
715                             use_size_offsets = n;
716                             if (n == 0)
717                             {
718                                 use_offsets = NULL;    /* Ensures it can't write to it */
719                             }
720                             continue;
721                         case 'P':
722                             options |= PCRE_PARTIAL;
723                             continue;
724                         case 'Q':
725                             while (isdigit(*p))
726                             {
727                                 n = n * 10 + *p++ - '0';
728                             }
729                             if (extra == NULL)
730                             {
731                                 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
732                                 extra->flags = 0;
733                             }
734                             extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
735                             extra->match_limit_recursion = n;
736                             continue;
737                         case 'q':
738                             while (isdigit(*p))
739                             {
740                                 n = n * 10 + *p++ - '0';
741                             }
742                             if (extra == NULL)
743                             {
744                                 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
745                                 extra->flags = 0;
746                             }
747                             extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
748                             extra->match_limit = n;
749                             continue;
750 #if !defined NODFA
751                         case 'R':
752                             options |= PCRE_DFA_RESTART;
753                             continue;
754 #endif
755                         case 'S':
756
757                             continue;
758                         case 'Z':
759                             options |= PCRE_NOTEOL;
760                             continue;
761                         case '?':
762                             options |= PCRE_NO_UTF8_CHECK;
763                             continue;
764                         case '<':
765                         {
766                             while (*p++ != '>')
767                             {
768                                 ;
769                             }
770                         }
771                         continue;
772                     }
773                 *q++ = (char)c;
774             }
775             *q = 0;
776             len = (int)(q - buffer);
777             if ((all_use_dfa || use_dfa) && find_match_limit)
778             {
779                 if (buffer)
780                 {
781                     FREE(buffer);
782                     buffer = NULL;
783                 }
784                 if (offsets)
785                 {
786                     FREE(offsets);
787                     offsets = NULL;
788                 }
789                 if (p)
790                 {
791                     FREE(p);
792                     p = NULL;
793                 }
794                 if (re)
795                 {
796                     (*pcre_free)(re);
797                     re = NULL;
798                 }
799                 if (tables)
800                 {
801                     (*pcre_free)((void*)tables);
802                     tables = NULL;
803                 }
804                 if (extra)
805                 {
806                     FREE(extra);
807                     extra = NULL;
808                 }
809                 return LIMIT_NOT_RELEVANT_FOR_DFA_MATCHING;
810             }
811             /* Handle matching via the POSIX interface, which does not
812             support timing or playing with the match limit or callout data. */
813             for (;; gmatched++)    /* Loop for /g or /G */
814             {
815
816                 /* If find_match_limit is set, we want to do repeated matches with
817                 varying limits in order to find the minimum value for the match limit and
818                 for the recursion limit. */
819
820                 if (find_match_limit)
821                 {
822                     if (extra == NULL)
823                     {
824                         extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
825                         extra->flags = 0;
826                     }
827
828                     (void)check_match_limit(re, extra, bptr, len, start_offset,
829                                             options | g_notempty, use_offsets, use_size_offsets,
830                                             PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
831                                             PCRE_ERROR_MATCHLIMIT);
832
833                     count = check_match_limit(re, extra, bptr, len, start_offset,
834                                               options | g_notempty, use_offsets, use_size_offsets,
835                                               PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
836                                               PCRE_ERROR_RECURSIONLIMIT);
837                 }
838                 /* If callout_data is set, use the interface with additional data */
839                 else if (callout_data_set)
840                 {
841                     if (extra == NULL)
842                     {
843                         extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
844                         extra->flags = 0;
845                     }
846                     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
847                     extra->callout_data = &callout_data;
848                     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
849                                       options | g_notempty, use_offsets, use_size_offsets);
850
851                     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
852                 }
853                 /* The normal case is just to do the match once, with the default
854                 value of match_limit. */
855                 else
856                 {
857                     count = pcre_exec(re, extra, (char *)bptr, len,
858                                       start_offset, options | g_notempty, use_offsets, use_size_offsets);
859                     if (count == 0)
860                     {
861                         count = use_size_offsets / 3;
862                     }
863
864                     //to retrieve backref count and values
865                     if (count > 0 && _pstCapturedString != NULL && _piCapturedStringCount != NULL)
866                     {
867                         int i = 0;
868                         int iErr = 0;
869
870                         iErr = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, _piCapturedStringCount);
871                         //sciprint("PCRE_INFO_CAPTURECOUNT %d\n", *_piCapturedStringCount);
872
873                         if (*_piCapturedStringCount > 0)
874                         {
875                             *_pstCapturedString = (char**)MALLOC(sizeof(char*) * *_piCapturedStringCount);
876                             for (i = 0 ; i < *_piCapturedStringCount ; i++)
877                             {
878                                 const char* pstSubstring = NULL;
879                                 pcre_get_substring(bptr, use_offsets, count, i + 1, &pstSubstring);
880                                 if (pstSubstring != NULL)
881                                 {
882                                     (*_pstCapturedString)[i] = os_strdup(pstSubstring);
883                                 }
884                                 else
885                                 {
886                                     //empty string is matching, so create it
887                                     (*_pstCapturedString)[i] = os_strdup("");
888                                 }
889
890                                 pcre_free_substring(pstSubstring);
891                             }
892                         }
893                     }
894                 }
895                 /* Matched */
896                 if (count >= 0)
897                 {
898                     int i, maxcount;
899                     maxcount = use_size_offsets / 3;
900                     /* This is a check against a lunatic return value. */
901                     if (count > maxcount)
902                     {
903                         if (buffer)
904                         {
905                             FREE(buffer);
906                             buffer = NULL;
907                         }
908                         if (offsets)
909                         {
910                             FREE(offsets);
911                             offsets = NULL;
912                         }
913                         if (re)
914                         {
915                             (*pcre_free)(re);
916                             re = NULL;
917                         }
918                         if (tables)
919                         {
920                             (*pcre_free)((void*)tables);
921                             tables = NULL;
922                         }
923                         if (extra)
924                         {
925                             FREE(extra);
926                             extra = NULL;
927                         }
928                         if (back_p)
929                         {
930                             FREE(back_p);
931                             back_p = NULL;
932                         }
933                         return TOO_BIG_FOR_OFFSET_SIZE;
934                     }
935
936                     for (i = 0; i < count * 2; i += 2)
937                     {
938                         if (use_offsets[i] >= 0)
939                         {
940                             *Output_Start = use_offsets[i];
941                             *Output_End = use_offsets[i + 1];
942                             if (buffer)
943                             {
944                                 FREE(buffer);
945                             }
946
947                             /* use_offsets = offsets no need to free use_offsets if we free offsets */
948                             if (offsets)
949                             {
950                                 FREE(offsets);
951                             }
952
953                             /* "re" allocated by pcre_compile (better to use free function associated)*/
954                             if (re)
955                             {
956                                 (*pcre_free)(re);
957                             }
958
959                             if (extra)
960                             {
961                                 FREE(extra);
962                             }
963                             if (tables)
964                             {
965                                 /* "tables" allocated by pcre_maketables (better to use free function associated to pcre)*/
966                                 (*pcre_free)((void *)tables);
967                                 tables = NULL;
968                                 setlocale(LC_CTYPE, "C");
969                             }
970
971                             if (back_p)
972                             {
973                                 FREE(back_p);
974                                 back_p = NULL;
975                             }
976                             return PCRE_FINISHED_OK;
977                         }
978                     }
979
980                     for (copynamesptr = copynames; *copynamesptr != 0; copynamesptr += (int)strlen((char*)copynamesptr) + 1)
981                     {
982                         char copybuffer[256];
983                         pcre_copy_named_substring(re, (char *)bptr, use_offsets, count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
984                     }
985
986                     for (i = 0; i < 32; i++)
987                     {
988                         if ((getstrings & (1 << i)) != 0)
989                         {
990                             const char *substring;
991                             pcre_get_substring((char *)bptr, use_offsets, count, i, &substring);
992                         }
993                     }
994
995                     for (getnamesptr = getnames; *getnamesptr != 0; getnamesptr += (int)strlen((char*)getnamesptr) + 1)
996                     {
997                         const char *substring;
998                         pcre_get_named_substring(re, (char *)bptr, use_offsets, count, (char *)getnamesptr, &substring);
999                     }
1000
1001                 }
1002                 /* Failed to match. If this is a /g or /G loop and we previously set
1003                 g_notempty after a null match, this is not necessarily the end. We want
1004                 to advance the start offset, and continue. We won't be at the end of the
1005                 string - that was checked before setting g_notempty.
1006                 Complication arises in the case when the newline option is "any" or
1007                 "anycrlf". If the previous match was at the end of a line terminated by
1008                 CRLF, an advance of one character just passes the \r, whereas we should
1009                 prefer the longer newline sequence, as does the code in pcre_exec().
1010                 Fudge the offset value to achieve this.
1011
1012                 Otherwise, in the case of UTF-8 matching, the advance must be one
1013                 character, not one byte. */
1014                 else
1015                 {
1016                     if (count == PCRE_ERROR_NOMATCH)
1017                     {
1018                         if (gmatched == 0)
1019                         {
1020                             if (tables)
1021                             {
1022                                 (*pcre_free)((void *)tables);
1023                                 tables = NULL;
1024                             }
1025                             if (re)
1026                             {
1027                                 (*pcre_free)((void *)re);
1028                                 re = NULL;
1029                             }
1030                             if (buffer)
1031                             {
1032                                 FREE(buffer);
1033                                 buffer = NULL;
1034                             }
1035                             if (offsets)
1036                             {
1037                                 FREE(offsets);
1038                             }
1039                             if (p)
1040                             {
1041                                 FREE(back_p);
1042                                 back_p = NULL;
1043                             }
1044                             return NO_MATCH;
1045                         }
1046                     }
1047
1048                     if (count == PCRE_ERROR_MATCHLIMIT )
1049                     {
1050                         if (tables)
1051                         {
1052                             (*pcre_free)((void *)tables);
1053                             tables = NULL;
1054                         }
1055                         if (re)
1056                         {
1057                             (*pcre_free)((void *)re);
1058                             re = NULL;
1059                         }
1060                         if (buffer)
1061                         {
1062                             FREE(buffer);
1063                             buffer = NULL;
1064                         }
1065                         if (offsets)
1066                         {
1067                             FREE(offsets);
1068                             offsets = NULL;
1069                         }
1070                         if (back_p)
1071                         {
1072                             FREE(back_p);
1073                             back_p = NULL;
1074                         }
1075                         return MATCH_LIMIT;
1076                     }
1077                     break;  /* Out of loop */
1078                 }
1079
1080                 /* If not /g or /G we are done */
1081                 if (!do_g && !do_G)
1082                 {
1083                     break;
1084                 }
1085
1086                 /* If we have matched an empty string, first check to see if we are at
1087                 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1088                 what Perl's /g options does. This turns out to be rather cunning. First
1089                 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1090                 same point. If this fails (picked up above) we advance to the next
1091                 character. */
1092
1093                 g_notempty = 0;
1094
1095                 if (use_offsets[0] == use_offsets[1])
1096                 {
1097                     if (use_offsets[0] == len)
1098                     {
1099                         break;
1100                     }
1101                     g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1102                 }
1103
1104                 /* For /g, update the start offset, leaving the rest alone */
1105
1106                 if (do_g)
1107                 {
1108                     start_offset = use_offsets[1];
1109                 }
1110                 /* For /G, update the pointer and length */
1111                 else
1112                 {
1113                     bptr += use_offsets[1];
1114                     len -= use_offsets[1];
1115                 }
1116             }  /* End of loop for /g and /G */
1117
1118             if (re)
1119             {
1120                 (*pcre_free)(re);
1121                 re = NULL;
1122             }
1123             if (extra)
1124             {
1125                 FREE(extra);
1126                 extra = NULL;
1127             }
1128             if (tables)
1129             {
1130                 (*pcre_free)((void *)tables);
1131                 tables = NULL;
1132             }
1133
1134             FREE(back_p);
1135             back_p = NULL;
1136             continue;
1137         }    /* End of loop for data lines */
1138     }
1139
1140     if (buffer)
1141     {
1142         FREE(buffer);
1143         buffer = NULL;
1144     }
1145     if (offsets)
1146     {
1147         FREE(offsets);
1148         offsets = NULL;
1149     }
1150
1151     return PCRE_EXIT;
1152 }
1153 /*-------------------------------------------------------------------------------*/
1154 pcre_error_code wide_pcre_private(wchar_t* _pwstInput, wchar_t* _pwstPattern, int* _piStart, int* _piEnd, wchar_t*** _pstCapturedString, int* _piCapturedStringCount)
1155 {
1156     pcre_error_code iPcreStatus = PCRE_FINISHED_OK;
1157     int i               = 0;
1158     int iStart          = 0;
1159     int iEnd            = 0;
1160
1161     char* pstInput      = wide_string_to_UTF8(_pwstInput);
1162     char* pstPattern    = wide_string_to_UTF8(_pwstPattern);
1163     char** pstCaptured  = NULL;//(char**)MALLOC(sizeof(char*) * (strlen(pstInput) + 1));
1164
1165     iPcreStatus = pcre_private(pstInput, pstPattern, &iStart, &iEnd, &pstCaptured, _piCapturedStringCount);
1166     if (iPcreStatus == PCRE_FINISHED_OK && iStart != iEnd)
1167     {
1168         char* pstTempStart      = NULL;
1169         char* pstTempEnd        = NULL;
1170         wchar_t* pwstTempStart  = NULL;
1171         wchar_t* pwstTempEnd    = NULL;
1172
1173         pstTempStart            = os_strdup(pstInput);
1174         pstTempEnd              = os_strdup(pstInput);
1175         pstTempEnd[iEnd]        = 0;
1176         pstTempStart[iStart]    = 0;
1177
1178
1179         pwstTempStart           = to_wide_string(pstTempStart);
1180         pwstTempEnd             = to_wide_string(pstTempEnd);
1181
1182         *_piStart               = (int)wcslen(pwstTempStart);
1183         *_piEnd                 = (int)wcslen(pwstTempEnd);
1184
1185         if (_piCapturedStringCount && *_piCapturedStringCount > 0)
1186         {
1187             /*convert captured field in wide char*/
1188             *_pstCapturedString = (wchar_t**)MALLOC(sizeof(wchar_t*) * *_piCapturedStringCount);
1189             for (i = 0 ; i < *_piCapturedStringCount ; i++)
1190             {
1191                 (*_pstCapturedString)[i] = to_wide_string(pstCaptured[i]);
1192             }
1193             freeArrayOfString(pstCaptured, *_piCapturedStringCount);
1194         }
1195
1196         FREE(pstTempStart);
1197         FREE(pstTempEnd);
1198         FREE(pwstTempStart);
1199         FREE(pwstTempEnd);
1200     }
1201     else
1202     {
1203         *_piStart   = iStart;
1204         *_piEnd     = iEnd;
1205         if (_piCapturedStringCount && *_piCapturedStringCount > 0)
1206         {
1207             /*free unused captured field*/
1208             freeArrayOfString(pstCaptured, *_piCapturedStringCount);
1209         }
1210     }
1211
1212     FREE(pstInput);
1213     FREE(pstPattern);
1214     return iPcreStatus;
1215 }
1216 /*-------------------------------------------------------------------------------*/