08ba8276a5726c2d4309f78fe524e6db36a8df51
[scilab.git] / scilab / modules / string / src / c / pcre_private.c
1 /*
2 * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
3 * Copyright (C) INRIA
4 * Copyright (C) DIGITEO - 2009
5 *
6 * This file must be used under the terms of the CeCILL.
7 * This source file is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution.  The terms
9 * are also available at
10 * http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.txt
11 *
12 */
13
14 /*-------------------------------------------------------------------------------*/
15 #include <ctype.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <stdlib.h>
19 #include <time.h>
20 #include <locale.h>
21 #include <errno.h>
22 #include <pcre.h>
23 #include "sci_malloc.h"
24 #include "BOOL.h"
25 #include "pcre_private.h"
26 #include "os_string.h"
27 #include "strsubst.h"
28 #include "configvariable_interface.h"
29 #include "sciprint.h"
30 #include "charEncoding.h"
31 #include "freeArrayOfString.h"
32 /*-------------------------------------------------------------------------------*/
33 /* A number of things vary for Windows builds. Originally, pcretest opened its
34 input and output without "b"; then I was told that "b" was needed in some
35 environments, so it was added for release 5.0 to both the input and output. (It
36 makes no difference on Unix-like systems.) Later I was told that it is wrong
37 for the input on Windows. I've now abstracted the modes into two macros that
38 are set here, to make it easier to fiddle with them, and removed "b" from the
39 input mode under Windows. */
40 /*-------------------------------------------------------------------------------*/
41 #if _MSC_VER
42 #include <io.h>                /* For _setmode() */
43 #include <fcntl.h>             /* For _O_BINARY */
44 #define INPUT_MODE   "r"
45 #define OUTPUT_MODE  "wb"
46 #else
47 #include <sys/time.h>          /* These two includes are needed */
48 #include <sys/resource.h>      /* for setrlimit(). */
49 #define INPUT_MODE   "rb"
50 #define OUTPUT_MODE  "wb"
51 #endif
52
53 #define LINK_SIZE               2
54
55 /* We have to include pcre_internal.h because we need the internal info for
56 displaying the results of pcre_study() and we also need to know about the
57 internal macros, structures, and other internal data values; pcretest has
58 "inside information" compared to a program that strictly follows the PCRE API.
59
60 Although pcre_internal.h does itself include pcre.h, we explicitly include it
61 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
62 appropriately for an application, not for building PCRE. */
63
64 #include <pcre.h>
65 #include "pcre_internal.h"
66
67 /* We need access to the data tables that PCRE uses. So as not to have to keep
68 two copies, we include the source file here, changing the names of the external
69 symbols to prevent clashes. */
70
71 #define _pcre_utf8_table1      utf8_table1
72 #define _pcre_utf8_table1_size utf8_table1_size
73 #define _pcre_utf8_table2      utf8_table2
74 #define _pcre_utf8_table3      utf8_table3
75 #define _pcre_utf8_table4      utf8_table4
76 #define _pcre_utt              utt
77 #define _pcre_utt_size         utt_size
78 #define _pcre_utt_names        utt_names
79 #define _pcre_OP_lengths       OP_lengths
80
81 #include "pcre_tables.c"
82
83 /* It is possible to compile this test program without including support for
84 testing the POSIX interface, though this is not available via the standard
85 Makefile. */
86
87 #if !defined NOPOSIX
88 #include "pcreposix.h"
89 #endif
90
91 /* It is also possible, for the benefit of the version currently imported into
92 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
93 interface to the DFA matcher (NODFA), and without the doublecheck of the old
94 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
95 UTF8 support if PCRE is built without it. */
96
97 #ifndef SUPPORT_UTF8
98 #ifndef NOUTF8
99 #define NOUTF8
100 #endif
101 #endif
102 /*-------------------------------------------------------------------------------*/
103 /* Static variables */
104
105 static int callout_count = 0;
106 static int callout_fail_count = 0;
107 static int callout_fail_id = 0;
108
109 /* The buffers grow automatically if very long input lines are encountered. */
110
111 char *buffer = NULL;
112
113
114 static int check_match_limit(pcre *re, pcre_extra *extra, char *bptr, int len,
115                              int start_offset, int options, int *use_offsets, int use_size_offsets,
116                              int flag, unsigned long int *limit, int errnumber);
117
118
119 /*************************************************
120 *        Check match or recursion limit          *
121 *************************************************/
122
123 static int check_match_limit(pcre *re, pcre_extra *extra, char *bptr, int len,
124                              int start_offset, int options, int *use_offsets, int use_size_offsets,
125                              int flag, unsigned long int *limit, int errnumber)
126 {
127     int count;
128     int min = 0;
129     int mid = 64;
130     int max = -1;
131
132     extra->flags |= flag;
133
134     for (;;)
135     {
136         *limit = mid;
137
138         count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
139                           use_offsets, use_size_offsets);
140
141         if (count == errnumber)
142         {
143             min = mid;
144             mid = (mid == max - 1) ? max : (max > 0) ? (min + max) / 2 : mid * 2;
145         }
146
147         else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
148                  count == PCRE_ERROR_PARTIAL)
149         {
150             if (mid == min + 1)
151             {
152                 break;
153             }
154             max = mid;
155             mid = (min + mid) / 2;
156         }
157         else
158         {
159             break;    /* Some other error */
160         }
161     }
162
163     extra->flags &= ~flag;
164     return count;
165 }
166
167
168 /*************************************************
169 *               Algorithm                      *
170 *************************************************/
171
172 /* Read lines from named file or stdin and write to named file or stdout; lines
173 consist of a regular expression, in delimiters and optionally followed by
174 options, followed by a set of test data, terminated by an empty line. */
175
176 pcre_error_code pcre_private(char *INPUT_LINE, char *INPUT_PAT, int *Output_Start, int *Output_End, char*** _pstCapturedString, int* _piCapturedStringCount)
177 {
178     /* ALL strings are managed as UTF-8 by default */
179     int options = PCRE_UTF8;
180     int size_offsets = 45;
181     int size_offsets_max;
182     int *offsets = NULL;
183     int all_use_dfa = 0;
184     BOOL LOOP_PCRE_TST = FALSE;
185
186     /* These vectors store, end-to-end, a list of captured substring names. Assume
187     that 1024 is plenty long enough for the few names we'll be testing. */
188
189     char copynames[1024];
190     char getnames[1024];
191
192     char *copynamesptr = NULL;
193     char *getnamesptr = NULL;
194
195     int rc = 0;
196     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
197     if (rc != 1)
198     {
199         return UTF8_NOT_SUPPORTED;
200     }
201
202     /* bug 3891 */
203     /* backslash characters are not interpreted for input */
204     buffer = strsub(INPUT_LINE, "\\", "\\\\");
205
206     size_offsets_max = size_offsets;
207     offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
208     if (offsets == NULL)
209     {
210         if (buffer)
211         {
212             FREE(buffer);
213             buffer = NULL;
214         }
215         return NOT_ENOUGH_MEMORY_FOR_VECTOR;
216     }
217     /* Main loop */
218     LOOP_PCRE_TST = FALSE;
219     while (!LOOP_PCRE_TST)
220     {
221         pcre *re = NULL;
222         pcre_extra *extra = NULL;
223         const char *error = NULL;
224         char *back_p = NULL;
225         char *p = NULL;
226         char *pp = NULL;
227         char *ppp = NULL;
228         const unsigned char *tables = NULL;
229         int do_G = 0;
230         int do_g = 0;
231         int erroroffset = 0, len = 0, delimiter;
232
233         LOOP_PCRE_TST = TRUE;
234         p = os_strdup(INPUT_PAT);
235         back_p = p;
236         while (isspace(*p))
237         {
238             p++;
239         }
240         if (*p == 0)
241         {
242             continue;
243         }
244         /* In-line pattern (the usual case). Get the delimiter and seek the end of
245         the pattern; if is isn't complete, read more. */
246
247         delimiter = *p++;
248
249         if (isalnum(delimiter) || delimiter == '\\')
250         {
251             if (buffer)
252             {
253                 FREE(buffer);
254                 buffer = NULL;
255             }
256             if (offsets)
257             {
258                 FREE(offsets);
259                 offsets = NULL;
260             }
261             if (back_p)
262             {
263                 FREE(back_p);
264                 back_p = NULL;
265             }
266             return DELIMITER_NOT_ALPHANUMERIC;
267         }
268
269         pp = p;
270
271         while (*pp != 0)
272         {
273             if (*pp == '\\' && pp[1] != 0)
274             {
275                 pp++;
276             }
277             else if (*pp == delimiter)
278             {
279                 break;
280             }
281             pp++;
282         }
283
284         /* If the delimiter can't be found, it's a syntax error */
285         if (*pp == 0)
286         {
287             if (buffer)
288             {
289                 FREE(buffer);
290                 buffer = NULL;
291             }
292             if (offsets)
293             {
294                 FREE(offsets);
295                 offsets = NULL;
296             }
297             if (back_p)
298             {
299                 FREE(back_p);
300                 back_p = NULL;
301             }
302             if (offsets)
303             {
304                 FREE(offsets);
305             }
306             return CAN_NOT_COMPILE_PATTERN;
307         }
308
309         /* If the first character after the delimiter is backslash, make
310         the pattern end with backslash. This is purely to provide a way
311         of testing for the error message when a pattern ends with backslash. */
312
313         if (pp[1] == '\\')
314         {
315             *pp++ = '\\';
316         }
317
318         /* Terminate the pattern at the delimiter, and save a copy of the pattern
319         for callouts. */
320
321         *pp++ = 0;
322
323         /* Look for options after final delimiter */
324
325         //options = 8192;
326
327         while (*pp != 0)
328         {
329             switch (*pp++)
330             {
331                 case 'f':
332                     options |= PCRE_FIRSTLINE;
333                     break;
334                 case 'g':
335                     do_g = 1;
336                     break;
337                 case 'i':
338                     options |= PCRE_CASELESS;
339                     break;
340                 case 'm':
341                     options |= PCRE_MULTILINE;
342                     break;
343                 case 's':
344                     options |= PCRE_DOTALL;
345                     break;
346                 case 'x':
347                     options |= PCRE_EXTENDED;
348                     break;
349                 case '+':
350                     break;
351                 case 'A':
352                     options |= PCRE_ANCHORED;
353                     break;
354                 case 'B':
355                     break;
356                 case 'C':
357                     options |= PCRE_AUTO_CALLOUT;
358                     break;
359                 case 'D':
360                     break;
361                 case 'E':
362                     options |= PCRE_DOLLAR_ENDONLY;
363                     break;
364                 case 'F':
365                     break;
366                 case 'G':
367                     do_G = 1;
368                     break;
369                 case 'I':
370                     break;
371                 case 'J':
372                     options |= PCRE_DUPNAMES;
373                     break;
374                 case 'M':
375                     break;
376                 case 'N':
377                     options |= PCRE_NO_AUTO_CAPTURE;
378                     break;
379                 case 'S':
380                     break;
381                 case 'U':
382                     options |= PCRE_UNGREEDY;
383                     break;
384                 case 'X':
385                     options |= PCRE_EXTRA;
386                     break;
387                 case 'Z':
388                     break;
389                 case '8':
390                 {
391                     int rc = 0;
392                     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
393                     if (rc != 1)
394                     {
395                         if (buffer)
396                         {
397                             FREE(buffer);
398                             buffer = NULL;
399                         }
400                         if (offsets)
401                         {
402                             FREE(offsets);
403                         }
404                         return UTF8_NOT_SUPPORTED;
405                     }
406                     options |= PCRE_UTF8;
407                 }
408                 break;
409                 case '?':
410                     options |= PCRE_NO_UTF8_CHECK;
411                     break;
412                 case 'L':
413                     ppp = pp;
414                     /* The '\r' test here is so that it works on Windows. */
415                     /* The '0' test is just in case this is an unterminated line. */
416                     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ')
417                     {
418                         ppp++;
419                     }
420                     *ppp = 0;
421                     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
422                     {
423                         goto SKIP_DATA;
424                     }
425
426                     tables = pcre_maketables();
427                     pp = ppp;
428                     break;
429                 case '>':
430                     while (*pp != 0)
431                     {
432                         pp++;
433                     }
434                     while (isspace(pp[-1]))
435                     {
436                         pp--;
437                     }
438                     *pp = 0;
439                     break;
440                 case '<':
441                 {
442                     while (*pp++ != '>')
443                     {
444                         ;
445                     }
446                 }
447                 break;
448                 case '\r':                      /* So that it works in Windows */
449                 case '\n':
450                 case ' ':
451                     break;
452
453                 default:
454                     goto SKIP_DATA;
455             }
456         }
457
458         /* Handle compiling via the POSIX interface, which doesn't support the
459         timing, showing, or debugging options, nor the ability to pass over
460         local character tables. */
461
462
463         {
464             re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
465             /* Compilation failed; go back for another re, skipping to blank line
466             if non-interactive. */
467             if (re == NULL)
468             {
469 SKIP_DATA:
470                 if (buffer)
471                 {
472                     FREE(buffer);
473                     buffer = NULL;
474                 }
475                 if (offsets)
476                 {
477                     FREE(offsets);
478                     offsets = NULL;
479                 }
480                 if (tables)
481                 {
482                     (*pcre_free)((void*)tables);
483                     tables = NULL;
484                 }
485                 if (extra)
486                 {
487                     FREE(extra);
488                     extra = NULL;
489                 }
490                 if (back_p)
491                 {
492                     FREE(back_p);
493                     back_p = NULL;
494                 }
495                 return CAN_NOT_COMPILE_PATTERN;
496             }
497
498         }        /* End of non-POSIX compile */
499
500         /* Read data lines and test them */
501         {
502             char *q = NULL;
503             char *bptr = NULL;
504             int *use_offsets = offsets;
505             int use_size_offsets = size_offsets;
506             int callout_data = 0;
507             int callout_data_set = 0;
508             int count = 0;
509             int c = 0;
510             int copystrings = 0;
511             int find_match_limit = 0;
512             int getstrings = 0;
513             int gmatched = 0;
514             int start_offset = 0;
515             int g_notempty = 0;
516             int use_dfa = 0;
517
518             options = 0;
519             *copynames = 0;
520             *getnames = 0;
521
522             copynamesptr = copynames;
523             getnamesptr = getnames;
524
525             callout_count = 0;
526             callout_fail_count = 999999;
527             callout_fail_id = -1;
528
529             if (extra != NULL)
530             {
531                 extra->flags &= ~(PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
532             }
533             p = buffer;
534             bptr = q = buffer;
535             while ((c = *p++) != 0)
536             {
537                 int i = 0;
538                 int n = 0;
539
540                 if (c == '\\') switch ((c = *p++))
541                     {
542                         case 'a':
543                             c =    7;
544                             break;
545                         case 'b':
546                             c = '\b';
547                             break;
548                         case 'e':
549                             c =   27;
550                             break;
551                         case 'f':
552                             c = '\f';
553                             break;
554                         case 'n':
555                             c = '\n';
556                             break;
557                         case 'r':
558                             c = '\r';
559                             break;
560                         case 't':
561                             c = '\t';
562                             break;
563                         case 'v':
564                             c = '\v';
565                             break;
566                         case '0':
567                         case '1':
568                         case '2':
569                         case '3':
570                         case '4':
571                         case '5':
572                         case '6':
573                         case '7':
574                             c -= '0';
575                             while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
576                             {
577                                 c = c * 8 + *p++ - '0';
578                             }
579                             break;
580                         case 'x':
581                             /* Ordinary \x */
582                             c = 0;
583                             while (i++ < 2 && isxdigit(*p))
584                             {
585                                 c = c * 16 + tolower(*p) - ((isdigit(*p)) ? '0' : 'W');
586                                 p++;
587                             }
588                             break;
589                         case 0:   /* \ followed by EOF allows for an empty line */
590                             p--;
591                             continue;
592                         case '>':
593                             while (isdigit(*p))
594                             {
595                                 start_offset = start_offset * 10 + *p++ - '0';
596                             }
597                             continue;
598                         case 'A':  /* Option setting */
599                             options |= PCRE_ANCHORED;
600                             continue;
601                         case 'B':
602                             options |= PCRE_NOTBOL;
603                             continue;
604                         case 'C':
605                             if (isdigit(*p))    /* Set copy string */
606                             {
607                                 while (isdigit(*p))
608                                 {
609                                     n = n * 10 + *p++ - '0';
610                                 }
611                                 copystrings |= 1 << n;
612                             }
613                             else if (isalnum(*p))
614                             {
615                                 char *npp = copynamesptr;
616                                 while (isalnum(*p))
617                                 {
618                                     *npp++ = *p++;
619                                 }
620                                 *npp++ = 0;
621                                 *npp = 0;
622                                 pcre_get_stringnumber(re, (char *)copynamesptr);
623                                 copynamesptr = npp;
624                             }
625                             else if (*p == '+')
626                             {
627                                 p++;
628                             }
629                             else if (*p == '-')
630                             {
631                                 p++;
632                             }
633                             else if (*p == '!')
634                             {
635                                 callout_fail_id = 0;
636                                 p++;
637                                 while (isdigit(*p))
638                                 {
639                                     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
640                                 }
641                                 callout_fail_count = 0;
642                                 if (*p == '!')
643                                 {
644                                     p++;
645                                     while (isdigit(*p))
646                                     {
647                                         callout_fail_count = callout_fail_count * 10 + *p++ - '0';
648                                     }
649                                 }
650                             }
651                             else if (*p == '*')
652                             {
653                                 int sign = 1;
654                                 callout_data = 0;
655                                 if (*(++p) == '-')
656                                 {
657                                     sign = -1;
658                                     p++;
659                                 }
660                                 while (isdigit(*p))
661                                 {
662                                     callout_data = callout_data * 10 + *p++ - '0';
663                                 }
664                                 callout_data *= sign;
665                                 callout_data_set = 1;
666                             }
667                             continue;
668                         case 'G':
669                             if (isdigit(*p))
670                             {
671                                 while (isdigit(*p))
672                                 {
673                                     n = n * 10 + *p++ - '0';
674                                 }
675                                 getstrings |= 1 << n;
676                             }
677                             else if (isalnum(*p))
678                             {
679                                 char *npp = getnamesptr;
680                                 while (isalnum(*p))
681                                 {
682                                     *npp++ = *p++;
683                                 }
684                                 *npp++ = 0;
685                                 *npp = 0;
686                                 pcre_get_stringnumber(re, (char *)getnamesptr);
687                                 getnamesptr = npp;
688                             }
689                             continue;
690                         case 'L':
691                             continue;
692                         case 'M':
693                             find_match_limit = 1;
694                             continue;
695                         case 'N':
696                             options |= PCRE_NOTEMPTY;
697                             continue;
698                         case 'O':
699                             while (isdigit(*p))
700                             {
701                                 n = n * 10 + *p++ - '0';
702                             }
703                             if (n > size_offsets_max)
704                             {
705                                 size_offsets_max = n;
706                                 if (offsets)
707                                 {
708                                     FREE(offsets);
709                                 }
710                                 use_offsets = offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
711                             }
712                             use_size_offsets = n;
713                             if (n == 0)
714                             {
715                                 use_offsets = NULL;    /* Ensures it can't write to it */
716                             }
717                             continue;
718                         case 'P':
719                             options |= PCRE_PARTIAL;
720                             continue;
721                         case 'Q':
722                             while (isdigit(*p))
723                             {
724                                 n = n * 10 + *p++ - '0';
725                             }
726                             if (extra == NULL)
727                             {
728                                 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
729                                 extra->flags = 0;
730                             }
731                             extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
732                             extra->match_limit_recursion = n;
733                             continue;
734                         case 'q':
735                             while (isdigit(*p))
736                             {
737                                 n = n * 10 + *p++ - '0';
738                             }
739                             if (extra == NULL)
740                             {
741                                 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
742                                 extra->flags = 0;
743                             }
744                             extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
745                             extra->match_limit = n;
746                             continue;
747 #if !defined NODFA
748                         case 'R':
749                             options |= PCRE_DFA_RESTART;
750                             continue;
751 #endif
752                         case 'S':
753
754                             continue;
755                         case 'Z':
756                             options |= PCRE_NOTEOL;
757                             continue;
758                         case '?':
759                             options |= PCRE_NO_UTF8_CHECK;
760                             continue;
761                         case '<':
762                         {
763                             while (*p++ != '>')
764                             {
765                                 ;
766                             }
767                         }
768                         continue;
769                     }
770                 *q++ = (char)c;
771             }
772             *q = 0;
773             len = (int)(q - buffer);
774             if ((all_use_dfa || use_dfa) && find_match_limit)
775             {
776                 if (buffer)
777                 {
778                     FREE(buffer);
779                     buffer = NULL;
780                 }
781                 if (offsets)
782                 {
783                     FREE(offsets);
784                     offsets = NULL;
785                 }
786                 if (p)
787                 {
788                     FREE(p);
789                     p = NULL;
790                 }
791                 if (re)
792                 {
793                     (*pcre_free)(re);
794                     re = NULL;
795                 }
796                 if (tables)
797                 {
798                     (*pcre_free)((void*)tables);
799                     tables = NULL;
800                 }
801                 if (extra)
802                 {
803                     FREE(extra);
804                     extra = NULL;
805                 }
806                 return LIMIT_NOT_RELEVANT_FOR_DFA_MATCHING;
807             }
808             /* Handle matching via the POSIX interface, which does not
809             support timing or playing with the match limit or callout data. */
810             for (;; gmatched++)    /* Loop for /g or /G */
811             {
812
813                 /* If find_match_limit is set, we want to do repeated matches with
814                 varying limits in order to find the minimum value for the match limit and
815                 for the recursion limit. */
816
817                 if (find_match_limit)
818                 {
819                     if (extra == NULL)
820                     {
821                         extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
822                         extra->flags = 0;
823                     }
824
825                     (void)check_match_limit(re, extra, bptr, len, start_offset,
826                                             options | g_notempty, use_offsets, use_size_offsets,
827                                             PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
828                                             PCRE_ERROR_MATCHLIMIT);
829
830                     count = check_match_limit(re, extra, bptr, len, start_offset,
831                                               options | g_notempty, use_offsets, use_size_offsets,
832                                               PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
833                                               PCRE_ERROR_RECURSIONLIMIT);
834                 }
835                 /* If callout_data is set, use the interface with additional data */
836                 else if (callout_data_set)
837                 {
838                     if (extra == NULL)
839                     {
840                         extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
841                         extra->flags = 0;
842                     }
843                     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
844                     extra->callout_data = &callout_data;
845                     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
846                                       options | g_notempty, use_offsets, use_size_offsets);
847
848                     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
849                 }
850                 /* The normal case is just to do the match once, with the default
851                 value of match_limit. */
852                 else
853                 {
854                     count = pcre_exec(re, extra, (char *)bptr, len,
855                                       start_offset, options | g_notempty, use_offsets, use_size_offsets);
856                     if (count == 0)
857                     {
858                         count = use_size_offsets / 3;
859                     }
860
861                     //to retrieve backref count and values
862                     if (count > 0 && _pstCapturedString != NULL && _piCapturedStringCount != NULL)
863                     {
864                         int i = 0;
865                         int iErr = 0;
866
867                         iErr = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, _piCapturedStringCount);
868                         //sciprint("PCRE_INFO_CAPTURECOUNT %d\n", *_piCapturedStringCount);
869
870                         if (*_piCapturedStringCount > 0)
871                         {
872                             *_pstCapturedString = (char**)MALLOC(sizeof(char*) * *_piCapturedStringCount);
873                             for (i = 0 ; i < *_piCapturedStringCount ; i++)
874                             {
875                                 const char* pstSubstring = NULL;
876                                 pcre_get_substring(bptr, use_offsets, count, i + 1, &pstSubstring);
877                                 if (pstSubstring != NULL)
878                                 {
879                                     (*_pstCapturedString)[i] = os_strdup(pstSubstring);
880                                 }
881                                 else
882                                 {
883                                     //empty string is matching, so create it
884                                     (*_pstCapturedString)[i] = os_strdup("");
885                                 }
886
887                                 pcre_free_substring(pstSubstring);
888                             }
889                         }
890                     }
891                 }
892                 /* Matched */
893                 if (count >= 0)
894                 {
895                     int i, maxcount;
896                     maxcount = use_size_offsets / 3;
897                     /* This is a check against a lunatic return value. */
898                     if (count > maxcount)
899                     {
900                         if (buffer)
901                         {
902                             FREE(buffer);
903                             buffer = NULL;
904                         }
905                         if (offsets)
906                         {
907                             FREE(offsets);
908                             offsets = NULL;
909                         }
910                         if (re)
911                         {
912                             (*pcre_free)(re);
913                             re = NULL;
914                         }
915                         if (tables)
916                         {
917                             (*pcre_free)((void*)tables);
918                             tables = NULL;
919                         }
920                         if (extra)
921                         {
922                             FREE(extra);
923                             extra = NULL;
924                         }
925                         if (back_p)
926                         {
927                             FREE(back_p);
928                             back_p = NULL;
929                         }
930                         return TOO_BIG_FOR_OFFSET_SIZE;
931                     }
932
933                     for (i = 0; i < count * 2; i += 2)
934                     {
935                         if (use_offsets[i] >= 0)
936                         {
937                             *Output_Start = use_offsets[i];
938                             *Output_End = use_offsets[i + 1];
939                             if (buffer)
940                             {
941                                 FREE(buffer);
942                             }
943
944                             /* use_offsets = offsets no need to free use_offsets if we free offsets */
945                             if (offsets)
946                             {
947                                 FREE(offsets);
948                             }
949
950                             /* "re" allocated by pcre_compile (better to use free function associated)*/
951                             if (re)
952                             {
953                                 (*pcre_free)(re);
954                             }
955
956                             if (extra)
957                             {
958                                 FREE(extra);
959                             }
960                             if (tables)
961                             {
962                                 /* "tables" allocated by pcre_maketables (better to use free function associated to pcre)*/
963                                 (*pcre_free)((void *)tables);
964                                 tables = NULL;
965                                 setlocale(LC_CTYPE, "C");
966                             }
967
968                             if (back_p)
969                             {
970                                 FREE(back_p);
971                                 back_p = NULL;
972                             }
973                             return PCRE_FINISHED_OK;
974                         }
975                     }
976
977                     for (copynamesptr = copynames; *copynamesptr != 0; copynamesptr += (int)strlen((char*)copynamesptr) + 1)
978                     {
979                         char copybuffer[256];
980                         pcre_copy_named_substring(re, (char *)bptr, use_offsets, count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
981                     }
982
983                     for (i = 0; i < 32; i++)
984                     {
985                         if ((getstrings & (1 << i)) != 0)
986                         {
987                             const char *substring;
988                             pcre_get_substring((char *)bptr, use_offsets, count, i, &substring);
989                         }
990                     }
991
992                     for (getnamesptr = getnames; *getnamesptr != 0; getnamesptr += (int)strlen((char*)getnamesptr) + 1)
993                     {
994                         const char *substring;
995                         pcre_get_named_substring(re, (char *)bptr, use_offsets, count, (char *)getnamesptr, &substring);
996                     }
997
998                 }
999                 /* Failed to match. If this is a /g or /G loop and we previously set
1000                 g_notempty after a null match, this is not necessarily the end. We want
1001                 to advance the start offset, and continue. We won't be at the end of the
1002                 string - that was checked before setting g_notempty.
1003                 Complication arises in the case when the newline option is "any" or
1004                 "anycrlf". If the previous match was at the end of a line terminated by
1005                 CRLF, an advance of one character just passes the \r, whereas we should
1006                 prefer the longer newline sequence, as does the code in pcre_exec().
1007                 Fudge the offset value to achieve this.
1008
1009                 Otherwise, in the case of UTF-8 matching, the advance must be one
1010                 character, not one byte. */
1011                 else
1012                 {
1013                     if (count == PCRE_ERROR_NOMATCH)
1014                     {
1015                         if (gmatched == 0)
1016                         {
1017                             if (tables)
1018                             {
1019                                 (*pcre_free)((void *)tables);
1020                                 tables = NULL;
1021                             }
1022                             if (re)
1023                             {
1024                                 (*pcre_free)((void *)re);
1025                                 re = NULL;
1026                             }
1027                             if (buffer)
1028                             {
1029                                 FREE(buffer);
1030                                 buffer = NULL;
1031                             }
1032                             if (offsets)
1033                             {
1034                                 FREE(offsets);
1035                             }
1036                             if (p)
1037                             {
1038                                 FREE(back_p);
1039                                 back_p = NULL;
1040                             }
1041                             return NO_MATCH;
1042                         }
1043                     }
1044
1045                     if (count == PCRE_ERROR_MATCHLIMIT )
1046                     {
1047                         if (tables)
1048                         {
1049                             (*pcre_free)((void *)tables);
1050                             tables = NULL;
1051                         }
1052                         if (re)
1053                         {
1054                             (*pcre_free)((void *)re);
1055                             re = NULL;
1056                         }
1057                         if (buffer)
1058                         {
1059                             FREE(buffer);
1060                             buffer = NULL;
1061                         }
1062                         if (offsets)
1063                         {
1064                             FREE(offsets);
1065                             offsets = NULL;
1066                         }
1067                         if (back_p)
1068                         {
1069                             FREE(back_p);
1070                             back_p = NULL;
1071                         }
1072                         return MATCH_LIMIT;
1073                     }
1074                     break;  /* Out of loop */
1075                 }
1076
1077                 /* If not /g or /G we are done */
1078                 if (!do_g && !do_G)
1079                 {
1080                     break;
1081                 }
1082
1083                 /* If we have matched an empty string, first check to see if we are at
1084                 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1085                 what Perl's /g options does. This turns out to be rather cunning. First
1086                 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1087                 same point. If this fails (picked up above) we advance to the next
1088                 character. */
1089
1090                 g_notempty = 0;
1091
1092                 if (use_offsets[0] == use_offsets[1])
1093                 {
1094                     if (use_offsets[0] == len)
1095                     {
1096                         break;
1097                     }
1098                     g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1099                 }
1100
1101                 /* For /g, update the start offset, leaving the rest alone */
1102
1103                 if (do_g)
1104                 {
1105                     start_offset = use_offsets[1];
1106                 }
1107                 /* For /G, update the pointer and length */
1108                 else
1109                 {
1110                     bptr += use_offsets[1];
1111                     len -= use_offsets[1];
1112                 }
1113             }  /* End of loop for /g and /G */
1114
1115             if (re)
1116             {
1117                 (*pcre_free)(re);
1118                 re = NULL;
1119             }
1120             if (extra)
1121             {
1122                 FREE(extra);
1123                 extra = NULL;
1124             }
1125             if (tables)
1126             {
1127                 (*pcre_free)((void *)tables);
1128                 tables = NULL;
1129             }
1130
1131             FREE(back_p);
1132             back_p = NULL;
1133             continue;
1134         }    /* End of loop for data lines */
1135     }
1136
1137     if (buffer)
1138     {
1139         FREE(buffer);
1140         buffer = NULL;
1141     }
1142     if (offsets)
1143     {
1144         FREE(offsets);
1145         offsets = NULL;
1146     }
1147
1148     return PCRE_EXIT;
1149 }
1150 /*-------------------------------------------------------------------------------*/
1151 pcre_error_code wide_pcre_private(wchar_t* _pwstInput, wchar_t* _pwstPattern, int* _piStart, int* _piEnd, wchar_t*** _pstCapturedString, int* _piCapturedStringCount)
1152 {
1153     pcre_error_code iPcreStatus = PCRE_FINISHED_OK;
1154     int i               = 0;
1155     int iStart          = 0;
1156     int iEnd            = 0;
1157
1158     char* pstInput      = wide_string_to_UTF8(_pwstInput);
1159     char* pstPattern    = wide_string_to_UTF8(_pwstPattern);
1160     char** pstCaptured  = NULL;//(char**)MALLOC(sizeof(char*) * (strlen(pstInput) + 1));
1161
1162     iPcreStatus = pcre_private(pstInput, pstPattern, &iStart, &iEnd, &pstCaptured, _piCapturedStringCount);
1163     if (iPcreStatus == PCRE_FINISHED_OK && iStart != iEnd)
1164     {
1165         char* pstTempStart      = NULL;
1166         char* pstTempEnd        = NULL;
1167         wchar_t* pwstTempStart  = NULL;
1168         wchar_t* pwstTempEnd    = NULL;
1169
1170         pstTempStart            = os_strdup(pstInput);
1171         pstTempEnd              = os_strdup(pstInput);
1172         pstTempEnd[iEnd]        = 0;
1173         pstTempStart[iStart]    = 0;
1174
1175
1176         pwstTempStart           = to_wide_string(pstTempStart);
1177         pwstTempEnd             = to_wide_string(pstTempEnd);
1178
1179         *_piStart               = (int)wcslen(pwstTempStart);
1180         *_piEnd                 = (int)wcslen(pwstTempEnd);
1181
1182         if (_piCapturedStringCount && *_piCapturedStringCount > 0)
1183         {
1184             /*convert captured field in wide char*/
1185             *_pstCapturedString = (wchar_t**)MALLOC(sizeof(wchar_t*) * *_piCapturedStringCount);
1186             for (i = 0 ; i < *_piCapturedStringCount ; i++)
1187             {
1188                 (*_pstCapturedString)[i] = to_wide_string(pstCaptured[i]);
1189             }
1190             freeArrayOfString(pstCaptured, *_piCapturedStringCount);
1191         }
1192
1193         FREE(pstTempStart);
1194         FREE(pstTempEnd);
1195         FREE(pwstTempStart);
1196         FREE(pwstTempEnd);
1197     }
1198     else
1199     {
1200         *_piStart   = iStart;
1201         *_piEnd     = iEnd;
1202         if (_piCapturedStringCount && *_piCapturedStringCount > 0)
1203         {
1204             /*free unused captured field*/
1205             freeArrayOfString(pstCaptured, *_piCapturedStringCount);
1206         }
1207     }
1208
1209     FREE(pstInput);
1210     FREE(pstPattern);
1211     return iPcreStatus;
1212 }
1213 /*-------------------------------------------------------------------------------*/