* Bug #8680 fixed - Output argument "end" of the regexp function has been changed.
[scilab.git] / scilab / modules / string / help / en_US / regexp.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3 * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
4 * Copyright (C) INRIA -
5 *
6 * This file must be used under the terms of the CeCILL.
7 * This source file is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution.  The terms
9 * are also available at
10 * http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
11 *
12 -->
13 <refentry xmlns="http://docbook.org/ns/docbook"
14 xmlns:xlink="http://www.w3.org/1999/xlink"
15 xmlns:svg="http://www.w3.org/2000/svg" xmlns:ns4="http://www.w3.org/1999/xhtml"
16 xmlns:mml="http://www.w3.org/1998/Math/MathML"
17 xmlns:db="http://docbook.org/ns/docbook"
18 xml:id="regexp" xml:lang="en">
19     <refnamediv>
20         <refname>regexp</refname>
21         <refpurpose>
22             find a substring that matches the regular expression string
23         </refpurpose>
24     </refnamediv>
25     <refsynopsisdiv>
26         <title>Calling Sequence</title>
27         <synopsis>
28             [start] = regexp(input, pattern, [flag])
29             [start, final] = regexp(input, pattern, [flag])
30             [start, final, match] = regexp(input, pattern, [flag])
31             [start, final, match, foundString] = regexp(input, pattern, [flag])
32         </synopsis>
33     </refsynopsisdiv>
34     <refsection>
35         <title>Arguments</title>
36         <variablelist>
37             <varlistentry>
38                 <term>input</term>
39                 <listitem>
40                     <para>a string.</para>
41                 </listitem>
42             </varlistentry>
43             <varlistentry>
44                 <term>pattern</term>
45                 <listitem>
46                 <para>
47                     a character string (under the rules of regular expression).
48                 </para>
49                 </listitem>
50             </varlistentry>
51             <varlistentry>
52                 <term>start</term>
53                 <listitem>
54                     <para>
55                         the starting index of each substring of
56                         <varname>input</varname> that matches the regular
57                         expression string <varname>pattern</varname>.
58                     </para>
59                 </listitem>
60             </varlistentry>
61             <varlistentry>
62                 <term>final</term>
63                 <listitem>
64                     <para>
65                         the ending index of each substring of
66                         <varname>input</varname> that matches the regular
67                         expression string <varname>pattern</varname>.
68                     </para>
69                 </listitem>
70             </varlistentry>
71             <varlistentry>
72                 <term>match</term>
73                 <listitem>
74                     <para>
75                         the text of each substring of <varname>input</varname>
76                         that matches <varname>pattern</varname>.
77                     </para>
78                 </listitem>
79             </varlistentry>
80             <varlistentry>
81                 <term>foundString</term>
82                 <listitem>
83                     <para>
84                         the captured parenthesized <literal>subpatterns</literal>.
85                     </para>
86                 </listitem>
87             </varlistentry>
88             <varlistentry>
89                 <term>[flag]</term>
90                 <listitem>
91                     <para>
92                         <literal>'o'</literal> for matching the pattern once.
93                     </para>
94                 </listitem>
95             </varlistentry>
96         </variablelist>
97     </refsection>
98     <refsection>
99         <title>Description</title>
100         <para>
101             The rules of regular expression are similar to Perl language. For a
102             quick start, see
103             <ulink url="http://perldoc.perl.org/perlrequick.html">http://perldoc.perl.org/perlrequick.html</ulink>.
104             For a more in-depth tutorial on, see
105             <ulink url="http://perldoc.perl.org/perlretut.html">http://perldoc.perl.org/perlretut.html</ulink>
106             and for the reference page, see
107             <ulink url="http://perldoc.perl.org/perlre.html">http://perldoc.perl.org/perlre.html</ulink>
108         </para>
109         <para>
110             A difference with Perl is that matching a position but no character
111             (for example, with <literal>/^/</literal> or
112             <literal>/(?=o)/</literal>) is a successful match in Perl but not
113             in Scilab.
114         </para>
115     </refsection>
116     <refsection>
117         <title>Examples</title>
118         <programlisting role="example"><![CDATA[
119 regexp('xabyabbbz','/ab*/','o')
120 regexp('a!','/((((((((((a))))))))))\041/')
121 regexp('ABCC','/^abc$/i')
122 regexp('ABC','/ab|cd/i')
123 [a b c]=regexp('XABYABBBZ','/ab*/i')
124
125 piString="3.14"
126 [a,b,c,piStringSplit]=regexp(piString,"/(\d+)\.(\d+)/")
127 disp(piStringSplit(1))
128 disp(piStringSplit(2))
129
130 [a,b,c,d]=regexp('xabyabbbz','/ab(.*)b(.*)/')
131 size(d)
132
133 // get host name from URL
134 myURL="http://www.scilab.org/download/";
135 [a,b,c,d]=regexp(myURL,'@^(?:http://)?([^/]+)@i')
136
137 str='foobar: 2012';
138 // Using named subpatterns
139 [a,b,c,d]=regexp(str,'/(?P<name>\w+): (?P<digit>\d+)/')
140 d(1)=="foobar"
141 d(2)=="2012"
142
143     ]]></programlisting>
144     </refsection>
145     <refsection role="see also">
146         <title>See Also</title>
147         <simplelist type="inline">
148             <member>
149                 <link linkend="strindex">strindex</link>
150             </member>
151         </simplelist>
152     </refsection>
153     <refsection>
154         <title>History</title>
155         <revhistory>
156             <revision>
157                 <revnumber>5.4.0</revnumber>
158                 <revremark>
159                     A new output argument, foundString, has been added to retrieve subpatterns matches.
160                 </revremark>
161             </revision>
162         </revhistory>
163     </refsection>
164 </refentry>