vectorfind() upgrade: short needle, wildcard, hypermats, %nan
[scilab.git] / scilab / modules / elementary_functions / help / en_US / searchandsort / vectorfind.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3  * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
4  * Copyright (C) 2008 - INRIA - Serge STEER
5  * Copyright (C) 2008 - INRIA - Ramine NIKOUKHAH
6  * Copyright (C) 2010 - DIGITEO - Vincent COUVERT
7  * Copyright (C) 2017 - Samuel GOUGEON
8  *
9  * Copyright (C) 2012 - 2016 - Scilab Enterprises
10  *
11  * This file is hereby licensed under the terms of the GNU GPL v2.0,
12  * pursuant to article 5.3.4 of the CeCILL v.2.1.
13  * This file was originally licensed under the terms of the CeCILL v2.1,
14  * and continues to be available under such terms.
15  * For more information, see the COPYING file which you should have received
16  * along with this program.
17  *
18  -->
19 <refentry xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink"
20         xmlns:svg="http://www.w3.org/2000/svg" xmlns:ns5="http://www.w3.org/1999/xhtml"
21         xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:db="http://docbook.org/ns/docbook"
22         xmlns:scilab="http://www.scilab.org" xml:id="vectorfind" xml:lang="en">
23     <refnamediv>
24         <refname>vectorfind</refname>
25         <refpurpose>locates occurences of a (wildcarded) vector in a matrix or hypermatrix</refpurpose>
26     </refnamediv>
27     <refsynopsisdiv>
28         <title>Syntax</title>
29         <synopsis>
30             ind             = vectorfind(haystack, needle)
31             ind             = vectorfind(haystack, needle, dimAlong)
32             ind             = vectorfind(haystack, needle, dimAlong, ,indType)
33             [ind, matching] = vectorfind(haystack, needle, dimAlong, joker)
34             [ind, matching] = vectorfind(haystack, needle, dimAlong, joker, indType)
35         </synopsis>
36     </refsynopsisdiv>
37     <refsection>
38         <title>Arguments</title>
39         <variablelist>
40             <varlistentry>
41                 <term>haystack</term>
42                 <listitem>
43                     <para>
44                     A matrix or hypermatrix of any type, possibly sparse encoded: The array in
45                     which the vector will be searched.
46                     </para>
47                 </listitem>
48             </varlistentry>
49             <varlistentry>
50                 <term>needle</term>
51                 <listitem>
52                     <para>
53                     The vector to be searched in the <varname>haystack</varname>, of the same type.
54                     If the <varname>haystack</varname> is sparse-encoded, the <varname>needle</varname>
55                     may be dense. In addition, if the <varname>haystack</varname> is boolean and a
56                     <varname>joker</varname> is used, the <varname>needle</varname> must be numerical
57                     instead of boolean. In this case, any of its non-zero components is
58                     <literal>%T</literal>
59                     </para>
60                     <note>
61                         <itemizedlist>
62                             <listitem>
63                                 Decimal numbers, complex numbers, and encoded integers are considered
64                                 of the same type: numerical.
65                             </listitem>
66                             <listitem>
67                                 <literal>%nan</literal> values are accepted in the <varname>needle</varname>.
68                                 They are processed in a regular way, as other values. They are matched only
69                                 by <literal>%nan</literal> in the <varname>haystack</varname>.
70                             </listitem>
71                         </itemizedlist>
72                     </note>
73                 </listitem>
74             </varlistentry>
75             <varlistentry>
76                 <term>dimAlong</term>
77                 <listitem>
78                     <para>Direction inside the <varname>haystack</varname> array along which the
79                     <varname>needle</varname> vector is searched. Possible values are
80                     <literal>"r"</literal> or <literal>1</literal> (along rows),
81                     <literal>"c"</literal> or <literal>2</literal> (along columns),
82                     or for an hypermatrix, any integer such that
83                     <literal>2 &lt; dimAlong &lt;= ndims(haystack)</literal>
84                     representing the index of the scanned dimension.
85                     By default, <literal>"r"</literal> is used.
86                     </para>
87                     <important>
88                         <varname>dimAlong</varname> is mandatory when a <varname>joker</varname>
89                         or <varname>indType</varname> is specified.
90                     </important>
91                 </listitem>
92             </varlistentry>
93             <varlistentry>
94                 <term>joker</term>
95                 <listitem>
96                     <para>
97                     Single element of <varname>needle</varname>'s data type.
98                     The <varname>needle</varname> components equal to the <varname>joker</varname>
99                     are ignored (they match/accept any values from the <varname>haystack</varname>).
100                     </para>
101                     <para>
102                         When the haystack is boolean, the <varname>joker</varname> must be a
103                         non-zero number.
104                     </para>
105                     <para>
106                         To skip the <varname>joker</varname>, specify
107                         <literal>..dimAlong, ,indType</literal> with no joker value.
108                     </para>
109                 </listitem>
110             </varlistentry>
111             <varlistentry>
112                 <term>indType</term>
113                 <listitem>
114                     <para>Single case-insensitive word among <literal>""</literal>
115                     (empty text = default), <literal>"headIJK"</literal>, and <literal>"headN"</literal>:
116                     Specifies the format or returned indices. See here-below the description of
117                     <varname>ind</varname>.
118                     </para>
119                 </listitem>
120             </varlistentry>
121             <varlistentry>
122                 <term>ind</term>
123                 <listitem>
124                     <para>
125                     <itemizedlist>
126                         <listitem>
127                             <para>
128                             When the <varname>needle</varname> is longer than the
129                             <varname>haystack</varname> size along the chosen dimension
130                             <varname>dimAlong</varname>, <varname>ind=[]</varname> is returned.
131                             </para>
132                         </listitem>
133                         <listitem>
134                             <para>
135                             When the <varname>needle</varname>'s length matches the
136                             <varname>haystack</varname> size along the chosen dimension,
137                             <itemizedlist>
138                                 <listitem>
139                                     <para>
140                                     <emphasis role="bold">By default</emphasis>
141                                     (<varname>indType==""</varname>):
142                                     <varname>ind</varname> is a row vector containing the indices
143                                     of matching rows or columns of the haystack. In case of hypermatrix,
144                                     returned indices of matching ranges are linearized accross all
145                                     dimensions but the <varname>dimAlong</varname> one (see examples).
146                                     </para>
147                                 </listitem>
148                                 <listitem>
149                                     <para>
150                                     <emphasis role="bold">indType="headN"</emphasis>:
151                                     <varname>ind</varname> is the row vector of
152                                     <emphasis role="italic">linear</emphasis> indices in
153                                     the <varname>haystack</varname> of the heading component of its
154                                     matching rows, columns, or higher ranges.
155                                     </para>
156                                 </listitem>
157                                 <listitem>
158                                     <para>
159                                     <emphasis role="bold">indType="headIJK"</emphasis>:
160                                     <varname>ind</varname> is a matrix: Each row returns the
161                                     <literal>[i j ..]</literal>
162                                     indices in the <varname>haystack</varname> of the heading
163                                     component of its matching ranges (rows, columns, or higher ranges).
164                                     <varname>ind</varname> has as many rows as there are matching
165                                     ranges in the <varname>haystack</varname>.
166                                     </para>
167                                 </listitem>
168                             </itemizedlist>
169                             </para>
170                         </listitem>
171                         <listitem>
172                             <para>
173                             Otherwise (short needle): By default,
174                             <varname>ind</varname> is the row vector of linear indices of the
175                             components of the <varname>haystack</varname> where matching ranges
176                             start. Using the <literal>indType="headN"</literal> option does
177                             nothing more. Using <literal>indType="headIJK"</literal> returns
178                             <varname>ind</varname> as a matrix of <literal>[i j k ..]</literal>
179                             indices, as described here-above.
180                             </para>
181                         </listitem>
182                     </itemizedlist>
183                     </para>
184                     <note>Returned indices are sorted in increasing order.</note>
185                 </listitem>
186             </varlistentry>
187             <varlistentry>
188                 <term>matching</term>
189                 <listitem>
190                     <para>When a joker is used, this <varname>matching</varname> optional output
191                     is a matrix of haystack's data type returning the actual matching ranges:
192                     The matching range number #i is returned in the row <literal>matching(i,:)</literal>.
193                     </para>
194                     <note>
195                         When the <varname>haystack</varname> is sparse-encoded,
196                         the <varname>matching</varname> matrix is sparse as well.
197                     </note>
198                 </listitem>
199             </varlistentry>
200         </variablelist>
201     </refsection>
202     <refsection>
203         <title>Description</title>
204         <para>
205             <literal>vectorfind()</literal> looks for a given series of values (needle) in a
206             haystack array, along a given right direction/dimension: width (rows), height (columns),
207             thickness (like RGB pixels), etc. The needle may be as long or shorter than the size
208             of the probed side of the haystack.
209         </para>
210         <para>
211             A special value so-called <emphasis role="italic">joker</emphasis> may be specified.
212             Then this value works as a wildcard where it occurs in the needle vector.
213             Since this value is no longer selective -- ANY value from the haystack matches at its
214             position --, it can't simultaneously be used in the needle as a selective one.
215             In practical, any value not present in the haystack makes necessarily a good joker.
216             However, this condition is not mandatory.
217         </para>
218         <para>
219             Consequence: When the haystack is boolean, the joker -- and so the needle vector as
220             well -- must be numerical. Indeed, it would be otherwise impossible to choose
221             a joker value out of the {%T, %F} limited set of values.
222         </para>
223         <para>
224             When such a wildcard is used, actual values in matching ranges are not fixed. It is
225             then possible to retrieve them thanks to the
226             <literal>matching</literal> optional output. Otherwise, <literal>matching</literal>
227             is empty (it is a trivial repetition of the needle vector).
228         </para>
229         <refsect3>
230             <title>Search in hypermatrices</title>
231             <para>
232             Using <literal>vectorfind()</literal> with an hypermatrix haystack deserves some
233             special attention:
234             <itemizedlist>
235                 <listitem>
236                     <para>About the direction value <literal>dimAlong</literal>:
237                     </para>
238                     <para>
239                         For instance, we
240                        can then probe the haystack array in "thickness", i.e.
241                        <emphasis role="italic">accross</emphasis> its successive layers
242                        <literal>haystack(:,:,#,..)</literal>. To do so, we will here specify
243                        <literal>dimAlong = 3</literal>.
244                     </para>
245                     <para>
246                     Like for matrices, this kind of high-dimension array can be
247                     scanned <emphasis role="italic">along</emphasis> its rows or columns.
248                     The corresponding <literal>dimAlong</literal> values have there some exceptions:
249                     <itemizedlist>
250                         <listitem>
251                             Searching the needle <emphasis role="italic">as rows</emphasis>
252                             is scanning the array
253                             <emphasis role="italic">accross its columns</emphasis>. Therefore,
254                             the <literal>dimAlong = "r"</literal> value should be equivalent to
255                             <literal>dimAlong = 2</literal> instead of 1!
256                         </listitem>
257                         <listitem>
258                             In the same way, searching the needle
259                             <emphasis role="italic">as columns</emphasis> is scanning the array
260                             <emphasis role="italic">accross its rows</emphasis>: The usual
261                             value
262                             <literal>dimAlong = "c"</literal> should be equivalent to
263                             <literal>dimAlong = 1</literal> instead of 2!
264                         </listitem>
265                     </itemizedlist>
266                     </para>
267                     <para>
268                     In order to not quit the common convention <literal>"r"&lt;=>1</literal>
269                     and <literal>"c"&lt;=>2</literal> used everywhere in Scilab,
270                     <literal>vectorfind()</literal> keeps and copes with it. But one should keep in
271                     mind the underlying switch, to have a clear understanding of the returned
272                     default indices when <literal>"r",1</literal> or <literal>"c",2</literal>
273                     are used.
274                     </para>
275                 </listitem>
276                 <listitem>
277                     <para>
278                     About returned indices of matching rows, columns, "pixels"... when the needle
279                     is as long as the haystack side size and no <literal>indType</literal> option
280                     is used:
281                     </para>
282                     <para>
283                     Indices of matching ranges are then <emphasis role="italic">linear</emphasis>
284                     indices of components of the following subspaces:
285                     <itemizedlist>
286                         <listitem>
287                             With <literal>dimAlong = "r" = 1</literal>:
288                             in <literal>haystack(:,1,:,:..)</literal>
289                         </listitem>
290                         <listitem>
291                             With <literal>dimAlong = "c" = 2</literal>:
292                             in <literal>haystack(1,:,:,:..)</literal>
293                         </listitem>
294                         <listitem>
295                             With <literal>dimAlong = 3</literal>:
296                             in <literal>haystack(:,:,1,:..)</literal>
297                         </listitem>
298                         <listitem>
299                             With <literal>dimAlong = 4</literal>:
300                             in <literal>haystack(:,:,:,1,:..)</literal>.
301                         </listitem>
302                         <listitem>
303                             etc...
304                         </listitem>
305                     </itemizedlist>
306                     The case of a 3D and of a 4D array is dealt with in the Examples section.
307                     </para>
308                     Despite they are easy to understand and use for a simple matrix,
309                     it is somewhat hard to work with these linear indices in the haystack subspace
310                     to actually address the matching ranges in a ND-dimensional array with N>2.
311                     The option <literal>indType = "headN" | "headIJK</literal> will then return
312                     more workable indices refering to the whole <literal>haystack</literal> array.
313                 </listitem>
314             </itemizedlist>
315             </para>
316         </refsect3>
317     </refsection>
318     <refsection>
319         <title>Examples</title>
320         <para>
321             <emphasis role="bold">In a matrix of numbers:</emphasis>
322         </para>
323         <programlisting role="example"><![CDATA[
324 m = [ 1  0   1   2  2  1
325       2  2   0   1  0  2
326       0  2  %nan 2  1  2
327       2 %nan 1   0  1  2
328     ];
329 vectorfind(m,[2 0 1 1], "c")            // => 5
330 vectorfind(m,[2 0 1 1], "c",,"headN")   // => 17
331 vectorfind(m,[2 0 1 1], "c",,"headIJK") // [1 5]
332
333 // With a short needle:
334 vectorfind(m,[2 2])                     // => [2 13]
335 vectorfind(m,[2 2], "r",,"headN")       // same output
336 vectorfind(m,[2 2], "r",,"headIJK")     // => [2 1 ; 1 4]
337 vectorfind(m,[2 %nan])                  // => [4 7]
338
339 // With a wildcard in the needle:
340
341 // ex #1: All columns starting with 1 and ending with 2:
342 [n, ma] = vectorfind(m,[1 .3 .3 2], "c", .3) // => n = [1 6], ma = [1 2 0 2; 1 2 2 2]
343
344 // ex #2: All rows having a [2 * 2] range (wildcarded short needle):
345 [n, ma] = vectorfind(m,[2 .3  2], "r", .3)   // => n = [7 15], ma = [2 %nan 2; 2 1 2]
346 vectorfind(m,[2 .3  2], "r", .3, "headIJK")  // => [3 2 ; 3 4]
347                                              // Note: The %nan is matched by *
348  ]]></programlisting>
349         <para>
350             <emphasis role="bold">In a boolean matrix:</emphasis>
351         </para>
352         <programlisting role="example"><![CDATA[
353 m = [0  0  0  1  1  0
354      0  1  1  1  0  1
355      1  1  0  1  1  1
356      1  0  1  0  0  1]==1
357 // m  =
358 //  F F F T T F
359 //  F T T T F T
360 //  T T F T T T
361 //  T F T F F T
362 vectorfind(m, [%F %T %T %F], "c")   // => 2
363 vectorfind(m, [%T %T], "c")         // => [3 6 13 14 22 23]
364 vectorfind(m, [1 1], "c")           // => error: same type expected
365 // Joker => the needle is numerical:
366 [n, ma] = vectorfind(m, [0 %nan 0 %nan 1], "r", %nan) // => n=[1 8], ma=[F F F T T ; F T F F T]
367  ]]></programlisting>
368
369         <para>
370             <emphasis role="bold">In a tiny 8-color RGB image (3D hypermatrix of uint8 integers):</emphasis>
371         </para>
372         <programlisting role="example"><![CDATA[
373 // Generating the array of color brightnesses:
374 m = [1  1  1  1  1  0  1  0  0  0  1  0  1  0  0
375      1  1  0  0  0  0  1  0  1  0  1  1  1  1  1
376      1  1  0  1  0  1  1  0  0  1  1  0  0  1  0];
377 m = uint8(matrix(m,3,5,3)*255)
378 // m  =
379 //(:,:,1)                   // RED layer
380 //  255  255  255  255  255
381 //  255  255    0    0    0
382 //  255  255    0  255    0
383 //(:,:,2)                   // GREEN layer
384 //    0  255    0    0    0
385 //    0  255    0  255    0
386 //  255  255    0    0  255
387 //(:,:,3)                   // BLUE layer
388 //  255    0  255    0    0
389 //  255  255  255  255  255
390 //  255    0    0  255    0
391
392 // Locates red pixels:
393 vectorfind(m, [255 0 0], 3)             // => [10 13]
394 vectorfind(m, [255 0 0], 3,,"headIJK")  // => [1 4 1 ; 1 5 1]
395
396 // Pixels with Green & Blue ON, whatever is their Red channel:
397 //   We may use a decimal-encoded needle (not a uint8).
398 //   Then, %nan is a possible joker, that can't be in the uint8 image:
399 vectorfind(m, [%nan 255 255], 3, %nan,"headIJK") // => [3 1 1; 2 2 1; 2 4 1]
400
401 // Columns of 255:
402 vectorfind(m, [255 255 255], "c")      // => [1 2 7 11]
403  ]]></programlisting>
404         <para>
405             <emphasis role="bold">In a 4D hypermatrix of text:</emphasis>
406         </para>
407         <programlisting role="example"><![CDATA[
408 m  = [
409   "U"  "C"  "G"  "A"  "A"  "A"  "U"  "U"  "A"  "G"  "A"  "G"
410   "A"  "A"  "A"  "A"  "C"  "C"  "U"  "U"  "C"  "G"  "G"  "G"
411   "A"  "G"  "A"  "C"  "G"  "C"  "C"  "C"  "G"  "C"  "A"  "G"
412   "C"  "U"  "G"  "G"  "G"  "A"  "A"  "G"  "C"  "C"  "C"  "C"
413   "C"  "G"  "G"  "A"  "A"  "G"  "U"  "C"  "A"  "U"  "G"  "C"
414   ];
415 m = matrix(m, 3, 5, 2, 2);
416 // (:,:,1,1)
417 // !U  C  A  G  A  !
418 // !A  C  G  G  G  !
419 // !A  C  U  A  G  !
420 //(:,:,2,1)
421 // !A  G  C  A  C  !
422 // !A  A  G  A  A  !
423 // !C  A  G  C  G  !
424 //(:,:,1,2)
425 // !U  A  U  C  G  !
426 // !U  U  C  A  C  !
427 // !C  U  G  C  A  !
428 //(:,:,2,2)
429 // !G  C  G  G  G  !
430 // !G  U  A  G  C  !
431 // !C  A  C  G  C  !
432
433 vectorfind(m, ["A" "A" "C"], "c")       // => [6 9]
434 vectorfind(m, [""  "G" "G"], "c", "")   // => [5 8 19]
435
436 // Joker
437 [n, ma] = vectorfind(m, ["" "G" "G"], "c", "", "headN") // => n=[13 22 55], ma=[A G G; C G G; G G G]
438 vectorfind(m, ["" "C" "C"], "c", "", "headIJK") // => [1 2 1 1 ; 1 5 2 2]
439
440 // Short needle
441 vectorfind(m, ["C" "C"], "c",,"headIJK")        // => [1 2 1 1; 2 2 1 1; 2 5 2 2]
442
443 // Short needle with joker
444 vectorfind(m, ["A" "" "A"],"r","","headIJK")    // => [1 3 1 1 ; 2 2 2 1]
445 ]]></programlisting>
446     </refsection>
447     <refsection role="see also">
448         <title>See also</title>
449         <simplelist type="inline">
450             <member>
451                 <link linkend="find">find</link>
452             </member>
453             <member>
454                 <link linkend="members">members</link>
455             </member>
456             <member>
457                 <link linkend="grep">grep</link>
458             </member>
459         </simplelist>
460     </refsection>
461     <refsection role="history">
462         <title>History</title>
463         <revhistory>
464             <revision>
465                 <revnumber>6.1</revnumber>
466                 <revdescription>
467                     <itemizedlist>
468                         <listitem>
469                             <literal>vectorfind(H,[])</literal> nows returns <literal>[]</literal>
470                             instead of an error.
471                         </listitem>
472                         <listitem>
473                             When the needle is too long, <literal>[]</literal> is now returned
474                             instead of an error.
475                         </listitem>
476                         <listitem>
477                             A needle shorter than the haystack size can now be used.
478                         </listitem>
479                         <listitem>
480                             A wildcard value matched by any value of the haystack can now be specified
481                             and used in the needle. Then, actual matching ranges can be returned:
482                             Options <literal>joker</literal> and <literal>matching</literal> added.
483                         </listitem>
484                         <listitem>
485                             Any <literal>%nan</literal> value occuring in the needle is now processed
486                             as any other regular value: It is matched by <literal>%nan</literal> in
487                             the haystack. It could formerly never be matched.
488                         </listitem>
489                         <listitem>
490                             Hypermatrices can now be processed as haystack.
491                         </listitem>
492                         <listitem>
493                             The probing direction <literal>dimAlong</literal> can now be numerical:
494                             1, 2, ..
495                         </listitem>
496                         <listitem>
497                             Option <literal>indType</literal> added.
498                         </listitem>
499                     </itemizedlist>
500                 </revdescription>
501             </revision>
502         </revhistory>
503     </refsection>
504
505 </refentry>