histc() reforged
[scilab.git] / scilab / modules / statistics / help / en_US / 4_distribution_empirical_shape / histc.xml
1 <?xml version='1.0' encoding='UTF-8'?>
2 <!--
3  * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
4  * Copyright (C) 2012 - 2016 - Scilab Enterprises
5  * Copyright (C) 2013 - Scilab Enterprises - Paul Bignier
6  * Copyright (C) 2017 - 2018 - Samuel GOUGEON
7  *
8  * This file is hereby licensed under the terms of the GNU GPL v2.0,
9  * pursuant to article 5.3.4 of the CeCILL v.2.1.
10  * This file was originally licensed under the terms of the CeCILL v2.1,
11  * and continues to be available under such terms.
12  * For more information, see the COPYING file which you should have received
13  * along with this program.
14  *
15  -->
16 <refentry xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink"
17           xmlns:svg="http://www.w3.org/2000/svg" xmlns:mml="http://www.w3.org/1998/Math/MathML"
18           xmlns:db="http://docbook.org/ns/docbook" xmlns:scilab="http://www.scilab.org"
19           xml:lang="en" xml:id="histc">
20     <refnamediv>
21         <refname>histc</refname>
22         <refpurpose>computes the histogram of a simple series of data</refpurpose>
23     </refnamediv>
24     <refsynopsisdiv>
25         <title>Syntax</title>
26         <synopsis>
27             Heights = histc(Data)
28             Heights = histc(Data, nbins)
29             Heights = histc(Data, -binsWidth)
30             Heights = histc(Data, binsAlgo)
31             Heights = histc(Data, binsEdges)
32             Heights = histc(Data, binsValues [, "discrete"])
33             Heights = histc(Data,    , Options)
34             Heights = histc(Data, .. , Options)
35             [Heights, jokers]              = histc(Data, ..)
36             [Heights, jokers, bins]        = histc(Data, ..)
37             [Heights, jokers, bins, inBin] = histc(Data, ..)
38         </synopsis>
39     </refsynopsisdiv>
40     <refsection role="arguments">
41         <title>Arguments</title>
42         <refsect2>
43             <title>Input arguments</title>
44             <variablelist>
45                 <varlistentry>
46                     <term>Data</term>
47                     <listitem>
48                         <para>
49                             vector, matrix or hypermatrix of encoded integers, decimal numbers,
50                             complex numbers, polynomials, or texts. Sparse-encoded matrices are accepted.
51                             <itemizedlist>
52                                 <listitem>
53                                     <literal>Data</literal> must have at least 2 components.
54                                     <literal>histc([],..)</literal> returns <literal>[]</literal>
55                                     for every output argument.
56                                 </listitem>
57                                 <listitem>
58                                     Numerical <literal>Data</literal> may include
59                                     <literal>Inf</literal>inite or <literal>NaN</literal> values.
60                                     However,
61                                     <literal>NaN</literal> values are never binned in the histogram;
62                                     <literal>Inf</literal>inite values can be binned only in categorial
63                                     histograms.
64                                 </listitem>
65                                 <listitem>
66                                     Textual <literal>Data</literal> may include empty texts
67                                     <literal>""</literal> or extended-ascii or UTF-8 characters.
68                                 </listitem>
69                             </itemizedlist>
70                         </para>
71                     </listitem>
72                 </varlistentry>
73                 <varlistentry>
74                     <term>Binning:</term>
75                     <listitem>
76                         <para>
77                             <literal>histc</literal> allows to define the set of histogram bins in
78                             several ways depending on the <literal>Data</literal> type and on the need.
79                             Two major binning types / histogram modes can be used:
80                             <itemizedlist>
81                                 <listitem>
82                                     <para>
83                                         <emphasis role="bold">
84                                             continuous contiguous ranging bins
85                                         </emphasis>
86                                         :this is meaningful
87                                         whether <varname>Data</varname> values are sortable. This is
88                                         the case for encoded integers, decimal numbers, and texts.
89                                         <important>
90                                             <itemizedlist>
91                                                 <listitem>
92                                                     <literal>histc()</literal> continuously bins complex
93                                                     numbers considering only their real parts.
94                                                 </listitem>
95                                                 <listitem>
96                                                     Any number with either a real or imaginary part set to
97                                                     <literal>%nan</literal>, <literal>-%inf</literal>, or to
98                                                     <literal>+%inf</literal> is excluded from bins and from
99                                                     the histogram.
100                                                 </listitem>
101                                                 <listitem>
102                                                     For sparse-encoded <varname>Data</varname>, the zero
103                                                     value is not taken into account to define the whole
104                                                     binning range.
105                                                 </listitem>
106                                             </itemizedlist>
107                                         </important>
108                                     </para>
109                                     <para>
110                                         In this case, bins are defined by their edges. For a given bin,
111                                         any data value being between the bin's edges belongs to it.
112                                     </para>
113                                 </listitem>
114                                 <listitem>
115                                     <para>
116                                         <emphasis role="bold">
117                                             discrete / categorial binning mode
118                                         </emphasis>
119                                         :this can be used for any <literal>Data</literal>
120                                         type. It is the only binning mode available for polynomial data.
121                                     </para>
122                                     <para>
123                                         A categorial bin -- aka category -- is defined by its value:
124                                         any data belongs to the bin if its value
125                                         <emphasis role="italic">is equal to</emphasis> the bin's value.
126                                         <important>
127                                             Any <literal>Data</literal> or bin's value being
128                                             <literal>NaN</literal> is canceled before computing the
129                                             categorial histogram.
130                                         </important>
131                                     </para>
132                                 </listitem>
133                             </itemizedlist>
134                             <variablelist>
135                                 <varlistentry>
136                                     <term>(default)</term>
137                                     <listitem>
138                                         <para>
139                                             When no binning specification is provided,
140                                             <itemizedlist>
141                                                 <listitem>
142                                                     For integers, decimal, or complex numbers, the
143                                                     <literal>"sqrt"</literal> binning algorithm is used
144                                                     See here-below for more informations.
145                                                 </listitem>
146                                                 <listitem>
147                                                     For texts and polynomials: the histogram is
148                                                     computed in <literal>"discrete"</literal> mode, with
149                                                     as many bins as there are distinct data entries.
150                                                 </listitem>
151                                             </itemizedlist>
152                                         </para>
153                                     </listitem>
154                                 </varlistentry>
155                                 <varlistentry>
156                                     <term>nbins</term>
157                                     <listitem>
158                                         <para>
159                                             single positive integer: required number of contiguous
160                                             bins of equal widths covering the whole range of
161                                             non-infinite <literal>Data</literal> values.
162                                             <warning>
163                                                 This binning specification can't be used for texts
164                                                 <literal>Data</literal>
165                                             </warning>
166                                         </para>
167                                     </listitem>
168                                 </varlistentry>
169                                 <varlistentry>
170                                     <term>binsWidth</term>
171                                     <listitem>
172                                         <para>
173                                             Single decimal number > 0 specifying the bins width for all
174                                             bins. Its opposite <literal>-binsWidth &lt; 0</literal>
175                                             must be provided in input (to not get confused with
176                                             <literal>nbins</literal> that is already a single positive
177                                             number).
178                                         </para>
179                                     </listitem>
180                                 </varlistentry>
181                                 <varlistentry>
182                                     <term>binsAlgo</term>
183                                     <listitem>
184                                         <para>
185                                             Single text word among the ones described here-below. These
186                                             automatic binning modes can be used for encoded integers,
187                                             decimal, or complex numbers. None of them can be used for
188                                             texts or polynomial data.
189                                         </para>
190                                         <para>
191                                             For these 3 modes, the whole range of data values is shared
192                                             into nB bins of equal widths. nB is set according to the
193                                             chosen algorithm as follows.
194                                         </para>
195                                         <para>
196                                             <table>
197                                                 <tr>
198                                                     <td valign="top">
199                                                         <emphasis role="bold">"sqrt"</emphasis>:
200                                                     </td>
201                                                     <td>
202                                                         <para>
203                                                             nB is set to the square-root of the number
204                                                             <literal>Nvalid</literal> of
205                                                             valid data in <varname>Data</varname>, in such a
206                                                             way that there are as many bins as the
207                                                             average number of counts in bins. The vertical
208                                                             average relative resolution
209                                                             <literal>1 count / nB counts = 1/nB</literal>
210                                                             of the histogram is then similar to the
211                                                             horizontal one
212                                                             <literal>binWidth/range = (range/nB)/range = 1/nB</literal>
213                                                         </para>
214                                                         <para>
215                                                             However, for encoded integers data, if the
216                                                             data range
217                                                             <literal>dR=max(Data)-min(Data)+1</literal>
218                                                             is narrower than nB, nB is then set to dR,
219                                                             so setting the bins width to 1. Bins are then
220                                                             automatically centered on integer values in the
221                                                             range.
222                                                         </para>
223                                                     </td>
224                                                 </tr>
225                                                 <tr>
226                                                     <td valign="top">
227                                                         <emphasis role="bold">"freediac"</emphasis>:
228                                                     </td>
229                                                     <td>
230                                                         Freedmann - Diaconis binning criterion:
231                                                         <code>nB = round(strange(Data)/binWidth)</code>
232                                                         with
233                                                         <code>binsWidth = 2*iqr(Data)* Nvalid^(-1/3)</code>.
234                                                     </td>
235                                                 </tr>
236                                                 <tr>
237                                                     <td valign="top">
238                                                         <emphasis role="bold">"sturges"</emphasis>:
239                                                     </td>
240                                                     <td>
241                                                         Sturges binning criterion:
242                                                         <literal>nB = ceil(1 + log2(Nvalid))</literal>
243                                                     </td>
244                                                 </tr>
245                                             </table>
246                                         </para>
247                                     </listitem>
248                                 </varlistentry>
249                                 <varlistentry>
250                                     <term>binsEdges</term>
251                                     <listitem>
252                                         <para>
253                                             Vector of values sorted in strict increasing order (without
254                                             duplicates). N bins edges define N-1 bins.
255                                             For encoded integers <varname>Data</varname>,
256                                             <varname>binsEdges</varname> can be decimal numbers. For
257                                             complex numbers <varname>Data</varname>, decimal numbers are
258                                             expected in <varname>binsEdges</varname>: only the distribution
259                                             of real parts is considered.
260                                         </para>
261                                         <para>
262                                             <itemizedlist>
263                                                 <listitem>
264                                                     First bin: Any non-infinite <varname>Data</varname>
265                                                     component belonging to the closed interval
266                                                     <literal>[binsEdges(1), binsEdges(2)]</literal>
267                                                     belongs to the first bin and is accounted in
268                                                     the <literal>Heights(1)</literal> count.
269                                                 </listitem>
270                                                 <listitem>
271                                                     Next bins # i>1: Any non-infinite
272                                                     <varname>Data</varname> component belonging to
273                                                     the semi-open interval
274                                                     <literal>]binsEdges(i), binsEdges(i+1)]</literal>
275                                                     belongs to the bin #i and is accounted in
276                                                     the <literal>Heights(i)</literal> count.
277                                                 </listitem>
278                                             </itemizedlist>
279                                         </para>
280                                         <para>
281                                             <emphasis role="bold">Marginal bins:</emphasis>
282                                         </para>
283                                         <para>
284                                             For numerical and text <varname>Data</varname>, the first
285                                             or/and the last <varname>binsEdges</varname> components
286                                             may be set to collect and count
287                                             in marginal bins all non-infinite <varname>Data</varname>
288                                             components
289                                             remaining in the left and right wings of the complete
290                                             histogram:
291                                             <itemizedlist>
292                                                 <listitem>
293                                                     <emphasis role="italic">Left wing:</emphasis> set
294                                                     <itemizedlist>
295                                                         <listitem>
296                                                             <literal>binsEdges(1) =
297                                                                 <emphasis role="bold">-%inf</emphasis>
298                                                             </literal>
299                                                             ,
300                                                             or
301                                                         </listitem>
302                                                         <listitem>
303                                                             <literal>binsEdges(1) = ""</literal>
304                                                         </listitem>
305                                                     </itemizedlist>
306                                                     Then,
307                                                     <itemizedlist>
308                                                         <listitem>
309                                                             <varname>Data</varname> entries such that
310                                                             <literal>Data &lt; binsEdges(2)</literal> are
311                                                             counted in <literal>Heights(1)</literal>.
312                                                         </listitem>
313                                                         <listitem>
314                                                             The actual <literal>bins(1)</literal> edge
315                                                             is set to <literal>min(Data)</literal>.
316                                                         </listitem>
317                                                     </itemizedlist>
318                                                 </listitem>
319                                                 <listitem>
320                                                     <para></para>
321                                                     <emphasis role="italic">Right wing:</emphasis> set
322                                                     <itemizedlist>
323                                                         <listitem>
324                                                             <literal>binsEdges($) =
325                                                                 <emphasis role="bold">%inf</emphasis>
326                                                             </literal>
327                                                             ,
328                                                             or
329                                                         </listitem>
330                                                         <listitem>
331                                                             <literal>binsEdges($) = "~~"</literal> (for
332                                                             texts in standard ascii,
333                                                             <literal>ascii(126)=="~"</literal> is the
334                                                             last printable character)
335                                                         </listitem>
336                                                     </itemizedlist>
337                                                     Then,
338                                                     <itemizedlist>
339                                                         <listitem>
340                                                             <varname>Data</varname> entries such that
341                                                             <literal>Data > binsEdges($-1)</literal> are
342                                                             counted in <literal>Heights($)</literal>.
343                                                         </listitem>
344                                                         <listitem>
345                                                             The actual <literal>bins($)</literal> edge
346                                                             is set to <literal>max(Data)</literal>.
347                                                         </listitem>
348                                                     </itemizedlist>
349                                                 </listitem>
350                                             </itemizedlist>
351                                         </para>
352                                     </listitem>
353                                 </varlistentry>
354                                 <varlistentry>
355                                     <term>binsValues</term>
356                                     <listitem>
357                                         <para>
358                                             For polynomial <varname>Data</varname> or when the
359                                             <varname>"discrete"</varname> option is used,
360                                             <varname>binsValues</varname> provides values whose
361                                             occurrences in <varname>Data</varname> must be counted.
362                                         </para>
363                                         <para>
364                                             <itemizedlist>
365                                                 <listitem>
366                                                     Duplicates and <literal>%nan</literal> values are
367                                                     priorly removed from <varname>binsValues</varname>.
368                                                 </listitem>
369                                                 <listitem>
370                                                     <varname>binsValues</varname> may include some
371                                                     <literal>%inf</literal> values. However,
372                                                     for encoded integers <varname>Data</varname>,
373                                                     any <literal>%inf</literal> value is removed
374                                                     before processing.
375                                                 </listitem>
376                                                 <listitem>
377                                                     Components of <varname>binsValues</varname> may be
378                                                     unsorted: the order of <varname>binsValues</varname>
379                                                     components is kept as is in the
380                                                     <varname>Heights</varname> output vector.
381                                                 </listitem>
382                                             </itemizedlist>
383
384                                         </para>
385                                     </listitem>
386                                 </varlistentry>
387                             </variablelist>
388                         </para>
389                     </listitem>
390                 </varlistentry>
391                 <varlistentry>
392                     <term>Options</term>
393                     <listitem>
394                         <para>
395                             <literal>Options</literal> is either a vector of textual flags, or equivalently
396                             a single word of <emphasis role="italic">comma-separated</emphasis>
397                             concatenated flags, or both. All flags are
398                             <emphasis role="italic">case-insensitive</emphasis> and can be specified
399                             <emphasis role="italic">in any order</emphasis>.
400                         </para>
401                         <para>
402                             Examples: The following options specifications are equivalent:
403                             <literal>["discrete" "countsNorm" "normWith: Out Inf"]</literal>, or
404                             <literal>["countsNORM" "NORMwith: inf out" "Discrete" ]</literal>, or
405                             <literal>["normWith: INF OUT", "discrete, countsNorm" ]</literal>, or simply
406                             <literal>"discrete,countsNorm,normWith: inf out"</literal>.
407                         </para>
408                         <para>
409                             <variablelist>
410                                 <varlistentry>
411                                     <term>"discrete"</term>
412                                     <listitem>
413                                         <para>
414                                             This flag must be used when a discrete / categorial histogram is
415                                             required. Then, the vector provided in argument #2 with at
416                                             least 2 components sets
417                                             <emphasis role="italic">bins values</emphasis> instead of
418                                             <emphasis role="italic">bins edges</emphasis> (by default).
419                                             <note>
420                                                 Presently, polynomial <varname>Data</varname> are always
421                                                 processed in a categorial way. The <varname>"discrete"</varname>
422                                                 flag looks then useless. However, in a future release, polynomials
423                                                 could become sortable. Using the <varname>"discrete"</varname>
424                                                 flag does not hurt and would avoid future back-compatibility
425                                                 issues.
426                                             </note>
427                                         </para>
428                                     </listitem>
429                                 </varlistentry>
430                                 <varlistentry>
431                                     <term>Histogram scale:</term>
432                                     <listitem>
433                                         <table>
434                                             <tr>
435                                                 <td valign="top">
436                                                     <emphasis role="bold">"counts"</emphasis>
437                                                 </td>
438                                                 <td>
439                                                     <para>
440                                                         This mode is the default one: Whatever is each bin's width, the
441                                                         <emphasis role="italic">height</emphasis> of the bin is equal
442                                                         to the number of <varname>Data</varname> components falling in it.
443                                                     </para>
444                                                 </td>
445                                             </tr>
446                                             <tr>
447                                                 <td valign="top">
448                                                     <emphasis role="bold">"countsNorm"</emphasis>
449                                                 </td>
450                                                 <td>
451                                                     <para>
452                                                         Whatever is each bin's width and position, the
453                                                         <emphasis role="italic">height</emphasis> of the bin is equal
454                                                         to the <emphasis role="italic">relative</emphasis> number of
455                                                         <varname>Data</varname> components falling in it,
456                                                         over all counted components.
457                                                         Then, unless the <literal>"normWith:.."</literal>
458                                                         option is used, the cumulated bins heights is equal to 1:
459                                                         <literal>sum(Heights)==1</literal>.
460                                                     </para>
461                                                 </td>
462                                             </tr>
463                                             <tr>
464                                                 <td valign="top">
465                                                     <emphasis role="bold">"density"</emphasis>
466                                                 </td>
467                                                 <td>
468                                                     <para>
469                                                         The <emphasis role="italic">area</emphasis> of each bin is equal
470                                                         to the number of <varname>Data</varname> components falling in it.
471                                                         This scaling mode is meaningless and ignored in case of
472                                                         <emphasis role="italic">categorial</emphasis> histogram.
473                                                     </para>
474                                                 </td>
475                                             </tr>
476                                             <tr>
477                                                 <td valign="top">
478                                                     <emphasis role="bold">"densityNorm"</emphasis>
479                                                 </td>
480                                                 <td>
481                                                     <para>
482                                                         The <emphasis role="italic">area</emphasis> of each bin is equal
483                                                         to the <emphasis role="italic">relative</emphasis> number of
484                                                         <varname>Data</varname> components falling in it. Then, unless
485                                                         the <literal>"normWith:.."</literal> option is used, the whole
486                                                         area of the histogram is equal to 1:
487                                                         <para>
488                                                             <latex style="display"><![CDATA[ \int_{binsEdges(1)}^{binsEdges(\$)}h(x)\,\mathrm{d}x=1 ]]></latex>
489                                                         </para>
490                                                     </para>
491                                                     <para>
492                                                         This scaling mode is meaningless and ignored in case of
493                                                         <emphasis role="italic">categorial</emphasis> histogram.
494                                                     </para>
495                                                 </td>
496                                             </tr>
497                                         </table>
498                                     </listitem>
499                                 </varlistentry>
500                                 <varlistentry>
501                                     <term>"normWith:.."</term>
502                                     <listitem>
503                                         <para>
504                                             When the <literal>"countsNorm"</literal> or <literal>"densityNorm"</literal>
505                                             option is used, it is possible to provide additional informations about which
506                                             components of <varname>Data</varname> out of bins should be considered
507                                             for the total number N of counts over which the normalization is computed.
508                                         </para>
509                                         <para>
510                                             After the <literal>"normWith:"</literal> option's header, a
511                                             <emphasis role="italic">space-separated</emphasis> list of
512                                             <emphasis role="italic">case-insensitive</emphasis> flags can be provided
513                                             <emphasis role="italic">in any order</emphasis>. If several concurrent
514                                             flags are provided, only the last specified one is taken into account.
515                                             Unrelevant flags for the given <literal>Data</literal> type are ignored.
516                                             Available flags and their relative priorities are described here-below.
517                                             Examples:
518                                             <literal>"normWith: all"</literal>,
519                                             <literal>"normWith: out inf"</literal>,
520                                             <literal>"normWith: Nan inf"</literal>,
521                                             <literal>"normWith: rightout inf"</literal>, etc.
522                                         </para>
523                                         <table>
524                                             <tr>
525                                                 <td valign="top">
526                                                     <emphasis role="bold">"all"</emphasis>
527                                                 </td>
528                                                 <td>
529                                                     All components of <varname>Data</varname> are considered:
530                                                     <literal>N = size(Data,"*")</literal>. If <literal>"all"</literal>
531                                                     is used, all other <literal>"normWith:.."</literal> options are ignored.
532                                                 </td>
533                                             </tr>
534                                             <tr>
535                                                 <td valign="top">
536                                                     <emphasis role="bold">"out"</emphasis>
537                                                 </td>
538                                                 <td>
539                                                     All <varname>Data</varname> out of <varname>bins</varname> that are
540                                                     not <literal>Nan</literal> or <literal>Inf</literal> or
541                                                     <literal>""</literal> are accounted.
542                                                     If <varname>Data</varname> is sparse-encoded, zeros remain excluded
543                                                     unless the option <literal>"normWith: zeros"</literal> is used.
544                                                     If <literal>"out"</literal> is used, <literal>"leftout"</literal>
545                                                     and <literal>"rightout"</literal> options are ignored.
546                                                 </td>
547                                             </tr>
548                                             <tr>
549                                                 <td valign="top">
550                                                     <emphasis role="bold">"leftout"</emphasis>
551                                                 </td>
552                                                 <td>
553                                                     As with <literal>"out"</literal>, but only for
554                                                     <literal>Data &lt; binsEdges(1)</literal>.
555                                                     This flag is ignored in discrete/categorial mode.
556                                                 </td>
557                                             </tr>
558                                             <tr>
559                                                 <td valign="top">
560                                                     <emphasis role="bold">"rightout"</emphasis>
561                                                 </td>
562                                                 <td>
563                                                     As with <literal>"out"</literal>, but only for
564                                                     <literal>Data > binsEdges($)</literal>.
565                                                     This flag is ignored in discrete/categorial mode.
566                                                 </td>
567                                             </tr>
568                                             <tr>
569                                                 <td valign="top">
570                                                     <emphasis role="bold">"NaN"</emphasis>
571                                                 </td>
572                                                 <td>
573                                                     <literal>NaN</literal> data are accounted, in addition to
574                                                     other ones.
575                                                 </td>
576                                             </tr>
577                                             <tr>
578                                                 <td valign="top">
579                                                     <emphasis role="bold">"Inf"</emphasis>
580                                                 </td>
581                                                 <td>
582                                                     <literal>Inf</literal> data are accounted, in addition to
583                                                     other ones.
584                                                     <para>
585                                                         In discrete/categorial mode, <literal>Inf</literal>
586                                                         values are not specific and are processed as other ones.
587                                                         This flag is then ignored.
588                                                     </para>
589                                                 </td>
590                                             </tr>
591                                             <tr>
592                                                 <td valign="top">
593                                                     <emphasis role="bold">"zeros"</emphasis>
594                                                 </td>
595                                                 <td>
596                                                     If <varname>Data</varname> is sparse-encoded, by default only
597                                                     non-zero elements are considered (otherwise, zeros are not specific
598                                                     and are processed as other values). Nevertheless, it's possible
599                                                     to take them into account in the normalization by using this
600                                                     <literal>"normWith: zeros"</literal> flag.
601                                                     <important>
602                                                         Using this flag does not credit the <literal>Heights</literal>
603                                                         of the bin covering the zero value (if any).
604                                                     </important>
605                                                 </td>
606                                             </tr>
607                                             <tr>
608                                                 <td valign="top">
609                                                     <emphasis role="bold">"empty"</emphasis>
610                                                 </td>
611                                                 <td>
612                                                     <literal>""</literal> empty texts in <varname>Data</varname>
613                                                     are accounted, in addition to other ones.
614                                                 </td>
615                                             </tr>
616                                         </table>
617                                     </listitem>
618                                 </varlistentry>
619                             </variablelist>
620                         </para>
621                     </listitem>
622                 </varlistentry>
623             </variablelist>
624         </refsect2>
625         <refsect2>
626             <title>Results</title>
627             <variablelist>
628                 <varlistentry>
629                     <term>Heights</term>
630                     <listitem>
631                         <para>
632                             vector of decimal numbers whose values depend on the histogram scaling mode
633                             set with each dedicated option.
634                             See the description of the <literal>Histogram scales</literal> options
635                             here-above. In brief:
636                             <itemizedlist>
637                                 <listitem>
638                                     <literal>"counts"</literal> mode: <varname>Heights(i)</varname>
639                                     is the number of <varname>Data</varname> components equal to the
640                                     <literal>bins(i)</literal> value (categorial), or belonging to the
641                                     <literal>]bins(i), bins(i+1)]</literal> interval (continuous histogram).
642                                 </listitem>
643                                 <listitem>
644                                     <literal>"countsNorm"</literal> mode: <varname>Heights(i)</varname>
645                                     is as for <literal>"counts"</literal>, divided by the total number
646                                     <literal>N</literal> of considered <varname>Data</varname> components.
647                                     <literal>N</literal> is the sum of counts in all bins, plus
648                                     possibly the number of counts of some special jokers values
649                                     (<literal>%inf, %nan, 0, ""</literal>), according to the
650                                     <literal>normWith:</literal> option used.
651                                 </listitem>
652                             </itemizedlist>
653                         </para>
654                         <para>
655                             In continuous mode, statistical densities may be returned in the vector
656                             <varname>Heights</varname> instead of integer numbers of counts: Let's
657                             call <literal>counts(i)</literal> the number of counts in the
658                             bin #i defined by its edges. Then
659                             <itemizedlist>
660                                 <listitem>
661                                     In <literal>"density"</literal> mode: <varname>Heights(i)</varname>
662                                     is set such that the <emphasis role="italic">area</emphasis> of
663                                     the bin is equal to its population:
664                                     <literal>Heights(i) * (binsEdges(i+1) - binsEdges(i)) == counts(i)</literal>.
665                                 </listitem>
666                                 <listitem>
667                                     In <literal>"densityNorm"</literal> mode: the
668                                     <literal>"density"</literal> results are divided by the total number
669                                     <literal>N</literal> of considered counts
670                                     (see <literal>"countsNorm"</literal>).
671                                 </listitem>
672                             </itemizedlist>
673                         </para>
674                     </listitem>
675                 </varlistentry>
676                 <varlistentry>
677                     <term>jokers</term>
678                     <listitem>
679                         <para>
680                             Row vector of 1 to 5 decimal numbers indicating the frequency of special
681                             values in <literal>Data</literal>. Let's define the following numbers:
682                             <itemizedlist>
683                                 <listitem>
684                                     <literal>Nnan</literal>: number of <literal>NaN</literal> objects
685                                     in <literal>Data</literal>.
686                                 </listitem>
687                                 <listitem>
688                                     <literal>Ninf</literal>: number of <literal>Inf</literal> objects
689                                     in <literal>Data</literal>.
690                                 </listitem>
691                                 <listitem>
692                                     <literal>Nzeros</literal>: number of null values in
693                                     <literal>Data</literal>.
694                                 </listitem>
695                                 <listitem>
696                                     <literal>Nempty</literal>: number of empty texts "" in
697                                     <literal>Data</literal>.
698                                 </listitem>
699                                 <listitem>
700                                     <literal>Nleftout</literal>: number of <literal>Data</literal>
701                                     components not equal to <literal>-%inf</literal> nor to
702                                     <literal>""</literal>, such that
703                                     <literal>Data &lt; binsEdges(1)</literal>.
704                                 </listitem>
705                                 <listitem>
706                                     <literal>Nrightout</literal>: number of <literal>Data</literal>
707                                     components not equal to <literal>%inf</literal> such that
708                                     <literal>Data > binsEdges($)</literal>.
709                                 </listitem>
710                                 <listitem>
711                                     <literal>Nout</literal>: number of <literal>Data</literal>
712                                     components out of bins, non-infinite, not being
713                                     <literal>Nan</literal>, not being empty text <literal>""</literal>,
714                                     and for sparse <literal>Data</literal>: not equal to zero.
715                                 </listitem>
716                             </itemizedlist>
717                         </para>
718                         <para>
719                             In unnormalized <literal>"counts"</literal> and <literal>"density"</literal>
720                             histogram scales, <varname>jokers</varname> returns the integer
721                             <emphasis role="italic">counts</emphasis> numbers of special values.
722                         </para>
723                         <para>
724                             In normalized <literal>"countsNorm"</literal> and <literal>"densityNorm"</literal>
725                             histogram scales, <varname>jokers</varname> returns
726                             <emphasis role="italic">countsNorm</emphasis> frequencies of special values.
727                         </para>
728                         <para>
729                             Then, according to the <literal>Data</literal> type and the
730                             <emphasis role="italic">continuous</emphasis> or
731                             <emphasis role="italic">categorial</emphasis> histogram mode,
732                             <varname>jokers</varname> is made of the following:
733                             <orderedlist>
734                                 <listitem>
735                                     <emphasis role="italic">Encoded integers:</emphasis>
736                                     <itemizedlist>
737                                         <listitem>
738                                             continuous: <literal>[Nleftout, Nrightout]</literal>
739                                         </listitem>
740                                         <listitem>
741                                             categorial: <literal>[Nout]</literal>
742                                         </listitem>
743                                     </itemizedlist>
744                                 </listitem>
745                                 <listitem>
746                                     <emphasis role="italic">
747                                         Decimal or complex numbers, full or sparse:
748                                     </emphasis>
749                                     <itemizedlist>
750                                         <listitem>
751                                             continuous:
752                                             <literal>[Nleftout, Nrightout, Nzeros, Nnan, Ninf]</literal>
753                                         </listitem>
754                                         <listitem>
755                                             categorial: <literal>[Nout, 0, Nzeros, Nnan, Ninf]</literal>
756                                         </listitem>
757                                     </itemizedlist>
758                                 </listitem>
759                                 <listitem>
760                                     <emphasis role="italic">Polynomials:</emphasis>
761                                     <literal>[Nout, 0, 0, Nnan, Ninf]</literal>
762                                 </listitem>
763                                 <listitem>
764                                     <emphasis role="italic">Texts:</emphasis>
765                                     <itemizedlist>
766                                         <listitem>
767                                             continuous: <literal>[Nleftout, Nrightout, Nempty]</literal>
768                                         </listitem>
769                                         <listitem>
770                                             categorial: <literal>[Nout, 0, Nempty]</literal>
771                                         </listitem>
772                                     </itemizedlist>
773                                 </listitem>
774                             </orderedlist>
775                         </para>
776                     </listitem>
777                 </varlistentry>
778                 <varlistentry>
779                     <term>bins</term>
780                     <listitem>
781                         <para>
782                             Row vector of bins edges or of bins values actually used to build the histogram.
783                             <literal>histc()</literal> allows using many semi-automatic or automatic
784                             binning modes for which no explicit or incomplete <varname>binsEdges</varname>
785                             or <varname>binsValues</varname> vector is provided as input.
786                             <itemizedlist>
787                                 <listitem>
788                                     Continuous binning mode:
789                                     <itemizedlist>
790                                         <listitem>
791                                             The actual <varname>binsEdges</varname> is returned in
792                                             <varname>bins</varname>. It has the
793                                             <varname>Heights</varname> number of components, + 1
794                                             (position of the closing edge).
795                                         </listitem>
796                                         <listitem>
797                                             <para>
798                                                 For encoded integers, decimal numbers, and complex numbers
799                                                 <varname>Data</varname>, <varname>bins</varname> is of
800                                                 decimal type. For text <varname>Data</varname>,
801                                                 <varname>bins</varname> is of type text as well.
802                                             </para>
803                                         </listitem>
804                                         <listitem>
805                                             <para>
806                                                 When marginal bins are required (see the
807                                                 <varname>binsEdges</varname> description)
808                                                 <literal>bins(1)</literal> and <literal>bins($)</literal>
809                                                 return the actual boundaries of the whole binning range
810                                                 used.
811                                             </para>
812                                         </listitem>
813                                     </itemizedlist>
814                                 </listitem>
815                                 <listitem>
816                                     Discrete categorial mode:
817                                     <para>
818                                         For polynomial <varname>Data</varname>, or
819                                         for other <varname>Data</varname> types used with the
820                                         <varname>"discrete"</varname> option: if no explicit
821                                         <varname>binsValues</varname> vector is provided,
822                                         <literal>histc()</literal> sets it to
823                                         <literal>unique(Data)(:)'</literal> and returns it as
824                                         <varname>bins</varname>.
825                                     </para>
826                                 </listitem>
827                             </itemizedlist>
828                         </para>
829                     </listitem>
830                 </varlistentry>
831                 <varlistentry>
832                     <term>inBin</term>
833                     <listitem>
834                         <para>
835                             Array of decimal integers having the sizes of <varname>Data</varname>.
836                             If <varname>Data</varname> is sparse-encoded, <varname>inBin</varname>
837                             is so as well.
838                         </para>
839                         <para>
840                             <literal>inBin(i,j)</literal> returns the index of the <varname>bins</varname>
841                             which <literal>Data(i,j)</literal> belongs to. If the value of
842                             <literal>Data(i,j)</literal> is out of bins, <literal>inBin(i,j)=0</literal>.
843                             Otherwise, <literal>Data(i,j)</literal> increments the
844                             <literal>Heights(inBin(i,j))</literal> counts by one unit.
845                         </para>
846                     </listitem>
847                 </varlistentry>
848             </variablelist>
849         </refsect2>
850     </refsection>
851      <refsection role="examples">
852         <title>Examples</title>
853         <refsect2>
854             <title>with decimal numbers:</title>
855             <programlisting role="example"><![CDATA[
856 data = [1 1 1 2 2 3 4 4 5 5 5 6 6 7 8 8 9 9 9];
857 N = size(data,"*")   // ==19
858
859 // Default binning; "sqrt": sqrt(19) => 4. .. => 4 bins
860 [h, j, b, i] = histc(data)
861 // expected: h = [6 5 3 5] = href
862 // expected: b = [1 3 5 7 9] bins edges
863 // expected: i = [1 1 1 1 1 1 2 2 2 2 2 3 3 3 4 4 4 4 4]  d memberships to bins
864 histc(data, , "countsNorm")   // Expected: href/N
865 histc(data, , "density")      // Expected: href/2, 2 being the bins width
866 histc(data, , "densityNorm")  // Expected: href/N/2
867
868 // Automatic Sturges binning
869 [h, j, b, i] = histc(data,"sturges")    // h = [5 1 5 2 1 5]
870                                         // b = [3 7 11 15 19 23 27] / 3
871                                         // i = [1 1 1 1 1 2 3 3 3 3 3 4 4 5 6 6 6 6 6]
872
873 // Explicit bins edges, with marginal bins
874 // ---------------------------------------
875 data = [1 1 1 2 2 3 4 4 5 5 5 6 6 7 8 8 9 9 9];
876 be = [-%inf 3 5 7 %inf];
877 [href, j, b, i] = histc(data, be)   // href = [6 5 3 5] => sum N = 19
878                                     // b = [1 3 5 7 9]  // bins completed with actual data bounds
879                                     // i = [1 1 1 1 1 1 2 2 2 2 2 3 3 3 4 4 4 4 4]
880 histc(data, be, "countsNorm")       // href/N
881 histc(data, be, "density")          // href/2   bins width = 2: see b
882 histc(data, be, "densityNorm")      // href/N/2
883
884 // Explicit bins edges, with outsiders
885 // -----------------------------------
886 data = [1 1 1 2 2 3 4 4 5 5 5 6 6 7 8 8 9 9 9]; // still the same
887 be = [2, 5.5, 7];   // Bins edges (2 bins)
888 [href, jref, b, i] = histc(d, be)   // href = [8 3]   jref = [3 5 0 0 0] = [leftout, rightout, ..]
889                                     // i = [0 0 0 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 0]
890 histc(data, be, "countsNorm")                       // href / 11
891 histc(data, be, "countsNorm, normWith: leftout")    // href / 14
892 histc(data, be, "countsNorm, normWith: rightout")   // href / 16
893 histc(data, be, "countsNorm, normWith: out")        // href / 19
894 histc(data, be, "density")                          // href ./ diff(be)
895 histc(data, be, "densityNorm")                      // href ./ diff(be) / 11
896 histc(data, be, "densityNorm, normWith: leftout")   // href ./ diff(be) / 14
897 histc(data, be, "densityNorm, normWith: rightout")  // href ./ diff(be) / 16
898 histc(data, be, "densityNorm, normWith: all");      // href ./ diff(be) / 19
899
900 // With Nan and Inf values
901 // -----------------------
902 data = [1 1 1 2 2 3 4 4 5 5 5 6 6 7 8 8 9 9 9];
903 data = [%nan %inf, data, %nan %nan -%inf];
904 N = size(data,"*");         // 24
905 be = [2, 4.5, 7];           // Set bins edges (2 bins)
906 [href, jref, b, iref] = histc(data, be) //  href = [5 6]  jref = [3 5 0 3 2];
907                             // continuous mode: jokers = [leftout, rightout, zeros, nan, inf]
908                             // iref = [0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 0]  memberships
909 [h, j] = histc(data, be, "countsNorm")                          // Expected: href/11, jref/11
910 [h, j] = histc(data, be, "countsNorm, normWith: nan")           // Expected: href/14, jref/14
911 [h, j] = histc(data, be, "countsNorm, normWith: inf")           // Expected: href/13, jref/13
912 [h, j] = histc(data, be, "countsNorm, normWith: inf nan")       // Expected: href/16, jref/16
913 [h, j] = histc(data, be, "countsNorm, normWith: leftout nan")   // Expected: href/17, jref/17
914 [h, j] = histc(data, be, "countsNorm, normWith: rightout inf")  // Expected: href/18, jref/18
915 [h, j] = histc(data, be, "countsNorm, normWith: out inf")       // Expected: href/21, jref/21
916 [h, j] = histc(data, be, "countsNorm, normWith: all")           // Expected: href/24, jref/24
917
918 // Normalized densities over a Bins width = 2.5 (see be)
919 [h, j] = histc(data, be, "densityNorm")                         // Expected: href/11/2.5, jref/11
920 [h, j] = histc(data, be, "densityNorm, normWith: nan")          // Expected: href/14/2.5, jref/14
921 [h, j] = histc(data, be, "densityNorm, normWith: inf")          // Expected: href/13/2.5, jref/13
922 [h, j] = histc(data, be, "densityNorm, normWith: inf nan")      // Expected: href/16/2.5, jref/16
923 [h, j] = histc(data, be, "densityNorm, normWith: leftout nan")  // Expected: href/17/2.5, jref/17
924 [h, j] = histc(data, be, "densityNorm, normWith: rightout inf") // Expected: href/18/2.5, jref/18
925 [h, j] = histc(data, be, "densityNorm, normWith: all")          // Expected: href/24/2.5, jref/24
926      ]]></programlisting>
927         </refsect2>
928         <refsect2>
929             <title>with texts:</title>
930             <programlisting role="example"><![CDATA[
931 histc(["a" "c" "a" "a" "b" "c"])    //  [3 1 2]
932
933 t = [
934 "c" "n" "h" "i" "b" "i" "f" "i" "p" "l" "p" "d" "f" "i" "l"
935 "b" "m" "e" "o" "o" "f" "p" "o" "h" "f" "h" "h" "c" "k" "o"
936 "p" "f" "k" "a" "j" "o" "j" "d" "h" "h" "n" "m" "o" "l" "n"
937 "h" "b" "o" "l" "j" "n" "o" "i" "g" "i" "a" "a" "j" "d" "p"
938  ];
939 // With default discrete bins
940 // --------------------------
941 [h,j,b,i] = histc(t)  // h = [3  3 2 3 1 5 1 7 6 4 2 4 2 4 8 5]
942                       // b = "a" b c d e f g h i j k l m n o p
943 iref = [
944 3   14  8   9   2   9   6   9   16  12  16  4   6   9   12
945 2   13  5   15  15  6   16  15  8   6   8   8   3   11  15
946 16  6   11  1   10  15  10  4   8   8   14  13  15  12  14
947 8   2   15  12  10  14  15  9   7   9   1   1   10  4   16
948 ];
949
950 // With given discrete bins WITHOUT "" bins
951 // ----------------------------------------
952 t2 = t;
953 t2([7 13 19 26 32 39 43]) = "";
954 // --> t2  =
955 // c  n  h     b  i  f  i  p  l  p  d  f  i  l
956 // b  m  e  o  o  f     o  h  f  h  h  c  k  o
957 // p     k  a     o  j  d  h        m  o  l  n
958 // h  b  o  l  j  n  o     g  i  a  a  j  d  p
959 //
960 // b =  '' a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p
961 // h =  7  3  3  2  3  1  4  1  6  4  3  2  4  2  3  8  4
962
963 [h, j, b, i] = histc(t2, ["a" "e" "i" "o"], "discrete")
964                                             // h = [3 1 4 8];  N = 16
965                                             // j = [37 0 7] = [out, 0, #""]
966 // i = [    // memberships
967 // 0  0  0  0  0  3  0  3  0  0  0  0  0  3  0
968 // 0  0  2  4  4  0  0  4  0  0  0  0  0  0  4
969 // 0  0  0  1  0  4  0  0  0  0  0  0  4  0  0
970 // 0  0  4  0  0  0  4  0  0  3  1  1  0  0  0
971 // ];
972
973 // With continuous and marginal bins: "" <=> -inf , "~~" <=> Inf (regular ascii)
974 // -----------------------------------------------------------------------------
975 [h,j,b,i] = histc(t, ["" "c" "e" "g" "i" "k" "m" "~~"])
976                     // h = [8 4 6 13 6 6 17]    j = [0 0 0]
977 // i = [    // memberships
978 // 1  7  4  4  1  4  3  4  7  6  7  2  3  4  6
979 // 1  6  2  7  7  3  7  7  4  3  4  4  1  5  7
980 // 7  3  5  1  5  7  5  2  4  4  7  6  7  6  7
981 // 4  1  7  6  5  7  7  4  3  4  1  1  5  2  7
982 // ];
983
984 // Continuous bins. Data WITH ""
985 // -----------------------------
986 // t2  =
987 // c  n  h     b  i  f  i  p  l  p  d  f  i  l
988 // b  m  e  o  o  f     o  h  f  h  h  c  k  o
989 // p     k  a     o  j  d  h        m  o  l  n
990 // h  b  o  l  j  n  o     g  i  a  a  j  d  p
991 //
992 // b =  '' a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p
993 // h =  7  3  3  2  3  1  4  1  6  4  3  2  4  2  3  8  4
994 binsEdges = ["e" "f" "g" "h" "i" "j"];
995 [href, jref, b, i] = histc(t2, binsEdges)   // href=[5 1 6 4 3]; N = sum(href) = 19
996                                             // jref=[11  23  7]; [leftout rightout ""]
997
998 [h,j,b,i] = histc(t2, binsEdges, "countsNorm,normWith: leftout")
999                                             // h = href / (N+jref(1)),   j = jref / (N+jref(1))
1000 [h,j,b,i] = histc(t2, binsEdges, "countsNorm,normWith: rightout")
1001                                             // h = href / (N+jref(2)),  j = jref / (N+jref(2))
1002 [h,j,b,i] = histc(t2, binsEdges, "countsNorm,normWith: out");
1003                                             // h = href / sum([N jref(1:2)]), j = jref / sum([N jref(1:2)])
1004 [h,j,b,i] = histc(t2, binsEdges, "countsNorm,normWith: empty")
1005                                             // h = href / (N+jref(3)), j = jref/(N+jref(3))
1006 [h,j,b,i] = histc(t2, binsEdges,"countsNorm,normWith: out empty")
1007                                             // h = href / sum([N jref]), j = jref / sum([N jref])
1008 [h,j,b,i] = histc(t2, binsEdges, "countsNorm,normWith: all")
1009                                             // h = href / sum([N jref]), j = jref/sum([N jref])
1010      ]]></programlisting>
1011         </refsect2>
1012         <refsect2>
1013             <title>with polynomials:</title>
1014             <programlisting role="example"><![CDATA[
1015 histc([%z 2+%z %z])                                     // [2 1]
1016 histc([%z 2+%z %z],, "countsnorm")                      // [2 1] / 3
1017 histc([%z 2+%z %z %nan],, "countsnorm")                 // [2 1] / 3
1018 histc([%z 2+%z %z %nan],, "countsnorm, normWith: Nan")  // [2 1] / 4
1019 // Data order is kept:
1020 histc([2+%z %z %z ]) == [1 2]
1021 ]]></programlisting>
1022         </refsect2>
1023
1024     </refsection>
1025     <refsection role="see also">
1026         <title>See also</title>
1027         <simplelist type="inline">
1028             <member>
1029                 <link linkend="histplot">histplot</link>
1030             </member>
1031             <member>
1032                 <link linkend="hist3d">hist3d</link>
1033             </member>
1034             <member>
1035                 <link linkend="bar">bar</link>
1036             </member>
1037             <member>
1038                 <link linkend="barh">barh</link>
1039             </member>
1040             <member>
1041                 <link linkend="plot2d2">plot2d2</link>
1042             </member>
1043             <member>
1044                 <link linkend="dsearch">dsearch</link>
1045             </member>
1046             <member>
1047                 <link linkend="members">members</link>
1048             </member>
1049             <member>
1050                 <link linkend="grep">grep</link>
1051             </member>
1052             <member>
1053                 <link linkend="strcmp">strcmp</link>
1054             </member>
1055             <member>
1056                 <link linkend="isnan">isnan</link>
1057             </member>
1058             <member>
1059                 <link linkend="isinf">isinf</link>
1060             </member>
1061         </simplelist>
1062     </refsection>
1063     <refsection role="history">
1064         <title>History</title>
1065         <revhistory>
1066             <revision>
1067                 <revnumber>5.5.0</revnumber>
1068                 <revdescription>
1069                     histc() introduced
1070                 </revdescription>
1071             </revision>
1072             <revision>
1073                 <revnumber>6.1.0</revnumber>
1074                 <revdescription>
1075                     histc() reforged:
1076                     <itemizedlist>
1077                         <listitem>
1078                             Data and nbins input arguments are commuted.
1079                         </listitem>
1080                         <listitem>
1081                             New accepted Data types: complex numbers, sparse decimal or complex
1082                             matrices, polynomials, texts.
1083                         </listitem>
1084                         <listitem>
1085                             Histogram binning:
1086                             <itemizedlist>
1087                                 <listitem>
1088                                     binsWidth and binsAlgo = ["sqrt" "sturges" "freediac"] input
1089                                     arguments added.
1090                                 </listitem>
1091                                 <listitem>
1092                                     histc() can now build categorial histograms: "discrete" option
1093                                     added; binsValues input arguments added.
1094                                 </listitem>
1095                                 <listitem>
1096                                     Marginal bins are now handled with binsEdges(1)=-%inf and
1097                                     binsEdges($)=%inf, or with binsEdges(1)="" and binsEdges($)="~"
1098                                 </listitem>
1099                             </itemizedlist>
1100                         </listitem>
1101                         <listitem>
1102                             Histogram scaling:
1103                             <itemizedlist>
1104                                 <listitem>
1105                                     normalization option removed
1106                                 </listitem>
1107                                 <listitem>
1108                                     "counts", "countsNorm", "density" and "densityNorm" options added.
1109                                 </listitem>
1110                                 <listitem>
1111                                     "normWith:" option added, with flags among "leftout",
1112                                     "rightout", "out", "inf", "nan", "zeros", "empty", "all"
1113                                     possible values.
1114                                 </listitem>
1115                                 <listitem>
1116                                     "counts" is now the default scaling mode, instead of
1117                                     "densityNorm" in Scilab 5.5.0 and 5.5.1, and
1118                                     "countsNorm" in Scilab 5.5.2. The backward compatibility to the
1119                                     former "densityNorm" default Scilab 5.5 mode is ensured.
1120                                 </listitem>
1121                             </itemizedlist>
1122                         </listitem>
1123                         <listitem>
1124                             jokers and bins output arguments are added and inserted after the
1125                             histogram heights. The backward compatibility to Scilab 5.5 is ensured
1126                             for the ind result.
1127                         </listitem>
1128                         <listitem>Extensive unit tests added.</listitem>
1129                         <listitem>Help page rewritten.</listitem>
1130                     </itemizedlist>
1131                 </revdescription>
1132             </revision>
1133         </revhistory>
1134     </refsection>
1135 </refentry>