1 // Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
2 // Copyright (C) 2017 - 2020 - Samuel GOUGEON
4 // This file is hereby licensed under the terms of the GNU GPL v2.0,
5 // pursuant to article 5.3.4 of the CeCILL v.2.1.
6 // This file was originally licensed under the terms of the CeCILL v2.1,
7 // and continues to be available under such terms.
8 // For more information, see the COPYING file which you should have received
9 // along with this program.
12 function [heights, jokers, binsOut, ind] = histc(data, bins, options)
14 // 1) histc(bins, data, normalization)
15 // 2) By défaut, histc() worked in "densityNorm,normWith:all" mode
17 // 1) histc(data, bins, options)
18 // 2) By default, histc() works in "counts" mode
22 // histc(data, binsWidth<0)
23 // histc(data, binsEdges)
24 // histc(data, binsValues [, "discrete"])
25 // histc(data, binsAlgo)
26 // histc(data, ..., options)
27 // histc(nbins, data) // deprecated
28 // histc(binsEdges, data) // deprecated
29 // histc(binsEdges, data, normalization=%t) // deprecated
30 // heights = histc(data ..)
31 // [heights, jokers] = histc(data ..)
32 // [heights, jokers, binsOut] = histc(data ..)
33 // [heights, jokers, binsOut, ind] = histc(data ..)
36 // "sqrt" | "sturges" | "freediac"
38 // "discrete" // , "smartbins"
40 // "counts" (default) | "countsNorm" | "density" | "densityNorm"
41 // Normalization "normWith: " options: Priorities: "all" > "out" > ..
42 // decimal, complex numbers: "leftout rightout out inf nan zeros all"
43 // integers: "leftout rightout (out=) all"
44 // Text: "empty out all"
45 // polynomials: "(out=) all"
46 // vector or single string with space-separated flags (case-unsensitive)
48 // Complex numbers are sorted by real parts, then by imaginary parts
50 // * integers: [leftout, rightout] (continuous) or [out] (discrete)
51 // * decimal or complex numbers, full or sparse:
52 // continuous: [leftout, rightout, zeros, nan, inf]
53 // discrete: [out, 0, zeros, nan, inf]
54 // * polynomials: [out, 0, 0, nan, inf]
55 // * texts: continuous: [leftout, rightout, ""]; discrete: [out, 0, ""]
60 [heights, jokers, binsOut, ind] = ([],[],[],[])
61 [Nout, Nleftout, Nzeros, Nrightout, Ninf, Nnan] = (0,0,0,0,0,0)
63 // PARSING INPUT ARGUMENTS
64 // =======================
66 msg = _("%s: Wrong number of input arguments: %d to %d expected.\n")
67 error(msprintf(msg, fname, 1, 3))
69 // Old 5.5.x mode management
70 // -------------------------
72 // Check for Scilab 5.5 back-compatibility
73 if (isdef("data","l") & isdef("bins", "l") & type(bins)>0) & ..
74 size(data,"*")< size(bins,"*") & or(type(data)==[1 8]) & type(bins)~=10
75 msg = _("%s: data are now expected in argument #1 => arguments #1 and #2 switched\n")
76 warning(msprintf(msg, fname))
78 [data, bins] = (bins, data)
79 tmp = isdef("options","l") && type(options)==4
82 if isdef("normalization","l")
84 if type(normalization)==4 then
85 norma = normalization(1)
91 if isdef("options","l") & type(options)==4
98 options = ["densityNorm" "normWith:all"]
103 msg = _("%s: ""normalization"" boolean argument #3 is obsolete. ""%s"" option used.\n")
104 warning(msprintf(msg, fname, options(1)))
107 // -----------------------
108 if ~isdef("data","l") | type(data)==0 then
109 msg = _("%s: Argument #%d: %s\n")
110 error(msprintf(msg, fname, 1, "data array missing"))
112 if ~isdef("bins","l") | type(bins)==0 then
115 if ~isdef("options","l") then
118 // Now, bins, data and options are defined. Let's check their types and sizes
120 // CHECKING INPUT ARGUMENTS AND SETTING DEFAULTS
121 // =============================================
125 if type(options)~=10 then
126 msg = _("%s: Argument #%d: Text expected.\n")
127 error(msprintf(msg, fname, 3))
129 options = tokens(strcat(convstr(options),","),",")
131 discrete = grep(options, "discrete")~=[] | type(data)==2 // polynomials
135 if isempty(data) then
138 if ~or(type(data)==[1 2 5 8 10]) then
139 msg = _("%s: Data in argument #%d: Numbers or polynomials or texts expected.\n")
140 error(msprintf(msg, fname, 1))
142 data0 = data // Saving
143 // If encoded integers (including u-int64): => decimal (for dsearch(), etc)
144 if type(data) == 8 then
147 // If sparse numbers: we remove zeros
149 [IJ, data] = spget(data)
151 // If there are only %inf or %nan => return ==!!!!!!!!!!!!!!!!!!!!!!!!!!!
152 if ~discrete & or(type(data0)==[1 5]) then
161 msg = _("%s: data have only %inf or/and %nan values.\n")
162 warning(msprintf(msg, fname))
164 Nzeros = sum(data==0)
166 Nzeros = size(data0,"*") - Nnan - Ninf
171 b = bins(~isinf(bins))
179 jokers = [Ninf Nnan Nzeros Nleftout Nrightout]
183 // Polynomials: setting Nan and Ninf
188 L(tmp) = [] // to ensure excluding any intersection
195 // Setting a default binning mode
197 // histc(data): Default bins:
198 // * strings: discrete, from unique()
199 // * polynomials: discrete, from defBins set hereabove
200 // * encoded integers: "sqrt". if strange() < "sqrt" => binsWidth = 1
201 // * decimal, complex: binMethod option or default "sqrt"
202 if type(data)==10 // texts
204 bins = unique(data)(:)'
207 elseif type(data)==2 // Polynomials
218 else // decimals, complexes, encoded integers
222 bins = unique(data)(:).'
228 if type(bins)==8 | (type(bins)==1 & ~isreal(bins))
229 bins = real(double(bins))
231 if size(bins,"*")==1 & type(bins)==10 then
234 if ~or(convstr(bins)==["sqrt" "sturges" "freediac"])
235 msg = _("%s: Argument #%d: wrong value for binning algo")
236 error(msprintf(msg, fname, 2))
238 // We set the number of bins. Their edges are set later.
239 Nvalid = size(data,"*") - Nnan - Ninf
242 bins = ceil(1 + log2(Nvalid))
245 binWidth = 2*iqr(tmp)* Nvalid^(-1/3)
247 bins = round((max(tmp) - min(tmp)) / binWidth)
252 bins = max(1, round(sqrt(Nvalid)))
256 if length(bins)==1 & type(bins)==1 then
257 // The number of bins or their width is provided
258 if type(data)==10 // texts
259 msg = _("%s: Argument #2: Please provide bins edges or values or leave choosing default bins.\n")
260 error(msprintf(msg, fname))
262 // Case polynomials: not possible here <= discrete previously set to %t
263 // Cases: Encoded integers, decimal or complex numbers
264 if bins>=0 & (bins~=floor(bins) | bins==0) | isnan(bins) | bins==%inf
265 msg = _("%s: Argument #%d: non-zero decimal integer expected.\n")
266 error(msprintf(msg, fname, 2))
268 msg = _("%s: Argument #%d: decimal number > -Inf expected.\n")
269 error(msprintf(msg, fname, 2))
272 // If data are complex-encoded, they will be sorted by increasing
273 // real parts, then by increasing imaginary parts.
274 b = data(~isnan(data) & ~isinf(data))
280 // Required number of bins
281 if (mind == maxd) then
282 mind = mind*(1 - 0.5/bins);
283 maxd = maxd*(1 + 0.5/bins);
285 bins = linspace(mind, maxd, bins+1); // Bins edges
287 // Required bins width
288 if (mind == maxd) then
289 bins = mind + [1 -1]*bins/2
291 // Edges set à 0.0, width = |bins|, from mind- to maxd+
293 bins = (floor(mind/tmp):ceil(maxd/tmp))*tmp
299 // Required number of bins
300 tmp = maxd - mind + 1
301 if tmp <= bins & tmp <= 2^16 // integer-centered bins of width==1
302 bins = (mind:(maxd+1)) - 0.5 // Bins edges
304 ds = (maxd - mind)/bins/2
305 bins = linspace(mind-ds, maxd+ds, bins+1) // Bins edges
308 // Required bins width
309 tmp = max(round(-bins),1) // Bins edges @ -0.5 modulo |int(bins)|
310 bins = (floor(mind/tmp):ceil(maxd/tmp))*tmp - 0.5 // Bins edges
315 // Bins edges are provided: we check that edges are sorted
320 msg = _("%s: Arguments #%d and #%d: Same types expected.\n")
321 error(msprintf(msg, fname, 1, 2))
323 if ~and(bins==unique(bins))
324 msg = _("%s: Argument #%d: Elements must be sorted in increasing order.\n")
325 error(msprintf(msg, fname, 1))
331 if ~or(type(bins)==[1 8])
332 msg = _("%s: Arguments #%d and #%d: Same types expected.\n")
333 error(msprintf(msg, fname, 1, 2))
335 bins = bins(~isnan(bins))
336 if min(diff(bins)) <= 0 then
337 msg = _("%s: Argument #%d: Elements must be in increasing order.\n")
338 error(msprintf(msg, fname, 1))
340 // We manage -%inf open left bin
342 tmp = min(data(~isinf(data)))
349 // We manage %inf open right bin
351 tmp = max(data(~isinf(data)))
365 msg = _("%s: Arguments #%d and #%d: Same types expected.\n")
366 error(msprintf(msg, fname, 1, 2))
368 [bins, dataOrder] = unique(bins)
370 elseif or(type(data)==[1 8])
371 if ~or(type(bins)==[1 8])
372 msg = _("%s: Arguments #%d and #%d: Same types expected.\n")
373 error(msprintf(msg, fname, 1, 2))
375 bins = real(double(bins))
376 bins(isnan(bins)) = []
377 [bins, dataOrder] = unique(bins)
385 // * integers: [leftout, rightout] (continuous) or [out] (discrete)
386 // * decimal or complex numbers, full or sparse:
387 // continuous: [inf, nan, zeros, leftout, rightout]
388 // discrete: [inf, nan, zeros, out]
389 // * polynomials: [inf, nan, out]
390 // * texts: continuous: [empty, leftout, rightout]; discrete: [empty, out]
391 if type(data0)~=2 then
392 hmode = "c" // continuous
394 hmode = "d" // discrete
396 if or(type(data0)==[1 5])
402 tmp1 = isinf(real(tmp))
403 tmp2 = isinf(imag(tmp))
404 tmp3 = isnan(tmp) | (tmp1 & tmp2)
405 if ~discrete & ~isreal(tmp,0)
406 data(tmp1 | tmp2) = %inf
409 clear tmp tmp1 tmp2 tmp3
412 [ind, heights] = dsearch(data, bins, hmode)
413 // ind: bin's number of each data component
416 // We restore the original order of binsValues:
417 [tmp, k] = gsort(dataOrder, "g","i")
418 [tmp, k2] = gsort(k,"g","i")
423 ind(tmp2) = k2(ind(tmp2))
427 // Counts (and location)
430 for i = 1:size(bins,"*")
432 heights(1,i) = sum(tmp)
435 tmp = Ninf - sum(heights(isinf(bins))) // inf out of the selection
436 Nout = size(data,"*") - sum(heights) - Nnan - tmp// excludes all nan and inf not in the selection
437 jokers = [Nout, 0, 0, Nnan, Ninf]
444 if or(type(data0)==[1 5]) // Real and complex numbers, sparse or not
447 Nnan = sum(isnan(data))
449 Nzeros = sum(data==0)
451 Nzeros = size(data0,"*") - nnz(data0)
453 if ~discrete // Sparses: 0 is not taken into account
454 Nleftout = sum(data(~inf) < bins(1))
455 Nrightout = sum(data(~inf) > bins($))
456 jokers = [Nleftout Nrightout Nzeros Nnan Ninf]
458 Nout = size(data0,"*") - sum(heights) - Nnan - Ninf
459 binsinf = isinf(bins)
461 Nout = Nout + sum(heights(binsinf))
466 jokers = [Nout 0 Nzeros Nnan Ninf]
469 elseif type(data0)==8 // Encoded integers
471 Nleftout = sum(data < bins(1))
472 Nrightout = sum(data > bins($))
473 jokers = [Nleftout Nrightout]
474 Nout = Nleftout + Nrightout
476 Nout = size(data,"*") - sum(heights)
480 elseif type(data0)==10 // Texts
481 Nempty = sum(data0=="")
482 Nout = size(data0,"*") - sum(heights)
484 tmp = gsort(data,"g","i")
485 Nleftout = sum(find(strcmp(tmp,bins(1))<0)($)) - Nempty
486 tmp = sum(find(strcmp(tmp,bins($))>0)(1))
490 Nrightout = size(data,"*") - tmp + 1
493 jokers = [Nleftout, Nrightout, Nempty]
498 jokers = [Nout, 0, Nempty]
501 // Polynomials: jokers already set when setting bins
504 // Tuning the histogram scale
505 // --------------------------
508 tmp = grep(options, ["counts" "countsnorm"])
510 tmp = grep(options, ["counts" "countsnorm" "density" "densitynorm"])
513 histmode = options(tmp($))
516 // * integers: [leftout, rightout] (continuous) or [out] (discrete)
517 // * decimal or complex numbers, full or sparse:
518 // continuous: [leftout, rightout, zeros, nan, inf]
519 // discrete: [out, 0, zeros, nan, inf]
520 // * polynomials: [out, 0, 0, nan, inf].
521 // * texts: continuous: [leftout, rightout, empty]; discrete: [out, empty]
523 // Number of components to take into account:
524 // Parsing the option "normWith:
525 // Continuous: " leftout rightout inf nan zeros"
526 // Discrete: " out inf nan zeros"
527 if grep(histmode, "norm") then
529 tmp = grep(options, "normwith")
530 normMode = options(tmp)
532 gh = "/[\:|\s+]" // grep() header
534 if or(type(data0)==[1 2 5])
535 if grep(normMode, gh+["inf" "all"]+"/", "r")~=[]
538 if grep(normMode, gh+["nan" "all"]+"/", "r")~=[]
541 if type(data0)==5 & ..
542 grep(normMode,gh+["zeros" "all"]+"/", "r")~=[]
546 if grep(normMode, gh+["leftout" "out" "all"]+"/", "r")~=[]
549 if grep(normMode, gh+["rightout" "out" "all"]+"/", "r")~=[]
552 if type(data0)==10 & grep(normMode, gh+["empty" "all"]+"/", "r")~=[]
555 // Normalized discrete mode:
557 if grep(normMode, gh+"all/", "r")
560 if grep(normMode, gh+"out/", "r")
563 if or(type(data0)==[1 2 5])
564 if grep(normMode, gh+"nan/", "r")
567 if grep(normMode, gh+"inf/", "r")
569 // Inf already in bins must not be counted twice:
570 binsinf = isinf(bins)
572 N = N - sum(heights(binsinf))
575 if type(data0)==5 & and(bins~=0) & ..
576 grep(normMode, gh+"zeros/", "r")~=[]
580 if type(data0)==10 & ~or(bins=="") & ..
581 grep(normMode, gh+"empty/", "r")~=[]
586 //else // all (but zeros for sparses)
587 // N = size(data,"*")
589 heights = heights / N
592 if grep(histmode, "density") then
593 heights = heights ./ (bins(2:$) - bins(1:($-1)))
596 // Formatting outputs for sparse input data
597 // ----------------------------------------
598 if type(data0)==5 then
599 ind = sparse(IJ, ind, size(data0))
602 // Shifting argouts in Scilab 5.5 compatibility mode
603 // -------------------------------------------------