1 // Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
2 // Copyright (C) 2017 - 2020 - Samuel GOUGEON
4 // This file is hereby licensed under the terms of the GNU GPL v2.0,
5 // pursuant to article 5.3.4 of the CeCILL v.2.1.
6 // This file was originally licensed under the terms of the CeCILL v2.1,
7 // and continues to be available under such terms.
8 // For more information, see the COPYING file which you should have received
9 // along with this program.
12 function [heights, jokers, binsOut, ind] = histc(data, bins, options)
14 // 1) histc(bins, data, normalization)
15 // 2) By défaut, histc() worked in "densityNorm,normWith:all" mode
17 // 1) histc(data, bins, options)
18 // 2) By default, histc() works in "counts" mode
22 // histc(data, binsWidth<0)
23 // histc(data, binsEdges)
24 // histc(data, binsValues [, "discrete"])
25 // histc(data, binsAlgo)
26 // histc(data, ..., options)
27 // histc(nbins, data) // deprecated
28 // histc(binsEdges, data) // deprecated
29 // histc(binsEdges, data, normalization=%t) // deprecated
30 // heights = histc(data ..)
31 // [heights, jokers] = histc(data ..)
32 // [heights, jokers, binsOut] = histc(data ..)
33 // [heights, jokers, binsOut, ind] = histc(data ..)
36 // "sqrt" | "sturges" | "freediac"
38 // "discrete" // , "smartbins"
40 // "counts" (default) | "countsNorm" | "density" | "densityNorm"
41 // Normalization "normWith: " options: Priorities: "all" > "out" > ..
42 // decimal, complex numbers: "leftout rightout out inf nan zeros all"
43 // integers: "leftout rightout (out=) all"
44 // Text: "empty out all"
45 // polynomials: "(out=) all"
46 // vector or single string with space-separated flags (case-unsensitive)
48 // Complex numbers are sorted by real parts, then by imaginary parts
50 // * integers: [leftout, rightout] (continuous) or [out] (discrete)
51 // * decimal or complex numbers, full or sparse:
52 // continuous: [leftout, rightout, zeros, nan, inf]
53 // discrete: [out, 0, zeros, nan, inf]
54 // * polynomials: [out, 0, 0, nan, inf]
55 // * texts: continuous: [leftout, rightout, ""]; discrete: [out, 0, ""]
60 [heights, jokers, binsOut, ind] = ([],[],[],[])
61 [Nout, Nleftout, Nzeros, Nrightout, Ninf, Nnan] = (0,0,0,0,0,0)
63 // PARSING INPUT ARGUMENTS
64 // =======================
66 msg = _("%s: Wrong number of input arguments: %d to %d expected.\n")
67 error(msprintf(msg, fname, 1, 3))
69 // Old 5.5.x mode management
70 // -------------------------
72 // Check for Scilab 5.5 back-compatibility
73 if ~isdef("from_histplot") & isdef("data","l") & isdef("bins", "l") & ..
74 type(bins)>0 & size(data,"*")< size(bins,"*") & ..
75 or(type(data)==[1 8]) & type(bins)~=10
76 msg = _("%s: data are now expected in argument #1 => arguments #1 and #2 switched\n")
77 warning(msprintf(msg, fname))
79 [data, bins] = (bins, data)
80 tmp = isdef("options","l") && type(options)==4
83 if isdef("normalization","l")
85 if type(normalization)==4 then
86 norma = normalization(1)
92 if isdef("options","l") & type(options)==4
99 options = ["densityNorm" "normWith:all"]
104 msg = _("%s: ""normalization"" boolean argument #3 is obsolete. ""%s"" option used.\n")
105 warning(msprintf(msg, fname, options(1)))
108 // -----------------------
109 if ~isdef("data","l") | type(data)==0 then
110 msg = _("%s: Argument #%d: %s\n")
111 error(msprintf(msg, fname, 1, "data array missing"))
113 if ~isdef("bins","l") | type(bins)==0 then
116 if ~isdef("options","l") then
119 // Now, bins, data and options are defined. Let's check their types and sizes
121 // CHECKING INPUT ARGUMENTS AND SETTING DEFAULTS
122 // =============================================
126 if type(options)~=10 then
127 msg = _("%s: Argument #%d: Text expected.\n")
128 error(msprintf(msg, fname, 3))
130 options = tokens(strcat(convstr(options),","),",")
132 discrete = grep(options, "discrete")~=[] | type(data)==2 // polynomials
136 if isempty(data) then
139 if ~or(type(data)==[1 2 5 8 10]) then
140 msg = _("%s: Data in argument #%d: Numbers or polynomials or texts expected.\n")
141 error(msprintf(msg, fname, 1))
143 data0 = data // Saving
144 // If encoded integers (including u-int64): => decimal (for dsearch(), etc)
145 if type(data) == 8 then
148 // If sparse numbers: we remove zeros
150 [IJ, data] = spget(data)
152 // If there are only %inf or %nan => return ==!!!!!!!!!!!!!!!!!!!!!!!!!!!
153 if ~discrete & or(type(data0)==[1 5]) then
162 msg = _("%s: data have only %inf or/and %nan values.\n")
163 warning(msprintf(msg, fname))
165 Nzeros = sum(data==0)
167 Nzeros = size(data0,"*") - Nnan - Ninf
172 b = bins(~isinf(bins))
180 jokers = [Ninf Nnan Nzeros Nleftout Nrightout]
184 // Polynomials: setting Nan and Ninf
189 L(tmp) = [] // to ensure excluding any intersection
196 // Setting a default binning mode
198 // histc(data): Default bins:
199 // * strings: discrete, from unique()
200 // * polynomials: discrete, from defBins set hereabove
201 // * encoded integers: "sqrt". if strange() < "sqrt" => binsWidth = 1
202 // * decimal, complex: binMethod option or default "sqrt"
203 if type(data)==10 // texts
205 bins = unique(data)(:)'
208 elseif type(data)==2 // Polynomials
219 else // decimals, complexes, encoded integers
223 bins = unique(data)(:).'
229 if type(bins)==8 | (type(bins)==1 & ~isreal(bins))
230 bins = real(double(bins))
232 if size(bins,"*")==1 & type(bins)==10 then
235 if ~or(convstr(bins)==["sqrt" "sturges" "freediac"])
236 msg = _("%s: Argument #%d: wrong value for binning algo")
237 error(msprintf(msg, fname, 2))
239 // We set the number of bins. Their edges are set later.
240 Nvalid = size(data,"*") - Nnan - Ninf
243 bins = ceil(1 + log2(Nvalid))
246 binWidth = 2*iqr(tmp)* Nvalid^(-1/3)
248 bins = round((max(tmp) - min(tmp)) / binWidth)
253 bins = max(1, round(sqrt(Nvalid)))
257 if length(bins)==1 & type(bins)==1 then
258 // The number of bins or their width is provided
259 if type(data)==10 // texts
260 msg = _("%s: Argument #2: Please provide bins edges or values or leave choosing default bins.\n")
261 error(msprintf(msg, fname))
263 // Case polynomials: not possible here <= discrete previously set to %t
264 // Cases: Encoded integers, decimal or complex numbers
265 if bins>=0 & (bins~=floor(bins) | bins==0) | isnan(bins) | bins==%inf
266 msg = _("%s: Argument #%d: non-zero decimal integer expected.\n")
267 error(msprintf(msg, fname, 2))
269 msg = _("%s: Argument #%d: decimal number > -Inf expected.\n")
270 error(msprintf(msg, fname, 2))
273 // If data are complex-encoded, they will be sorted by increasing
274 // real parts, then by increasing imaginary parts.
275 b = data(~isnan(data) & ~isinf(data))
281 // Required number of bins
282 if (mind == maxd) then
283 mind = mind*(1 - 0.5/bins);
284 maxd = maxd*(1 + 0.5/bins);
286 bins = linspace(mind, maxd, bins+1); // Bins edges
288 // Required bins width
289 if (mind == maxd) then
290 bins = mind + [1 -1]*bins/2
292 // Edges set à 0.0, width = |bins|, from mind- to maxd+
294 bins = (floor(mind/tmp):ceil(maxd/tmp))*tmp
300 // Required number of bins
301 tmp = maxd - mind + 1
302 if tmp <= bins & tmp <= 2^16 // integer-centered bins of width==1
303 bins = (mind:(maxd+1)) - 0.5 // Bins edges
305 ds = (maxd - mind)/bins/2
306 bins = linspace(mind-ds, maxd+ds, bins+1) // Bins edges
309 // Required bins width
310 tmp = max(round(-bins),1) // Bins edges @ -0.5 modulo |int(bins)|
311 bins = (floor(mind/tmp):ceil(maxd/tmp))*tmp - 0.5 // Bins edges
316 // Bins edges are provided: we check that edges are sorted
321 msg = _("%s: Arguments #%d and #%d: Same types expected.\n")
322 error(msprintf(msg, fname, 1, 2))
324 if ~and(bins==unique(bins))
325 msg = _("%s: Argument #%d: Elements must be sorted in increasing order.\n")
326 error(msprintf(msg, fname, 1))
332 if ~or(type(bins)==[1 8])
333 msg = _("%s: Arguments #%d and #%d: Same types expected.\n")
334 error(msprintf(msg, fname, 1, 2))
336 bins = bins(~isnan(bins))
337 if min(diff(bins)) <= 0 then
338 msg = _("%s: Argument #%d: Elements must be in increasing order.\n")
339 error(msprintf(msg, fname, 1))
341 // We manage -%inf open left bin
343 tmp = min(data(~isinf(data)))
350 // We manage %inf open right bin
352 tmp = max(data(~isinf(data)))
366 msg = _("%s: Arguments #%d and #%d: Same types expected.\n")
367 error(msprintf(msg, fname, 1, 2))
369 [bins, dataOrder] = unique(bins)
371 elseif or(type(data)==[1 8])
372 if ~or(type(bins)==[1 8])
373 msg = _("%s: Arguments #%d and #%d: Same types expected.\n")
374 error(msprintf(msg, fname, 1, 2))
376 bins = real(double(bins))
377 bins(isnan(bins)) = []
378 [bins, dataOrder] = unique(bins)
386 // * integers: [leftout, rightout] (continuous) or [out] (discrete)
387 // * decimal or complex numbers, full or sparse:
388 // continuous: [inf, nan, zeros, leftout, rightout]
389 // discrete: [inf, nan, zeros, out]
390 // * polynomials: [inf, nan, out]
391 // * texts: continuous: [empty, leftout, rightout]; discrete: [empty, out]
392 if type(data0)~=2 then
393 hmode = "c" // continuous
395 hmode = "d" // discrete
397 if or(type(data0)==[1 5])
403 tmp1 = isinf(real(tmp))
404 tmp2 = isinf(imag(tmp))
405 tmp3 = isnan(tmp) | (tmp1 & tmp2)
406 if ~discrete & ~isreal(tmp,0)
407 data(tmp1 | tmp2) = %inf
410 clear tmp tmp1 tmp2 tmp3
413 [ind, heights] = dsearch(data, bins, hmode)
414 // ind: bin's number of each data component
417 // We restore the original order of binsValues:
418 [tmp, k] = gsort(dataOrder, "g","i")
419 [tmp, k2] = gsort(k,"g","i")
424 ind(tmp2) = k2(ind(tmp2))
428 // Counts (and location)
431 for i = 1:size(bins,"*")
433 heights(1,i) = sum(tmp)
436 tmp = Ninf - sum(heights(isinf(bins))) // inf out of the selection
437 Nout = size(data,"*") - sum(heights) - Nnan - tmp// excludes all nan and inf not in the selection
438 jokers = [Nout, 0, 0, Nnan, Ninf]
445 if or(type(data0)==[1 5]) // Real and complex numbers, sparse or not
448 Nnan = sum(isnan(data))
450 Nzeros = sum(data==0)
452 Nzeros = size(data0,"*") - nnz(data0)
454 if ~discrete // Sparses: 0 is not taken into account
455 Nleftout = sum(data(~inf) < bins(1))
456 Nrightout = sum(data(~inf) > bins($))
457 jokers = [Nleftout Nrightout Nzeros Nnan Ninf]
459 Nout = size(data0,"*") - sum(heights) - Nnan - Ninf
460 binsinf = isinf(bins)
462 Nout = Nout + sum(heights(binsinf))
467 jokers = [Nout 0 Nzeros Nnan Ninf]
470 elseif type(data0)==8 // Encoded integers
472 Nleftout = sum(data < bins(1))
473 Nrightout = sum(data > bins($))
474 jokers = [Nleftout Nrightout]
475 Nout = Nleftout + Nrightout
477 Nout = size(data,"*") - sum(heights)
481 elseif type(data0)==10 // Texts
482 Nempty = sum(data0=="")
483 Nout = size(data0,"*") - sum(heights)
485 tmp = gsort(data,"g","i")
486 Nleftout = sum(find(strcmp(tmp,bins(1))<0)($)) - Nempty
487 tmp = sum(find(strcmp(tmp,bins($))>0)(1))
491 Nrightout = size(data,"*") - tmp + 1
494 jokers = [Nleftout, Nrightout, Nempty]
499 jokers = [Nout, 0, Nempty]
502 // Polynomials: jokers already set when setting bins
505 // Tuning the histogram scale
506 // --------------------------
509 tmp = grep(options, ["counts" "countsnorm"])
511 tmp = grep(options, ["counts" "countsnorm" "density" "densitynorm"])
514 histmode = options(tmp($))
517 // * integers: [leftout, rightout] (continuous) or [out] (discrete)
518 // * decimal or complex numbers, full or sparse:
519 // continuous: [leftout, rightout, zeros, nan, inf]
520 // discrete: [out, 0, zeros, nan, inf]
521 // * polynomials: [out, 0, 0, nan, inf].
522 // * texts: continuous: [leftout, rightout, empty]; discrete: [out, empty]
524 // Number of components to take into account:
525 // Parsing the option "normWith:
526 // Continuous: " leftout rightout inf nan zeros"
527 // Discrete: " out inf nan zeros"
528 if grep(histmode, "norm") then
530 tmp = grep(options, "normwith")
531 normMode = options(tmp)
533 gh = "/[\:|\s+]" // grep() header
535 if or(type(data0)==[1 2 5])
536 if grep(normMode, gh+["inf" "all"]+"/", "r")~=[]
539 if grep(normMode, gh+["nan" "all"]+"/", "r")~=[]
542 if type(data0)==5 & ..
543 grep(normMode,gh+["zeros" "all"]+"/", "r")~=[]
547 if grep(normMode, gh+["leftout" "out" "all"]+"/", "r")~=[]
550 if grep(normMode, gh+["rightout" "out" "all"]+"/", "r")~=[]
553 if type(data0)==10 & grep(normMode, gh+["empty" "all"]+"/", "r")~=[]
556 // Normalized discrete mode:
558 if grep(normMode, gh+"all/", "r")
561 if grep(normMode, gh+"out/", "r")
564 if or(type(data0)==[1 2 5])
565 if grep(normMode, gh+"nan/", "r")
568 if grep(normMode, gh+"inf/", "r")
570 // Inf already in bins must not be counted twice:
571 binsinf = isinf(bins)
573 N = N - sum(heights(binsinf))
576 if type(data0)==5 & and(bins~=0) & ..
577 grep(normMode, gh+"zeros/", "r")~=[]
581 if type(data0)==10 & ~or(bins=="") & ..
582 grep(normMode, gh+"empty/", "r")~=[]
587 //else // all (but zeros for sparses)
588 // N = size(data,"*")
590 heights = heights / N
593 if grep(histmode, "density") then
594 heights = heights ./ (bins(2:$) - bins(1:($-1)))
597 // Formatting outputs for sparse input data
598 // ----------------------------------------
599 if type(data0)==5 then
600 ind = sparse(IJ, ind, size(data0))
603 // Shifting argouts in Scilab 5.5 compatibility mode
604 // -------------------------------------------------