* Bug #13490 fixed - histc help page fixed to match the macro (by default, normalize... 71/17971/3
Paul Bignier [Tue, 8 Jul 2014 09:01:33 +0000 (11:01 +0200)]
Also, normalization is now done properly (in total area).

Voluntarily left the 'cw' variable line for further normalizations.

Change-Id: Ie66b66ddd4d6289e28df8cad01f072783716d5d6

scilab/CHANGES
scilab/modules/statistics/help/en_US/descriptive_statistics/histc.xml
scilab/modules/statistics/macros/histc.sci
scilab/modules/statistics/tests/nonreg_tests/bug_13490.dia.ref [new file with mode: 0644]
scilab/modules/statistics/tests/nonreg_tests/bug_13490.tst [new file with mode: 0644]
scilab/modules/statistics/tests/unit_tests/histc.dia.ref
scilab/modules/statistics/tests/unit_tests/histc.tst

index b5be5ad..047ec1e 100644 (file)
@@ -340,6 +340,8 @@ In 6.0.0:
 
 * Bug #13748 fixed - printf, sprintf (en,ja): short descriptions and obsolete flags were missing.
 
+* Bug #13490 fixed - histc help page fixed to match the macro (by default, normalize the result).
+
 * Bug #13769 fixed - t = "abc..//ghi" was parsed as a continued + comment
 
 * Bug #13810 fixed - householder(v, k*v) returned column of %nan. Input parameters were not checked. The Householder matrix could not be returned. Help pages were inaccurate and without examples. There was no householder() demo.
index bbbf25a..9bab6fc 100644 (file)
@@ -74,8 +74,8 @@
                 <listitem>
                     <para>
                         scalar boolean.
-                        <varname>normalization=%f (default)</varname>: <varname>cf</varname> represents the total number of points in each class,
-                        <varname>normalization=%t</varname>: <varname>cf</varname> represents the number of points in each class, relatively to the total number of points
+                        <varname>normalization=%t (default)</varname>: <varname>cf</varname> represents the number of points in each class, relatively to the total number of points,
+                        <varname>normalization=%f</varname>: <varname>cf</varname> represents the total number of points in each class
                     </para>
                 </listitem>
             </varlistentry>
index c0d2533..55fcbd9 100644 (file)
@@ -46,7 +46,6 @@ function [cf, ind] = histc(n, data, normalization)
         data = iconvert(data, 0); // To pass to dsearch
     end
 
-    nd = length(data); // Number of data values
     if length(n) == 1 then  // The number of classes is provided
         if n < 1
             error(msprintf(_("%s: Wrong value for input argument #%d: Must be in the interval %s.\n"),"histc",1,"[1, oo)"));
@@ -71,8 +70,10 @@ function [cf, ind] = histc(n, data, normalization)
 
     // Normalization
     if normalization == %t then
-        cw = cb(2:$)-cb(1:$-1); // Bin width
-        cf = cf./(nd*cw);
+        nd = size(data, "*"); // Number of data values
+        //cw = cb(2:$)-cb(1:$-1); // Bin width
+        //cf = cf./(nd*cw); // Normalization in bin heights
+        cf = cf./nd; // Heights normalization
     end
 
 endfunction
diff --git a/scilab/modules/statistics/tests/nonreg_tests/bug_13490.dia.ref b/scilab/modules/statistics/tests/nonreg_tests/bug_13490.dia.ref
new file mode 100644 (file)
index 0000000..ed841f6
--- /dev/null
@@ -0,0 +1,23 @@
+// =============================================================================
+// Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
+// Copyright (C) 2014 - Scilab Enterprises - Paul Bignier
+//
+//  This file is distributed under the same license as the Scilab package.
+// =============================================================================
+//
+// <-- CLI SHELL MODE -->
+//
+// <-- Non-regression test for bug 13490 -->
+//
+// <-- Bugzilla URL -->
+// http://bugzilla.scilab.org/show_bug.cgi?id=13490
+//
+// <-- Short Description -->
+// histc now matches its help description (by default, normalize the result),
+// normalization is now done properly (in total area).
+// =============================================================================
+myprob = [1 2 2 3 3 3];
+withNorm    = histc(3, myprob);
+withoutNorm = histc(3, myprob, normalization=%f);
+assert_checkequal(withNorm, (1:3)/6);
+assert_checkequal(withoutNorm, 1:3);
diff --git a/scilab/modules/statistics/tests/nonreg_tests/bug_13490.tst b/scilab/modules/statistics/tests/nonreg_tests/bug_13490.tst
new file mode 100644 (file)
index 0000000..b3d3fb6
--- /dev/null
@@ -0,0 +1,26 @@
+// =============================================================================
+// Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
+// Copyright (C) 2014 - Scilab Enterprises - Paul Bignier
+//
+//  This file is distributed under the same license as the Scilab package.
+// =============================================================================
+//
+// <-- CLI SHELL MODE -->
+//
+// <-- Non-regression test for bug 13490 -->
+//
+// <-- Bugzilla URL -->
+// http://bugzilla.scilab.org/show_bug.cgi?id=13490
+//
+// <-- Short Description -->
+// histc now matches its help description (by default, normalize the result),
+// normalization is now done properly (in total area).
+// =============================================================================
+
+myprob = [1 2 2 3 3 3];
+
+withNorm    = histc(3, myprob);
+withoutNorm = histc(3, myprob, normalization=%f);
+
+assert_checkequal(withNorm, (1:3)/6);
+assert_checkequal(withoutNorm, 1:3);
index c151db4..220fb2d 100644 (file)
@@ -10,26 +10,26 @@ rand("seed", 0);
 d = rand(1, 10000, "normal");
 [cfC, indC] = histc(20, d);
 refCF = [
-0.000257209601
-0.001028838404
-0.005144192018
-0.009259545632
-0.027007008092
-0.073561945850
-0.128604800438
-0.205767680700
-0.303764538634
-0.364723214041
-0.397131623751
-0.363179956436
-0.289875220186
-0.190335104648
-0.110085709175
-0.061215885008
-0.026749798491
-0.010288384035
-0.002829305610
-0.001286048004 ]';
+0.0001
+0.0004
+0.002
+0.0036
+0.0105
+0.0286
+0.05
+0.08
+0.1181
+0.1418
+0.1544
+0.1412
+0.1127
+0.074
+0.0428
+0.0238
+0.0104
+0.004
+0.0011
+0.0005 ]';
 assert_checkequal(size(indC), [1 10000]);
 assert_checkalmostequal(cfC, refCF);
 [cfC, indC] = histc(int32(20), d);
@@ -65,7 +65,7 @@ assert_checkequal(size(indC), [1 10000]);
 assert_checkequal(cfC, refCF);
 // With x instead of n as first argument
 cfC = histc([-5 0 5], d);
-refCF = [0.09982 0.10018];
+refCF = [0.4991 0.5009];
 assert_checkequal(cfC, refCF);
 cfC = histc(int8([-5 0 5]), d);
 assert_checkequal(cfC, refCF);
@@ -73,7 +73,7 @@ cfC = histc(int16([-5 0 5]), d);
 assert_checkequal(cfC, refCF);
 cfC = histc(int32([-5 0 5]), d);
 assert_checkequal(cfC, refCF);
-assert_checkequal(sum(cfC), 0.2);
+assert_checkequal(sum(cfC), 1);
 cfC = histc([-5 0 5], d, %f);
 assert_checkequal(sum(cfC), 10000); // -5 > d > 5, and d has 10000 elements.
 lambda = 2;
@@ -81,46 +81,46 @@ grand("setsd", 0);
 D = grand(100000, 1, "exp", 1/lambda);
 [cfC, indC] = histc(40, D);
 refCF = [
-1.698711801698
-1.24408608389
-0.908431380900
-0.656611502242
-0.489635746028
-0.352875852007
-0.257181774872
-0.194479129735
-0.141490978915
-0.097271105433
-0.073615680960
-0.050401824411
-0.038416409344
-0.028512671632
-0.021447584856
-0.014634822607
-0.010660711296
-0.008200547151
-0.005866545269
-0.004289516971
-0.004163354707
-0.001387784902
-0.001450866034
-0.000946216979
-0.000756973583
-0.000756973583
-0.000441567923
-0.000315405660
-0.000252324528
-0.000189243396
-0.000063081132
-0.000189243396
-0.000126162264
-0.000063081132
-0.000063081132
+0.26929
+0.19722
+0.14401
+0.10409
+0.07762
+0.05594
+0.04077
+0.03083
+0.02243
+0.01542
+0.01167
+0.00799
+0.00609
+0.00452
+0.0034
+0.00232
+0.00169
+0.0013
+0.00093
+0.00068
+0.00066
+0.00022
+0.00023
+0.00015
+0.00012
+0.00012
+0.00007
+0.00005
+0.00004
+0.00003
+0.00001
+0.00003
+0.00002
+0.00001
+0.00001
 0
 0
-0.000063081132
+0.00001
 0
-0.000063081132 ]';
+0.00001 ]';
 assert_checkequal(size(indC), [100000 1]);
 assert_checkalmostequal(cfC, refCF);
 [cfC, indC] = histc(int8(40), D);
@@ -176,7 +176,7 @@ assert_checkequal(size(indC), [100000 1]);
 assert_checkalmostequal(cfC, refCF);
 // With x instead of n as first argument
 cfC = histc([0 7], D);
-refCF = 0.142857142857;
+refCF = 1;
 assert_checkalmostequal(cfC, refCF);
 cfC = histc(int8([0 7]), D);
 assert_checkalmostequal(cfC, refCF);
index 1dcb9ef..9f71851 100644 (file)
@@ -12,26 +12,26 @@ d = rand(1, 10000, "normal");
 [cfC, indC] = histc(20, d);
 
 refCF = [
-0.000257209601
-0.001028838404
-0.005144192018
-0.009259545632
-0.027007008092
-0.073561945850
-0.128604800438
-0.205767680700
-0.303764538634
-0.364723214041
-0.397131623751
-0.363179956436
-0.289875220186
-0.190335104648
-0.110085709175
-0.061215885008
-0.026749798491
-0.010288384035
-0.002829305610
-0.001286048004 ]';
+0.0001
+0.0004
+0.002
+0.0036
+0.0105
+0.0286
+0.05
+0.08
+0.1181
+0.1418
+0.1544
+0.1412
+0.1127
+0.074
+0.0428
+0.0238
+0.0104
+0.004
+0.0011
+0.0005 ]';
 assert_checkequal(size(indC), [1 10000]);
 assert_checkalmostequal(cfC, refCF);
 [cfC, indC] = histc(int32(20), d);
@@ -71,7 +71,7 @@ assert_checkequal(cfC, refCF);
 
 // With x instead of n as first argument
 cfC = histc([-5 0 5], d);
-refCF = [0.09982 0.10018];
+refCF = [0.4991 0.5009];
 assert_checkequal(cfC, refCF);
 cfC = histc(int8([-5 0 5]), d);
 assert_checkequal(cfC, refCF);
@@ -79,7 +79,7 @@ cfC = histc(int16([-5 0 5]), d);
 assert_checkequal(cfC, refCF);
 cfC = histc(int32([-5 0 5]), d);
 assert_checkequal(cfC, refCF);
-assert_checkequal(sum(cfC), 0.2);
+assert_checkequal(sum(cfC), 1);
 cfC = histc([-5 0 5], d, %f);
 assert_checkequal(sum(cfC), 10000); // -5 > d > 5, and d has 10000 elements.
 
@@ -91,46 +91,46 @@ D = grand(100000, 1, "exp", 1/lambda);
 [cfC, indC] = histc(40, D);
 
 refCF = [
-1.698711801698
-1.24408608389
-0.908431380900
-0.656611502242
-0.489635746028
-0.352875852007
-0.257181774872
-0.194479129735
-0.141490978915
-0.097271105433
-0.073615680960
-0.050401824411
-0.038416409344
-0.028512671632
-0.021447584856
-0.014634822607
-0.010660711296
-0.008200547151
-0.005866545269
-0.004289516971
-0.004163354707
-0.001387784902
-0.001450866034
-0.000946216979
-0.000756973583
-0.000756973583
-0.000441567923
-0.000315405660
-0.000252324528
-0.000189243396
-0.000063081132
-0.000189243396
-0.000126162264
-0.000063081132
-0.000063081132
+0.26929
+0.19722
+0.14401
+0.10409
+0.07762
+0.05594
+0.04077
+0.03083
+0.02243
+0.01542
+0.01167
+0.00799
+0.00609
+0.00452
+0.0034
+0.00232
+0.00169
+0.0013
+0.00093
+0.00068
+0.00066
+0.00022
+0.00023
+0.00015
+0.00012
+0.00012
+0.00007
+0.00005
+0.00004
+0.00003
+0.00001
+0.00003
+0.00002
+0.00001
+0.00001
 0
 0
-0.000063081132
+0.00001
 0
-0.000063081132 ]';
+0.00001 ]';
 assert_checkequal(size(indC), [100000 1]);
 assert_checkalmostequal(cfC, refCF);
 [cfC, indC] = histc(int8(40), D);
@@ -189,7 +189,7 @@ assert_checkalmostequal(cfC, refCF);
 
 // With x instead of n as first argument
 cfC = histc([0 7], D);
-refCF = 0.142857142857;
+refCF = 1;
 assert_checkalmostequal(cfC, refCF);
 cfC = histc(int8([0 7]), D);
 assert_checkalmostequal(cfC, refCF);