From f4411be7321b93a23cd71b8744d919a6be85913a Mon Sep 17 00:00:00 2001 From: Samuel GOUGEON Date: Tue, 19 Nov 2013 10:23:38 +0100 Subject: [PATCH] * Bug #7858 fixed - Statistics: variance and variancef After commit https://codereview.scilab.org/#/c/13009/ , variance and variancef help pages reworked and fix for variancef. CHANGES file already modified, in mentioned commit. Change-Id: I640e1b256514459831d84012c5a8f256b7b459aa --- .../help/en_US/descriptive_statistics/variance.xml | 155 +++++++++--- .../en_US/descriptive_statistics/variancef.xml | 163 +++++++++---- .../help/fr_FR/descriptive_statistics/variance.xml | 165 ++++++++++--- .../fr_FR/descriptive_statistics/variancef.xml | 207 ++++++++++++++++ scilab/modules/statistics/macros/variance.sci | 4 +- scilab/modules/statistics/macros/variancef.sci | 254 +++++++++++--------- .../statistics/tests/nonreg_tests/bug_7858.dia.ref | 4 +- .../statistics/tests/nonreg_tests/bug_7858.tst | 4 +- .../statistics/tests/unit_tests/variancef.dia.ref | 8 +- .../statistics/tests/unit_tests/variancef.tst | 8 +- 10 files changed, 715 insertions(+), 257 deletions(-) create mode 100644 scilab/modules/statistics/help/fr_FR/descriptive_statistics/variancef.xml diff --git a/scilab/modules/statistics/help/en_US/descriptive_statistics/variance.xml b/scilab/modules/statistics/help/en_US/descriptive_statistics/variance.xml index 2260c2b..1b7d66d 100644 --- a/scilab/modules/statistics/help/en_US/descriptive_statistics/variance.xml +++ b/scilab/modules/statistics/help/en_US/descriptive_statistics/variance.xml @@ -1,8 +1,8 @@ + + + variancef + variance (et moyenne) d'un vecteur ou d'une matrice réelle ou complexe de nombres pondérés en fréquence + + + Séquence d'appel + + [s [,mc]] = variancef(x, fre [,orien [,m]]) + + [s, mc] = variancef(x) + [s, mc] = variancef(x, fre, "r"|1 ) + [s, mc] = variancef(x, fre, "c"|2 ) + [s, mc] = variancef(x, fre, "*" , %nan) + [s, mc] = variancef(x, fre, "r"|1, %nan) + [s, mc] = variancef(x, fre, "c"|2, %nan) + s = variancef(x, fre, "*", m) + s = variancef(x, fre, "r", m) + s = variancef(x, fre, "c", m) + + + + Paramètres + + + x + + + vecteur ou matrice réel(le) ou complexe + + + + + fre + + + vecteur ou matrice d'entiers positifs = fréquences: fre(i,j) est le nombre de fois que x(i,j) doit être compté. + fre et x doivent être de même taille. + + + + + orien + + l'orientation du calcul. Les valeurs acceptées sont: + + 1 ou "r" : le résultat est une rangée, après un calcul en colonne. + 2 ou "c" : le résultat est une colonne, après un calcul en ligne. + + "*" : calcul tous les éléments de x confondus (mode utilisé par défaut); utile si le 3ème paramètre m doit par ailleurs être indiqué. + + + + + + + m + + + Espérance mathématique de la loi de distribution de probabilité sous-jacente (supposée connue). + + + "*" mode (par défaut): m doit être scalaire + + + mode "r" or 1 : m un vecteur ligne à size(x,2) éléments. La variance des éléments de la colonne #j de x est calculée en utilisant m(j) comme moyenne pour la colonne. Si m est la même pour toutes les colonnes, sa valeur scalaire peut être fournie au lieu d'une ligne. + + + mode "c" ou 2 : m un vecteur colonne à size(x,1) éléments. La variance des éléments de la ligne #i de x est calculée en utilisant m(i) comme moyenne pour la ligne. Si m est la même pour toutes les lignes, sa valeur scalaire peut être fournie au lieu d'une colonne. + + + + + Lorsque m n'est pas indiquée, la variance est estimée en divisant par (n-1) (non n) la distance quadratique totale des n valeurs à la moyenne calculée mean(x)(ou mean(x,"c") ou mean(x,"r")) (n vaut length(x) ou size(x,1) ou size(x,2)). Si les éléments de x sont indépendants entre eux, l'estimation de la variance retournée est non biaisée. + + + Sinon, la variance est estimée en divisant par n (au lieu de n-1) la distance quadratique totale des valeurs x(k) à m (n valant toujours length(x) ou size(x,1) ou size(x,2)). Alors : + + + Si une véritable valeur m indépendante des éléments de x est fournie, elle est utilisée comme moyenne de référence dans le calcul de la variance. La valeur obtenue et retournée pour celle-ci est alors réputée non biaisée. + + + Si la valeur spéciale m=%nan est fournie, la variance est toujours "normalisée" par n (non n-1) mais est estimée en utilisant l'estimation "empirique" + m=mean(x) de la moyenne de référence (ou m = mean(x,"c") ou m = mean(x,"r")). Comme m=%nan n'apporte aucune information nouvelle à "l'équation", celle-ci retourne une estimation biaisée de la variance. + + + + + + + s + + Estimation de la variance des valeurs de x (pondérées). s est un scalaire ou un vecteur ligne ou colonne selon l'option orien utilisée. + + + + mc + + Moyenne pondérée calculée à partir de x (= mean(x,..)) et utilisée comme référence dans le calcul de la variance. Valeur scalaire ou en vecteur colonne ou ligne, selon l'option orien utilisée. + + + + + + Description + + Cette fonction calcule la variance des valeurs d'un vecteur ou une matrice + x, chacun des x(i,j) étant compté fre(i,j) fois. + If x est complexe, alors variancef(x, fre,..) = variancef(real(x), fre,..) + variancef(imag(x), fre,..) est retourné. + + + s = variancef(x,fre) (ou s=variancef(x,fre,"*")) retourne la variance scalaire calculée à partir de toutes les valeurs de x. + + + s = variancef(x,fre,"r")(ou s = variancef(x,fre,1)) retourne un vecteur ligne s tel que pour chaque j, + s(j) = variancef(x(:,j),fre(:,j),..). + + + s = variancef(x,fre,"c")(ou s = variancef(x,fre,2)) retourne un vecteur colonne s tel que pour chaque i, + s(i) = variancef(x(i,:),fre(i,:),..). + + + Quand la moyenne m est fournie, elle est utilisée comme référence dans le calcul de la variance au lieu d'être évaluée intérieurement à partir de x (à moins qu'elle ne soit égale à %nan : Voir la description de m). Ceci permet de calculer la variance d'un échantillon x en regard d'un modèle statistique donné (plutôt que d'extraire une dispersion empirique pour construire le modèle). + + + + Exemples + + + + Voir aussi + + + variance + + + mtlb_var + + + stdevf + + + + + Bibliographie + + Wonacott, T.H. & Wonacott, R.J.; Introductory Statistics, fifth edition, J.Wiley & Sons, 1990. + + + + Historique + + + 5.5.0 + + + + variancef() peut être appelée avec des nombres complexes. + + + variancef(x, fre, orien, m) introduit: la vraie moyenne m de la loi de distribution de probabilité sous-jacente peut être utilisée. + + + variancef(x, fre, orien, %nan) introduit: mean(x, fre,..) est utilisé mais divisé par n valeurs (à la place de n-1) + + + [s, mc] = variancef(x,fre,..) introduit : la moyenne mc évaluée à partir de x et fre est maintenant retournée + + + + + + + diff --git a/scilab/modules/statistics/macros/variance.sci b/scilab/modules/statistics/macros/variance.sci index c100349..d9d39b6 100644 --- a/scilab/modules/statistics/macros/variance.sci +++ b/scilab/modules/statistics/macros/variance.sci @@ -76,8 +76,8 @@ function [s, m] = variance(x, orien, m) orien = "r" end else - tmp = gettext("%s: Wrong value for input argument: ''%s'', ''%s'', %d or %d expected.\n") - error(msprintf(tmp, "variance", "c", "r", 1, 2)) + tmp = gettext("%s: Wrong value for input argument #%d: ''%s'', ''%s'', %d or %d expected.\n") + error(msprintf(tmp, "variance", 2, "c", "r", 1, 2)) end // Calculations diff --git a/scilab/modules/statistics/macros/variancef.sci b/scilab/modules/statistics/macros/variancef.sci index 7d30847..b36d14c 100644 --- a/scilab/modules/statistics/macros/variancef.sci +++ b/scilab/modules/statistics/macros/variancef.sci @@ -1,118 +1,136 @@ - -// Scilab ( http://www.scilab.org/ ) - This file is part of Scilab -// Copyright (C) 2000 - INRIA - Carlos Klimann -// -// This file must be used under the terms of the CeCILL. -// This source file is licensed as described in the file COPYING, which -// you should have received as part of this distribution. The terms -// are also available at -// http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.txt -// - -function [s,m]=variancef(x,fre,orien,m) - // - //This function computes the variance of the values of a vector or - //matrix x, each of them counted with a frequency signaled by the - //corresponding values of the integer vector or matrix fre with the same - //type of x. - // - //For a vector or matrix x, s=variancef(x,fre) (or s=variancef(x,fre,'*') returns - //in scalar s the variance of all the entries of x, each value counted - //with the multiplicity indicated by the corresponding value of fre. - // - //s=variancef(x,fre,'r')(or, equivalently, s=variancef(x,fre,1)) returns in each - //entry of the row vector s of type 1xsize(x,'c') the variance of each - //column of x, each value counted with the multiplicity indicated by the - //corresponding value of fre. - // - //s=variancef(x,fre,'c')(or, equivalently, s=variancef(x,fre,2)) returns in each - //entry of the column vector s of type size(x,'c')x1 the variance of - //each row of x, each value counted with the multiplicity indicated by - //the corresponding value of fre. - // - //The input argument m represents the a priori mean. If it is present, then the sum is - //divided by n. Otherwise ("sample variance"), it is divided by n-1. - // - // - if x==[] then s=%nan, return, end - [lhs,rhs]=argn(0) - if rhs<2|rhs>4 then - error(msprintf(gettext("%s: Wrong number of input arguments: %d to %d expected.\n"),"variancef",2,4)), - end - if x==[]|fre==[]|fre==0, s=%nan;return,end - if rhs==2 then - sumfre=sum(fre) - if sumfre <= 1 then error(msprintf(gettext("%s: Wrong value for input argument #%d: Must be > %d.\n"),"variancef", 2, 1)), end - m = meanf(x,fre) - s=(sum(((x-m).^2).*fre))/(sumfre-1), - return, - end - biased = %f - if rhs==4 then - if typeof(m)~="constant" then - tmp = gettext("%s: Wrong value of m : a priori mean expected.\n") - error(msprintf(tmp, "variance", )) - elseif orien=="*" then - if ~isscalar(m) then - tmp = gettext("%s: Wrong value of m : a priori mean expected.\n") - error(msprintf(tmp, "variance", )) - end - elseif orien=="r" | orien==1 then - if size(m)~=[1 size(x,"c")] & ~isscalar(m) then - tmp = gettext("%s: Wrong value of m : a priori mean expected.\n") - error(msprintf(tmp, "variance", )) - end - elseif orien=="c" | orien==2 then - if size(m)~=[size(x,"r") 1] & ~isscalar(m) then - tmp = gettext("%s: Wrong value of m : a priori mean expected.\n") - error(msprintf(tmp, "variance", )) - end - end - if isnan(m) then - biased = %t; // Compute the biased variance - end - end - if orien=="*", - sumfre=sum(fre) - if sumfre <= 1 then error(msprintf(gettext("%s: Wrong value for input argument #%d: Must be > %d.\n"),"variancef", 2, 1)),end - if rhs<4 then - m = meanf(x,fre) - s=(sum(((x-m).^2).*fre))/(sumfre-1), - elseif biased == %t - m = meanf(x,fre) - s=(sum(((x-m).^2).*fre))/sumfre, - else - s=(sum(((x-m).^2).*fre))/sumfre, - end - elseif orien=="r"|orien==1, - sumfre=sum(fre,"r") - if or(sumfre==0) then error(msprintf(gettext("%s: Wrong value for input argument #%d: Must be > %d.\n"),"variancef",2,1)),end - if rhs<4 | biased == %t then - m = meanf(x,fre,"r") - elseif isscalar(m) then - m = m*ones(1, size(x,"c")); - end - m2 = ones(size(x,"r"),1)*m - if rhs<4 then - s=(sum(((x-m2).^2).*fre))./(sumfre-1) - else - s=(sum(((x-m2).^2).*fre))./sumfre - end - elseif orien=="c"|orien==2, - sumfre=sum(fre,"c") - if or(sumfre==0) then error(msprintf(gettext("%s: Wrong value for input argument #%d: Must be > %d.\n"),"variancef",2,1)),end - if rhs<4 | biased == %t then - m = meanf(x,fre,"c") - elseif isscalar(m) then - m = m*ones(size(x,"r"), 1); - end - m2 = m*ones(1,size(x,"c")) - if rhs<4 then - s=(sum((x-m2).^2,"c"))./(sumfre-1) - else - s=(sum((x-m2).^2,"c"))./sumfre - end - else error(msprintf(gettext("%s: Wrong value for input argument #%d: ''%s'', ''%s'', ''%s'', %d or %d expected.\n"),"variancef",3,"*","c","r",1,2)) - end - -endfunction + +// Scilab ( http://www.scilab.org/ ) - This file is part of Scilab +// Copyright (C) 2013 - Scilab Enterprises - Paul BIGNIER : m parameter added +// Copyright (C) 2013 - Samuel GOUGEON : http://bugzilla.scilab.org/11209 fixed +// Copyright (C) 2000 - INRIA - Carlos Klimann +// +// This file must be used under the terms of the CeCILL. +// This source file is licensed as described in the file COPYING, which +// you should have received as part of this distribution. The terms +// are also available at +// http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.txt +// + +function [s, m] = variancef(x, fre, orien, m) + // + //This function computes the variance of the values of a vector or + //matrix x, each of them counted with a frequency signaled by the + //corresponding values of the integer vector or matrix fre with the same + //type of x. + // + //For a vector or matrix x, s=variancef(x,fre) (or s=variancef(x,fre,'*') returns + //in scalar s the variance of all the entries of x, each value counted + //with the multiplicity indicated by the corresponding value of fre. + // + //s=variancef(x,fre,'r')(or, equivalently, s=variancef(x,fre,1)) returns in each + //entry of the row vector s of type 1xsize(x,'c') the variance of each + //column of x, each value counted with the multiplicity indicated by the + //corresponding value of fre. + // + //s=variancef(x,fre,'c')(or, equivalently, s=variancef(x,fre,2)) returns in each + //entry of the column vector s of type size(x,'c')x1 the variance of + //each row of x, each value counted with the multiplicity indicated by + //the corresponding value of fre. + // + //The input argument m represents the a priori mean. If it is present, then the sum is + //divided by n. Otherwise ("sample variance"), it is divided by n-1. + // + // + + [lhs,rhs] = argn(0) + if rhs<2 | rhs>4 then + msg = gettext("%s: Wrong number of input arguments: %d to %d expected.\n") + error(msprintf(msg, "variancef", 2, 4)) + end + if x==[] | fre==[] | fre==0 + s = %nan + return + end + if rhs==2 then + sumfre = sum(fre) + if sumfre <= 1 then + msg = gettext("%s: Wrong value for input argument #%d: Must be > %d.\n") + error(msprintf(msg, "variancef", 2, 1)), end + m = meanf(x,fre) + s = sum((abs(x-m).^2).*fre) / (sumfre-1) + return + end + biased = %f + if rhs==4 then + if typeof(m)~="constant" then + tmp = gettext("%s: Wrong value of m : a priori mean expected.\n") + error(msprintf(tmp, "variancef", )) + elseif orien=="*" then + if ~isscalar(m) then + tmp = gettext("%s: Wrong value of m : a priori mean expected.\n") + error(msprintf(tmp, "variancef", )) + end + elseif orien=="r" | orien==1 then + if size(m)~=[1 size(x,"c")] & ~isscalar(m) then + tmp = gettext("%s: Wrong value of m : a priori mean expected.\n") + error(msprintf(tmp, "variancef", )) + end + elseif orien=="c" | orien==2 then + if size(m)~=[size(x,"r") 1] & ~isscalar(m) then + tmp = gettext("%s: Wrong value of m : a priori mean expected.\n") + error(msprintf(tmp, "variancef", )) + end + end + if isnan(m) then + biased = %t; // Compute the biased variance + end + end + if orien=="*", + sumfre = sum(fre) + if sumfre <= 1 then + msg = _("%s: Wrong value for input argument #%d: Must be > %d.\n") + error(msprintf(msg, "variancef", 2, 1)),end + if rhs<4 then + m = meanf(x,fre) + s = sum((abs(x-m).^2).*fre) / (sumfre-1) + elseif biased == %t + m = meanf(x,fre) + s = sum((abs(x-m).^2).*fre) / sumfre + else + s = sum((abs(x-m).^2).*fre) / sumfre + end + elseif orien=="r" | orien==1, + sumfre = sum(fre, "r") + if or(sumfre==0) then + msg = _("%s: Wrong value for input argument #%d: Must be > %d.\n") + error(msprintf(msg, "variancef", 2, 1)) + end + if rhs<4 | biased == %t then + m = meanf(x,fre,"r") + elseif isscalar(m) then + m = m*ones(1, size(x,"c")); + end + m2 = ones(size(x,"r"),1)*m + if rhs<4 then + s = sum((abs(x-m2).^2).*fre, "r") ./ (sumfre-1) + else + s = sum((abs(x-m2).^2).*fre, "r") ./ sumfre + end + elseif orien=="c" | orien==2, + sumfre = sum(fre, "c") + if or(sumfre==0) then + msg = _("%s: Wrong value for input argument #%d: Must be > %d.\n") + error(msprintf(msg, "variancef", 2, 1)) + end + if rhs<4 | biased == %t then + m = meanf(x,fre,"c") + elseif isscalar(m) then + m = m*ones(size(x,"r"), 1); + end + m2 = m*ones(1,size(x,"c")) + if rhs<4 then + s = sum((abs(x-m2).^2).*fre, "c") ./ (sumfre-1) + else + s = sum((abs(x-m2).^2).*fre, "c") ./ sumfre + end + else + msg = _("%s: Wrong value for input argument #%d: ''%s'', ''%s'', ''%s'', %d or %d expected.\n") + error(msprintf(msg, "variancef", 3, "*", "c", "r", 1, 2)) + end + +endfunction diff --git a/scilab/modules/statistics/tests/nonreg_tests/bug_7858.dia.ref b/scilab/modules/statistics/tests/nonreg_tests/bug_7858.dia.ref index 5e3e732..499e4bc 100644 --- a/scilab/modules/statistics/tests/nonreg_tests/bug_7858.dia.ref +++ b/scilab/modules/statistics/tests/nonreg_tests/bug_7858.dia.ref @@ -49,14 +49,14 @@ x = [0.2113249 0.0002211 0.6653811; 0.7560439 0.9546254 0.6283918]; fre = [1 2 3; 3 4 3]; orien = "r"; refM = [1 1 1]; -refV = [0.889522663062 0.593015108708 0.593015108708]; +refV = [0.200138037385 0.334558519179 0.125031231272]; [v, m] = variancef( x, fre, orien, ones(meanf(x,fre,orien)) ); assert_checkalmostequal([v m], [refV refM]); [v, m] = variancef( x, fre, orien, 1 ); assert_checkalmostequal([v m], [refV refM]); orien = "c"; refM = [1; 1]; -refV = [0.288922678414; 0.019966608736]; +refV = [0.492838922640; 0.060105711640]; [v, m] = variancef( x, fre, orien, ones(meanf(x,fre,orien)) ); assert_checkalmostequal([v m], [refV refM]); [v, m] = variancef( x, fre, orien, 1 ); diff --git a/scilab/modules/statistics/tests/nonreg_tests/bug_7858.tst b/scilab/modules/statistics/tests/nonreg_tests/bug_7858.tst index c44b5a9..7fab7e8 100644 --- a/scilab/modules/statistics/tests/nonreg_tests/bug_7858.tst +++ b/scilab/modules/statistics/tests/nonreg_tests/bug_7858.tst @@ -55,7 +55,7 @@ x = [0.2113249 0.0002211 0.6653811; 0.7560439 0.9546254 0.6283918]; fre = [1 2 3; 3 4 3]; orien = "r"; refM = [1 1 1]; -refV = [0.889522663062 0.593015108708 0.593015108708]; +refV = [0.200138037385 0.334558519179 0.125031231272]; [v, m] = variancef( x, fre, orien, ones(meanf(x,fre,orien)) ); assert_checkalmostequal([v m], [refV refM]); @@ -65,7 +65,7 @@ assert_checkalmostequal([v m], [refV refM]); orien = "c"; refM = [1; 1]; -refV = [0.288922678414; 0.019966608736]; +refV = [0.492838922640; 0.060105711640]; [v, m] = variancef( x, fre, orien, ones(meanf(x,fre,orien)) ); assert_checkalmostequal([v m], [refV refM]); diff --git a/scilab/modules/statistics/tests/unit_tests/variancef.dia.ref b/scilab/modules/statistics/tests/unit_tests/variancef.dia.ref index 3cb5eaf..255d64a 100644 --- a/scilab/modules/statistics/tests/unit_tests/variancef.dia.ref +++ b/scilab/modules/statistics/tests/unit_tests/variancef.dia.ref @@ -13,11 +13,11 @@ refV = 0.096057419504; [v, m] = variancef(x, fre); assert_checkalmostequal([v m], [refV refM]); refM = [0.61986415 0.636490633333 0.64688645]; -refV = [0.479702720446 0.287821632267 0.287821632267]; +refV = [0.074179697240 0.242903351429 0.000410462494]; [v, m] = variancef(x, fre, "r"); assert_checkalmostequal([v m], [refV refM]); refM = [0.367985066667; 0.79718087]; -refV = [0.049647428728; 0.006107864498]; +refV = [0.112075256021; 0.021077902385]; [v, m] = variancef(x, fre, "c"); assert_checkalmostequal([v m], [refV refM]); // With the a priori mean @@ -26,11 +26,11 @@ refV = 0.090053830785; [v, m] = variancef(x, fre, "*", meanf(x,fre)); assert_checkalmostequal([v m], [refV refM]); refM = [0.61986415 0.636490633333 0.64688645]; -refV = [0.35977704 0.23985136 0.23985136]; +refV = [0.055634772930 0.202419459524 0.000342052079]; [v, m] = variancef(x, fre, "r", meanf(x,fre,"r")); assert_checkalmostequal([v m], [refV refM]); refM = [0.367985066667; 0.79718087]; -refV = [0.041372857273; 0.005497078047]; +refV = [0.093396046684; 0.018970112146]; [v, m] = variancef(x, fre, "c", meanf(x,fre,"c")); assert_checkalmostequal([v m], [refV refM]); // Biased variance diff --git a/scilab/modules/statistics/tests/unit_tests/variancef.tst b/scilab/modules/statistics/tests/unit_tests/variancef.tst index 15d3c91..66630d2 100644 --- a/scilab/modules/statistics/tests/unit_tests/variancef.tst +++ b/scilab/modules/statistics/tests/unit_tests/variancef.tst @@ -16,12 +16,12 @@ refV = 0.096057419504; assert_checkalmostequal([v m], [refV refM]); refM = [0.61986415 0.636490633333 0.64688645]; -refV = [0.479702720446 0.287821632267 0.287821632267]; +refV = [0.074179697240 0.242903351429 0.000410462494]; [v, m] = variancef(x, fre, "r"); assert_checkalmostequal([v m], [refV refM]); refM = [0.367985066667; 0.79718087]; -refV = [0.049647428728; 0.006107864498]; +refV = [0.112075256021; 0.021077902385]; [v, m] = variancef(x, fre, "c"); assert_checkalmostequal([v m], [refV refM]); @@ -32,12 +32,12 @@ refV = 0.090053830785; assert_checkalmostequal([v m], [refV refM]); refM = [0.61986415 0.636490633333 0.64688645]; -refV = [0.35977704 0.23985136 0.23985136]; +refV = [0.055634772930 0.202419459524 0.000342052079]; [v, m] = variancef(x, fre, "r", meanf(x,fre,"r")); assert_checkalmostequal([v m], [refV refM]); refM = [0.367985066667; 0.79718087]; -refV = [0.041372857273; 0.005497078047]; +refV = [0.093396046684; 0.018970112146]; [v, m] = variancef(x, fre, "c", meanf(x,fre,"c")); assert_checkalmostequal([v m], [refV refM]); -- 1.7.9.5