[doc] datafit page fixed after 534951c
[scilab.git] / scilab / modules / optimization / help / en_US / nonlinearleastsquares / datafit.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3  * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
4  * Copyright (C) 2008 - INRIA
5  * Copyright (C) 2011 - DIGITEO - Michael Baudin
6  * Copyright (C) 2018 - Samuel GOUGEON
7  *
8  * Copyright (C) 2012 - 2016 - Scilab Enterprises
9  *
10  * This file is hereby licensed under the terms of the GNU GPL v2.0,
11  * pursuant to article 5.3.4 of the CeCILL v.2.1.
12  * This file was originally licensed under the terms of the CeCILL v2.1,
13  * and continues to be available under such terms.
14  * For more information, see the COPYING file which you should have received
15  * along with this program.
16  *
17  -->
18 <refentry xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink"
19         xmlns:svg="http://www.w3.org/2000/svg" xmlns:ns4="http://www.w3.org/1999/xhtml"
20         xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:db="http://docbook.org/ns/docbook"
21         xmlns:scilab="http://www.scilab.org" xml:id="datafit" xml:lang="en">
22     <refnamediv>
23         <refname>datafit</refname>
24         <refpurpose>Non linear (constrained) parametric fit of measured (weighted) data</refpurpose>
25     </refnamediv>
26     <refsynopsisdiv>
27         <title>Syntax</title>
28         <synopsis>
29         [p, fmin, status] = datafit(G, Data, p0)
30         [p, fmin, status] = datafit(G, Data, Wd, p0)
31         [p, fmin, status] = datafit(G, Data, Wd, 'b', p_inf, p_sup, p0)
32         [p, fmin, status] = datafit(G, Data, 'b', p_inf, p_sup, p0)
33         [p, fmin, status] = datafit(G, Data [,Wd] [,Wg][,'b',p_inf,p_sup], p0 [,algo][,stop])
34         [p, fmin, status] = datafit(G, DG, Data, ..)
35         [p, fmin, status] = datafit(iprint, ..)
36         </synopsis>
37     </refsynopsisdiv>
38     <refsection>
39         <title>Arguments</title>
40         <variablelist>
41             <varlistentry>
42                 <term>iprint</term>
43                 <listitem>
44                     <para>scalar used to set the trace mode:
45                         <table>
46                             <tr>
47                                 <th>Value</th><th>Action</th>
48                             </tr>
49                             <tr>
50                                 <td>0</td><td>nothing (except errors) is reported</td>
51                             </tr>
52                             <tr>
53                                 <td>1</td><td>initial and final reports</td>
54                             </tr>
55                             <tr>
56                                 <td>2</td><td>adds a report per iteration</td>
57                             </tr>
58                             <tr>
59                                 <td>>2</td><td>adds reports on linear search</td>
60                             </tr>
61                         </table>
62                         <warning>
63                             Most of the reports are displayed in the Scilab console.
64                         </warning>
65                     </para>
66                 </listitem>
67             </varlistentry>
68             <varlistentry>
69                 <term>p0</term>
70                 <listitem>
71                     Matrix of initial guess of the model parameters.
72                   <para/>
73                 </listitem>
74             </varlistentry>
75             <varlistentry>
76                 <term>"b"</term>
77                 <listitem>
78                     keyword heading the <literal>p_inf, p_sup</literal> sequence, where
79                     <literal>p_inf</literal> and <literal>p_sup</literal> are real matrices
80                     with the <literal>p0</literal> shape and sizes.
81                     <literal>p_inf</literal> is the vector of lower bounds of respective
82                     <literal>p</literal> components. <literal>p_sup</literal> are upper bounds.
83                     <para/>
84                 </listitem>
85             </varlistentry>
86             <varlistentry>
87                 <term>p</term>
88                 <listitem>
89                     Matrix of the best fitting model parameters, with the <varname>p0</varname> sizes.
90                     <para/>
91                 </listitem>
92             </varlistentry>
93             <varlistentry>
94                 <term>Data</term>
95                 <listitem>
96                     (ndc x nd) matrix of experimental data points, where <literal>Data(:,i)</literal>
97                     are the #nzc coordinates of the ith measurement.
98                     <para/>
99                 </listitem>
100             </varlistentry>
101             <varlistentry>
102                 <term>Wd</term>
103                 <listitem>
104                     Data weights (optional). Row vector with size(Data,2) elements.
105                     <para/>
106                 </listitem>
107             </varlistentry>
108             <varlistentry>
109                 <term>G</term>
110                 <listitem>
111                     <para>
112                         Gap function: handle of a function in Scilab language or of a compiled
113                         external function. Computed gap(s) between the fitting model and data
114                         may be of the first order, not quadratic.
115                     </para>
116                     <para>
117                         <emphasis role="bold">
118                             If <literal>G()</literal>
119                         </emphasis>
120                         does not need
121                         any input parameters (like constants), only arguments, its syntax must be
122                         <literal>g = G(p, Data)</literal>.
123                     </para>
124                     <para>
125                         <emphasis role="bold">
126                             If <literal>G()</literal>
127                         </emphasis>
128                         needs
129                         some input parameters in addition to input arguments, its syntax must be
130                         <literal>g = G(p, Data, param1, param2, .., paramN)</literal>.
131                         Then, <literal>list(G, param1, param2, .., paramN)</literal> must be provided
132                         to <literal>datafit()</literal> instead of only <literal>G</literal>.
133                     </para>
134                     <para>
135                         The following definitions will be used:
136                         <itemizedlist>
137                             <listitem>
138                                 <literal>ng = size(g,1)</literal>: number of fitting criteria (= of
139                                 gap definitions).
140                             </listitem>
141                             <listitem>
142                                 <literal>np = size(p,"*")</literal>: total number of fitting parameters.
143                             </listitem>
144                             <listitem>
145                                 <literal>nd = size(Data,2)</literal>: number of data points.
146                             </listitem>
147                             <listitem>
148                                 <literal>ndc = size(Data,1)</literal>: number of coordinates of each
149                                 data point.
150                             </listitem>
151                         </itemizedlist>
152                     </para>
153                     <para>
154                         Then, the sizes of G arguments are the following ones:
155                         <table>
156                             <tr valign="top">
157                                 <th>g</th>
158                                 <td>: (ng x nd) matrix
159                                     <itemizedlist>
160                                         <listitem>
161                                             Each column is related to a given data point and retuns its
162                                             gaps to the fitting model.
163                                         </listitem>
164                                         <listitem>
165                                             Each row is about a gap definition. <literal>G</literal> may
166                                             compute several types of gaps. For instance, the vertical
167                                             gaps may be returned in <literal>g(1,:)</literal> ; the
168                                             horizontal gaps in <literal>g(2,:)</literal> ; the nearest
169                                             distances in <literal>g(3,:)</literal> ; etc.
170                                         </listitem>
171                                     </itemizedlist>
172                                 </td>
173                             </tr>
174                             <tr valign="top">
175                                 <th>p</th>
176                                 <td>: fitting parameters to be optimized.
177                                     <para/>
178                                 </td>
179                             </tr>
180                             <tr valign="top">
181                                 <th>Data</th>
182                                 <td>: (ndc x nd) matrix: Each column provides the
183                                     <literal>#ndc</literal> coordinates of a given data point.
184                                     If <literal>G</literal> computes gaps only for a single
185                                     data point, <literal>datafit()</literal> will automatically
186                                     loop over the set of <literal>Data</literal> columns.
187                                     This makes <literal>datafit()</literal> slower.
188                                 </td>
189                             </tr>
190                         </table>
191                     </para>
192                 </listitem>
193             </varlistentry>
194             <varlistentry>
195                 <term>DG</term>
196                 <listitem>
197                     (optional) Handle of a function in Scilab language or of a compiled external
198                     function, computing the partial of <varname>G</varname> wrt <varname>p</varname>.
199                     Its syntax must be like <literal>S = DG(p, Data)</literal>. The expected sizes
200                     of the result are (ng x np x 1) (Data-unvectorized) or (ng x np x nd)
201                     (Data-vectorized).
202                     <para/>
203                 </listitem>
204             </varlistentry>
205             <varlistentry>
206                 <term>Wg</term>
207                 <listitem>
208                     Optional (ng x ng) matrix of weights of gaps definitions. Default = eye().
209                     <varname>G()</varname> defines as many types of gaps as required.
210                     Most often, a unique gap will be computed -- most often the vertical one --,
211                     but other ones can be computed (horizontal, nearest distance, etc).
212                     This matrix allows to weight the different types.
213                     <para/>
214                 </listitem>
215             </varlistentry>
216             <varlistentry>
217                 <term>algo</term>
218                 <listitem>
219                     Word <literal>'qn'</literal> (quasi-Newton, default), or
220                     <literal>'gc'</literal> (conjugate gradient), or
221                     <literal>'nd'</literal> (non-differentiable. Then <varname>p_inf</varname>
222                     and <varname>p_sup</varname> parameters bounds are not accepted).
223                     Selects the algorithm used to perform the fitting.
224                     See <function>optim()</function> for more details.
225                     <para/>
226                 </listitem>
227             </varlistentry>
228             <varlistentry>
229                 <term>stop</term>
230                 <listitem>
231                     <para>sequence of optional arguments controlling the convergence of
232                         the fitting algorithm:
233                     </para>
234                     <para>
235                         <literal>'ar',nap, [iter [,epsg [,epsf [,epsx]]]]</literal>, with
236                     </para>
237                     <table>
238                         <tr>
239                             <th>"ar"</th>
240                             <td>
241                                 reserved keyword heading the list of stopping criteria as defined below.
242                             </td>
243                         </tr>
244                         <tr>
245                             <th>nap</th>
246                             <td>
247                                 maximum number of calls to the cost f function allowed (default: 100).
248                             </td>
249                         </tr>
250                         <tr>
251                             <th>iter</th>
252                             <td>maximum number of iterations allowed (default: 100).</td>
253                         </tr>
254                         <tr>
255                             <th>epsg</th>
256                             <td>threshold on gradient norm (default: %eps).</td>
257                         </tr>
258                         <tr>
259                             <th>epsf</th>
260                             <td>
261                                 threshold controlling the decreasing of <literal>f</literal>
262                                 (defined in the description section). Default: 0.
263                             </td>
264                         </tr>
265                         <tr>
266                             <th>epsx</th>
267                             <td>
268                                 threshold controlling the variation of <varname>p</varname>.
269                                 This vector (possibly matrix) of same size as <varname>p0</varname>
270                                 can be used to scale <varname>p</varname>. Default: 0.
271                             </td>
272                         </tr>
273                     </table>
274                 </listitem>
275             </varlistentry>
276             <!--            <varlistentry>
277                 <term>fmin</term>
278                 <listitem>
279                     <para>
280                         <literal>f</literal> quadratic residu (minimal <literal>f</literal> value reached).
281                         <literal>sqrt(fmin/ng/nz)</literal> or
282                         <literal>sqrt(fmin/ng/sum(Wd))</literal> estimates the Model-to-Data average
283                         distance.
284                     </para>
285                 </listitem>
286             </varlistentry>
287 -->
288             <varlistentry>
289                 <term>dmin</term>
290                 <listitem>
291                     Average Model-to-data residual distance. It is equal to
292                     <literal>sqrt(fmin/ng/nd)</literal> or
293                     <literal>sqrt(fmin/ng/sum(Wd))</literal>, where fmin is the quadratic residue
294                     of the cost function <literal>f</literal> (see the description).
295                     <para/>
296                 </listitem>
297             </varlistentry>
298             <varlistentry>
299                 <term>status</term>
300                 <listitem>
301                   return flag = termination status (only for the 'qn' default algorithm.
302                   Set to %nan otherwise): <literal>9</literal> is successful. Possible values are:
303                   <table>
304                     <tr valign="top">
305                       <th>1</th><td>: Norm of projected gradient lower than...</td>
306                       <td>....</td>
307                       <th>6</th><td>: Optim stops: too small variations in gradient direction.</td>
308                     </tr>
309                     <tr valign="top">
310                       <th>2</th><td>: At last iteration, f decreases by less than...</td>
311                       <td></td>
312                       <th>7</th><td>: Stop during calculation of descent direction.</td>
313                     </tr>
314                     <tr valign="top">
315                       <th>3</th><td>: Optim stops: Too small variations for p.</td>
316                       <td></td>
317                       <th>8</th><td>: Stop during calculation of estimated hessian.</td>
318                     </tr>
319                     <tr valign="top">
320                       <th>4</th><td>: Optim stops: maximum number of calls to f is reached.</td>
321                       <td></td>
322                       <th>9</th><td>: Successful completion.</td>
323                     </tr>
324                     <tr valign="top">
325                       <th>5</th><td>: Optim stops: maximum number of iterations is reached.</td>
326                       <td></td>
327                       <th>10</th><td>: Linear search fails.</td>
328                     </tr>
329                   </table>
330                   <para/>
331                 </listitem>
332             </varlistentry>
333         </variablelist>
334         <para>
335             Other <function>optim()</function> input arguments such that <varname>df0</varname>,
336             <varname>mem</varname>, or <varname>'in'</varname> may be used with
337             <literal>datafit()</literal>. They will be passed as is to <function>optim()</function>.
338         </para>
339     </refsection>
340     <refsection>
341         <title>Description</title>
342         <para>
343             <literal>datafit</literal> is used to fit a parametrized model to given data.
344         </para>
345         <para>
346             A function <literal>G(p, Data)</literal> must be defined to compute the gaps between the
347             <varname>Data</varname> points and a model whose parameters to be tuned are provided
348             through the matrix <varname>p</varname>.
349
350         </para>
351         <para>
352             <literal>datafit()</literal> checks whether <literal>G()</literal> is vectorized or not
353             over <varname>Data</varname>. If <literal>G()</literal> works only with a single
354             <literal>data</literal> point as a single column of coordinates, then <literal>datafit()</literal>
355             loops over <literal>data=Data(:,i)</literal> points. Otherwise, <literal>G(p, Data)</literal>
356             is called for the whole <varname>Data</varname> array of data coordinates, which is
357             way faster if <varname>G()</varname>'s code is truly vectorized.
358         </para>
359         <para>
360             Starting from initial values <varname>p0</varname> of the model parameters,
361             <literal>datafit()</literal> varies them in order to minimize the whole Model-to-Data
362             distance defined as
363             <screen>
364 f = (G(p, Data(:,1))' * Wg * G(p, Data(:,1)))* Wd(1) + ..
365     (G(p, Data(:,2))' * Wg * G(p, Data(:,2)))* Wd(2) + ..
366     ...
367     (G(p, Data(:,nz))' * Wg * G(p,Data(:,nz)))* Wd(nz)
368 </screen>
369         </para>
370         <para>
371             When <literal>Wg = eye()</literal> (default), this definition is equivalent to
372 <screen>
373 f = sum(G(p, Data(:,1)).^2) * Wd(1) + ..
374     sum(G(p, Data(:,2)).^2) * Wd(2) + ..
375     ...
376     sum(G(p, Data(:,nz)).^2) * Wd(nz)
377 </screen>
378         </para>
379         <para>
380             If in addition <varname>G</varname> returns only one gap type, this even simplifies to
381             <screen>
382 f = G(p, Data(:,1))^2 * Wd(1) + ..
383     G(p, Data(:,2))^2 * Wd(2) + ..
384     ...
385     G(p, Data(:,nz))^2 * Wd(nz)
386 </screen>
387         </para>
388         <para>
389             <literal>datafit()</literal> calls <function>optim()</function> to minimize f.
390         </para>
391         <para>
392             Choosing appropriately <varname>p0</varname> may improve and faster
393             <literal>datafit()</literal>'s convergence to the best fit.
394         </para>
395     </refsection>
396     <refsection>
397         <title>Examples</title>
398         <para>
399             <emphasis role="bold">Simple example: Polynomial fitting (parabola = 3 parameters)</emphasis>.
400         </para>
401         <programlisting role="example"><![CDATA[
402 // Model (parabola)
403 function y = FF(x,p)
404     y = (p(1)*x + p(2)).*x + p(3)
405 endfunction
406
407 // Data creation
408 pa = [2;-4;14] // parameters of the actual law, to generate data
409 X = 0:0.1:3.5;
410 Y0 = FF(X, pa);
411 noise = 5*(rand(Y0)-.5);   // We add some noise
412 Y = Y0 + noise
413 Data = [X ; Y];
414
415 // Gap function definition
416 function e = G(p, Data)
417     e = Data(2,:) - FF(Data(1,:), p);  // vertical distance
418 endfunction
419
420 // Performing the fitting
421 // ----------------------
422 // Without weighting data:
423 p0 = [1;-1;10]
424 [p, dmin] = datafit(G, Data, p0);
425 // The uncertainty is identified to the noise value.
426 // The weight is set inversely proportional to the uncertainty
427 [pw, dmin] = datafit(G, Data, 1./abs(noise), p0);
428
429 // Display
430 // -------
431 scf(0);
432 clf()
433 xsetech([0 0 1 0.83])
434 plot2d(X, Y0, 24)       // True underlaying law
435 plot2d(X, Y, -1)        // Noisy data
436 plot2d(X, FF(X, p), 15)  // best unweighted fit
437 plot2d(X, FF(X, pw), 18) // best weighted fit
438 legend(["True law" "Noisy data" "Unweighted fit" "Weighted fit"],2)
439
440 xsetech([0 .75 1 0.25])
441 plot2d(X, FF(X,p)-Y0, 15)  // remaining gap <= best unweighted fit
442 plot2d(X, FF(X,pw)-Y0, 18) // remaining gap <= best weighted fit
443 ylabel("Data fit - True law")
444 ax = gca(); ax.x_location = "top";
445 tmp = ax.x_ticks.labels;
446 gca().x_ticks.labels = emptystr(size(tmp,"*"),1);
447 xgrid(color("grey70"))
448      ]]></programlisting>
449         <!-- Due to random input data, it is preferable to have a fixed image instead of automated.
450         <scilab:image>
451             // ===============
452             gcf().axes_size = [520 550];
453         </scilab:image>
454         -->
455         <inlinemediaobject>
456             <imageobject>
457                 <imagedata fileref="../../images/datafit_A.png"/>
458             </imageobject>
459         </inlinemediaobject>
460         <para/>
461         <para>
462             <emphasis role="bold">Fitting a gaussian curve + sloping base (5 parameters)</emphasis>:
463         </para>
464         <programlisting role="example"><![CDATA[
465 function Y = model(P, X)
466     Y = P(3) * exp(-(X-P(1)).^2 / (2*P(2)*P(2))) + P(4)*X + P(5);
467 endfunction
468 // -------------------------------
469 // Gap function
470 function r = gap(P, XY)
471     r = XY(2,:) - model(P,XY(1,:))
472 endfunction
473 // -------------------------------
474 // True parameters
475 Pg = [3.5 1.5 7 0.4 -0.5]; // mean stDev ymax a(.x) b
476
477 // Generating data
478 // ---------------
479 X = 0:0.2:10;
480 Np = length(X);
481 Y0 = model(Pg, X);              // True law
482 noise = (rand(Y0)-.5)*Pg(3)/4;
483 Y = Y0 + noise                  // Noised data
484 Data = [X ; Y];
485
486 // Trying to recover original parameters from the (noisy) data:
487 // -----------------------------------------------------------
488 // Performing the fitting = parameters optimization
489 [v, k] = max(Y0);
490 P0 = [X(k) 1 v 1 1];
491 [P, dmin, s]  = datafit(gap, Data, P0);
492 Wd =  1./abs(noise);
493 [Pw, dminw, s] = datafit(gap, Data, Wd, P0);
494
495 // Computing fitting curves from found parameters
496 fY = model(P, X);
497 fYw = model(Pw, X);
498
499 // Display
500 // -------
501 scf(0);
502 clf()
503 xsetech([0 0 1 0.8])
504 plot2d(X, Y0, 24)  // True underlaying law
505 plot2d(X, Y,  -1)  // Noisy data
506 plot2d(X, fY, 15)  // best unweighted fit
507 plot2d(X, fYw,18)  // best weighted fit
508
509 // Legends: Average vertical Model-Data distance:
510 // True one v/s according to the residual cost
511 L_1 = "Unweighted fit (%5.2f /%5.2f)";
512 L_1 = msprintf(L_1, mean(abs(fY-Y0)), sqrt(dmin/Np));
513 L_2 = "Weighted fit (%5.2f /%5.2f)";
514 L_2 = msprintf(L_2, mean(abs(fYw-Y0)), sqrt(dminw/sum(Wd)));
515 legend(["True law", "Noisy Data", L_1, L_2],1)
516
517 // Params: row#1: true params row#2:
518 //  from unweighted fit row#3: from weighted fit
519 [xs, ys] = meshgrid(linspace(2,6,5), linspace(0.5,-0.5,3));
520 xnumb(xs, ys, [Pg ; P ; Pw]);
521 LawTxt = "$\Large p_3\,.\,"+..
522          "exp\left({-(x-p_1)^2}\over{2\,p_2} \right)+p_4.x+p_5$";
523 xstring(2, 1.3, LawTxt)
524
525 // Plotting residus:
526 xsetech([0 .75 1 0.25])
527 plot2d(X, model(P ,X)-Y0, 15) // remaining gap with best unweighted fit
528 plot2d(X, model(Pw,X)-Y0, 18) // remaining gap best weighted fit
529 ylabel("Data fit - True law")
530 ax = gca();
531 ax.x_location = "top";
532 gca().x_ticks.labels = emptystr(size(ax.x_ticks.labels, "*"),1);
533 xgrid(color("grey70"))
534      ]]></programlisting>
535         <!-- Due to random input data, it is preferable to have a fixed image instead of automated.
536         <scilab:image>
537             // ===============
538             gcf().axes_size = [520 550];
539         </scilab:image>
540         -->
541         <inlinemediaobject>
542             <imageobject>
543                 <imagedata fileref="../../images/datafit_B.png"/>
544             </imageobject>
545         </inlinemediaobject>
546         <para/>
547         <para>
548             <emphasis role="bold">Extraction of 3 overlapping gaussian curves (9 parameters)</emphasis>:
549             example with constrained bounds and a matrix of parameters.
550         </para>
551         <programlisting role="example"><![CDATA[
552 // Basic gaussian component:
553 function Y = gauss(X, average, stDev, ymax)
554     Y = ymax * exp(-(X-average).^2 / (2*stDev*stDev))
555 endfunction
556
557 // Model: cumulated gaussian laws
558 function r = model(P, X)
559     // P(1,:): averages  P(2,:): standard deviation  P(3,:): ymax
560     r = 0;
561     for i = 1:size(P,2)
562         r = r + gauss(X, P(1,i), P(2,i), P(3,i));
563     end
564 endfunction
565
566 // Gap function:
567 function r = gap(P,XY)
568     r = XY(2,:) - model(P, XY(1,:))
569 endfunction
570
571 // True parameters :
572 Pa = [ 1.1  4    7    // averages
573         1  0.8  1.5   // standard deviations
574         3   2   5.5   // ymax
575      ];
576 Nc = size(Pa,2);      // Number of summed curves
577 Np = 200;             // Number of data points
578
579 // Data generation (without noise)
580 xmin = 0;
581 xmax = 10;
582 X = linspace(xmin-2, xmax+2, Np);
583 Y = model(Pa, X);
584 Data = [X ; Y];      // Curve to analyze / fit
585
586 // FITTING
587 // -------
588 // Fitting parameters: initial values and bounds (amplitudes must be > 0)
589 Po = [ linspace(xmin,xmax,Nc) ; ones(1,Nc)*0.5 ; ones(1,Nc)*max(Y)/2];
590 Pinf = [ones(1,Nc)*-2 ; ones(1,Nc)*0.1 ; ones(1,Nc)*1];
591 Psup = [ones(1,Nc)*12 ; ones(1,Nc)*3   ; ones(1,Nc)*max(Y)*1.2];
592 // Fitting
593 [P, dmin] = datafit(gap, Data, 'b',Pinf,Psup, Po, 'qn','ar',200,200);
594
595 // Display
596 // -------
597 clf()
598 xsetech([0 0 1 0.8]);
599 plot(X, Y, "-b", X, model(P,X), "-r");
600 gca().children.children(1:2).thickness = 2;
601 for i = 1:Nc
602 plot(X, gauss(X, Pa(1,i), Pa(2,i), Pa(3,i)), "-c"); // True gaussian components
603 plot(X, gauss(X, P(1,i), P(2,i), P(3,i)), "-g");    // Extracted gaussian components
604 end
605 legend("Data", "Parametric Model","True components", "Extracted components",  2);
606
607 xsetech([0 0.75 1 0.25])
608 plot(X, model(P,X)-Y);
609 gca().x_location = "top";
610 gca().x_ticks.labels = emptystr(size(gca().x_ticks.labels,"*"),1);
611 ylabel("Model - Data")
612 xgrid(color("grey70"))
613      ]]></programlisting>
614         <scilab:image>
615             // Basic gaussian component:
616             function Y = gauss(X, average, stDev, ymax)
617             Y = ymax * exp(-(X-average).^2 / (2*stDev*stDev))
618             endfunction
619
620             // Model: cumulated gaussian laws
621             function r = model(P, X)
622             // P(1,:): averages  P(2,:): standard deviation  P(3,:): ymax
623             r = 0;
624             for i = 1:size(P,2)
625             r = r + gauss(X, P(1,i), P(2,i), P(3,i));
626             end
627             endfunction
628
629             // Gap function:
630             function r = gap(P,XY)
631             r = XY(2,:) - model(P,XY(1,:))
632             endfunction
633
634             // True parameters :
635             Pa = [ 1.1  4    7    // averages
636             1  0.8  1.5   // standard deviations
637             3   2   5.5   // ymax
638             ];
639             Nc = size(Pa,2);       // Number of summed curves
640             Np = 200;             // Number of data points
641
642             // Data generation (without noise)
643             xmin = 0;
644             xmax = 10;
645             X = linspace(xmin-2, xmax+2, Np);
646             Y = model(Pa, X);
647             Data = [X ; Y];          // Curve to analyze / fit
648
649             // FITTING
650             // -------
651             // Fitting parameters: initial values and bounds (amplitudes must be > 0)
652             Po = [ linspace(xmin,xmax,Nc) ; ones(1,Nc)*0.5 ; ones(1,Nc)*max(Y)/2];
653             Pinf = [ones(1,Nc)*-2 ; ones(1,Nc)*0.1 ; ones(1,Nc)*1];
654             Psup = [ones(1,Nc)*12 ; ones(1,Nc)*3   ; ones(1,Nc)*max(Y)*1.2];
655             // Fitting
656             [P, dmin] = datafit(gap, Data, 'b',Pinf,Psup, Po, 'qn','ar',200,200);
657
658             // Display
659             // -------
660             clf()
661             xsetech([0 0 1 0.8]);
662             plot(X, Y, "-b", X, model(P,X), "-r");
663             gca().children.children(1:2).thickness = 2;
664             for i = 1:Nc
665             plot(X, gauss(X, Pa(1,i), Pa(2,i), Pa(3,i)), "-c"); // True gaussian components
666             plot(X, gauss(X, P(1,i), P(2,i), P(3,i)), "-g");    // Extracted gaussian components
667             end
668             legend("Data", "Parametric Model","True components", "Extracted components",  2);
669
670             xsetech([0 0.75 1 0.25])
671             plot(X, model(P,X)-Y);
672             gca().x_location = "top";
673             gca().x_ticks.labels = emptystr(size(gca().x_ticks.labels,"*"),1);
674             ylabel("Model - Data")
675             xgrid(color("grey70"))
676             // ===============
677             gcf().axes_size = [520 400];
678         </scilab:image>
679         <para/>
680         <para>
681             <emphasis role="bold">With a parametric curve: Fitting a off-centered tilted 2D elliptical arc (5 parameters)</emphasis>.
682         </para>
683         <programlisting role="example"><![CDATA[
684 function g = Gap(p, Data)
685     // p: a, b, center xc, yc, alpha tilt °
686     C = cosd(p(5)); S = -sind(p(5));
687     x = Data(1,:) - p(3);
688     y = Data(2,:) - p(4);
689     g = ((x*C - y*S )/p(1)).^2 + ((x*S + y*C)/p(2)).^2 - 1;
690 endfunction
691
692 // Generating the data
693 // -------------------
694 // Actual parameters :
695 Pa = [3, 1.3, -2, 1.5, 30];
696 Np = 30;            // Number of data points
697 Theta = grand(1,Np, "unf",-100, 170);
698 // untilted centered noisy arc
699 x = Pa(1)*(cosd(Theta) + grand(Theta, "unf", -0.07, 0.07));
700 y = Pa(2)*(sind(Theta) + grand(Theta, "unf", -0.07, 0.07));
701 // Tilting and off-centering the arc
702 A = Pa(5);
703 C = cosd(A); S = sind(A);
704 xe =  C*x + y*S + Pa(3);
705 ye = -S*x + y*C + Pa(4);
706 Data = [xe ; ye];
707
708 // Retrieving parameters from noisy data
709 // -------------------------------------
710 // Initial estimation
711 ab0 = (strange(xe)+strange(ye))/2;
712 xc0 = mean(xe);
713 yc0 = mean(ye);
714 A0 = -atand(reglin(xe,ye));
715 P0 = [ab0 ab0/2 xc0 yc0 A0];
716 // Parameters bounds
717 Pinf = [ 0     0     xc0-2*ab0, yc0-2*ab0 -90];
718 Psup = [3*ab0 3*ab0  xc0+2*ab0, yc0+2*ab0  90];// Fitting
719
720 // FITTING
721 [P, dmin] = datafit(Gap, Data, 'b',Pinf,Psup, P0);
722
723 // P(1) is the longest axis:
724 if P(1)<P(2) then
725     P([1 2]) = P([2 1]);
726     P(5) = 90 - P(5);
727 end
728
729 // DISPLAY
730 // -------
731 disp([Pa;P], dmin);
732 // A successful fit should lead to dmin ~ noise amplitude
733
734 clf
735 plot(xe, ye, "+")   // Data
736 // Model
737 Theta = 0:2:360;
738 x = P(1) * cosd(Theta);
739 y = P(2) * sind(Theta);
740 A = P(5);
741 [x, y] = (x*cosd(A)+y*sind(A)+P(3), -x*sind(A)+y*cosd(A)+P(4));
742 plot(x, y, "r")
743 isoview
744 // Parameters values:
745 Patxt = msprintf("%5.2f   %5.2f   %5.2f   %5.2f  %5.1f", Pa);
746 Ptxt  = msprintf("%5.2f   %5.2f   %5.2f   %5.2f  %5.1f", P);
747 xstring(-3.7, 1.2, ["" "    a           b        xc       yc     A°"
748                     "Actual:" Patxt
749                     "Fit:" Ptxt ])
750 xstring(-2.5, 0.9, "dmin = " + msprintf("%5.3f", dmin))
751      ]]></programlisting>
752         <!-- Due to random input data, it is preferable to have a fixed image instead of automated.
753         <scilab:image>
754             // ===============
755             gcf().axes_size = [520 550];
756         </scilab:image>
757         -->
758         <inlinemediaobject>
759             <imageobject>
760                 <imagedata fileref="../../images/datafit_C.png"/>
761             </imageobject>
762         </inlinemediaobject>
763     </refsection>
764     <refsection role="see also">
765         <title>See also</title>
766         <simplelist type="inline">
767             <member>
768                 <link linkend="lsqrsolve">lsqrsolve</link>
769             </member>
770             <member>
771                 <link linkend="optim">optim</link>
772             </member>
773             <member>
774                 <link linkend="leastsq">leastsq</link>
775             </member>
776         </simplelist>
777     </refsection>
778     <refsection role="history">
779         <title>History</title>
780         <revhistory>
781             <revision>
782                 <revnumber>6.1</revnumber>
783                 <revdescription>
784                     <itemizedlist>
785                         <listitem>
786                             New option <varname>Wd</varname> to weight measured data
787                             <varname>Data</varname>.
788                         </listitem>
789                         <listitem>
790                             The gap function G() may now be vectorized over <literal>Data</literal>
791                             points.
792                         </listitem>
793                         <listitem>
794                             Initial parameters <varname>p0</varname> and their bounds may now be
795                             provided as a matrix instead of a mandatory column.
796                         </listitem>
797                         <listitem>
798                             The <literal>err</literal> output argument is replaced with
799                             <literal>dmin</literal> = average Model-to-data distance.
800                         </listitem>
801                         <listitem>
802                             New output argument <literal>status</literal> added
803                             (for the "qn" algorithm).
804                         </listitem>
805                     </itemizedlist>
806                 </revdescription>
807             </revision>
808         </revhistory>
809     </refsection>
810 </refentry>