Typo fixes
[scilab.git] / scilab / modules / optimization / help / en_US / nonlinearleastsquares / leastsq.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3  * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
4  * Copyright (C) 2008 - INRIA
5  * Copyright (C) 2011 - DIGITEO - Michael Baudin
6  *
7  * This file must be used under the terms of the CeCILL.
8  * This source file is licensed as described in the file COPYING, which
9  * you should have received as part of this distribution.  The terms
10  * are also available at
11  * http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.txt
12  *
13  -->
14 <refentry xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:svg="http://www.w3.org/2000/svg" xmlns:ns4="http://www.w3.org/1999/xhtml" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:db="http://docbook.org/ns/docbook" xmlns:scilab="http://www.scilab.org" xml:id="leastsq" xml:lang="en">
15     <refnamediv>
16         <refname>leastsq</refname>
17         <refpurpose>Solves non-linear least squares problems</refpurpose>
18     </refnamediv>
19     <refsynopsisdiv>
20         <title>Calling Sequence</title>
21         <synopsis>
22             fopt=leastsq(fun, x0)
23             fopt=leastsq(fun, x0)
24             fopt=leastsq(fun, dfun, x0)
25             fopt=leastsq(fun, cstr, x0)
26             fopt=leastsq(fun, dfun, cstr, x0)
27             fopt=leastsq(fun, dfun, cstr, x0, algo)
28             fopt=leastsq([imp], fun [,dfun] [,cstr],x0 [,algo],[df0,[mem]],[stop])
29             [fopt,xopt] = leastsq(...)
30             [fopt,xopt,gopt] =  = leastsq(...)
31         </synopsis>
32     </refsynopsisdiv>
33     <refsection>
34         <title>Arguments</title>
35         <variablelist>
36             <varlistentry>
37                 <term>fopt</term>
38                 <listitem>
39                     <para>
40                         value of the function <literal>f(x)=||fun(x)||^2</literal>
41                         at <literal>xopt</literal>
42                     </para>
43                 </listitem>
44             </varlistentry>
45             <varlistentry>
46                 <term>xopt</term>
47                 <listitem>
48                     <para>
49                         best value of <literal>x</literal> found to minimize
50                         <literal>||fun(x)||^2</literal>
51                     </para>
52                 </listitem>
53             </varlistentry>
54             <varlistentry>
55                 <term>gopt</term>
56                 <listitem>
57                     <para>
58                         gradient of <literal>f</literal> at
59                         <literal>xopt</literal>
60                     </para>
61                 </listitem>
62             </varlistentry>
63             <varlistentry>
64                 <term>fun</term>
65                 <listitem>
66                     <para>
67                         a scilab function or a list defining a function from
68                         <literal>R^n</literal> to <literal>R^m</literal> (see more
69                         details in DESCRIPTION).
70                     </para>
71                 </listitem>
72             </varlistentry>
73             <varlistentry>
74                 <term>x0</term>
75                 <listitem>
76                     <para>
77                         real vector (initial guess of the variable to be
78                         minimized).
79                     </para>
80                 </listitem>
81             </varlistentry>
82             <varlistentry>
83                 <term>dfun</term>
84                 <listitem>
85                     <para>
86                         a scilab function or a string defining the Jacobian matrix of
87                         <literal>fun</literal> (see more details in DESCRIPTION).
88                     </para>
89                 </listitem>
90             </varlistentry>
91             <varlistentry>
92                 <term>cstr</term>
93                 <listitem>
94                     <para>
95                         bound constraints on <literal>x</literal>. They must be
96                         introduced by the string keyword <literal>'b'</literal> followed by
97                         the lower bound <literal>binf</literal> then by the upper bound
98                         <literal>bsup</literal> (so <literal>cstr</literal> appears as
99                         <literal>'b',binf,bsup</literal> in the calling sequence). Those
100                         bounds are real vectors with same dimension than
101                         <literal>x0</literal> (-%inf and +%inf may be used for dimension
102                         which are unrestricted).
103                     </para>
104                 </listitem>
105             </varlistentry>
106             <varlistentry>
107                 <term>algo</term>
108                 <listitem>
109                     <para>
110                         a string with possible values: <literal>'qn'</literal> or
111                         <literal>'gc'</literal> or <literal>'nd'</literal>. These strings
112                         stand for quasi-Newton (default), conjugate gradient or
113                         non-differentiable respectively. Note that <literal>'nd'</literal>
114                         does not accept bounds on <literal>x</literal>.
115                     </para>
116                 </listitem>
117             </varlistentry>
118             <varlistentry>
119                 <term>imp</term>
120                 <listitem>
121                     <para>
122                         scalar argument used to set the trace mode.
123                         <literal>imp=0</literal> nothing (except errors) is reported,
124                         <literal>imp=1</literal> initial and final reports,
125                         <literal>imp=2</literal> adds a report per iteration,
126                         <literal>imp&gt;2</literal> add reports on linear search. Warning,
127                         most of these reports are written on the Scilab standard
128                         output.
129                     </para>
130                 </listitem>
131             </varlistentry>
132             <varlistentry>
133                 <term>df0</term>
134                 <listitem>
135                     <para>
136                         real scalar. Guessed decreasing of
137                         <literal>||fun||^2</literal> at first iteration.
138                         (<literal>df0=1</literal> is the default value).
139                     </para>
140                 </listitem>
141             </varlistentry>
142             <varlistentry>
143                 <term>mem</term>
144                 <listitem>
145                     <para>
146                         integer, number of variables used to approximate the Hessian
147                         (second derivatives) of <literal>f</literal> when
148                         <literal>algo</literal><literal>='qn'</literal>. Default value is
149                         10.
150                     </para>
151                 </listitem>
152             </varlistentry>
153             <varlistentry>
154                 <term>stop</term>
155                 <listitem>
156                     <para>
157                         sequence of optional parameters controlling the convergence of
158                         the algorithm. They are introduced by the keyword
159                         <literal>'ar'</literal>, the sequence being of the form
160                         <literal>'ar',nap, [iter [,epsg [,epsf [,epsx]]]]</literal>
161                     </para>
162                     <variablelist>
163                         <varlistentry>
164                             <term>nap</term>
165                             <listitem>
166                                 <para>
167                                     maximum number of calls to <literal>fun</literal>
168                                     allowed.
169                                 </para>
170                             </listitem>
171                         </varlistentry>
172                         <varlistentry>
173                             <term>iter</term>
174                             <listitem>
175                                 <para>maximum number of iterations allowed.</para>
176                             </listitem>
177                         </varlistentry>
178                         <varlistentry>
179                             <term>epsg</term>
180                             <listitem>
181                                 <para>threshold on gradient norm.</para>
182                             </listitem>
183                         </varlistentry>
184                         <varlistentry>
185                             <term>epsf</term>
186                             <listitem>
187                                 <para>
188                                     threshold controlling decreasing of
189                                     <literal>f</literal>
190                                 </para>
191                             </listitem>
192                         </varlistentry>
193                         <varlistentry>
194                             <term>epsx</term>
195                             <listitem>
196                                 <para>
197                                     threshold controlling variation of <literal>x</literal>.
198                                     This vector (possibly matrix) of same size as
199                                     <literal>x0</literal> can be used to scale
200                                     <literal>x</literal>.
201                                 </para>
202                             </listitem>
203                         </varlistentry>
204                     </variablelist>
205                 </listitem>
206             </varlistentry>
207         </variablelist>
208     </refsection>
209     <refsection>
210         <title>Description</title>
211         <para>
212             The <literal>leastsq</literal> function
213             solves the problem
214         </para>
215         <para>
216             <latex>
217                 \begin{eqnarray}
218                 \begin{array}{l}
219                 \textrm{minimize } \|f(x)\|^2=f_1(x)^2 + f_2(x)^2+\ldots+f_m(x)^2
220                 \end{array}
221                 \end{eqnarray}
222             </latex>
223         </para>
224         <para>
225             where <literal>f</literal> is a function from
226             <literal>R^n</literal> to <literal>R^m</literal>.
227             Bound constraints cab be imposed on <literal>x</literal>.
228         </para>
229     </refsection>
230     <refsection>
231         <title>How to provide fun and dfun</title>
232         <para>
233             <literal>fun</literal> can be a scilab function (case
234             1) or a fortran or a C routine linked to scilab (case 2).
235         </para>
236         <variablelist>
237             <varlistentry>
238                 <term>case 1:</term>
239                 <listitem>
240                     <para>
241                         When <literal>fun</literal> is a Scilab function, its calling
242                         sequence must be:
243                         <screen><![CDATA[
244 y=fun(x)
245  ]]></screen>
246                         In the case where the cost function needs extra parameters,
247                         its header must be:
248                         <screen><![CDATA[
249 y=f(x,a1,a2,...)
250  ]]></screen>
251                         In this case, we provide <literal>fun</literal> as a list,
252                         which contains <literal>list(f,a1,a2,...)</literal>.
253                     </para>
254                 </listitem>
255             </varlistentry>
256             <varlistentry>
257                 <term>case 2:</term>
258                 <listitem>
259                     <para>
260                         When <literal>fun</literal> is a Fortran or C
261                         routine, it must be <literal>list(fun_name,m[,a1,a2,...])</literal> in the calling sequence of
262                         <literal>leastsq</literal>, where <literal>fun_name</literal> is
263                         a 1-by-1 matrix of strings, the name of the routine which must be linked to Scilab (see
264                         <link linkend="link">link</link>). The header must be, in Fortran:
265                         
266                         <screen><![CDATA[
267 subroutine fun(m, n, x, params, y)
268 integer m,n
269 double precision x(n), params(*), y(m)
270  ]]></screen>
271                         and in C:
272                         <screen><![CDATA[
273 void fun(int *m, int *n, double *x, double *params, double *y)
274  ]]></screen>
275                         
276                         where <literal>n</literal> is the dimension of vector
277                         <literal>x</literal>, <literal>m</literal> the dimension of vector
278                         <literal>y</literal>, with <literal>y=fun(x)</literal>, and
279                         <literal>params</literal> is a vector which contains the optional
280                         parameters <literal>a1, a2, ...</literal>. Each
281                         parameter may be a vector, for instance if
282                         <literal>a1</literal> has 3 components, the description of
283                         <literal>a2</literal> begin from
284                         <literal>params(4)</literal> (in fortran), and from
285                         <literal>params[3]</literal> (in C).
286                         Note that even if <literal>fun</literal> does not need supplementary parameters you
287                         must anyway write the fortran code with a <literal>params</literal>
288                         argument (which is then unused in the subroutine core).
289                     </para>
290                 </listitem>
291             </varlistentry>
292         </variablelist>
293         <para>
294             By default, the algorithm uses a finite difference approximation
295             of the Jacobian matrix.
296             The Jacobian matrix can be provided by defining the function
297             <literal>dfun</literal>, where  to the
298             optimizer it may be given as a usual scilab function or
299             as a fortran or a C routine linked to scilab.
300         </para>
301         <variablelist>
302             <varlistentry>
303                 <term>case 1:</term>
304                 <listitem>
305                     <para>
306                         when <literal>dfun</literal> is a scilab function, its calling
307                         sequence must be:
308                         <screen><![CDATA[
309               y=dfun(x)
310  ]]></screen>
311                         where <literal>y(i,j)=dfi/dxj</literal>.
312                         If extra parameters are required by <literal>fun</literal>, i.e. if arguments
313                         <literal>a1,a2,...</literal> are required, they are passed also to
314                         <literal>dfun</literal>, which must have header
315                         <screen><![CDATA[
316               y=dfun(x,a1,a2,...)
317  ]]></screen>
318                         Note that, even if <literal>dfun</literal>
319                         needs extra parameters, it must appear simply as
320                         <literal>dfun</literal> in the calling sequence of
321                         <literal>leastsq</literal>.
322                     </para>
323                 </listitem>
324             </varlistentry>
325             <varlistentry>
326                 <term>case 2:</term>
327                 <listitem>
328                     <para>
329                         When <literal>dfun</literal> is defined by a Fortran or C
330                         routine it must be a string, the name of the function linked to
331                         Scilab.
332                         The calling sequences must be, in Fortran:
333                         <screen><![CDATA[
334 subroutine dfun(m, n, x, params, y)
335 integer m,n
336 double precision x(n), params(*), y(m,n)
337  ]]></screen>
338                         in C:
339                         <screen><![CDATA[
340           void fun(int *m, int *n, double *x, double *params, double *y)
341  ]]></screen>
342                         
343                         In the C case <literal>y(i,j)=dfi/dxj</literal> must be
344                         stored in <literal>y[m*(j-1)+i-1]</literal>.
345                     </para>
346                 </listitem>
347             </varlistentry>
348         </variablelist>
349     </refsection>
350     <refsection>
351         <title>Remarks</title>
352         <para>
353             Like <link linkend="datafit">datafit</link>,
354             <literal>leastsq</literal> is a front end onto the <link linkend="optim">optim</link> function. If you want to try the
355             Levenberg-Marquard method instead, use <link linkend="lsqrsolve">lsqrsolve</link>.
356         </para>
357         <para>
358             A least squares problem may be solved directly with the <link linkend="optim">optim</link> function ; in this case the function <link linkend="NDcost">NDcost</link> may be useful to compute the derivatives
359             (see the <link linkend="NDcost">NDcost</link> help page which provides a
360             simple example for parameters identification of a differential
361             equation).
362         </para>
363     </refsection>
364     <refsection>
365         <title>Examples</title>
366         <para>
367             We will show different calling possibilities of leastsq on one (trivial) example
368             which is non linear but does not really need to be solved with leastsq (applying
369             log linearizes the model and the problem may be solved with linear algebra).
370             In this example we look for the 2 parameters x(1) and x(2) of a simple
371             exponential decay model (x(1) being the unknow initial value and x(2) the
372             decay constant):
373         </para>
374         <programlisting role="example"><![CDATA[
375
376 function y = yth(t, x)
377    y  = x(1)*exp(-x(2)*t)
378 endfunction
379
380 // we have the m measures (ti, yi):
381 m = 10;
382 tm = [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5]';
383 ym = [0.79, 0.59, 0.47, 0.36, 0.29, 0.23, 0.17, 0.15, 0.12, 0.08]';
384 // measure weights (here all equal to 1...)
385 wm = ones(m,1);
386
387 // and we want to find the parameters x such that the model fits the given
388 // data in the least square sense:
389 //
390 //  minimize  f(x) = sum_i  wm(i)^2 ( yth(tm(i),x) - ym(i) )^2
391
392 // initial parameters guess
393 x0 = [1.5 ; 0.8];
394
395 // in the first examples, we define the function fun and dfun
396 // in scilab language
397 function e = myfun(x, tm, ym, wm)
398    e = wm.*( yth(tm, x) - ym )
399 endfunction
400
401 function g = mydfun(x, tm, ym, wm)
402    v = wm.*exp(-x(2)*tm)
403    g = [v , -x(1)*tm.*v]
404 endfunction
405
406 // now we could call leastsq:
407
408 // 1- the simplest call
409 [f,xopt, gopt] = leastsq(list(myfun,tm,ym,wm),x0)
410
411 // 2- we provide the Jacobian
412 [f,xopt, gopt] = leastsq(list(myfun,tm,ym,wm),mydfun,x0)
413
414 // a small graphic (before showing other calling features)
415 tt = linspace(0,1.1*max(tm),100)';
416 yy = yth(tt, xopt);
417 scf();
418 plot(tm, ym, "kx")
419 plot(tt, yy, "b-")
420 legend(["measure points", "fitted curve"]);
421 xtitle("a simple fit with leastsq")
422
423 // 3- how to get some information (we use imp=1)
424 [f,xopt, gopt] = leastsq(1,list(myfun,tm,ym,wm),mydfun,x0)
425
426 // 4- using the conjugate gradient (instead of quasi Newton)
427 [f,xopt, gopt] = leastsq(1,list(myfun,tm,ym,wm),mydfun,x0,"gc")
428
429 // 5- how to provide bound constraints (not useful here !)
430 xinf = [-%inf,-%inf];
431 xsup = [%inf, %inf];
432 // without Jacobian:
433 [f,xopt, gopt] = leastsq(list(myfun,tm,ym,wm),"b",xinf,xsup,x0)
434 // with Jacobian :
435 [f,xopt, gopt] = leastsq(list(myfun,tm,ym,wm),mydfun,"b",xinf,xsup,x0)
436
437 // 6- playing with some stopping parameters of the algorithm
438 //    (allows only 40 function calls, 8 iterations and set epsg=0.01, epsf=0.1)
439 [f,xopt, gopt] = leastsq(1,list(myfun,tm,ym,wm),mydfun,x0,"ar",40,8,0.01,0.1)
440  ]]></programlisting>
441         <scilab:image>
442             
443             function y = yth(t, x)
444             y  = x(1)*exp(-x(2)*t)
445             endfunction
446             
447             m = 10;
448             tm = [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5]';
449             ym = [0.79, 0.59, 0.47, 0.36, 0.29, 0.23, 0.17, 0.15, 0.12, 0.08]';
450             wm = ones(m,1);
451             
452             x0 = [1.5 ; 0.8];
453             
454             function e = myfun(x, tm, ym, wm)
455             e = wm.*( yth(tm, x) - ym )
456             endfunction
457             
458             function g = mydfun(x, tm, ym, wm)
459             v = wm.*exp(-x(2)*tm)
460             g = [v , -x(1)*tm.*v]
461             endfunction
462             
463             [f,xopt, gopt] = leastsq(list(myfun,tm,ym,wm),x0)
464             
465             [f,xopt, gopt] = leastsq(list(myfun,tm,ym,wm),mydfun,x0)
466             
467             tt = linspace(0,1.1*max(tm),100)';
468             yy = yth(tt, xopt);
469             scf();
470             plot(tm, ym, "kx")
471             plot(tt, yy, "b-")
472             legend(["measure points", "fitted curve"]);
473             xtitle("a simple fit with leastsq")
474             
475         </scilab:image>
476     </refsection>
477     <refsection>
478         <title>Examples with compiled functions</title>
479         <para>
480             Now we want to define fun and dfun in Fortran, then in C.
481             Note that the "compile and link to scilab" method used here
482             is believed to be OS independent (but there are some requirements,
483             in particular you need a C and a fortran compiler, and they must
484             be compatible with the ones used to build your scilab binary).
485         </para>
486         <para>
487             Let us begin by an example with fun and dfun in fortran
488         </para>
489         <programlisting role="example"><![CDATA[
490 // 7-1/ Let 's Scilab write the fortran code (in the TMPDIR directory):
491 f_code = ["      subroutine myfun(m,n,x,param,f)"
492           "*     param(i) = tm(i), param(m+i) = ym(i), param(2m+i) = wm(i)"
493           "      implicit none"
494           "      integer n,m"
495           "      double precision x(n), param(*), f(m)"
496           "      integer i"
497           "      do i = 1,m"
498           "         f(i) = param(2*m+i)*( x(1)*exp(-x(2)*param(i)) - param(m+i) )"
499           "      enddo"
500           "      end ! subroutine fun"
501           ""
502           "      subroutine mydfun(m,n,x,param,df)"
503           "*     param(i) = tm(i), param(m+i) = ym(i), param(2m+i) = wm(i)"
504           "      implicit none"
505           "      integer n,m"
506           "      double precision x(n), param(*), df(m,n)"
507           "      integer i"
508           "      do i = 1,m"
509           "         df(i,1) =  param(2*m+i)*exp(-x(2)*param(i))"
510           "         df(i,2) = -x(1)*param(i)*df(i,1)"
511           "      enddo"
512           "      end ! subroutine dfun"];
513 cd TMPDIR;
514 mputl(f_code,TMPDIR+'/myfun.f')
515
516 // 7-2/ compiles it. You need a fortran compiler !
517 names = ["myfun" "mydfun"]
518 flibname = ilib_for_link(names,"myfun.f",[],"f");
519
520 // 7-3/ link it to scilab (see link help page)
521 link(flibname,names,"f")
522
523 // 7-4/ ready for the leastsq call: be carreful do not forget to
524 //      give the dimension m after the routine name !
525 [f,xopt, gopt] = leastsq(list("myfun",m,tm,ym,wm),x0)  // without Jacobian
526 [f,xopt, gopt] = leastsq(list("myfun",m,tm,ym,wm),"mydfun",x0) // with Jacobian
527  ]]></programlisting>
528         <para>
529             Last example: fun and dfun in C.
530         </para>
531         <programlisting role="example"><![CDATA[
532 // 8-1/ Let 's Scilab write the C code (in the TMPDIR directory):
533 c_code = ["#include <math.h>"
534           "void myfunc(int *m,int *n, double *x, double *param, double *f)"
535           "{"
536           "  /*  param[i] = tm[i], param[m+i] = ym[i], param[2m+i] = wm[i] */"
537           "  int i;"
538           "  for ( i = 0 ; i < *m ; i++ )"
539           "    f[i] = param[2*(*m)+i]*( x[0]*exp(-x[1]*param[i]) - param[(*m)+i] );"
540           "  return;"
541           "}"
542           ""
543           "void mydfunc(int *m,int *n, double *x, double *param, double *df)"
544           "{"
545           "  /*  param[i] = tm[i], param[m+i] = ym[i], param[2m+i] = wm[i] */"
546           "  int i;"
547           "  for ( i = 0 ; i < *m ; i++ )"
548           "    {"
549           "      df[i] = param[2*(*m)+i]*exp(-x[1]*param[i]);"
550           "      df[i+(*m)] = -x[0]*param[i]*df[i];"
551           "    }"
552           "  return;"
553           "}"];
554
555 mputl(c_code,TMPDIR+'/myfunc.c')
556
557 // 8-2/ compiles it. You need a C compiler !
558 names = ["myfunc" "mydfunc"]
559 clibname = ilib_for_link(names,"myfunc.c",[],"c");
560
561 // 8-3/ link it to scilab (see link help page)
562 link(clibname,names,"c")
563
564 // 8-4/ ready for the leastsq call
565 [f,xopt, gopt] = leastsq(list("myfunc",m,tm,ym,wm),"mydfunc",x0)
566  ]]></programlisting>
567     </refsection>
568     <refsection role="see also">
569         <title>See Also</title>
570         <simplelist type="inline">
571             <member>
572                 <link linkend="lsqrsolve">lsqrsolve</link>
573             </member>
574             <member>
575                 <link linkend="optim">optim</link>
576             </member>
577             <member>
578                 <link linkend="NDcost">NDcost</link>
579             </member>
580             <member>
581                 <link linkend="datafit">datafit</link>
582             </member>
583             <member>
584                 <link linkend="external">external</link>
585             </member>
586             <member>
587                 <link linkend="qpsolve">qpsolve</link>
588             </member>
589         </simplelist>
590     </refsection>
591 </refentry>