Bug #9693: The default value of epsg in optim help page is %eps.
[scilab.git] / scilab / modules / optimization / help / en_US / optim.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3  * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
4  * Copyright (C) 2008 - INRIA
5  * Copyright (C) 2008 - 2009 - INRIA - Michael Baudin
6  * Copyright (C) 2010 - 2011 - DIGITEO - Michael Baudin
7  *
8  * This file must be used under the terms of the CeCILL.
9  * This source file is licensed as described in the file COPYING, which
10  * you should have received as part of this distribution.  The terms
11  * are also available at
12  * http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
13  *
14  -->
15 <refentry xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:svg="http://www.w3.org/2000/svg" xmlns:ns3="http://www.w3.org/1999/xhtml" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:db="http://docbook.org/ns/docbook" xmlns:scilab="http://www.scilab.org" version="5.0-subset Scilab" xml:id="optim" xml:lang="en">
16     <refnamediv>
17         <refname>optim</refname>
18         <refpurpose>non-linear optimization routine</refpurpose>
19     </refnamediv>
20     <refsynopsisdiv>
21         <title>Calling Sequence</title>
22         <synopsis>
23             fopt = optim(costf, x0)
24             fopt = optim(costf [,&lt;contr&gt;],x0 [,algo] [,df0 [,mem]] [,work] [,&lt;stop&gt;] [,&lt;params&gt;] [,imp=iflag])
25             [fopt, xopt] = optim(...)
26             [fopt, xopt, gopt] = optim(...)
27             [fopt, xopt, gopt, work] = optim(...)
28         </synopsis>
29     </refsynopsisdiv>
30     <refsection>
31         <title>Arguments</title>
32         <variablelist>
33             <varlistentry>
34                 <term>costf</term>
35                 <listitem>
36                     <para>a function, a list or a string, the objective function.</para>
37                 </listitem>
38             </varlistentry>
39             <varlistentry>
40                 <term>x0</term>
41                 <listitem>
42                     <para>real vector, the initial guess for
43                         <literal>x</literal>.
44                     </para>
45                 </listitem>
46             </varlistentry>
47             <varlistentry>
48                 <term>&lt;contr&gt;</term>
49                 <listitem>
50                     <para>an optional sequence of arguments containing the lower and
51                         upper bounds on <literal>x</literal>. If bounds are required, this
52                         sequence of arguments must be <literal>"b",binf,bsup</literal> where
53                         <literal>binf</literal> and <literal>bsup</literal> are real vectors
54                         with same dimension as <literal>x0</literal>.
55                     </para>
56                 </listitem>
57             </varlistentry>
58             <varlistentry>
59                 <term>algo</term>
60                 <listitem>
61                     <para>a string, the algorithm to use (default
62                         <literal>algo="qn"</literal>).
63                     </para>
64                     <para>The available algorithms are:</para>
65                     <itemizedlist>
66                         <listitem>
67                             <para>
68                                 <literal>"qn"</literal>: Quasi-Newton with BFGS
69                             </para>
70                         </listitem>
71                         <listitem>
72                             <para>
73                                 <literal>"gc"</literal>: limited memory BFGS
74                             </para>
75                         </listitem>
76                         <listitem>
77                             <para>
78                                 <literal>"nd"</literal>: non-differentiable.
79                             </para>
80                             <para>
81                                 The <literal>"nd"</literal> algorithm does not accept
82                                 bounds on <literal>x</literal>.
83                             </para>
84                         </listitem>
85                     </itemizedlist>
86                 </listitem>
87             </varlistentry>
88             <varlistentry>
89                 <term>df0</term>
90                 <listitem>
91                     <para>
92                         real scalar, a guess of the decreasing of <literal>f</literal>
93                         at first iteration. (default <literal>df0=1</literal>).
94                     </para>
95                 </listitem>
96             </varlistentry>
97             <varlistentry>
98                 <term>mem</term>
99                 <listitem>
100                     <para>integer, the number of variables used to approximate the
101                         Hessian (default <literal>mem=10</literal>). This feature is
102                         available for the <literal>"gc"</literal> algorithm without
103                         constraints and the non-smooth algorithm <literal>"nd"</literal>
104                         without constraints.
105                     </para>
106                 </listitem>
107             </varlistentry>
108             <varlistentry>
109                 <term>&lt;stop&gt;</term>
110                 <listitem>
111                     <para>a sequence of arguments containing the parameters controlling
112                         the convergence of the algorithm. The following sequences are
113                         available: <screen>              "ar",nap
114                             "ar",nap,iter
115                             "ar",nap,iter,epsg
116                             "ar",nap,iter,epsg,epsf
117                             "ar",nap,iter,epsg,epsf,epsx            
118                         </screen>
119                     </para>
120                     <para>where:</para>
121                     <variablelist>
122                         <varlistentry>
123                             <term>nap</term>
124                             <listitem>
125                                 <para>
126                                     maximum number of calls to <literal>costf</literal>
127                                     allowed (default <literal>nap=100</literal>).
128                                 </para>
129                             </listitem>
130                         </varlistentry>
131                         <varlistentry>
132                             <term>iter</term>
133                             <listitem>
134                                 <para>maximum number of iterations allowed (default
135                                     <literal>iter=100</literal>).
136                                 </para>
137                             </listitem>
138                         </varlistentry>
139                         <varlistentry>
140                             <term>epsg</term>
141                             <listitem>
142                                 <para>threshold on gradient norm (default
143                                     <literal>epsg= %eps</literal>).
144                                 </para>
145                             </listitem>
146                         </varlistentry>
147                         <varlistentry>
148                             <term>epsf</term>
149                             <listitem>
150                                 <para>
151                                     threshold controlling decreasing of <literal>f</literal>
152                                     (default <literal>epsf=0</literal>).
153                                 </para>
154                             </listitem>
155                         </varlistentry>
156                         <varlistentry>
157                             <term>epsx</term>
158                             <listitem>
159                                 <para>
160                                     threshold controlling variation of <literal>x</literal>
161                                     (default <literal>epsx=0</literal>). This vector (possibly
162                                     matrix) with same size as <literal>x0</literal> can be used to
163                                     scale <literal>x</literal>.
164                                 </para>
165                             </listitem>
166                         </varlistentry>
167                     </variablelist>
168                 </listitem>
169             </varlistentry>
170             <varlistentry>
171                 <term>&lt;params&gt;</term>
172                 <listitem>
173                     <para>in the case where the objective function is a C or Fortran
174                         routine, a sequence of arguments containing the method to
175                         communicate with the objective function. This option has no meaning
176                         when the cost function is a Scilab script.
177                     </para>
178                     <para>The available values for &lt;params&gt; are the
179                         following.
180                     </para>
181                     <itemizedlist>
182                         <listitem>
183                             <para>
184                                 <literal>"in"</literal>
185                             </para>
186                             <para>That mode allows to allocate memory in the internal Scilab
187                                 workspace so that the objective function can get arrays with the
188                                 required size, but without directly allocating the memory. The
189                                 <literal>"in"</literal> value stands for "initialization". In
190                                 that mode, before the value and derivative of the objective
191                                 function is to be computed, there is a dialog between the
192                                 <literal>optim</literal> Scilab primitive and the objective
193                                 function <literal>costf</literal>. In this dialog, the objective
194                                 function is called two times, with particular values of the
195                                 <literal>ind</literal> parameter. The first time,
196                                 <literal>ind</literal> is set to 10 and the objective function
197                                 is expected to set the <literal>nizs</literal>,
198                                 <literal>nrzs</literal> and <literal>ndzs</literal> integer
199                                 parameters of the <literal>nird</literal> common, which is
200                                 defined as:
201                             </para>
202                             <screen>common /nird/ nizs,nrzs,ndzs    </screen>
203                             <para>This allows Scilab to allocate memory inside its internal
204                                 workspace. The second time the objective function is called,
205                                 <literal>ind</literal> is set to 11 and the objective function
206                                 is expected to set the <literal>ti</literal>,
207                                 <literal>tr</literal> and <literal>tz</literal> arrays. After
208                                 this initialization phase, each time it is called, the objective
209                                 function is ensured that the <literal>ti</literal>,
210                                 <literal>tr</literal> and <literal>tz</literal> arrays which are
211                                 passed to it have the values that have been previously
212                                 initialized.
213                             </para>
214                         </listitem>
215                         <listitem>
216                             <para>
217                                 <literal>"ti",valti</literal>
218                             </para>
219                             <para>
220                                 In this mode, <literal>valti</literal> is expected to be a
221                                 Scilab vector variable containing integers. Whenever the
222                                 objective function is called, the <literal>ti</literal> array it
223                                 receives contains the values of the Scilab variable.
224                             </para>
225                         </listitem>
226                         <listitem>
227                             <para>
228                                 <literal>"td", valtd</literal>
229                             </para>
230                             <para>
231                                 In this mode, <literal>valtd</literal> is expected to be a
232                                 Scilab vector variable containing double values. Whenever the
233                                 objective function is called, the <literal>td</literal> array it
234                                 receives contains the values of the Scilab variable.
235                             </para>
236                         </listitem>
237                         <listitem>
238                             <para>
239                                 <literal>"ti",valti,"td",valtd</literal>
240                             </para>
241                             <para>This mode combines the two previous modes.</para>
242                         </listitem>
243                     </itemizedlist>
244                     <para>
245                         The <literal>ti, td</literal> arrays may be used so that the
246                         objective function can be computed. For example, if the objective
247                         function is a polynomial, the ti array may may be used to store the
248                         coefficients of that polynomial.
249                     </para>
250                     <para>Users should choose carefully between the
251                         <literal>"in"</literal> mode and the <literal>"ti"</literal> and
252                         <literal>"td"</literal> mode, depending on the fact that the arrays
253                         are Scilab variables or not. If the data is available as Scilab
254                         variables, then the <literal>"ti", valti, "td", valtd</literal> mode
255                         should be chosen. If the data is available directly from the
256                         objective function, the <literal>"in"</literal> mode should be
257                         chosen. Notice that there is no <literal>"tr"</literal> mode, since,
258                         in Scilab, all real values are doubles.
259                     </para>
260                     <para>If neither the "in" mode, nor the "ti", "td" mode is chosen,
261                         that is, if &lt;params&gt; is not present as an option of the optim
262                         primitive, the user may should not assume that the ti,tr and td
263                         arrays can be used : reading or writing the arrays may generate
264                         unpredictable results.
265                     </para>
266                 </listitem>
267             </varlistentry>
268             <varlistentry>
269                 <term>"imp=iflag"</term>
270                 <listitem>
271                     <para>named argument used to set the trace mode (default
272                         <literal>imp=0</literal>). The available values for
273                         <literal>iflag</literal> are <literal>imp=0,1,2 and
274                             &gt;2
275                         </literal>
276                         .
277                     </para>
278                     <itemizedlist>
279                         <listitem>
280                             <para>iflag=0: nothing (except errors) is reported (this is the
281                                 default),
282                             </para>
283                         </listitem>
284                         <listitem>
285                             <para>iflag=1: initial and final reports,</para>
286                         </listitem>
287                         <listitem>
288                             <para>iflag=2: adds a report per iteration,</para>
289                         </listitem>
290                         <listitem>
291                             <para>iflag&gt;2: add reports on linear search.</para>
292                         </listitem>
293                     </itemizedlist>
294                 </listitem>
295             </varlistentry>
296             <varlistentry>
297                 <term>fopt</term>
298                 <listitem>
299                     <para>the value of the objective function at the point
300                         <literal>xopt</literal>
301                     </para>
302                 </listitem>
303             </varlistentry>
304             <varlistentry>
305                 <term>xopt</term>
306                 <listitem>
307                     <para>
308                         best value of <literal>x</literal> found.
309                     </para>
310                 </listitem>
311             </varlistentry>
312             <varlistentry>
313                 <term>gopt</term>
314                 <listitem>
315                     <para>the gradient of the objective function at the point
316                         <literal>xopt</literal>
317                     </para>
318                 </listitem>
319             </varlistentry>
320             <varlistentry>
321                 <term>work</term>
322                 <listitem>
323                     <para>working array for hot restart for quasi-Newton method. This
324                         array is automatically initialized by <literal>optim</literal> when
325                         <literal>optim</literal> is invoked. It can be used as input
326                         parameter to speed-up the calculations.
327                     </para>
328                 </listitem>
329             </varlistentry>
330         </variablelist>
331     </refsection>
332     <refsection>
333         <title>Description</title>
334         <para>This function solves unconstrained nonlinear optimization
335             problems:
336         </para>
337         <screen>min f(x)      </screen>
338         <para>
339             where <literal>x</literal> is a vector and <literal>f(x)</literal>
340             is a function that returns a scalar. This function can also solve bound
341             constrained nonlinear optimization problems:
342         </para>
343         <screen>min f(x)
344             binf &lt;= x &lt;= bsup      
345         </screen>
346         <para>
347             where <literal>binf</literal> is the lower bound and
348             <literal>bsup</literal> is the upper bound on <literal>x</literal>.
349         </para>
350         <para>
351             The <literal>costf</literal> argument can be a Scilab function, a
352             list or a string giving the name of a C or Fortran routine (see
353             "external"). This external must return the value <literal>f</literal> of
354             the cost function at the point <literal>x</literal> and the gradient
355             <literal>g</literal> of the cost function at the point
356             <literal>x</literal>.
357         </para>
358         <variablelist>
359             <varlistentry>
360                 <term>Scilab function case</term>
361                 <listitem>
362                     <para>
363                         If <literal>costf</literal> is a Scilab function, its calling
364                         sequence must be:
365                     </para>
366                     <screen>[f, g, ind] = costf(x, ind)      </screen>
367                     <para>
368                         where <literal>x</literal> is the current point,
369                         <literal>ind</literal> is an integer flag described below,
370                         <literal>f</literal> is the real value of the objective function at
371                         the point <literal>x</literal> and <literal>g</literal> is a vector
372                         containing the gradient of the objective function at
373                         <literal>x</literal>. The variable <literal>ind</literal> is
374                         described below.
375                     </para>
376                 </listitem>
377             </varlistentry>
378             <varlistentry>
379                 <term>List case</term>
380                 <listitem>
381                     <para>It may happen that objective function requires extra
382                         arguments. In this case, we can use the following feature. The
383                         <literal>costf</literal> argument can be the list
384                         <literal>(real_costf, arg1,...,argn)</literal>. In this case,
385                         <literal>real_costf</literal>, the first element in the list, must
386                         be a Scilab function with calling sequence: <screen>        [f,g,ind]=real_costf(x,ind,arg1,...,argn)      </screen>
387                         The <literal>x</literal>, <literal>f</literal>,
388                         <literal>g</literal>, <literal>ind</literal> arguments have the same
389                         meaning as before. In this case, each time the objective function is
390                         called back, the arguments <literal>arg1,...,argn</literal> are
391                         automatically appended at the end of the calling sequence of
392                         <literal>real_costf</literal>.
393                     </para>
394                 </listitem>
395             </varlistentry>
396             <varlistentry>
397                 <term>String case</term>
398                 <listitem>
399                     <para>
400                         If <literal>costf</literal> is a string, it refers to the name
401                         of a C or Fortran routine which must be linked to Scilab
402                     </para>
403                     <variablelist>
404                         <varlistentry>
405                             <term>Fortran case</term>
406                             <listitem>
407                                 <para>The calling sequence of the Fortran subroutine computing
408                                     the objective must be:
409                                 </para>
410                                 <screen>subroutine costf(ind,n,x,f,g,ti,tr,td)      </screen>
411                                 <para>with the following declarations:</para>
412                                 <screen>integer ind,n ti(*)
413                                     double precision x(n),f,g(n),td(*)
414                                     real tr(*)      
415                                 </screen>
416                                 <para>
417                                     The argument <literal>ind</literal> is described
418                                     below.
419                                 </para>
420                                 <para>If ind = 2, 3 or 4, the inputs of the routine are :
421                                     <literal>x, ind, n, ti, tr,td</literal>.
422                                 </para>
423                                 <para>If ind = 2, 3 or 4, the outputs of the routine are :
424                                     <literal>f</literal> and <literal>g</literal>.
425                                 </para>
426                             </listitem>
427                         </varlistentry>
428                         <varlistentry>
429                             <term>C case</term>
430                             <listitem>
431                                 <para>The calling sequence of the C function computing the
432                                     objective must be:
433                                 </para>
434                                 <screen>void costf(int *ind, int *n, double *x, double *f, double *g, int *ti, float *tr, double *td)      </screen>
435                                 <para>
436                                     The argument <literal>ind</literal> is described
437                                     below.
438                                 </para>
439                                 <para>The inputs and outputs of the function are the same as
440                                     in the fortran case.
441                                 </para>
442                             </listitem>
443                         </varlistentry>
444                     </variablelist>
445                 </listitem>
446             </varlistentry>
447         </variablelist>
448         <para>
449             On output, <literal>ind&lt;0</literal> means that
450             <literal>f</literal> cannot be evaluated at <literal>x</literal> and
451             <literal>ind=0</literal> interrupts the optimization.
452         </para>
453     </refsection>
454     <refsection>
455         <title>Termination criteria</title>
456         <para>Each algorithm has its own termination criteria, which may use the
457             parameters given by the user, that is <literal>nap</literal>,
458             <literal>iter</literal>, <literal>epsg</literal>, <literal>epsf</literal>
459             and <literal>epsx</literal>. Not all the parameters are taken into
460             account. In the table below, we present the specific termination
461             parameters which are taken into account by each algorithm. The
462             unconstrained solver is identified by "UNC" while the bound constrained
463             solver is identified by "BND". An empty entry means that the parameter is
464             ignored by the algorithm.
465         </para>
466         <para>
467             <informaltable border="1">
468                 <tr>
469                     <td>Solver</td>
470                     <td>nap</td>
471                     <td>iter</td>
472                     <td>epsg</td>
473                     <td>epsf</td>
474                     <td>epsx</td>
475                 </tr>
476                 <tr>
477                     <td>optim/"qn" UNC</td>
478                     <td>X</td>
479                     <td>X</td>
480                     <td>X</td>
481                     <td/>
482                     <td/>
483                 </tr>
484                 <tr>
485                     <td>optim/"qn" BND</td>
486                     <td>X</td>
487                     <td>X</td>
488                     <td>X</td>
489                     <td>X</td>
490                     <td>X</td>
491                 </tr>
492                 <tr>
493                     <td>optim/"gc" UNC</td>
494                     <td>X</td>
495                     <td>X</td>
496                     <td>X</td>
497                     <td/>
498                     <td/>
499                 </tr>
500                 <tr>
501                     <td>optim/"gc" BND</td>
502                     <td>X</td>
503                     <td>X</td>
504                     <td>X</td>
505                     <td>X</td>
506                     <td>X</td>
507                 </tr>
508                 <tr>
509                     <td>optim/"nd" UNC</td>
510                     <td>X</td>
511                     <td>X</td>
512                     <td/>
513                     <td>X</td>
514                     <td>X</td>
515                 </tr>
516             </informaltable>
517         </para>
518     </refsection>
519     <refsection>
520         <title>Example: Scilab function</title>
521         <para>The following is an example with a Scilab function. Notice, for
522             simplifications reasons, the Scilab function "cost" of the following
523             example computes the objective function f and its derivative no matter of
524             the value of ind. This allows to keep the example simple. In practical
525             situations though, the computation of "f" and "g" may raise performances
526             issues so that a direct optimization may be to use the value of "ind" to
527             compute "f" and "g" only when needed.
528         </para>
529         <programlisting role="example">function [f, g, ind] = cost(x, ind)
530             xref = [1; 2; 3];
531             f = 0.5 * norm(x - xref)^2;
532             g = x - xref;
533             endfunction
534             
535             // Simplest call
536             x0 = [1; -1; 1];
537             [fopt, xopt] = optim(cost, x0)
538             
539             // Use "gc" algorithm
540             [fopt, xopt, gopt] = optim(cost, x0, "gc")
541             
542             // Use "nd" algorithm
543             [fopt, xopt, gopt] = optim(cost, x0, "nd")
544             
545             // Upper and lower bounds on x
546             [fopt, xopt, gopt] = optim(cost, "b", [-1;0;2], [0.5;1;4], x0)
547             
548             // Upper and lower bounds on x and setting up the algorithm to "gc"
549             [fopt, xopt, gopt] = optim(cost, "b", [-1; 0; 2], [0.5; 1; 4], x0, "gc")
550             
551             // Bound on the number of call to the objective function
552             [fopt, xopt, gopt] = optim(cost, "b", [-1; 0; 2], [0.5; 1; 4], x0, "gc", "ar", 3)
553             
554             // Set max number of call to the objective function (3)
555             // Set max number of iterations (100)
556             // Set stopping threshold on the value of f (1e-6),
557             // on the value of the norm of the gradient of the objective function (1e-6)
558             // on the improvement on the parameters x_opt (1e-6;1e-6;1e-6)
559             [fopt, xopt, gopt] = optim(cost, "b", [-1; 0; 2], [0.5; 1; 4], x0, "gc", "ar", 3, 100, 1e-6, 1e-6, [1e-3; 1e-3; 1e-3])
560             
561             // Additionnal messages are printed in the console.
562             [fopt, xopt] = optim(cost, x0, imp = 3)    
563         </programlisting>
564     </refsection>
565     <refsection>
566         <title>Example: Print messages</title>
567         <para>
568             The <literal>imp</literal> flag may take negative integer values,
569             say k. In that case, the cost function is called once every -k iterations.
570             This allows to draw the function value or write a log file.
571         </para>
572         <para>
573             This feature is available only with the <literal>"qn"</literal>
574             algorithm without constraints.
575         </para>
576         <para>In the following example, we solve the Rosenbrock test case. For
577             each iteration of the algorithm, we print the value of x, f and g.
578         </para>
579         <programlisting role="example">function [f, g, ind] = cost(x, ind)
580             xref = [1; 2; 3];
581             f = 0.5 * norm(x - xref)^2;
582             g = x - xref;
583             if (ind == 1) then
584             mprintf("f(x) = %s, |g(x)|=%s\n", string(f), string(norm(g)))
585             end
586             endfunction
587             
588             x0 = [1; -1; 1];
589             [fopt, xopt] = optim(cost, x0, imp = -1)   
590         </programlisting>
591         <para>The previous script produces the following output.</para>
592         <screen>--&gt;[fopt, xopt] = optim(cost, x0, imp = -1)
593             f(x) = 6.5, |g(x)|=3.6055513
594             f(x) = 2.8888889, |g(x)|=2.4037009
595             f(x) = 9.861D-31, |g(x)|=1.404D-15
596             f(x) = 0, |g(x)|=0
597             Norm of projected gradient lower than   0.0000000D+00.
598             xopt  =
599             1.
600             2.
601             3.
602             fopt  =
603             0.    
604         </screen>
605         <para>In the following example, we solve the Rosenbrock test case. For
606             each iteration of the algorithm, we plot the current value of x into a 2D
607             graph containing the contours of Rosenbrock's function. This allows to see
608             the progress of the algorithm while the algorithm is performing. We could
609             as well write the value of x, f and g into a log file if needed.
610         </para>
611         <programlisting role="example">// 1. Define Rosenbrock for optimization
612             function [f , g , ind] = rosenbrock (x , ind)
613             f = 100.0 *(x(2) - x(1)^2)^2 + (1 - x(1))^2;
614             g(1) = - 400. * ( x(2) - x(1)**2 ) * x(1) -2. * ( 1. - x(1) )
615             g(2) = 200. * ( x(2) - x(1)**2 )
616             endfunction
617             
618             // 2. Define rosenbrock for contouring
619             function f = rosenbrockC ( x1 , x2 )
620             x = [x1 x2]
621             ind = 4
622             [ f , g , ind ] = rosenbrock ( x , ind )
623             endfunction
624             
625             // 3. Define Rosenbrock for plotting
626             function [ f , g , ind ] = rosenbrockPlot ( x , ind )
627             [ f , g , ind ] = rosenbrock ( x , ind )
628             if (ind == 1) then
629             plot ( x(1) , x(2) , "g." )
630             end
631             endfunction
632             
633             // 4. Draw the contour of Rosenbrock's function
634             x0 = [-1.2 1.0];
635             xopt = [1.0 1.0];
636             xdata = linspace(-2,2,100);
637             ydata = linspace(-2,2,100);
638             contour ( xdata , ydata , rosenbrockC , [1 10 100 500 1000])
639             plot(x0(1) , x0(2) , "b.")
640             plot(xopt(1) , xopt(2) , "r*")
641             
642             // 5. Plot the optimization process, during optimization
643             [fopt, xopt] = optim ( rosenbrockPlot , x0 , imp = -1)    
644         </programlisting>
645         <scilab:image>
646             function [f, g, ind]=rosenbrock(x, ind)
647             f = 100.0 *(x(2) - x(1)^2)^2 + (1 - x(1))^2;
648             g(1) = - 400. * ( x(2) - x(1)**2 ) * x(1) -2. * ( 1. - x(1) )
649             g(2) = 200. * ( x(2) - x(1)**2 )
650             endfunction
651             
652             function f=rosenbrockC(x1, x2)
653             x = [x1 x2]
654             ind = 4
655             [ f , g , ind ] = rosenbrock ( x , ind )
656             endfunction
657             
658             function [f, g, ind]=rosenbrockPlot(x, ind)
659             [ f , g , ind ] = rosenbrock ( x , ind )
660             if (ind == 1) then
661             plot ( x(1) , x(2) , "g." )
662             end
663             endfunction
664             
665             x0 = [-1.2 1.0];
666             xopt = [1.0 1.0];
667             xdata = linspace(-2,2,100);
668             ydata = linspace(-2,2,100);
669             contour ( xdata , ydata , rosenbrockC , [1 10 100 500 1000])
670             plot(x0(1) , x0(2) , "b.")
671             plot(xopt(1) , xopt(2) , "r*")
672             [fopt, xopt] = optim ( rosenbrockPlot , x0 , imp = -1)
673         </scilab:image>
674         
675     </refsection>
676     <refsection>
677         <title>Example: Optimizing with numerical derivatives</title>
678         <para>It is possible to optimize a problem without an explicit knowledge
679             of the derivative of the cost function. For this purpose, we can use the
680             numdiff or derivative function to compute a numerical derivative of the
681             cost function.
682         </para>
683         <para>In the following example, we use the numdiff function to solve
684             Rosenbrock's problem.
685         </para>
686         <programlisting role="example">function f = rosenbrock ( x )
687             f = 100.0 *(x(2)-x(1)^2)^2 + (1-x(1))^2;
688             endfunction
689             
690             function [ f , g , ind ] = rosenbrockCost ( x , ind )
691             f = rosenbrock ( x );
692             g= numdiff ( rosenbrock , x );
693             endfunction
694             
695             x0 = [-1.2 1.0];
696             
697             [ fopt , xopt ] = optim ( rosenbrockCost , x0 )    
698         </programlisting>
699         <para>In the following example, we use the derivative function to solve
700             Rosenbrock's problem. Given that the step computation strategy is not the
701             same in numdiff and derivative, this might lead to improved
702             results.
703         </para>
704         <programlisting role="example">function f = rosenbrock ( x )
705             f = 100.0 *(x(2)-x(1)^2)^2 + (1-x(1))^2;
706             endfunction
707             
708             function [ f , g , ind ] = rosenbrockCost2 ( x , ind )
709             f = rosenbrock ( x );
710             g = derivative ( rosenbrock , x.' , order = 4 );
711             endfunction
712             
713             x0 = [-1.2 1.0];
714             [fopt , xopt] = optim ( rosenbrockCost2 , x0 )    
715         </programlisting>
716     </refsection>
717     <refsection>
718         <title>Example: Counting function evaluations and number of
719             iterations
720         </title>
721         <para>
722             The <literal>imp</literal> option can take negative values. If the
723             <literal>imp</literal> is equal to <literal>m</literal> where
724             <literal>m</literal> is a negative integer, then the cost function is
725             evaluated every -<literal>m</literal> iterations, with the
726             <literal>ind</literal> input argument equal to 1. The following example
727             uses this feature to compute the number of iterations. The global variable
728             <literal>mydata</literal> is used to store the number of function
729             evaluations as well as the number of iterations.
730         </para>
731         <programlisting role="example">function [f, g, ind] = cost(x, ind,xref)
732             global _MYDATA_
733             if ( ind == 1 )
734             _MYDATA_.niter = _MYDATA_.niter + 1
735             end
736             _MYDATA_.nfevals = _MYDATA_.nfevals + 1
737             f = 0.5 * norm(x - xref)^2;
738             g = x - xref;
739             endfunction
740             xref = [1; 2; 3];
741             x0 = [1; -1; 1];
742             global _MYDATA_
743             _MYDATA_ = tlist ( ["T_MYDATA", "niter", "nfevals"])
744             _MYDATA_.niter = 0
745             _MYDATA_.nfevals = 0
746             [f, xopt] = optim(list(cost, xref), x0, imp = -1)
747             mprintf("Number of function evaluations:%d\n", _MYDATA_.nfevals)
748             mprintf("Number of iterations:%d\n", _MYDATA_.niter) 
749         </programlisting>
750         <para>While the previous example perfectly works, there is a risk that the
751             same variable <literal>_MYDATA_</literal> is used by some internal
752             function used by <literal>optim</literal>. In this case, the value may be
753             wrong. This is why a sufficiently weird variable name has been
754             used.
755         </para>
756     </refsection>
757     <refsection>
758         <title>Example : Passing extra parameters</title>
759         <para>In most practical situations, the cost function depends on extra
760             parameters which are required to evaluate the cost function. There are
761             several methods to achieve this goal.
762         </para>
763         <para>In the following example, the cost function uses 4 parameters
764             <literal>a, b, c</literal> and <literal>d</literal>. We define the cost
765             function with additionnal input arguments, which are declared after the
766             index argument. Then we pass a list as the first input argument of the
767             <literal>optim</literal> solver. The first element of the list is the cost
768             function. The additionnal variables are directly passed to the cost
769             function.
770         </para>
771         <programlisting role="example">function [ f , g , ind ] = costfunction ( x , ind , a , b , c , d )
772             f = a * ( x(1) - c ) ^2 + b * ( x(2) - d )^2
773             g(1) = 2 * a * ( x(1) - c )
774             g(2) = 2 * b * ( x(2) - d )
775             endfunction
776             
777             x0 = [1 1];
778             a = 1.0;
779             b = 2.0;
780             c = 3.0;
781             d = 4.0;
782             costf = list ( costfunction , a , b , c, d );
783             [fopt , xopt] = optim ( costf , x0 , imp = 2)    
784         </programlisting>
785         <para>In complex cases, the cost function may have so many parameters,
786             that having a function which takes all arguments as inputs is not
787             convenient. For example, consider the situation where the cost function
788             needs 12 parameters. Then, designing a function with 14 input arguments
789             (x, index and the 12 parameters) is difficult to manage. Instead, we can
790             use a more complex data structure to store our data. In the following
791             example, we use a tlist to store the 4 input arguments. This method can
792             easily be expanded to an arbitrary number of parameters.
793         </para>
794         <programlisting role="example">function [f , g , ind] = costfunction ( x , ind , parameters)
795             // Get the parameters
796             a = parameters.a
797             b = parameters.b
798             c = parameters.c
799             d = parameters.d
800             f = a * ( x(1) - c ) ^2 + b * ( x(2) - d )^2
801             g(1) = 2 * a * ( x(1) - c )
802             g(2) = 2 * b * ( x(2) - d )
803             endfunction
804             
805             x0 = [1 1];
806             a = 1.0;
807             b = 2.0;
808             c = 3.0;
809             d = 4.0;
810             // Store the parameters
811             parameters = tlist ( [
812             "T_MYPARAMS"
813             "a"
814             "b"
815             "c"
816             "d"
817             ]);
818             
819             parameters.a = a;
820             parameters.b = b;
821             parameters.c = c;
822             parameters.d = d;
823             costf = list ( costfunction , parameters );
824             [fopt , xopt] = optim ( costf , x0 , imp = 2)    
825         </programlisting>
826         <para>In the following example, the parameters are defined before the
827             optimizer is called. They are directly used in the cost function.
828         </para>
829         <programlisting role="example">// The example NOT to follow
830             function [ f , g , ind ] = costfunction ( x , ind )
831             f = a * ( x(1) - c ) ^2 + b * ( x(2) - d )^2
832             g(1) = 2 * a * ( x(1) - c )
833             g(2) = 2 * b * ( x(2) - d )
834             endfunction
835             x0 = [1 1];
836             a = 1.0;
837             b = 2.0;
838             c = 3.0;
839             d = 4.0;
840             [ fopt , xopt ] = optim ( costfunction , x0 , imp = 2 )   
841         </programlisting>
842         <para>While the previous example perfectly works, there is a risk that the
843             same variables are used by some internal function used by
844             <literal>optim</literal>. In this case, the value of the parameters are
845             not what is expected and the optimization can fail or, worse, give a wrong
846             result. It is also difficult to manage such a function, which requires
847             that all the parameters are defined in the calling context.
848         </para>
849         <para>In the following example, we define the cost function with the
850             classical header. Inside the function definition, we declare that the
851             parameters <literal>a, b, c</literal> and <literal>d</literal> are global
852             variables. Then we declare and set the global variables.
853         </para>
854         <programlisting role="example">// Another example NOT to follow
855             function [ f , g , ind ] = costfunction ( x , ind )
856             global a b c d
857             f = a * ( x(1) - c ) ^2 + b * ( x(2) - d )^2
858             g(1) = 2 * a * ( x(1) - c )
859             g(2) = 2 * b * ( x(2) - d )
860             endfunction
861             x0 = [1 1];
862             global a b c d
863             a = 1.0;
864             b = 2.0;
865             c = 3.0;
866             d = 4.0;
867             [ fopt , xopt ] = optim ( costfunction , x0 , imp = 2 )    
868         </programlisting>
869         <para>While the previous example perfectly works, there is a risk that the
870             same variables are used by some internal function used by
871             <literal>optim</literal>. In this case, the value of the parameters are
872             not what is expected and the optimization can fail or, worse, give a wrong
873             result.
874         </para>
875     </refsection>
876     <refsection>
877         <title>Example : Checking that derivatives are correct</title>
878         <para>Many optimization problem can be avoided if the derivatives are
879             computed correctly. One common reason for failure in the step-length
880             procedure is an error in the calculation of the cost function and its
881             gradient. Incorrect calculation of derivatives is by far the most common
882             user error.
883         </para>
884         <para>In the following example, we give a false implementation of
885             Rosenbrock's gradient. In order to check the computation of the
886             derivatives, we use the <literal>derivative</literal> function. We define
887             the <literal>simplified</literal> function, which delegates the
888             computation of <literal>f</literal> to the rosenbrock function. The
889             <literal>simplified</literal> function is passed as an input argument of
890             the <literal>derivative</literal> function.
891         </para>
892         <programlisting role="example">function [ f , g , index ] = rosenbrock ( x , index )
893             f = 100.0 *(x(2)-x(1)^2)^2 + (1-x(1))^2;
894             // Exact :
895             g(1) = - 400. * ( x(2) - x(1)**2 ) * x(1) -2. * ( 1. - x(1) )
896             // Wrong :
897             g(1) = - 1200. * ( x(2) - x(1)**2 ) * x(1) -2. * ( 1. - x(1) )
898             g(2) = 200. * ( x(2) - x(1)**2 )
899             endfunction
900             
901             function f = simplified ( x )
902             index = 1;
903             [ f , g , index ] = rosenbrock ( x , index )
904             endfunction
905             
906             x0 = [-1.2 1];
907             index = 1;
908             [ f , g , index ] = rosenbrock ( x0 , index );
909             gnd = derivative ( simplified , x0.' );
910             mprintf("Exact derivative:[%s]\n" , strcat ( string(g) , " " ));
911             mprintf("Numerical derivative:[%s]\n" , strcat ( string(gnd) , " " ));    
912         </programlisting>
913         <para>The previous script produces the following output. Obviously, the
914             difference between the two gradient is enormous, which shows that the
915             wrong formula has been used in the gradient.
916         </para>
917         <programlisting role="example">      Exact derivative:[-638 -88]
918             Numerical derivative:[-215.6 -88]   
919         </programlisting>
920     </refsection>
921     <refsection>
922         <title>Example: C function</title>
923         <para>The following is an example with a C function, where a C source code
924             is written into a file, dynamically compiled and loaded into Scilab, and
925             then used by the "optim" solver. The interface of the "rosenc" function is
926             fixed, even if the arguments are not really used in the cost function.
927             This is because the underlying optimization solvers must assume that the
928             objective function has a known, constant interface. In the following
929             example, the arrays ti and tr are not used, only the array "td" is used,
930             as a parameter of the Rosenbrock function. Notice that the content of the
931             arrays ti and td are the same that the content of the Scilab variable, as
932             expected.
933         </para>
934         <programlisting role="example">// External function written in C (C compiler required)
935             // write down the C code (Rosenbrock problem)
936             C=['#include &lt;math.h&gt;'
937             'double sq(double x)'
938             '{ return x*x;}'
939             'void rosenc(int *ind, int *n, double *x, double *f, double *g, '
940             '                                int *ti, float *tr, double *td)'
941             '{'
942             '  double p;'
943             '  int i;'
944             '  p=td[0];'
945             '  if (*ind==2||*ind==4) {'
946             '    *f=1.0;'
947             '    for (i=1;i&lt;*n;i++)'
948             '      *f+=p*sq(x[i]-sq(x[i-1]))+sq(1.0-x[i]);'
949             '  }'
950             '  if (*ind==3||*ind==4) {'
951             '    g[0]=-4.0*p*(x[1]-sq(x[0]))*x[0];'
952             '    for (i=1;i&lt;*n-1;i++)'
953             '      g[i]=2.0*p*(x[i]-sq(x[i-1]))-4.0*p*(x[i+1]-sq(x[i]))*x[i]-2.0*(1.0-x[i]);'
954             '    g[*n-1]=2.0*p*(x[*n-1]-sq(x[*n-2]))-2.0*(1.0-x[*n-1]);'
955             '  }'
956             '}'];
957             cd TMPDIR;
958             mputl(C, TMPDIR+'/rosenc.c')
959             
960             // compile the C code
961             l = ilib_for_link('rosenc', 'rosenc.c', [], 'c');
962             
963             // incremental linking
964             link(l, 'rosenc', 'c')
965             
966             //solve the problem
967             x0 = [40; 10; 50];
968             p = 100;
969             [f, xo, go] = optim('rosenc', x0, 'td', p)
970         </programlisting>
971     </refsection>
972     <refsection>
973         <title>Example: Fortran function</title>
974         <para>The following is an example with a Fortran function.</para>
975         <programlisting role="example">// External function written in Fortran (Fortran compiler required)
976             // write down the Fortran  code (Rosenbrock problem)
977             F = [ '      subroutine rosenf(ind, n, x, f, g, ti, tr, td)'
978             '      integer ind,n,ti(*)'
979             '      double precision x(n),f,g(n),td(*)'
980             '      real tr(*)'
981             'c'
982             '      double precision y,p'
983             '      p=td(1)'
984             '      if (ind.eq.2.or.ind.eq.4) then'
985             '        f=1.0d0'
986             '        do i=2,n'
987             '          f=f+p*(x(i)-x(i-1)**2)**2+(1.0d0-x(i))**2'
988             '        enddo'
989             '      endif'
990             '      if (ind.eq.3.or.ind.eq.4) then'
991             '        g(1)=-4.0d0*p*(x(2)-x(1)**2)*x(1)'
992             '        if(n.gt.2) then'
993             '          do i=2,n-1'
994             '            g(i)=2.0d0*p*(x(i)-x(i-1)**2)-4.0d0*p*(x(i+1)-x(i)**2)*x(i)'
995             '     &amp;           -2.0d0*(1.0d0-x(i))'
996             '          enddo'
997             '        endif'
998             '        g(n)=2.0d0*p*(x(n)-x(n-1)**2)-2.0d0*(1.0d0-x(n))'
999             '      endif'
1000             '      return'
1001             '      end'];
1002             cd TMPDIR;
1003             mputl(F, TMPDIR+'/rosenf.f')
1004             
1005             // compile the Fortran code
1006             l = ilib_for_link('rosenf', 'rosenf.f', [], 'f');
1007             
1008             // incremental linking
1009             link(l, 'rosenf', 'f')
1010             
1011             //solve the problem
1012             x0 = [40; 10; 50];
1013             p = 100;
1014             [f, xo, go] = optim('rosenf', x0, 'td', p)    
1015         </programlisting>
1016     </refsection>
1017     <refsection>
1018         <title>Example: Fortran function with initialization</title>
1019         <para>The following is an example with a Fortran function in which the
1020             "in" option is used to allocate memory inside the Scilab environment. In
1021             this mode, there is a dialog between Scilab and the objective function.
1022             The goal of this dialog is to initialize the parameters of the objective
1023             function. Each part of this dialog is based on a specific value of the
1024             "ind" parameter.
1025         </para>
1026         <para>At the beginning, Scilab calls the objective function, with the ind
1027             parameter equals to 10. This tells the objective function to initialize
1028             the sizes of the arrays it needs by setting the nizs, nrzs and ndzs
1029             integer parameters of the "nird" common. Then the objective function
1030             returns. At this point, Scilab creates internal variables and allocate
1031             memory for the variable izs, rzs and dzs. Scilab calls the objective
1032             function back again, this time with ind equals to 11. This tells the
1033             objective function to initialize the arrays izs, rzs and dzs. When the
1034             objective function has done so, it returns. Then Scilab enters in the real
1035             optimization mode and calls the optimization solver the user requested.
1036             Whenever the objective function is called, the izs, rzs and dzs arrays
1037             have the values that have been previously initialized.
1038         </para>
1039         <programlisting role="example">//
1040             // Define a fortran source code and compile it (fortran compiler required)
1041             //
1042             fortransource = ['      subroutine rosenf(ind,n,x,f,g,izs,rzs,dzs)'
1043             'C     -------------------------------------------'
1044             'c     Example of cost function given by a subroutine'
1045             'c     if n&lt;=2 returns ind=0'
1046             'c     f.bonnans, oct 86'
1047             '      implicit double precision (a-h,o-z)'
1048             '      real rzs(1)'
1049             '      double precision dzs(*)'
1050             '      dimension x(n),g(n),izs(*)'
1051             '      common/nird/nizs,nrzs,ndzs'
1052             '      if (n.lt.3) then'
1053             '        ind=0'
1054             '        return'
1055             '      endif'
1056             '      if(ind.eq.10) then'
1057             '         nizs=2'
1058             '         nrzs=1'
1059             '         ndzs=2'
1060             '         return'
1061             '      endif'
1062             '      if(ind.eq.11) then'
1063             '         izs(1)=5'
1064             '         izs(2)=10'
1065             '         dzs(2)=100.0d+0'
1066             '         return'
1067             '      endif'
1068             '      if(ind.eq.2)go to 5'
1069             '      if(ind.eq.3)go to 20'
1070             '      if(ind.eq.4)go to 5'
1071             '      ind=-1'
1072             '      return'
1073             '5     f=1.0d+0'
1074             '      do 10 i=2,n'
1075             '        im1=i-1'
1076             '10      f=f + dzs(2)*(x(i)-x(im1)**2)**2 + (1.0d+0-x(i))**2'
1077             '      if(ind.eq.2)return'
1078             '20    g(1)=-4.0d+0*dzs(2)*(x(2)-x(1)**2)*x(1)'
1079             '      nm1=n-1'
1080             '      do 30 i=2,nm1'
1081             '        im1=i-1'
1082             '        ip1=i+1'
1083             '        g(i)=2.0d+0*dzs(2)*(x(i)-x(im1)**2)'
1084             '30      g(i)=g(i) -4.0d+0*dzs(2)*(x(ip1)-x(i)**2)*x(i) - '
1085             '     &amp;        2.0d+0*(1.0d+0-x(i))'
1086             '      g(n)=2.0d+0*dzs(2)*(x(n)-x(nm1)**2) - 2.0d+0*(1.0d+0-x(n))'
1087             '      return'
1088             '      end'];
1089             cd TMPDIR;
1090             mputl(fortransource, TMPDIR + '/rosenf.f')
1091             
1092             // compile the C code
1093             libpath = ilib_for_link('rosenf', 'rosenf.f', [], 'f');
1094             
1095             // incremental linking
1096             linkid = link(libpath, 'rosenf', 'f');
1097             
1098             x0 = 1.2 * ones(1, 5);
1099             //
1100             // Solve the problem
1101             //
1102             [f, x, g] = optim('rosenf', x0, 'in');    
1103         </programlisting>
1104     </refsection>
1105     <refsection>
1106         <title>Example: Fortran function with initialization on Windows with Intel
1107             Fortran Compiler
1108         </title>
1109         <para>Under the Windows operating system with Intel Fortran Compiler, one
1110             must carefully design the fortran source code so that the dynamic link
1111             works properly. On Scilab's side, the optimization component is
1112             dynamically linked and the symbol "nird" is exported out of the
1113             optimization dll. On the cost function's side, which is also dynamically
1114             linked, the "nird" common must be imported in the cost function
1115             dll.
1116         </para>
1117         <para>The following example is a re-writing of the previous example, with
1118             special attention for the Windows operating system with Intel Fortran
1119             compiler as example. In that case, we introduce additionnal compiling
1120             instructions, which allows the compiler to import the "nird"
1121             symbol.
1122         </para>
1123         <programlisting role="example">fortransource = ['subroutine rosenf(ind,n,x,f,g,izs,rzs,dzs)'
1124             'cDEC$ IF DEFINED (FORDLL)'
1125             'cDEC$ ATTRIBUTES DLLIMPORT:: /nird/'
1126             'cDEC$ ENDIF'
1127             'C     -------------------------------------------'
1128             'c     Example of cost function given by a subroutine'
1129             'c     if n&lt;=2 returns ind=0'
1130             'c     f.bonnans, oct 86'
1131             '      implicit double precision (a-h,o-z)'
1132             [etc...]    
1133         </programlisting>
1134     </refsection>
1135     <refsection role="see also">
1136         <title>See Also</title>
1137         <simplelist type="inline">
1138             <member>
1139                 <link linkend="external">external</link>
1140             </member>
1141             <member>
1142                 <link linkend="qpsolve">qpsolve</link>
1143             </member>
1144             <member>
1145                 <link linkend="datafit">datafit</link>
1146             </member>
1147             <member>
1148                 <link linkend="leastsq">leastsq</link>
1149             </member>
1150             <member>
1151                 <link linkend="numdiff">numdiff</link>
1152             </member>
1153             <member>
1154                 <link linkend="derivative">derivative</link>
1155             </member>
1156             <member>
1157                 <link linkend="NDcost">NDcost</link>
1158             </member>
1159         </simplelist>
1160     </refsection>
1161     <refsection>
1162         <title>References</title>
1163         <para>The following is a map from the various options to the underlying
1164             solvers.
1165         </para>
1166         <variablelist>
1167             <varlistentry>
1168                 <term>"qn" without constraints</term>
1169                 <listitem>
1170                     <para>n1qn1 : a quasi-Newton method with a Wolfe-type line
1171                         search
1172                     </para>
1173                 </listitem>
1174             </varlistentry>
1175             <varlistentry>
1176                 <term>"qn" with bounds constraints</term>
1177                 <listitem>
1178                     <para>qnbd : a quasi-Newton method with projection</para>
1179                     <para>RR-0242 - A variant of a projected variable metric method for
1180                         bound constrained optimization problems, Bonnans Frederic, Rapport
1181                         de recherche de l'INRIA - Rocquencourt, Octobre 1983
1182                     </para>
1183                 </listitem>
1184             </varlistentry>
1185             <varlistentry>
1186                 <term>"gc" without constraints</term>
1187                 <listitem>
1188                     <para>n1qn3 : a Quasi-Newton limited memory method with BFGS.</para>
1189                 </listitem>
1190             </varlistentry>
1191             <varlistentry>
1192                 <term>"gc" with bounds constraints</term>
1193                 <listitem>
1194                     <para>gcbd : a BFGS-type method with limited memory and
1195                         projection
1196                     </para>
1197                 </listitem>
1198             </varlistentry>
1199             <varlistentry>
1200                 <term>"nd" without constraints</term>
1201                 <listitem>
1202                     <para>n1fc1 : a bundle method</para>
1203                 </listitem>
1204             </varlistentry>
1205             <varlistentry>
1206                 <term>"nd" with bounds constraints</term>
1207                 <listitem>
1208                     <para>not available</para>
1209                 </listitem>
1210             </varlistentry>
1211         </variablelist>
1212     </refsection>
1213 </refentry>