[doc] broken linkend fixed
[scilab.git] / scilab / modules / spreadsheet / help / en_US / csvRead.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3  * Copyright (C) 2010-2011 - INRIA - Allan CORNET
4  * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
5  * Copyright (C) 2012 - 2016 - Scilab Enterprises
6  *
7  * This file is hereby licensed under the terms of the GNU GPL v2.0,
8  * pursuant to article 5.3.4 of the CeCILL v.2.1.
9  * This file was originally licensed under the terms of the CeCILL v2.1,
10  * and continues to be available under such terms.
11  * For more information, see the COPYING file which you should have received
12  * along with this program.
13  -->
14 <refentry version="5.0-subset Scilab" xml:id="csvRead" xml:lang="en"
15           xmlns="http://docbook.org/ns/docbook"
16           xmlns:xlink="http://www.w3.org/1999/xlink"
17           xmlns:svg="http://www.w3.org/2000/svg"
18           xmlns:ns3="http://www.w3.org/1999/xhtml"
19           xmlns:mml="http://www.w3.org/1998/Math/MathML"
20           xmlns:db="http://docbook.org/ns/docbook">
21     <refnamediv>
22         <refname>csvRead</refname>
23         <refpurpose>Read comma-separated value file</refpurpose>
24     </refnamediv>
25     <refsynopsisdiv>
26         <title>Syntax</title>
27         <synopsis>
28             M = csvRead(filename)
29             M = csvRead(filename, separator)
30             M = csvRead(filename, separator, decimal)
31             M = csvRead(filename, separator, decimal, conversion)
32             M = csvRead(filename, separator, decimal, conversion, substitute)
33             M = csvRead(filename, separator, decimal, conversion, substitute, regexpcomments, range)
34             M = csvRead(filename, separator, decimal, conversion, substitute, regexpcomments, range, header)
35             [M, comments] = csvRead(filename, separator, decimal, conversion, substitute, regexpcomments, range, header)
36         </synopsis>
37     </refsynopsisdiv>
38     <refsection>
39         <title>Arguments</title>
40         <variablelist>
41             <varlistentry>
42                 <term>filename</term>
43                 <listitem>
44                     <para>a 1-by-1 matrix of strings, the file path.</para>
45                 </listitem>
46             </varlistentry>
47             <varlistentry>
48                 <term>separator</term>
49                 <listitem>
50                     <para>a 1-by-1 matrix of strings, the field separator used.</para>
51                 </listitem>
52             </varlistentry>
53             <varlistentry>
54                 <term>decimal</term>
55                 <listitem>
56                     <para>a 1-by-1 matrix of strings, the decimal used.</para>
57                     <para>
58                         If <literal>decimal</literal> is different of <literal>[]</literal> and <literal>conversion</literal> is set to <literal>string</literal>, the decimal conversion will be done.
59                     </para>
60                 </listitem>
61             </varlistentry>
62             <varlistentry>
63                 <term>conversion</term>
64                 <listitem>
65                     <para>a 1-by-1 matrix of strings, the type of the output
66                         <literal>M</literal>. Available values are "string" or "double" (by default).
67                     </para>
68                     <para>
69                         Note that <link linkend="read_csv">read_csv</link> has "string" as default.
70                     </para>
71                 </listitem>
72             </varlistentry>
73             <varlistentry>
74                 <term>substitute</term>
75                 <listitem>
76                     <para>a m-by-2 matrix of strings, a replacing map (default = [],
77                         meaning no replacements). The first column
78                         <literal>substitute(:,1)</literal> contains the searched strings and
79                         the second column <literal>substitute(:,2)</literal> contains the
80                         replace strings. Every occurence of a searched string in the file is
81                         replaced.
82                     </para>
83                 </listitem>
84             </varlistentry>
85             <varlistentry>
86                 <term>regexpcomments</term>
87                 <listitem>
88                     <para>a string: a regexp to remove lines which match. (default:
89                         [])
90                     </para>
91                 </listitem>
92             </varlistentry>
93             <varlistentry>
94                 <term>range</term>
95                 <listitem>
96                     <para>a 1-by-4 matrix of floating point integers, the range of rows
97                         and columns which must be read (default range=[], meaning that all
98                         the rows and columns). Specify range using the format <literal>[Row1 Column1 Row2 Column2]
99                         </literal>
100                         where (R1,C1) is the upper left corner of the
101                         data to be read and (R2,C2) is the lower right corner.
102                     </para>
103                     <para>
104                         <note>Note that the file has to be correctly formatted. The range will be done in the memory on the parsed elements.</note>
105                     </para>
106                 </listitem>
107             </varlistentry>
108             <varlistentry>
109                 <term>header</term>
110                 <listitem>
111                     <para>a 1-by-1 matrix of floating point integers, the number of lines to be ignored
112                         at the beginning of the file.
113                     </para>
114                 </listitem>
115             </varlistentry>
116             <varlistentry>
117                 <term>M</term>
118                 <listitem>
119                     <para>a m-by-n matrix of strings or double.</para>
120                 </listitem>
121             </varlistentry>
122             <varlistentry>
123                 <term>comments</term>
124                 <listitem>
125                     <para>a m-by-n matrix of strings matched by regexp.</para>
126                 </listitem>
127             </varlistentry>
128         </variablelist>
129     </refsection>
130     <refsection>
131         <title>Description</title>
132         <para>Given an ascii file with comma separated values delimited fields,
133             this function returns the corresponding Scilab matrix of strings or
134             doubles.
135         </para>
136         <para>For example, the .csv data file may have been created by a
137             spreadsheet software using "Text and comma" format.
138         </para>
139         <para>It might happen that the columns are separated by a non-comma
140             separator. In this case, use csvRead(filename, separator) for another
141             choice of separator.
142         </para>
143         <para>The default value of the optional input arguments are defined by the
144             <literal>csvDefault</literal> function.
145         </para>
146         <para>Any optional input argument equal to the empty matrix
147             <literal>[]</literal> is set to its default value.
148         </para>
149         <para>When the input argument "conversion" is equal to "double", the
150             non-numeric fields within the .csv (e.g. strings) are converted into
151             NaN.
152         </para>
153         <para>csvRead is able to handle both UTF-8 and ASCII text files.
154         </para>
155     </refsection>
156     <refsection>
157         <title>Examples</title>
158         <para>The following script presents some basic uses of the
159             <literal>csvRead</literal> function.
160         </para>
161         <programlisting role="example"><![CDATA[// Create a file with some data separated with tabs.
162 M = 1:50;
163 filename = fullfile(TMPDIR, "data.csv");
164 csvWrite(M, filename, ascii(9), '.');
165
166 // read csv file
167 M1 = csvRead(filename,ascii(9), [], 'string')
168
169 // Returns a double
170 M2 = csvRead(filename,ascii(9), '.', 'double')
171
172 // Compares original data and result.
173 and(M == M2)
174
175 // Use the substitude argument to manage
176 // special data files.
177 content = [
178 "1"
179 "Not-A-Number"
180 "2"
181 "Not-A-Number"
182 ];
183
184 substitute = [
185 "Not-A-Number" "Nan"
186 ];
187
188 mputl(content,filename);
189 M = csvRead(filename,",",".","double",substitute)
190 isnan(M(2,1)) // Expected=%t
191 isnan(M(4,1)) // Expected=%t
192
193         ]]></programlisting>
194         <para>The following script presents more practical uses of the
195             <literal>csvRead</literal> function.
196         </para>
197         <programlisting role="example"><![CDATA[// Define a matrix of strings
198 Astr = [
199 "1" "8" "15" "22" "29" "36" "43" "50"
200 "2" "9" "16" "23" "30" "37" "44" "51"
201 "3" "10" "17" "6+3*I" "31" "38" "45" "52"
202 "4" "11" "18" "25" "32" "39" "46" "53"
203 "5" "12" "19" "26" "33" "40" "47" "54"
204 "6" "13" "20" "27" "34" "41" "48" "55"
205 "+0" "-0" "Inf" "-Inf" "Nan" "1.D+308" "1.e-308" "1.e-323"
206 ];
207
208 // Create a file with some data separated with commas
209 filename = fullfile(TMPDIR , 'foo.csv');
210 sep = ",";
211 fd = mopen(filename,'wt');
212 for i = 1 : size(Astr,"r")
213         mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
214 end
215 mclose(fd);
216 // To see the file: edit(filename)
217
218 // Read this file
219 Bstr = csvRead ( filename )
220
221 // Create a file with a particular separator: here ";"
222 filename = fullfile(TMPDIR , 'foo.csv');
223 sep = ";";
224 fd = mopen(filename,'wt');
225 for i = 1 : size(Astr,"r")
226         mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
227 end
228 mclose(fd);
229
230 //
231 // Read the file and customize the separator
232 csvRead ( filename , sep )
233         ]]></programlisting>
234         <para>The following script shows how to remove lines with regexp argument
235             of the <literal>csvRead</literal> function.
236         </para>
237         <programlisting role="example"><![CDATA[
238 CSV = ["// tata"; ..
239 "1,0,0,0,0"; ..
240 "// titi"; ..
241 "0,1,0,0,0"; ..
242 "// toto"; ..
243 "0,0,1,0,0"; ..
244 "// tutu"];
245 filename = fullfile(TMPDIR , 'foo.csv');
246 mputl(CSV, filename);
247
248 // remove lines with // @ beginning
249 [M, comments] = csvRead(filename, [], [], [], [], '/\/\//')
250         ]]></programlisting>
251     </refsection>
252     <refsection>
253         <para>Empty field are managed by csvRead</para>
254         <programlisting role="example"><![CDATA[
255 csvWrite(['1','','3';'','','6'], TMPDIR + "/example.csv")
256 csvRead(TMPDIR + "/example.csv", [], [], "string")
257 csvRead(TMPDIR + "/example.csv", [], [], "double")
258         ]]></programlisting>
259     </refsection>
260     <programlisting role="example"><![CDATA[
261 // Define a matrix of strings
262 Astr = [
263 "1" "8" "15" "22" "29" "36" "43" "50"
264 "2" "9" "16" "23" "30" "37" "44" "51"
265 "3" "10" "17" "6+3*I" "31" "38" "45" "52"
266 "4" "11" "18" "25" "32" "39" "46" "53"
267 "5" "12" "19" "26" "33" "40" "47" "54"
268 "6" "13" "20" "27" "34" "41" "48" "55"
269 "+0" "-0" "Inf" "-Inf" "Nan" "1.D+308" "1.e-308" "1.e-323"
270 ];
271
272 // Create a file with some data separated with commas
273 filename = fullfile(TMPDIR , 'foo.csv');
274 sep = ",";
275 fd = mopen(filename,'wt');
276 for i = 1 : size(Astr,"r")
277         mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
278 end
279 mclose(fd);
280 // To see the file: edit(filename)
281
282 // Read this file
283 Bstr = csvRead ( filename )
284
285 // Create a file with a particular separator: here ";"
286 filename = fullfile(TMPDIR , 'foo.csv');
287 sep = ";";
288 fd = mopen(filename,'wt');
289 for i = 1 : size(Astr,"r")
290         mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
291 end
292 mclose(fd);
293 //
294 // Read the file and customize the separator
295 csvRead ( filename , sep )
296     ]]></programlisting>
297     <refsection>
298         <para>In the following script, the file "filename" is read by blocks of
299             5000 rows. The algorithm stops when the number of rows actually read from
300             the file differ from 5000, i.e. when the end of the file has been
301             reached.
302         </para>
303         <programlisting role="example"><![CDATA[blocksize = 5000;
304 C1 = 1;
305 C2 = 3;
306 iblock = 1
307 while (%t)
308 R1 = (iblock-1) * blocksize + 1;
309 R2 = blocksize + R1-1;
310 irange = [R1 C1 R2 C2];
311 mprintf("Block #%d, rows #%d to #%d\n",iblock,R1,R2);
312 tic();
313 M=csvRead(filename , [] , [] , [] , [] , [] , irange );
314 t = toc();
315 nrows = size(M,"r");
316 ncols = size(M,"c");
317 if ( nrows &gt; 0 ) then
318 p = t/(nrows*ncols)*1.e6;
319 mprintf("  Actual #rows=%d\n",nrows);
320 mprintf("  T=%.3f (s)\n",t);
321 mprintf("  T=%.1f (ms/cell)\n",p);
322 end
323 if ( nrows &lt; blocksize ) then
324 mprintf("... End of the file.\n");
325 break
326 end
327 iblock = iblock + 1;
328 end
329         ]]></programlisting>
330         <para>This produces:</para>
331         <programlisting role="no-scilab-exec"><![CDATA[Block #1, rows #1 to #5000
332 Actual #rows=5000
333 T=3.135 (s)
334 T=209.0 (ms/cell)
335 Block #2, rows #5001 to #10000
336 Actual #rows=5000
337 T=3.139 (s)
338 T=209.3 (ms/cell)
339 Block #3, rows #10001 to #15000
340 Actual #rows=5000
341 T=3.151 (s)
342 T=210.1 (ms/cell)
343 etc....
344         ]]></programlisting>
345         Example with range
346         <programlisting role="example"><![CDATA[
347 CSV = ["1,0,0,0,0"; ..
348 "0,1,0,0,0"; ..
349 "0,0,1,0,0"; ..
350 "4,4,1,2,0"; ..
351 "4,63,1,2,0"; ..
352 "4,63,1,4,233"; ..
353 "42,3,23,2,23"; ..
354 ];
355 filename = fullfile(TMPDIR , 'foo.csv');
356 mputl(CSV, filename);
357 // Extract a subset of the csv file
358 csvRead(filename, [], [], "double", [], [], [5 3 7 6])
359     ]]></programlisting>
360         Example with header
361         <programlisting role="example"><![CDATA[
362 comments = [
363 "// Copyright (C) INRIA"];
364 filename = fullfile(TMPDIR , 'foo.csv');
365 csvWrite(rand(2,3), filename, ascii(9), ",", [], comments);
366
367 header = 2;
368 [M, c] = csvRead(filename, ascii(9), ",", "double", [], [], [], header) // Ignore the two first lines (the header)
369     ]]></programlisting>
370     </refsection>
371     <refsection role="see also">
372         <title>See also</title>
373         <simplelist type="inline">
374             <member>
375                 <link linkend="csvTextScan">csvTextScan</link>
376             </member>
377             <member>
378                 <link linkend="csvWrite">csvWrite</link>
379             </member>
380             <member>
381                 <link linkend="csvDefault">csvDefault</link>
382             </member>
383         </simplelist>
384     </refsection>
385     <refsection>
386         <title>History</title>
387         <revhistory>
388             <revision>
389                 <revnumber>5.4.0</revnumber>
390                 <revremark>
391                     Function introduced. Based on the 'csv_readwrite' module. The only difference in the behavior compared to <link linkend="read_csv">read_csv</link> is that csvRead will try to convert value to double by default when read_csv will return value as string.
392                 </revremark>
393             </revision>
394             <revision>
395                 <revnumber>5.4.1</revnumber>
396                 <revremark>
397                     If <literal>decimal</literal> is different of <literal>[]</literal> and <literal>conversion</literal> is set to <literal>string</literal>, the decimal conversion will be done.
398                 </revremark>
399             </revision>
400             <revision>
401                 <revnumber>5.5</revnumber>
402                 <revremark>
403                     Addition of the "header" input argument, to ignore headers.
404                 </revremark>
405             </revision>
406         </revhistory>
407     </refsection>
408 </refentry>