* read_csv has the same behavior as in the version 5.3.3
[scilab.git] / scilab / modules / spreadsheet / help / en_US / csvRead.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3  * Copyright (C) 2010-2011 - INRIA - Allan CORNET
4  * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
5  -->
6 <refentry version="5.0-subset Scilab" xml:id="csvRead" xml:lang="en"
7           xmlns="http://docbook.org/ns/docbook"
8           xmlns:xlink="http://www.w3.org/1999/xlink"
9           xmlns:svg="http://www.w3.org/2000/svg"
10           xmlns:ns3="http://www.w3.org/1999/xhtml"
11           xmlns:mml="http://www.w3.org/1998/Math/MathML"
12           xmlns:db="http://docbook.org/ns/docbook">
13     <info>
14         <pubdate>$LastChangedDate$</pubdate>
15     </info>
16     
17     <refnamediv>
18         <refname>csvRead</refname>
19         
20         <refpurpose>Read comma-separated value file</refpurpose>
21     </refnamediv>
22     
23     <refsynopsisdiv>
24         <title>Calling Sequence</title>
25         
26         <synopsis>
27             M = csvRead(filename)
28             M = csvRead(filename, separator)
29             M = csvRead(filename, separator, decimal)
30             M = csvRead(filename, separator, decimal, conversion)
31             M = csvRead(filename, separator, decimal, conversion, substitute)
32             M = csvRead(filename, separator, decimal, conversion, substitute, rexgepcomments, range)
33             [M, comments] = csvRead(filename, separator, decimal, conversion, substitute, rexgepcomments, range)
34         </synopsis>
35     </refsynopsisdiv>
36     
37     <refsection>
38         <title>Parameters</title>
39         
40         <variablelist>
41             <varlistentry>
42                 <term>filename</term>
43                 
44                 <listitem>
45                     <para>a 1-by-1 matrix of strings, the file path.</para>
46                 </listitem>
47             </varlistentry>
48             
49             <varlistentry>
50                 <term>separator</term>
51                 
52                 <listitem>
53                     <para>a 1-by-1 matrix of strings, the field separator used.</para>
54                 </listitem>
55             </varlistentry>
56             
57             <varlistentry>
58                 <term>decimal</term>
59                 
60                 <listitem>
61                     <para>a 1-by-1 matrix of strings, the decimal used.</para>
62                 </listitem>
63             </varlistentry>
64             
65             <varlistentry>
66                 <term>conversion</term>
67                 
68                 <listitem>
69                     <para>a 1-by-1 matrix of strings, the type of the output
70                         <literal>M</literal>. Available values are "string" or "double" (by default).
71                     </para>
72                     <para>
73                         Note that <link linkend="read_csv">read_csv</link> has "string" as default.
74                     </para>
75                 </listitem>
76             </varlistentry>
77             
78             <varlistentry>
79                 <term>substitute</term>
80                 
81                 <listitem>
82                     <para>a m-by-2 matrix of strings, a replacing map (default = [],
83                         meaning no replacements). The first column
84                         <literal>substitute(:,1)</literal> contains the searched strings and
85                         the second column <literal>substitute(:,2)</literal> contains the
86                         replace strings. Every occurence of a searched string in the file is
87                         replaced.
88                     </para>
89                 </listitem>
90             </varlistentry>
91             
92             <varlistentry>
93                 <term>rexgepcomments</term>
94                 
95                 <listitem>
96                     <para>a string: a regexp to remove lines which match. (default:
97                         [])
98                     </para>
99                 </listitem>
100             </varlistentry>
101             
102             <varlistentry>
103                 <term>range</term>
104                 
105                 <listitem>
106                     <para>a 1-by-4 matrix of floating point integers, the range of rows
107                         and columns which must be read (default range=[], meaning that all
108                         the rows and columns). Specify range using the format <literal>[R1
109                             C1 R2 C2]
110                         </literal>
111                         where (R1,C1) is the upper left corner of the
112                         data to be read and (R2,C2) is the lower right corner.
113                     </para>
114                 </listitem>
115             </varlistentry>
116             
117             <varlistentry>
118                 <term>M</term>
119                 
120                 <listitem>
121                     <para>a m-by-n matrix of strings or double.</para>
122                 </listitem>
123             </varlistentry>
124             
125             <varlistentry>
126                 <term>comments</term>
127                 
128                 <listitem>
129                     <para>a m-by-n matrix of strings matched by regexp.</para>
130                 </listitem>
131             </varlistentry>
132         </variablelist>
133     </refsection>
134     
135     <refsection>
136         <title>Description</title>
137         
138         <para>Given an ascii file with comma separated values delimited fields,
139             this function returns the corresponding Scilab matrix of strings or
140             doubles.
141         </para>
142         
143         <para>For example, the .csv data file may have been created by a
144             spreadsheet software using "Text and comma" format.
145         </para>
146         
147         <para>It might happen that the columns are separated by a non-comma
148             separator. In this case, use csvRead(filename, separator) for another
149             choice of separator.
150         </para>
151         
152         <para>The default value of the optional input arguments are defined by the
153             <literal>csvDefault</literal> function.
154         </para>
155         
156         <para>Any optional input argument equal to the empty matrix
157             <literal>[]</literal> is set to its default value.
158         </para>
159         
160         <para>When the input argument "conversion" is equal to "double", the
161             non-numeric fields within the .csv (e.g. strings) are converted into
162             NaN.
163         </para>
164     </refsection>
165     
166     <refsection>
167         <title>Examples</title>
168         
169         <para>The following script presents some basic uses of the
170             <literal>csvRead</literal> function.
171         </para>
172         
173         <programlisting role="example">// Create a file with some data separated with tabs.
174             M = 1:50;
175             filename = fullfile(TMPDIR, "data.csv");
176             csvWrite(M, filename, ascii(9), '.');
177             
178             // read csv file
179             M1 = csvRead(filename,ascii(9), [], 'string')
180             
181             // Returns a double
182             M2 = csvRead(filename,ascii(9), '.', 'double')
183             
184             // Compares original data and result.
185             and(M == M2)
186             
187             // Use the substitude argument to manage
188             // special data files.
189             content = [
190             "1"
191             "Not-A-Number"
192             "2"
193             "Not-A-Number"
194             ];
195             
196             substitute = [
197             "Not-A-Number" "Nan"
198             ];
199             
200             mputl(content,filename);
201             M = csvRead(filename,",",".","double",substitute)
202             isnan(M(2,1)) // Expected=%t
203             isnan(M(4,1)) // Expected=%t
204             
205         </programlisting>
206         
207         <para>The following script presents more practical uses of the
208             <literal>csvRead</literal> function.
209         </para>
210         
211         <programlisting role="example">// Define a matrix of strings
212             Astr = [
213             "1" "8" "15" "22" "29" "36" "43" "50"
214             "2" "9" "16" "23" "30" "37" "44" "51"
215             "3" "10" "17" "6+3*I" "31" "38" "45" "52"
216             "4" "11" "18" "25" "32" "39" "46" "53"
217             "5" "12" "19" "26" "33" "40" "47" "54"
218             "6" "13" "20" "27" "34" "41" "48" "55"
219             "+0" "-0" "Inf" "-Inf" "Nan" "1.D+308" "1.e-308" "1.e-323"
220             ];
221             
222             // Create a file with some data separated with commas
223             filename = fullfile(TMPDIR , 'foo.csv');
224             sep = ",";
225             fd = mopen(filename,'wt');
226             for i = 1 : size(Astr,"r")
227             mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
228             end
229             mclose(fd);
230             // To see the file : edit(filename)
231             
232             // Read this file
233             Bstr = csvRead ( filename )
234             
235             // Create a file with a particular separator: here ";"
236             filename = fullfile(TMPDIR , 'foo.csv');
237             sep = ";";
238             fd = mopen(filename,'wt');
239             for i = 1 : size(Astr,"r")
240             mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
241             end
242             mclose(fd);
243             
244             //
245             // Read the file and customize the separator
246             csvRead ( filename , sep )   
247         </programlisting>
248         
249         <para>The following script shows how to remove lines with regexp argument
250             of the <literal>csvRead</literal> function.
251         </para>
252         
253         <programlisting role="example">CSV = ["// tata"; ..
254             "1,0,0,0,0"; ..
255             "// titi"; ..
256             "0,1,0,0,0"; ..
257             "// toto"; ..
258             "0,0,1,0,0"; ..
259             "// tutu"];
260             filename = fullfile(TMPDIR , 'foo.csv');
261             mputl(CSV, filename);
262             
263             // remove lines with // @ beginning
264             [M, comments] = csvRead(filename, [], [], [], [], '/\/\//')
265         </programlisting>
266     </refsection>
267     
268     <refsection>
269         <para>Empty field are managed by csvRead</para>
270         
271         <programlisting role="example">
272             csvWrite(['1','','3';'','','6'], TMPDIR + "/example.csv")
273             csvRead(TMPDIR + "/example.csv", [], [], "string")
274             csvRead(TMPDIR + "/example.csv", [], [], "double")
275         </programlisting>
276     </refsection>
277     
278     <programlisting role="example">
279         // Define a matrix of strings
280         Astr = [
281         "1" "8" "15" "22" "29" "36" "43" "50"
282         "2" "9" "16" "23" "30" "37" "44" "51"
283         "3" "10" "17" "6+3*I" "31" "38" "45" "52"
284         "4" "11" "18" "25" "32" "39" "46" "53"
285         "5" "12" "19" "26" "33" "40" "47" "54"
286         "6" "13" "20" "27" "34" "41" "48" "55"
287         "+0" "-0" "Inf" "-Inf" "Nan" "1.D+308" "1.e-308" "1.e-323"
288         ];
289         
290         // Create a file with some data separated with commas
291         filename = fullfile(TMPDIR , 'foo.csv');
292         sep = ",";
293         fd = mopen(filename,'wt');
294         for i = 1 : size(Astr,"r")
295         mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
296         end
297         mclose(fd);
298         // To see the file : edit(filename)
299         
300         // Read this file
301         Bstr = csvRead ( filename )
302         
303         // Create a file with a particular separator: here ";"
304         filename = fullfile(TMPDIR , 'foo.csv');
305         sep = ";";
306         fd = mopen(filename,'wt');
307         for i = 1 : size(Astr,"r")
308         mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
309         end
310         mclose(fd);
311         //
312         // Read the file and customize the separator
313         csvRead ( filename , sep )
314     </programlisting>
315     
316     <refsection>
317         <para>In the following script, the file "filename" is read by blocks of
318             5000 rows. The algorithm stops when the number of rows actually read from
319             the file differ from 5000, i.e. when the end of the file has been
320             reached.
321         </para>
322         
323         <programlisting role="example">blocksize = 5000;
324             C1 = 1;
325             C2 = 3;
326             iblock = 1
327             while (%t)
328             R1 = (iblock-1) * blocksize + 1;
329             R2 = blocksize + R1-1;
330             irange = [R1 C1 R2 C2];
331             mprintf("Block #%d, rows #%d to #%d\n",iblock,R1,R2);
332             tic();
333             M=csvRead(filename , [] , [] , [] , [] , [] , [] , irange );
334             t = toc();
335             nrows = size(M,"r");
336             ncols = size(M,"c");
337             if ( nrows &gt; 0 ) then
338             p = t/(nrows*ncols)*1.e6;
339             mprintf("  Actual #rows=%d\n",nrows);
340             mprintf("  T=%.3f (s)\n",t);
341             mprintf("  T=%.1f (ms/cell)\n",p);
342             end
343             if ( nrows &lt; blocksize ) then
344             mprintf("... End of the file.\n");
345             break
346             end
347             iblock = iblock + 1;
348             end
349         </programlisting>
350         
351         <para>This produces :</para>
352         
353         <programlisting role="no-scilab-exec">Block #1, rows #1 to #5000
354             Actual #rows=5000
355             T=3.135 (s)
356             T=209.0 (ms/cell)
357             Block #2, rows #5001 to #10000
358             Actual #rows=5000
359             T=3.139 (s)
360             T=209.3 (ms/cell)
361             Block #3, rows #10001 to #15000
362             Actual #rows=5000
363             T=3.151 (s)
364             T=210.1 (ms/cell)
365             etc....
366         </programlisting>
367     </refsection>
368     
369     <refsection>
370         <title>See Also</title>
371         
372         <simplelist type="inline">
373             <member>
374                 <link linkend="csvWrite">csvWrite</link>
375             </member>
376         </simplelist>
377     </refsection>
378     <refsection>
379         <title>History</title>
380         <revhistory>
381             <revision>
382                 <revnumber>5.4.0</revnumber>
383                 <revremark>
384                     Function introduced. Based on the 'csv_readwrite' module. The only difference in the behavior compared to <link linkend="read_csv">read_csv</link> is that csvRead will try to convert value to double by default when read_csv will return value as string. 
385                 </revremark>
386             </revision>
387         </revhistory>
388     </refsection>
389     
390 </refentry>
391