9293f423555aa10c87b848b5f3d4bd244b5ea35b
[scilab.git] / scilab / modules / spreadsheet / help / en_US / csvRead.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3  * Copyright (C) 2010-2011 - INRIA - Allan CORNET
4  * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
5  -->
6 <refentry version="5.0-subset Scilab" xml:id="csvRead" xml:lang="en"
7           xmlns="http://docbook.org/ns/docbook"
8           xmlns:xlink="http://www.w3.org/1999/xlink"
9           xmlns:svg="http://www.w3.org/2000/svg"
10           xmlns:ns3="http://www.w3.org/1999/xhtml"
11           xmlns:mml="http://www.w3.org/1998/Math/MathML"
12           xmlns:db="http://docbook.org/ns/docbook">
13     <info>
14         <pubdate>$LastChangedDate$</pubdate>
15     </info>
16     
17     <refnamediv>
18         <refname>csvRead</refname>
19         
20         <refpurpose>Read comma-separated value file</refpurpose>
21     </refnamediv>
22     
23     <refsynopsisdiv>
24         <title>Calling Sequence</title>
25         
26         <synopsis>
27             M = csvRead(filename)
28             M = csvRead(filename, separator)
29             M = csvRead(filename, separator, decimal)
30             M = csvRead(filename, separator, decimal, conversion)
31             M = csvRead(filename, separator, decimal, conversion, substitute)
32             M = csvRead(filename, separator, decimal, conversion, substitute, rexgepcomments, range)
33             [M, comments] = csvRead(filename, separator, decimal, conversion, substitute, rexgepcomments, range)
34         </synopsis>
35     </refsynopsisdiv>
36     
37     <refsection>
38         <title>Parameters</title>
39         
40         <variablelist>
41             <varlistentry>
42                 <term>filename</term>
43                 
44                 <listitem>
45                     <para>a 1-by-1 matrix of strings, the file path.</para>
46                 </listitem>
47             </varlistentry>
48             
49             <varlistentry>
50                 <term>separator</term>
51                 
52                 <listitem>
53                     <para>a 1-by-1 matrix of strings, the field separator used.</para>
54                 </listitem>
55             </varlistentry>
56             
57             <varlistentry>
58                 <term>decimal</term>
59                 
60                 <listitem>
61                     <para>a 1-by-1 matrix of strings, the decimal used.</para>
62                 </listitem>
63             </varlistentry>
64             
65             <varlistentry>
66                 <term>conversion</term>
67                 
68                 <listitem>
69                     <para>a 1-by-1 matrix of strings, the type of the output
70                         <literal>M</literal>. Available values are "string" or "double"
71                         (default).
72                     </para>
73                 </listitem>
74             </varlistentry>
75             
76             <varlistentry>
77                 <term>substitute</term>
78                 
79                 <listitem>
80                     <para>a m-by-2 matrix of strings, a replacing map (default = [],
81                         meaning no replacements). The first column
82                         <literal>substitute(:,1)</literal> contains the searched strings and
83                         the second column <literal>substitute(:,2)</literal> contains the
84                         replace strings. Every occurence of a searched string in the file is
85                         replaced.
86                     </para>
87                 </listitem>
88             </varlistentry>
89             
90             <varlistentry>
91                 <term>rexgepcomments</term>
92                 
93                 <listitem>
94                     <para>a string: a regexp to remove lines which match. (default:
95                         [])
96                     </para>
97                 </listitem>
98             </varlistentry>
99             
100             <varlistentry>
101                 <term>range</term>
102                 
103                 <listitem>
104                     <para>a 1-by-4 matrix of floating point integers, the range of rows
105                         and columns which must be read (default range=[], meaning that all
106                         the rows and columns). Specify range using the format <literal>[R1
107                             C1 R2 C2]
108                         </literal>
109                         where (R1,C1) is the upper left corner of the
110                         data to be read and (R2,C2) is the lower right corner.
111                     </para>
112                 </listitem>
113             </varlistentry>
114             
115             <varlistentry>
116                 <term>M</term>
117                 
118                 <listitem>
119                     <para>a m-by-n matrix of strings or double.</para>
120                 </listitem>
121             </varlistentry>
122             
123             <varlistentry>
124                 <term>comments</term>
125                 
126                 <listitem>
127                     <para>a m-by-n matrix of strings matched by regexp.</para>
128                 </listitem>
129             </varlistentry>
130         </variablelist>
131     </refsection>
132     
133     <refsection>
134         <title>Description</title>
135         
136         <para>Given an ascii file with comma separated values delimited fields,
137             this function returns the corresponding Scilab matrix of strings or
138             doubles.
139         </para>
140         
141         <para>For example, the .csv data file may have been created by a
142             spreadsheet software using "Text and comma" format.
143         </para>
144         
145         <para>It might happen that the columns are separated by a non-comma
146             separator. In this case, use csvRead(filename, separator) for another
147             choice of separator.
148         </para>
149         
150         <para>The default value of the optional input arguments are defined by the
151             <literal>csvDefault</literal> function.
152         </para>
153         
154         <para>Any optional input argument equal to the empty matrix
155             <literal>[]</literal> is set to its default value.
156         </para>
157         
158         <para>When the input argument "conversion" is equal to "double", the
159             non-numeric fields within the .csv (e.g. strings) are converted into
160             NaN.
161         </para>
162     </refsection>
163     
164     <refsection>
165         <title>Examples</title>
166         
167         <para>The following script presents some basic uses of the
168             <literal>csvRead</literal> function.
169         </para>
170         
171         <programlisting role="example">// Create a file with some data separated with tabs.
172             M = 1:50;
173             filename = fullfile(TMPDIR, "data.csv");
174             csvWrite(M, filename, ascii(9), '.');
175             
176             // read csv file
177             M1 = csvRead(filename,ascii(9), [], 'string')
178             
179             // Returns a double
180             M2 = csvRead(filename,ascii(9), '.', 'double')
181             
182             // Compares original data and result.
183             and(M == M2)
184             
185             // Use the substitude argument to manage
186             // special data files.
187             content = [
188             "1"
189             "Not-A-Number"
190             "2"
191             "Not-A-Number"
192             ];
193             
194             substitute = [
195             "Not-A-Number" "Nan"
196             ];
197             
198             mputl(content,filename);
199             M = csvRead(filename,",",".","double",substitute)
200             isnan(M(2,1)) // Expected=%t
201             isnan(M(4,1)) // Expected=%t
202             
203         </programlisting>
204         
205         <para>The following script presents more practical uses of the
206             <literal>csvRead</literal> function.
207         </para>
208         
209         <programlisting role="example">// Define a matrix of strings
210             Astr = [
211             "1" "8" "15" "22" "29" "36" "43" "50"
212             "2" "9" "16" "23" "30" "37" "44" "51"
213             "3" "10" "17" "6+3*I" "31" "38" "45" "52"
214             "4" "11" "18" "25" "32" "39" "46" "53"
215             "5" "12" "19" "26" "33" "40" "47" "54"
216             "6" "13" "20" "27" "34" "41" "48" "55"
217             "+0" "-0" "Inf" "-Inf" "Nan" "1.D+308" "1.e-308" "1.e-323"
218             ];
219             
220             // Create a file with some data separated with commas
221             filename = fullfile(TMPDIR , 'foo.csv');
222             sep = ",";
223             fd = mopen(filename,'wt');
224             for i = 1 : size(Astr,"r")
225             mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
226             end
227             mclose(fd);
228             // To see the file : edit(filename)
229             
230             // Read this file
231             Bstr = csvRead ( filename )
232             
233             // Create a file with a particular separator: here ";"
234             filename = fullfile(TMPDIR , 'foo.csv');
235             sep = ";";
236             fd = mopen(filename,'wt');
237             for i = 1 : size(Astr,"r")
238             mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
239             end
240             mclose(fd);
241             
242             //
243             // Read the file and customize the separator
244             csvRead ( filename , sep )   
245         </programlisting>
246         
247         <para>The following script shows how to remove lines with regexp argument
248             of the <literal>csvRead</literal> function.
249         </para>
250         
251         <programlisting role="example">CSV = ["// tata"; ..
252             "1,0,0,0,0"; ..
253             "// titi"; ..
254             "0,1,0,0,0"; ..
255             "// toto"; ..
256             "0,0,1,0,0"; ..
257             "// tutu"];
258             filename = fullfile(TMPDIR , 'foo.csv');
259             mputl(CSV, filename);
260             
261             // remove lines with // @ beginning
262             [M, comments] = csvRead(filename, [], [], [], [], '/\/\//')
263         </programlisting>
264     </refsection>
265     
266     <refsection>
267         <para>Empty field are managed by csvRead</para>
268         
269         <programlisting role="example">
270             csvWrite(['1','','3';'','','6'], TMPDIR + "/example.csv")
271             csvRead(TMPDIR + "/example.csv", [], [], "string")
272             csvRead(TMPDIR + "/example.csv", [], [], "double")
273         </programlisting>
274     </refsection>
275     
276     <programlisting role="example">
277         // Define a matrix of strings
278         Astr = [
279         "1" "8" "15" "22" "29" "36" "43" "50"
280         "2" "9" "16" "23" "30" "37" "44" "51"
281         "3" "10" "17" "6+3*I" "31" "38" "45" "52"
282         "4" "11" "18" "25" "32" "39" "46" "53"
283         "5" "12" "19" "26" "33" "40" "47" "54"
284         "6" "13" "20" "27" "34" "41" "48" "55"
285         "+0" "-0" "Inf" "-Inf" "Nan" "1.D+308" "1.e-308" "1.e-323"
286         ];
287         
288         // Create a file with some data separated with commas
289         filename = fullfile(TMPDIR , 'foo.csv');
290         sep = ",";
291         fd = mopen(filename,'wt');
292         for i = 1 : size(Astr,"r")
293         mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
294         end
295         mclose(fd);
296         // To see the file : edit(filename)
297         
298         // Read this file
299         Bstr = csvRead ( filename )
300         
301         // Create a file with a particular separator: here ";"
302         filename = fullfile(TMPDIR , 'foo.csv');
303         sep = ";";
304         fd = mopen(filename,'wt');
305         for i = 1 : size(Astr,"r")
306         mfprintf(fd,"%s\n",strcat(Astr(i,:),sep));
307         end
308         mclose(fd);
309         //
310         // Read the file and customize the separator
311         csvRead ( filename , sep )
312     </programlisting>
313     
314     <refsection>
315         <para>In the following script, the file "filename" is read by blocks of
316             5000 rows. The algorithm stops when the number of rows actually read from
317             the file differ from 5000, i.e. when the end of the file has been
318             reached.
319         </para>
320         
321         <programlisting role="example">blocksize = 5000;
322             C1 = 1;
323             C2 = 3;
324             iblock = 1
325             while (%t)
326             R1 = (iblock-1) * blocksize + 1;
327             R2 = blocksize + R1-1;
328             irange = [R1 C1 R2 C2];
329             mprintf("Block #%d, rows #%d to #%d\n",iblock,R1,R2);
330             tic();
331             M=csvRead(filename , [] , [] , [] , [] , [] , [] , irange );
332             t = toc();
333             nrows = size(M,"r");
334             ncols = size(M,"c");
335             if ( nrows &gt; 0 ) then
336             p = t/(nrows*ncols)*1.e6;
337             mprintf("  Actual #rows=%d\n",nrows);
338             mprintf("  T=%.3f (s)\n",t);
339             mprintf("  T=%.1f (ms/cell)\n",p);
340             end
341             if ( nrows &lt; blocksize ) then
342             mprintf("... End of the file.\n");
343             break
344             end
345             iblock = iblock + 1;
346             end
347         </programlisting>
348         
349         <para>This produces :</para>
350         
351         <programlisting role="no-scilab-exec">Block #1, rows #1 to #5000
352             Actual #rows=5000
353             T=3.135 (s)
354             T=209.0 (ms/cell)
355             Block #2, rows #5001 to #10000
356             Actual #rows=5000
357             T=3.139 (s)
358             T=209.3 (ms/cell)
359             Block #3, rows #10001 to #15000
360             Actual #rows=5000
361             T=3.151 (s)
362             T=210.1 (ms/cell)
363             etc....
364         </programlisting>
365     </refsection>
366     
367     <refsection>
368         <title>See Also</title>
369         
370         <simplelist type="inline">
371             <member>
372                 <link linkend="csvWrite">csvWrite</link>
373             </member>
374         </simplelist>
375     </refsection>
376     <refsection>
377         <title>History</title>
378         <revhistory>
379             <revision>
380                 <revnumber>5.4.0</revnumber>
381                 <revremark>Function introduced. Based on the 'csv_readwrite' module.</revremark>
382             </revision>
383         </revhistory>
384     </refsection>
385     
386 </refentry>
387