bec5a83886841c6573a8b46090e606a878b4ccf1
[scilab.git] / scilab / modules / fileio / src / c / mgetl.c
1 /*
2 * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
3 * Copyright (C) 2010 - DIGITEO - Allan CORNET
4 *
5 * This file must be used under the terms of the CeCILL.
6 * This source file is licensed as described in the file COPYING, which
7 * you should have received as part of this distribution.  The terms
8 * are also available at
9 * http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
10 *
11 */
12 /*--------------------------------------------------------------------------*/
13 #include <string.h>
14 #ifdef _MSC_VER
15 #include <windows.h>
16 #include "strdup_windows.h"
17 #endif
18 #include "mgetl.h"
19 #include "filesmanagement.h"
20 #include "mopen.h"
21 #include "MALLOC.h"
22 #include "BOOL.h"
23 #include "strsubst.h"
24 #include "charEncoding.h"
25 /*--------------------------------------------------------------------------*/
26 #define LINE_MAX 4096
27 #define CR '\r'
28 #define LF '\n'
29 #define EMPTYSTR ""
30 /*--------------------------------------------------------------------------*/
31 static char *removeEOL(char *_inString);
32 static char *convertAnsiToUtf(char *_inString);
33 static char *getNextLine(FILE *stream);
34 /*--------------------------------------------------------------------------*/
35 static const unsigned char UTF8BOM_BYTEORDER_MARK[] = {0xEF, 0xBB, 0xBF, 0x00};
36 /*--------------------------------------------------------------------------*/
37 char **mgetl(int fd, int nbLinesIn, int *nbLinesOut, int *ierr)
38 {
39     char **strLines = NULL;
40     FILE *fa = NULL;
41
42     *ierr = MGETL_ERROR;
43     *nbLinesOut = 0;
44
45     if (fd == STDIN_ID)
46     {
47         fa = stdin;
48     }
49     else
50     {
51         fa = GetFileOpenedInScilab(fd);
52     }
53
54     if (fa)
55     {
56         char *Line = NULL;
57         int nbLines = 0;
58
59         if (nbLinesIn < 0)
60         {
61             strLines = (char **)MALLOC(sizeof(char *));
62             if (strLines == NULL)
63             {
64                 *nbLinesOut = 0;
65                 *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
66                 return NULL;
67             }
68
69             Line = getNextLine(fa);
70             if (Line)
71             {
72                 /* UTF-8 BOM */
73                 if (strncmp(Line, (const char*)UTF8BOM_BYTEORDER_MARK, strlen((const char*)UTF8BOM_BYTEORDER_MARK)) == 0)
74                 {
75                     /* we skip first characters */
76                     char *tmpLine = strsub(Line, (const char*)UTF8BOM_BYTEORDER_MARK, "");
77                     FREE(Line);
78                     Line = tmpLine;
79                 }
80             }
81             while ( Line != NULL )
82             {
83                 nbLines++;
84                 strLines = (char **)REALLOC(strLines, nbLines * sizeof(char *));
85                 if (strLines == NULL)
86                 {
87                     if (Line)
88                     {
89                         FREE(Line);
90                         Line = NULL;
91                     }
92                     *nbLinesOut = 0;
93                     *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
94                     return NULL;
95                 }
96
97                 strLines[nbLines - 1] = convertAnsiToUtf(removeEOL(Line));
98
99                 if (Line)
100                 {
101                     FREE(Line);
102                     Line = NULL;
103                 }
104
105                 if (strLines[nbLines - 1] == NULL)
106                 {
107                     *nbLinesOut = 0;
108                     *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
109                     return NULL;
110                 }
111
112                 Line = getNextLine(fa);
113             }
114
115             if (Line)
116             {
117                 FREE(Line);
118                 Line = NULL;
119             }
120
121             *nbLinesOut = nbLines;
122             *ierr = MGETL_NO_ERROR;
123         }
124         else
125         {
126             if (nbLinesIn == 0)
127             {
128                 *ierr = MGETL_EOF;
129                 *nbLinesOut = 0;
130                 if (strLines)
131                 {
132                     FREE(strLines);
133                 }
134                 strLines = NULL;
135             }
136             else
137             {
138                 BOOL bContinue = TRUE;
139                 BOOL bEOF = FALSE;
140                 strLines = (char **)MALLOC(sizeof(char *) * nbLinesIn);
141
142                 if (strLines == NULL)
143                 {
144                     *nbLinesOut = 0;
145                     *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
146                     return NULL;
147                 }
148
149                 do
150                 {
151                     if (nbLines < nbLinesIn)
152                     {
153                         if ((double) ftell(fa) == 0)
154                         {
155                             Line = getNextLine(fa);
156                             /* UTF-8 BOM */
157                             if (Line && (strncmp(Line, (const char*)UTF8BOM_BYTEORDER_MARK, strlen((const char*)UTF8BOM_BYTEORDER_MARK)) == 0))
158                             {
159                                 /* we skip first characters */
160                                 char *tmpLine = strsub(Line, (const char*)UTF8BOM_BYTEORDER_MARK, "");
161                                 FREE(Line);
162                                 Line = tmpLine;
163                             }
164                         }
165                         else
166                         {
167                             Line = getNextLine(fa);
168                         }
169
170                         if (Line != NULL)
171                         {
172                             nbLines++;
173                             strLines[nbLines - 1] = convertAnsiToUtf(removeEOL(Line));
174
175                             if (Line)
176                             {
177                                 FREE(Line);
178                                 Line = NULL;
179                             }
180
181                             if (strLines[nbLines - 1] == NULL)
182                             {
183                                 *nbLinesOut = 0;
184                                 *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
185                                 FREE(nbLines);
186                                 return NULL;
187                             }
188                         }
189                         else
190                         {
191                             /* EOF */
192                             if (feof(fa))
193                             {
194                                 bEOF = TRUE;
195                             }
196                             bContinue = FALSE;
197                         }
198                     }
199                     else
200                     {
201                         bContinue = FALSE;
202                     }
203                 }
204                 while (bContinue);
205
206                 if (Line)
207                 {
208                     FREE(Line);
209                     Line = NULL;
210                 }
211
212                 *nbLinesOut = nbLines;
213                 if (bEOF)
214                 {
215                     *ierr = MGETL_EOF;
216                 }
217                 else
218                 {
219                     *ierr = MGETL_NO_ERROR;
220                 }
221             }
222         }
223         if (Line)
224         {
225             FREE(Line);
226             Line = NULL;
227         }
228     }
229
230     return strLines;
231 }
232 /*--------------------------------------------------------------------------*/
233 char *removeEOL(char *_inString)
234 {
235     if (_inString)
236     {
237         char *pos = strchr(_inString, LF);
238         if (pos)
239         {
240             *pos = 0;
241         }
242
243         pos = strchr(_inString, CR);
244         if (pos)
245         {
246             *pos = 0;
247         }
248     }
249     return _inString;
250 }
251 /*--------------------------------------------------------------------------*/
252 /*
253 * convert ansi to Utf
254 */
255 char *convertAnsiToUtf(char *_inString)
256 {
257     char *outString = NULL;
258     if (_inString)
259     {
260 #ifdef _MSC_VER
261         if (IsValidUTF8(_inString))
262         {
263             outString = strdup(_inString);
264         }
265         else
266         {
267             /* conversion ANSI to UTF */
268             int Len = 0;
269             int newLen = 0;
270             BSTR bstrCode = NULL;
271
272             Len = MultiByteToWideChar(CP_ACP, 0, _inString, lstrlen(_inString), NULL, NULL);
273             bstrCode = SysAllocStringLen(NULL, Len);
274             if (bstrCode)
275             {
276                 MultiByteToWideChar(CP_ACP, 0, _inString, lstrlen(_inString), bstrCode, Len);
277                 newLen = WideCharToMultiByte(CP_UTF8, 0, bstrCode, -1, outString, 0, NULL, NULL);
278                 outString = (char*) MALLOC(newLen + 1);
279                 if (outString)
280                 {
281                     WideCharToMultiByte(CP_UTF8, 0, bstrCode, -1, outString, newLen, NULL, NULL);
282                 }
283                 else
284                 {
285                     outString = strdup(_inString);
286                 }
287                 SysFreeString(bstrCode);
288                 bstrCode = NULL;
289             }
290             else
291             {
292                 outString = strdup(_inString);
293             }
294         }
295 #else
296         if (IsValidUTF8(_inString))
297         {
298             outString = strdup(_inString);
299         }
300         else
301         {
302             int len = (int)strlen(_inString);
303             int i = 0;
304
305             outString = (char*)MALLOC(((len * 3) + 1) * sizeof(char));
306             if (outString == NULL) return NULL;
307             strcpy(outString, EMPTYSTR);
308
309             for (i = 0; i < len; i++)
310             {
311                 unsigned char *outUtfChar = NULL;
312                 unsigned char inAnsiChar = 0;
313
314                 if (_inString[i] < 0) inAnsiChar = 256 + _inString[i];
315                 else inAnsiChar = _inString[i];
316
317                 if (inAnsiChar < 128)
318                 {
319                     outUtfChar = (char *)CALLOC(2, sizeof(char));
320                     if (outUtfChar)
321                     {
322                         outUtfChar[0] = inAnsiChar;
323                         outUtfChar[1] = 0;
324                     }
325                 }
326                 else
327                 {
328                     outUtfChar = (char *)CALLOC(3, sizeof(char));
329                     if (outUtfChar)
330                     {
331                         outUtfChar[0] = (inAnsiChar >> 6) | 0xC0;
332                         outUtfChar[1] = (inAnsiChar & 0x3F) | 0x80;
333                         outUtfChar[2] = 0;
334                     }
335                 }
336
337                 if (outUtfChar)
338                 {
339                     strcat(outString, outUtfChar);
340                     FREE(outUtfChar);
341                     outUtfChar = NULL;
342                 }
343             }
344         }
345 #endif
346     }
347     return outString;
348 }
349 /*--------------------------------------------------------------------------*/
350 char *getNextLine(FILE *stream)
351 {
352     char *bufferLine = NULL;
353     if (stream != NULL)
354     {
355         char *pCurrentLine = NULL;
356         char tmp[LINE_MAX] = EMPTYSTR;
357
358         size_t size = 1;
359
360         while (fgets (tmp, LINE_MAX, stream) != NULL)
361         {
362             size += LINE_MAX;
363             pCurrentLine = (char*)REALLOC (bufferLine, sizeof (char) * size);
364             if (pCurrentLine != NULL)
365             {
366                 if (bufferLine == NULL)
367                 {
368                     pCurrentLine[0] = '\0';
369                 }
370                 bufferLine = pCurrentLine;
371                 pCurrentLine = NULL;
372                 strcat (bufferLine, tmp);
373
374                 if (bufferLine[strlen(bufferLine) - 1] == '\n')
375                 {
376                     bufferLine[strlen(bufferLine) - 1] = '\0';
377                     break;
378                 }
379             }
380             else
381             {
382                 FREE (bufferLine);
383                 bufferLine = NULL;
384             }
385         }
386     }
387     return bufferLine;
388 }
389 /*--------------------------------------------------------------------------*/