0ad188d4d9f5c4f8983cbcdd526897e8a5d25ffc
[scilab.git] / scilab / modules / fileio / src / cpp / mgetl.cpp
1 /*
2 * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
3 * Copyright (C) 2010 - DIGITEO - Allan CORNET
4 * Copyright (C) 2010 - DIGITEO - Antoine ELIAS
5 *
6  * Copyright (C) 2012 - 2016 - Scilab Enterprises
7  *
8  * This file is hereby licensed under the terms of the GNU GPL v2.0,
9  * pursuant to article 5.3.4 of the CeCILL v.2.1.
10  * This file was originally licensed under the terms of the CeCILL v2.1,
11  * and continues to be available under such terms.
12  * For more information, see the COPYING file which you should have received
13  * along with this program.
14 *
15 */
16 /*--------------------------------------------------------------------------*/
17 #include <string.h>
18 #include "filemanager.hxx"
19
20 extern "C"
21 {
22 #ifdef _MSC_VER
23 #include <Windows.h>
24 #endif
25 #include "mgetl.h"
26 #include "mopen.h"
27 #include "sci_malloc.h"
28 #include "os_string.h"
29 #include "mtell.h"
30 #include "mseek.h"
31 #include "sciprint.h"
32 #include "freeArrayOfString.h"
33 }
34
35 #include <iostream>
36 #include <fstream>
37 /*--------------------------------------------------------------------------*/
38 // we do not want to depend on the OS specific LINE_MAX setting
39 #ifdef LINE_MAX
40 #undef LINE_MAX
41 #endif
42 #define LINE_MAX 4096
43
44 #define CR '\r'
45 #define LF '\n'
46 #define EMPTYSTR ""
47 /*--------------------------------------------------------------------------*/
48 static char *removeEOL(char *_inString);
49 static char *convertAnsiToUtf(char *_inString);
50 static char* getLine(char* _pstLine, int _iLineSize, types::File* _pFile);
51 /*--------------------------------------------------------------------------*/
52 #define UTF_16BE_BOM 0xFEFF // 0xFEFF = to_wide_string(0xEFBBBF)
53 /*--------------------------------------------------------------------------*/
54 char** mgetl(int fd, int nbLinesIn, int *nbLinesOut, int *ierr)
55 {
56     char** strLines = NULL;
57     types::File* pFile = NULL;
58     int iLineSizeMult = 1;
59     *ierr = MGETL_ERROR;
60     *nbLinesOut = 0;
61
62     pFile = FileManager::getFile(fd);
63
64     if (nbLinesIn < 0 && fd == 5)
65     {
66         nbLinesIn = 1;
67     }
68
69     /*try std version*/
70     //{
71     //    int posix_handle = ::_fileno(pFile->getFiledesc());
72
73     //    std::ifstream ifs(::_wfdopen(posix_handle, pFile->getFileMode().c_str()));
74     //    std::list<string> lst;
75     //    std::string str;
76
77     //    while(ifs.eof() == false && lst.size() < nbLinesIn)
78     //    {
79     //        std::getline(ifs, str);
80     //        lst.push_back(str);
81     //    }
82
83     //    sciprint("size : %d\n", lst.size());
84
85     //    *nbLinesOut =  (int)lst.size();
86     //    if(*nbLinesOut == 0)
87     //    {
88     //        return NULL;
89     //    }
90
91     //    strLines = (char**)MALLOC(sizeof(char*) * *nbLinesOut);
92     //    for(int i = 0 ; i < *nbLinesOut ; i++)
93     //    {
94     //        strLines[i] = to_wide_string(lst.front().c_str());
95     //        lst.pop_front();
96     //    }
97
98     //    return strLines;
99     //}
100
101     if (pFile)
102     {
103         char* Line = (char*)MALLOC(LINE_MAX * iLineSizeMult * sizeof(char));
104         int nbLines = 0;
105         long long iPos = 0;
106         if (nbLinesIn < 0)
107         {
108             strLines = (char **)MALLOC(sizeof(char *));
109             if (strLines == NULL)
110             {
111                 *nbLinesOut = 0;
112                 *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
113                 FREE(Line);
114                 return NULL;
115             }
116             while ( getLine ( Line, LINE_MAX * iLineSizeMult, pFile ) != NULL )
117             {
118                 if (((int)strlen(Line)) >= (LINE_MAX * iLineSizeMult) - 1 && iPos >= 0)
119                 {
120                     FREE(Line);
121                     iLineSizeMult++;
122                     Line = (char*)MALLOC(LINE_MAX * iLineSizeMult * sizeof(char));
123                     mseek(fd, iPos, SEEK_SET);
124
125                     continue;
126                 }
127
128                 iPos = mtell(fd);
129                 /* UTF-16 BOM */
130                 if ((nbLines == 0) && (Line[0] == UTF_16BE_BOM))
131                 {
132                     char* tmpLine = os_strdup(Line);
133                     memset(Line, 0x00, LINE_MAX * iLineSizeMult);
134                     strcpy(Line, &tmpLine[1]);
135                     FREE(tmpLine);
136                 }
137
138                 nbLines++;
139                 strLines = (char **)REALLOC(strLines, nbLines * sizeof(char *));
140                 if (strLines == NULL)
141                 {
142                     *nbLinesOut = 0;
143                     *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
144                     FREE(Line);
145                     return NULL;
146                 }
147
148                 strLines[nbLines - 1] = os_strdup(removeEOL(Line));
149                 if (strLines[nbLines - 1] == NULL)
150                 {
151                     *nbLinesOut = 0;
152                     *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
153                     freeArrayOfString(strLines, nbLines);
154                     FREE(Line);
155                     return NULL;
156                 }
157                 
158                 strcpy(Line, EMPTYSTR);
159             }
160             *nbLinesOut = nbLines;
161             *ierr = MGETL_NO_ERROR;
162         }
163         else
164         {
165             if (nbLinesIn == 0)
166             {
167                 *ierr = MGETL_EOF;
168                 *nbLinesOut = 0;
169             }
170             else
171             {
172                 BOOL bContinue = TRUE;
173                 BOOL bEOF = FALSE;
174                 strLines = (char **)MALLOC(sizeof(char *) * nbLinesIn);
175                 if (strLines == NULL)
176                 {
177                     *nbLinesOut = 0;
178                     *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
179                     FREE(Line);
180                     return NULL;
181                 }
182
183                 do
184                 {
185                     if (nbLines < nbLinesIn)
186                     {
187                         bool header = false;
188                         /* UTF-16 BOM */
189                         if ((ftell(pFile->getFiledesc()) == 0) && (nbLines == 0))
190                         {
191                             header = true;
192                         }
193
194                         if ( getLine ( Line, LINE_MAX * iLineSizeMult, pFile) != NULL)
195                         {
196                             if (((int) strlen(Line)) >= (LINE_MAX * iLineSizeMult) - 1)
197                             {
198                                 FREE(Line);
199                                 iLineSizeMult++;
200                                 Line = (char*)MALLOC(LINE_MAX * iLineSizeMult * sizeof(char));
201                                 mseek(fd, iPos, SEEK_SET);
202
203                                 continue;
204                             }
205
206                             iPos = mtell(fd);
207
208                             if (header && (Line[0] == UTF_16BE_BOM))
209                             {
210                                 char* tmpLine = os_strdup(Line);
211                                 memset(Line, 0x00, LINE_MAX * iLineSizeMult);
212                                 strcpy(Line, &tmpLine[1]);
213                                 FREE(tmpLine);
214                             }
215                             nbLines++;
216                             strLines[nbLines - 1] = os_strdup(removeEOL(Line));
217                             if (strLines[nbLines - 1] == NULL)
218                             {
219                                 *nbLinesOut = 0;
220                                 *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
221                                 FREE(Line);
222                                 freeArrayOfString(strLines, nbLines);
223                                 return NULL;
224                             }
225
226                             strcpy(Line, EMPTYSTR);
227                         }
228                         else
229                         {
230                             /* EOF */
231                             if (feof(pFile->getFiledesc()))
232                             {
233                                 bEOF = TRUE;
234                             }
235                             bContinue = FALSE;
236                         }
237                     }
238                     else
239                     {
240                         bContinue = FALSE;
241                     }
242                 }
243                 while (bContinue);
244
245                 *nbLinesOut = nbLines;
246                 if (bEOF)
247                 {
248                     *ierr = MGETL_EOF;
249                 }
250                 else
251                 {
252                     *ierr = MGETL_NO_ERROR;
253                 }
254             }
255         }
256         FREE(Line);
257     }
258     return strLines;
259 }
260 /*--------------------------------------------------------------------------*/
261 char* getLine(char* _pstLine, int _iLineSize, types::File* _pFile)
262 {
263     char* pstTemp = (char*)MALLOC(sizeof(char) * _iLineSize);
264     if (fgets(pstTemp, _iLineSize, _pFile->getFiledesc()) == NULL)
265     {
266         FREE(pstTemp);
267         return NULL;
268     }
269
270     strcpy(_pstLine, pstTemp);
271     FREE(pstTemp);
272     return _pstLine;
273 }
274 /*--------------------------------------------------------------------------*/
275 char *removeEOL(char *_inString)
276 {
277     if (_inString)
278     {
279         char *pos = strchr(_inString, LF);
280         if (pos)
281         {
282             *pos = 0;
283         }
284
285         pos = strchr(_inString, CR);
286         if (pos)
287         {
288             *pos = 0;
289         }
290     }
291     return _inString;
292 }
293 /*--------------------------------------------------------------------------*/
294 /*
295 * convert ansi to Utf
296 */
297 static char *convertAnsiToUtf(char *_inString)
298 {
299     char *outString = NULL;
300     if (_inString)
301     {
302 #ifdef _MSC_VER
303         if (IsValidUTF8(_inString))
304         {
305             outString = os_strdup(_inString);
306         }
307         else
308         {
309             /* conversion ANSI to UTF */
310             int Len = 0;
311             int newLen = 0;
312             BSTR bstrCode = NULL;
313
314             Len = MultiByteToWideChar(CP_ACP, 0, _inString, lstrlen(_inString), NULL, NULL);
315             bstrCode = SysAllocStringLen(NULL, Len);
316             if (bstrCode)
317             {
318                 MultiByteToWideChar(CP_ACP, 0, _inString, lstrlen(_inString), bstrCode, Len);
319                 newLen = WideCharToMultiByte(CP_UTF8, 0, bstrCode, -1, outString, 0, NULL, NULL);
320                 outString = (char*) MALLOC(sizeof(char) * (newLen + 1));
321                 if (outString)
322                 {
323                     WideCharToMultiByte(CP_UTF8, 0, bstrCode, -1, outString, newLen, NULL, NULL);
324                 }
325                 else
326                 {
327                     outString = os_strdup(_inString);
328                 }
329                 SysFreeString(bstrCode);
330                 bstrCode = NULL;
331             }
332             else
333             {
334                 outString = os_strdup(_inString);
335             }
336         }
337 #else
338         if (IsValidUTF8(_inString))
339         {
340             outString = os_strdup(_inString);
341         }
342         else
343         {
344             int len = (int)strlen(_inString);
345             int i = 0;
346
347             outString = (char*)MALLOC(((len * 3) + 1) * sizeof(char));
348             if (outString == NULL)
349             {
350                 return NULL;
351             }
352
353             strcpy(outString, EMPTYSTR);
354
355             for (i = 0; i < len; i++)
356             {
357                 char *outUtfChar = NULL;
358                 unsigned char inAnsiChar = 0;
359
360                 if (_inString[i] < 0)
361                 {
362                     inAnsiChar = 256 + _inString[i];
363                 }
364                 else
365                 {
366                     inAnsiChar = _inString[i];
367                 }
368
369                 if (inAnsiChar < 128)
370                 {
371                     outUtfChar = (char *)CALLOC(2, sizeof(char));
372                     if (outUtfChar)
373                     {
374                         outUtfChar[0] = inAnsiChar;
375                         outUtfChar[1] = 0;
376                     }
377                 }
378                 else
379                 {
380                     outUtfChar = (char *)CALLOC(3, sizeof(char));
381                     if (outUtfChar)
382                     {
383                         outUtfChar[0] = (inAnsiChar >> 6) | 0xC0;
384                         outUtfChar[1] = (inAnsiChar & 0x3F) | 0x80;
385                         outUtfChar[2] = 0;
386                     }
387                 }
388
389                 if (outUtfChar)
390                 {
391                     strcat(outString, outUtfChar);
392                     FREE(outUtfChar);
393                     outUtfChar = NULL;
394                 }
395             }
396         }
397 #endif
398     }
399     return outString;
400 }
401 /*--------------------------------------------------------------------------*/