32ca9f3f77eaef4b03ae76ec6540bb1022d3d740
[scilab.git] / scilab / modules / fileio / src / cpp / mgetl.cpp
1 /*
2 * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
3 * Copyright (C) 2010 - DIGITEO - Allan CORNET
4 * Copyright (C) 2010 - DIGITEO - Antoine ELIAS
5 *
6  * Copyright (C) 2012 - 2016 - Scilab Enterprises
7  *
8  * This file is hereby licensed under the terms of the GNU GPL v2.0,
9  * pursuant to article 5.3.4 of the CeCILL v.2.1.
10  * This file was originally licensed under the terms of the CeCILL v2.1,
11  * and continues to be available under such terms.
12  * For more information, see the COPYING file which you should have received
13  * along with this program.
14 *
15 */
16 /*--------------------------------------------------------------------------*/
17 #include <string.h>
18 #include "filemanager.hxx"
19
20 extern "C"
21 {
22 #ifdef _MSC_VER
23 #include <Windows.h>
24 #endif
25 #include "mgetl.h"
26 #include "mopen.h"
27 #include "sci_malloc.h"
28 #include "os_string.h"
29 #include "mtell.h"
30 #include "mseek.h"
31 #include "sciprint.h"
32 #include "freeArrayOfString.h"
33 }
34
35 #include <iostream>
36 #include <fstream>
37 /*--------------------------------------------------------------------------*/
38 // we do not want to depend on the OS specific LINE_MAX setting
39 #ifdef LINE_MAX
40 #undef LINE_MAX
41 #endif
42 #define LINE_MAX 4096
43
44 #define CR L'\r'
45 #define LF L'\n'
46 #define EMPTYSTRW L""
47 #define EMPTYSTR ""
48 /*--------------------------------------------------------------------------*/
49 static wchar_t *removeEOL(wchar_t *_inString);
50 static char *convertAnsiToUtf(char *_inString);
51 static wchar_t* getLine(wchar_t* _pstLine, int _iLineSize, types::File* _pFile);
52 /*--------------------------------------------------------------------------*/
53 #define UTF_16BE_BOM 0xFEFF // 0xFEFF = to_wide_string(0xEFBBBF)
54 /*--------------------------------------------------------------------------*/
55 wchar_t **mgetl(int fd, int nbLinesIn, int *nbLinesOut, int *ierr)
56 {
57     wchar_t **strLines = NULL;
58     types::File* pFile = NULL;
59     int iLineSizeMult = 1;
60     *ierr = MGETL_ERROR;
61     *nbLinesOut = 0;
62
63     pFile = FileManager::getFile(fd);
64
65     if (nbLinesIn < 0 && fd == 5)
66     {
67         nbLinesIn = 1;
68     }
69
70     /*try std version*/
71     //{
72     //    int posix_handle = ::_fileno(pFile->getFiledesc());
73
74     //    std::ifstream ifs(::_wfdopen(posix_handle, pFile->getFileMode().c_str()));
75     //    std::list<string> lst;
76     //    std::string str;
77
78     //    while(ifs.eof() == false && lst.size() < nbLinesIn)
79     //    {
80     //        std::getline(ifs, str);
81     //        lst.push_back(str);
82     //    }
83
84     //    sciprint("size : %d\n", lst.size());
85
86     //    *nbLinesOut =  (int)lst.size();
87     //    if(*nbLinesOut == 0)
88     //    {
89     //        return NULL;
90     //    }
91
92     //    strLines = (wchar_t**)MALLOC(sizeof(wchar_t*) * *nbLinesOut);
93     //    for(int i = 0 ; i < *nbLinesOut ; i++)
94     //    {
95     //        strLines[i] = to_wide_string(lst.front().c_str());
96     //        lst.pop_front();
97     //    }
98
99     //    return strLines;
100     //}
101
102     if (pFile)
103     {
104         wchar_t* Line = (wchar_t*)MALLOC(LINE_MAX * iLineSizeMult * sizeof(wchar_t));
105         int nbLines = 0;
106         long long iPos = 0;
107         if (nbLinesIn < 0)
108         {
109             strLines = (wchar_t **)MALLOC(sizeof(wchar_t *));
110             if (strLines == NULL)
111             {
112                 *nbLinesOut = 0;
113                 *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
114                 FREE(Line);
115                 return NULL;
116             }
117             while ( getLine ( Line, LINE_MAX * iLineSizeMult, pFile ) != NULL )
118             {
119                 if (((int) wcslen(Line)) >= (LINE_MAX * iLineSizeMult) - 1 && iPos >= 0)
120                 {
121                     FREE(Line);
122                     iLineSizeMult++;
123                     Line = (wchar_t*)MALLOC(LINE_MAX * iLineSizeMult * sizeof(wchar_t));
124                     mseek(fd, iPos, SEEK_SET);
125
126                     continue;
127                 }
128
129                 iPos = mtell(fd);
130                 /* UTF-16 BOM */
131                 if ((nbLines == 0) && (Line[0] == UTF_16BE_BOM))
132                 {
133                     wchar_t* tmpLine = os_wcsdup(Line);
134                     memset(Line, 0x00, LINE_MAX * iLineSizeMult);
135                     wcscpy(Line, &tmpLine[1]);
136                     FREE(tmpLine);
137                 }
138
139                 nbLines++;
140                 strLines = (wchar_t **)REALLOC(strLines, nbLines * sizeof(wchar_t *));
141                 if (strLines == NULL)
142                 {
143                     *nbLinesOut = 0;
144                     *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
145                     FREE(Line);
146                     return NULL;
147                 }
148
149                 strLines[nbLines - 1] = os_wcsdup(removeEOL(Line));
150                 if (strLines[nbLines - 1] == NULL)
151                 {
152                     *nbLinesOut = 0;
153                     *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
154                     freeArrayOfWideString(strLines, nbLines);
155                     FREE(Line);
156                     return NULL;
157                 }
158                 wcscpy(Line, EMPTYSTRW);
159             }
160             *nbLinesOut = nbLines;
161             *ierr = MGETL_NO_ERROR;
162         }
163         else
164         {
165             if (nbLinesIn == 0)
166             {
167                 *ierr = MGETL_EOF;
168                 *nbLinesOut = 0;
169             }
170             else
171             {
172                 BOOL bContinue = TRUE;
173                 BOOL bEOF = FALSE;
174                 strLines = (wchar_t **)MALLOC(sizeof(wchar_t *) * nbLinesIn);
175                 if (strLines == NULL)
176                 {
177                     *nbLinesOut = 0;
178                     *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
179                     FREE(Line);
180                     return NULL;
181                 }
182
183                 do
184                 {
185                     if (nbLines < nbLinesIn)
186                     {
187                         bool header = false;
188                         /* UTF-16 BOM */
189                         if ((ftell(pFile->getFiledesc()) == 0) && (nbLines == 0))
190                         {
191                             header = true;
192                         }
193
194                         if ( getLine ( Line, LINE_MAX * iLineSizeMult, pFile) != NULL)
195                         {
196                             if (((int) wcslen(Line)) >= (LINE_MAX * iLineSizeMult) - 1)
197                             {
198                                 FREE(Line);
199                                 iLineSizeMult++;
200                                 Line = (wchar_t*)MALLOC(LINE_MAX * iLineSizeMult * sizeof(wchar_t));
201                                 mseek(fd, iPos, SEEK_SET);
202
203                                 continue;
204                             }
205
206                             iPos = mtell(fd);
207
208                             if (header && (Line[0] == UTF_16BE_BOM))
209                             {
210                                 wchar_t* tmpLine = os_wcsdup(Line);
211                                 memset(Line, 0x00, LINE_MAX * iLineSizeMult);
212                                 wcscpy(Line, &tmpLine[1]);
213                                 FREE(tmpLine);
214                             }
215                             nbLines++;
216                             strLines[nbLines - 1] = os_wcsdup(removeEOL(Line));
217                             if (strLines[nbLines - 1] == NULL)
218                             {
219                                 *nbLinesOut = 0;
220                                 *ierr = MGETL_MEMORY_ALLOCATION_ERROR;
221                                 FREE(Line);
222                                 freeArrayOfWideString(strLines, nbLines);
223                                 return NULL;
224                             }
225                             wcscpy(Line, EMPTYSTRW);
226                         }
227                         else
228                         {
229                             /* EOF */
230                             if (feof(pFile->getFiledesc()))
231                             {
232                                 bEOF = TRUE;
233                             }
234                             bContinue = FALSE;
235                         }
236                     }
237                     else
238                     {
239                         bContinue = FALSE;
240                     }
241                 }
242                 while (bContinue);
243
244                 *nbLinesOut = nbLines;
245                 if (bEOF)
246                 {
247                     *ierr = MGETL_EOF;
248                 }
249                 else
250                 {
251                     *ierr = MGETL_NO_ERROR;
252                 }
253             }
254         }
255         FREE(Line);
256     }
257     return strLines;
258 }
259 /*--------------------------------------------------------------------------*/
260 wchar_t* getLine(wchar_t* _pstLine, int _iLineSize, types::File* _pFile)
261 {
262     char* pstTemp = (char*)MALLOC(sizeof(char) * _iLineSize);
263     if (fgets(pstTemp, _iLineSize, _pFile->getFiledesc()) == NULL)
264     {
265         FREE(pstTemp);
266         return NULL;
267     }
268
269     wchar_t* pstTempWide = to_wide_string(pstTemp);
270     wcscpy(_pstLine, pstTempWide);
271     FREE(pstTemp);
272     FREE(pstTempWide);
273     return _pstLine;
274 }
275 /*--------------------------------------------------------------------------*/
276 wchar_t *removeEOL(wchar_t *_inString)
277 {
278     if (_inString)
279     {
280         wchar_t *pos = wcschr(_inString, LF);
281         if (pos)
282         {
283             *pos = 0;
284         }
285
286         pos = wcschr(_inString, CR);
287         if (pos)
288         {
289             *pos = 0;
290         }
291     }
292     return _inString;
293 }
294 /*--------------------------------------------------------------------------*/
295 /*
296 * convert ansi to Utf
297 */
298 static char *convertAnsiToUtf(char *_inString)
299 {
300     char *outString = NULL;
301     if (_inString)
302     {
303 #ifdef _MSC_VER
304         if (IsValidUTF8(_inString))
305         {
306             outString = os_strdup(_inString);
307         }
308         else
309         {
310             /* conversion ANSI to UTF */
311             int Len = 0;
312             int newLen = 0;
313             BSTR bstrCode = NULL;
314
315             Len = MultiByteToWideChar(CP_ACP, 0, _inString, lstrlen(_inString), NULL, NULL);
316             bstrCode = SysAllocStringLen(NULL, Len);
317             if (bstrCode)
318             {
319                 MultiByteToWideChar(CP_ACP, 0, _inString, lstrlen(_inString), bstrCode, Len);
320                 newLen = WideCharToMultiByte(CP_UTF8, 0, bstrCode, -1, outString, 0, NULL, NULL);
321                 outString = (char*) MALLOC(sizeof(char) * (newLen + 1));
322                 if (outString)
323                 {
324                     WideCharToMultiByte(CP_UTF8, 0, bstrCode, -1, outString, newLen, NULL, NULL);
325                 }
326                 else
327                 {
328                     outString = os_strdup(_inString);
329                 }
330                 SysFreeString(bstrCode);
331                 bstrCode = NULL;
332             }
333             else
334             {
335                 outString = os_strdup(_inString);
336             }
337         }
338 #else
339         if (IsValidUTF8(_inString))
340         {
341             outString = os_strdup(_inString);
342         }
343         else
344         {
345             int len = (int)strlen(_inString);
346             int i = 0;
347
348             outString = (char*)MALLOC(((len * 3) + 1) * sizeof(char));
349             if (outString == NULL)
350             {
351                 return NULL;
352             }
353
354             strcpy(outString, EMPTYSTR);
355
356             for (i = 0; i < len; i++)
357             {
358                 char *outUtfChar = NULL;
359                 unsigned char inAnsiChar = 0;
360
361                 if (_inString[i] < 0)
362                 {
363                     inAnsiChar = 256 + _inString[i];
364                 }
365                 else
366                 {
367                     inAnsiChar = _inString[i];
368                 }
369
370                 if (inAnsiChar < 128)
371                 {
372                     outUtfChar = (char *)CALLOC(2, sizeof(char));
373                     if (outUtfChar)
374                     {
375                         outUtfChar[0] = inAnsiChar;
376                         outUtfChar[1] = 0;
377                     }
378                 }
379                 else
380                 {
381                     outUtfChar = (char *)CALLOC(3, sizeof(char));
382                     if (outUtfChar)
383                     {
384                         outUtfChar[0] = (inAnsiChar >> 6) | 0xC0;
385                         outUtfChar[1] = (inAnsiChar & 0x3F) | 0x80;
386                         outUtfChar[2] = 0;
387                     }
388                 }
389
390                 if (outUtfChar)
391                 {
392                     strcat(outString, outUtfChar);
393                     FREE(outUtfChar);
394                     outUtfChar = NULL;
395                 }
396             }
397         }
398 #endif
399     }
400     return outString;
401 }
402 /*--------------------------------------------------------------------------*/