SASification of linear_algebra svd
Bernard HUGUENEY [Tue, 31 Mar 2009 11:11:05 +0000 (13:11 +0200)]
scilab/modules/linear_algebra/Makefile.am
scilab/modules/linear_algebra/Makefile.in
scilab/modules/linear_algebra/includes/svd.h [new file with mode: 0644]
scilab/modules/linear_algebra/sci_gateway/c/sci_svd.c
scilab/modules/linear_algebra/src/c/svd.c [new file with mode: 0644]

index 808888e..aadc029 100644 (file)
@@ -10,6 +10,8 @@ src/c/qr.c \
 src/c/hess.c \
 src/c/eigen.c \
 src/c/balanc.c \
+src/c/chol.c \
+src/c/svd.c \
 src/c/issymmetric.c
 
 LINEAR_ALGEBRA_FORTRAN_SOURCES = src/fortran/intdggbal.f \
index 9af3f25..56a6b5a 100644 (file)
@@ -115,6 +115,7 @@ am__objects_2 = libscilinear_algebra_la-schurtable.lo \
        libscilinear_algebra_la-hess.lo \
        libscilinear_algebra_la-eigen.lo \
        libscilinear_algebra_la-balanc.lo \
+       libscilinear_algebra_la-chol.lo libscilinear_algebra_la-svd.lo \
        libscilinear_algebra_la-issymmetric.lo
 am__objects_3 = SELECT.lo Ex-schur.lo
 am__objects_4 = libscilinear_algebra_la-sci_backslash.lo \
@@ -382,6 +383,8 @@ src/c/qr.c \
 src/c/hess.c \
 src/c/eigen.c \
 src/c/balanc.c \
+src/c/chol.c \
+src/c/svd.c \
 src/c/issymmetric.c
 
 LINEAR_ALGEBRA_FORTRAN_SOURCES = src/fortran/intdggbal.f \
@@ -631,6 +634,7 @@ distclean-compile:
 
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-assembleEigenvectors.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-balanc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-chol.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-eigen.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-gw_linear_algebra.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-gw_linear_algebra2.Plo@am__quote@
@@ -661,6 +665,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-sci_zgeev.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-sci_zggev.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-sci_zheev.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libscilinear_algebra_la-svd.Plo@am__quote@
 
 .c.o:
 @am__fastdepCC_TRUE@   $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@@ -732,6 +737,20 @@ libscilinear_algebra_la-balanc.lo: src/c/balanc.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@      DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@  $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libscilinear_algebra_la_CFLAGS) $(CFLAGS) -c -o libscilinear_algebra_la-balanc.lo `test -f 'src/c/balanc.c' || echo '$(srcdir)/'`src/c/balanc.c
 
+libscilinear_algebra_la-chol.lo: src/c/chol.c
+@am__fastdepCC_TRUE@   $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libscilinear_algebra_la_CFLAGS) $(CFLAGS) -MT libscilinear_algebra_la-chol.lo -MD -MP -MF $(DEPDIR)/libscilinear_algebra_la-chol.Tpo -c -o libscilinear_algebra_la-chol.lo `test -f 'src/c/chol.c' || echo '$(srcdir)/'`src/c/chol.c
+@am__fastdepCC_TRUE@   mv -f $(DEPDIR)/libscilinear_algebra_la-chol.Tpo $(DEPDIR)/libscilinear_algebra_la-chol.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@      source='src/c/chol.c' object='libscilinear_algebra_la-chol.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@      DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@  $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libscilinear_algebra_la_CFLAGS) $(CFLAGS) -c -o libscilinear_algebra_la-chol.lo `test -f 'src/c/chol.c' || echo '$(srcdir)/'`src/c/chol.c
+
+libscilinear_algebra_la-svd.lo: src/c/svd.c
+@am__fastdepCC_TRUE@   $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libscilinear_algebra_la_CFLAGS) $(CFLAGS) -MT libscilinear_algebra_la-svd.lo -MD -MP -MF $(DEPDIR)/libscilinear_algebra_la-svd.Tpo -c -o libscilinear_algebra_la-svd.lo `test -f 'src/c/svd.c' || echo '$(srcdir)/'`src/c/svd.c
+@am__fastdepCC_TRUE@   mv -f $(DEPDIR)/libscilinear_algebra_la-svd.Tpo $(DEPDIR)/libscilinear_algebra_la-svd.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@      source='src/c/svd.c' object='libscilinear_algebra_la-svd.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@      DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@  $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libscilinear_algebra_la_CFLAGS) $(CFLAGS) -c -o libscilinear_algebra_la-svd.lo `test -f 'src/c/svd.c' || echo '$(srcdir)/'`src/c/svd.c
+
 libscilinear_algebra_la-issymmetric.lo: src/c/issymmetric.c
 @am__fastdepCC_TRUE@   $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libscilinear_algebra_la_CFLAGS) $(CFLAGS) -MT libscilinear_algebra_la-issymmetric.lo -MD -MP -MF $(DEPDIR)/libscilinear_algebra_la-issymmetric.Tpo -c -o libscilinear_algebra_la-issymmetric.lo `test -f 'src/c/issymmetric.c' || echo '$(srcdir)/'`src/c/issymmetric.c
 @am__fastdepCC_TRUE@   mv -f $(DEPDIR)/libscilinear_algebra_la-issymmetric.Tpo $(DEPDIR)/libscilinear_algebra_la-issymmetric.Plo
diff --git a/scilab/modules/linear_algebra/includes/svd.h b/scilab/modules/linear_algebra/includes/svd.h
new file mode 100644 (file)
index 0000000..d35b771
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
+ * Copyright (C) ????-2009 - INRIA Bernard Hugueney
+ *
+ * This file must be used under the terms of the CeCILL.
+ * This source file is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution.  The terms
+ * are also available at
+ * http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
+ *
+ */
+
+
+/*
+ * perform singular value decomposition (cf. http://www.scilab.org/product/man/svd.html ), MALLOCating temporary buffers as needed.
+ *
+ * in  :
+ *
+ * @param pData double* (doublecomplex*) in / scratch data of the argument matrix
+ * @param iRows int in  nb of rows of the argument matrix
+ * @param iCols int in  nb of cols of the argument matrix
+ * @param complexArg int (bool semantics) in  if argument matrix is complex
+ * @param economy int (bool semantics) in  if economy mode
+ * @param tol double in  treshold for computing rank
+ *
+ * out :
+ *
+ * @param pSV double* out  result matrix s for lhs==1, NULL for lhs >1
+ * @param pU double* out  result matrix U for lhs>1, NULL for lhs ==1
+ * @param pS double* out  result matrix S for lhs>1, NULL for lhs ==1
+ * @param pV double* out  result matrix V for lhs>1, NULL for lhs ==1
+ * @param pRk double* out  result rank for lhs==4, NULL for lhs !=4
+ *
+ * @return 0 success, -1 MALLOC failure >0 Lapack convergence problems
+ *
+ */
+int iSvdM(double* pData, int iRows, int iCols, int complexArg, int economy, double tol, double* pSV, double* pU, double* pS, double* pV, double* pRk);
index 3bfe023..ec911a5 100644 (file)
@@ -1,7 +1,6 @@
-
 /*
  * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
- * Copyright (C) ????-2008 - INRIA
+ * Copyright (C) ????-2009 - INRIA Bernard Hugueney
  *
  * This file must be used under the terms of the CeCILL.
  * This source file is licensed as described in the file COPYING, which
 #include "stack-c.h"
 #include "gw_linear_algebra.h"
 #include "Scierror.h"
+#include "MALLOC.h"
 #include "localization.h"
-/*--------------------------------------------------------------------------*/
-extern int C2F(intdgesvd1)(char *fname, unsigned long fname_len);
-extern int C2F(intzgesvd1)(char *fname, unsigned long fname_len);
-extern int C2F(intdgesvd2)(char *fname, unsigned long fname_len);
-extern int C2F(intzgesvd2)(char *fname, unsigned long fname_len);
-extern int C2F(intdoldsvd)(double *tol, char *fname, unsigned long fname_len);
-extern int C2F(intzoldsvd)(double *tol, char *fname, unsigned long fname_len);
-/*--------------------------------------------------------------------------*/
+
+#include "svd.h"
+
+/*
+  s=svd(X): [R: min(rows, cols) x 1]
+  [U,S,V]=svd(X) [U,S,V]:  [ [C|R: rows x rows], [R: rows x cols ],  [R|C: cols x cols ] ]
+  [U,S,V]=svd(X,0) (obsolete) [U,S,V]=svd(X,"e"): [ [C|R:rows x min(rows,cols)], [R: min(rows,cols) x min(rows,cols)], [C|R:cols x min(rows,cols)] ]
+  [U,S,V,rk]=svd(X [,tol]) : cf. supra, rk[R 1 x 1]
+ */
+
+extern int C2F(vfinite)(int *n, double *v);
+
 int C2F(intsvd)(char *fname,unsigned long fname_len)
 {
-       int *header1;int *header2;
-       int Cmplx;int ret;double tol;
+  int ret=0, economy=0, complexArg, iRows, iCols;
+  double* pData= NULL;
+  double* pDataReal= NULL;
+  double* pDataImg= NULL;
+
+  double* pSV= NULL;
 
-       if (GetType(1)!=sci_matrix) 
+  double* pU= NULL;
+  double* pUReal= NULL;
+  double* pUImg= NULL;
+
+  double* pS= NULL;
+
+  double* pV= NULL;
+  double* pVReal= NULL;
+  double* pVImg= NULL;
+
+  double tol= 0.;
+  double* pRk= NULL;
+
+  if ( (Rhs >=1) && (GetType(1)!=sci_matrix))
+    {
+      OverLoad(1);
+      return 0;
+    }
+  CheckRhs(1,2);
+  CheckLhs(1, 4);
+
+  economy= (Rhs==2) && (Lhs==3);
+  if( (complexArg=iIsComplex(1)) )
+    {
+      GetRhsVarMatrixComplex(1, &iRows, &iCols, &pDataReal, &pDataImg);
+      /* c -> z */
+      pData=(double*)oGetDoubleComplexFromPointer( pDataReal, pDataImg, iRows * iCols);
+      if(!pData)
        {
-               OverLoad(1);
-               return 0;
+         Scierror(999,_("%s: Cannot allocate more memory.\n"),fname);
+         ret = 1;
        }
-       header1 = (int *) GetData(1);
-       Cmplx=header1[3];
-
-       switch (Rhs) 
+    }
+  else
+    {
+      GetRhsVarMatrixDouble(1, &iRows, &iCols, &pData);
+    }
+  if(iRows == 0) /* empty matrix */
+    {
+      switch (Lhs){
+       case 4:
+       {
+         iAllocMatrixOfDouble(Rhs+5, 1,1, &pRk);
+         *pRk= 0.;
+         LhsVar(4)= Rhs + 5; /* no break */
+       }
+       case 3:
+       {
+         double* dummy;
+         iAllocMatrixOfDouble(Rhs+3, iRows, iCols, &dummy); /* yes original Fortran code does this... (no check on iCols) */
+         LhsVar(3)= Rhs + 3; /* no break */
+       }
+       case 2: /* illegal according to doc, but there was Fortran code to handle this, so... */
+       {
+         double* dummy;
+         iAllocMatrixOfDouble(Rhs+1, iRows, iCols, &dummy);
+         LhsVar(2)= Rhs + 1; /* no break */
+       }
+       case 1:
+       {
+         LhsVar(1)= 1;
+       }
+      }
+    }
+  else
+    {
+      if((iRows == -1) || (iCols== -1))
        {
-               case 1:   /* s=svd(A)   or   [U,s,V]=svd(A)   */
-               switch (Lhs) 
+         Scierror(999,_("Size varying argument a*eye(), (arg %d) not allowed here.\n"), 1);
+          ret= 1;
+       }
+      else
+       {
+         int const totalsize= iRows * iCols * (complexArg ? 2 : 1); 
+         if(!C2F(vfinite)(&totalsize, pData))
+           {
+             Scierror(999,_("Wrong value for argument %d: Must not contain NaN or Inf.\n"), 1);
+             ret= 1;
+           }
+         else
+           {
+             switch(Lhs){
+             case 4:
                {
-                       case 1: case 2: case 3:
-                               if (Cmplx==0) 
-                               {
-                                       ret = C2F(intdgesvd1)("svd",3L);
-                                       return 0; 
-                               }
-                               if (Cmplx==1) 
-                               {
-                                       ret = C2F(intzgesvd1)("svd",3L);
-                                       return 0; 
-                               }
-                       break;
-
-                       case 4:
-                               if (Cmplx==0) 
-                               {
-                                       ret = C2F(intdoldsvd)((tol=0,&tol),"svd",3L);
-                                       return 0;
-                               }
-                               if (Cmplx==1) 
-                               {
-                                       ret = C2F(intzoldsvd)((tol=0,&tol),"svd",3L);
-                                       return 0;
-                               }
-                       break;
-        }
-        case 2 :   /* svd(A, something)   */
-               header2 = (int *) GetData(2);
-               switch (header2[0]) 
+                 if( (Rhs == 2) && ( GetType(2) == sci_matrix) ) /* other cases with Rhs==2 are handled as economy mode */
+                   {
+                     int dummy; /* original code does not check iRows == iCols == 1 */
+                     double* tmpData;
+                     GetRhsVarMatrixDouble(2, &dummy, &dummy, &tmpData);
+                     tol= *tmpData;
+                   }
+                 iAllocMatrixOfDouble(Rhs+4, 1, 1, &pRk);
+               }/* no break */
+             case 3:
                {
-                       case SCI_DOUBLE :
-                       if (Lhs == 4) 
-                       {
-                               /*  old svd, tolerance is passed: [U,S,V,rk]=svd(A,tol)  */
-                               /*   ret = C2F(intsvdold)("svd",2L);  */
-                               if (Cmplx==0) 
-                               {
-                                       tol = ((double *) header2)[2];
-                                       ret = C2F(intdoldsvd)(&tol,"svd",3L);
-                                       return 0;
-                               }
-                               if (Cmplx==1) 
-                               {
-                                       tol = ((double *) header2)[2];
-                                       ret = C2F(intzoldsvd)(&tol,"svd",3L);
-                                       return 0;
-                               }
-                       }
-                       else 
-                       {
-                               /* old Economy size:  [U,S,V]=svd(A,0)  */
-                               if (Cmplx==0) 
-                               {
-                                       ret = C2F(intdgesvd2)("svd",3L);
-                                       return 0;
-                               }
-                               if (Cmplx==1) 
-                               {
-                                       ret = C2F(intzgesvd2)("svd",3L);
-                                       return 0;
-                               }
-                       }
-                       break;
-
-                       case STRING  :
-                       /* Economy size:  [U,S,V]=svd(A,"e")  */
-                       if (Cmplx==0) 
-                       {
-                               ret = C2F(intdgesvd2)("svd",3L);
-                               return 0;
-                       }
-                       if (Cmplx==1) 
+                 int const economyRows= economy ? Min(iRows, iCols) : iRows;
+                 int const economyCols= economy ? Min(iRows, iCols) : iCols;
+                 if(complexArg)
+                   {
+                     iAllocComplexMatrixOfDouble(Rhs+1, iRows, economyRows , &pUReal, &pUImg);
+                     pU= (double*)MALLOC(iRows * economyRows*(complexArg ? sizeof(doublecomplex): sizeof(double)));
+                     iAllocMatrixOfDouble(Rhs+2, economyRows, economyCols, &pS);
+                     iAllocComplexMatrixOfDouble(Rhs+3, iCols, economyCols , &pVReal, &pVImg);
+                     pV= (double*)MALLOC(iCols * economyCols*(complexArg ? sizeof(doublecomplex): sizeof(double)));
+                   }
+                 else
+                   {
+                     iAllocMatrixOfDouble(Rhs+1, iRows, economyRows , &pU);
+                     iAllocMatrixOfDouble(Rhs+2, economyRows, economyCols, &pS);
+                     iAllocMatrixOfDouble(Rhs+3, iCols, economyCols , &pV);
+                   }
+                
+                 break;
+               }
+             case 2:
+               {
+                 Scierror(999,_("%s: Wrong number of output arguments.\n"),fname);
+                 break;
+               }
+             case 1:
+               {
+                 iAllocMatrixOfDouble(Rhs+1, Min(iRows, iCols), 1, &pSV);
+                 break;
+               }
+             }
+             ret=  iSvdM(pData, iRows, iCols, complexArg, economy, tol, pSV, pU, pS, pV, pRk);
+             if(ret){
+               if( ret == -1)
+                 {
+                   Scierror(999,_("%s: Cannot allocate more memory.\n"),fname);
+                 }
+               else
+                 {
+                   Scierror(999,_("Convergence problem...\n"));
+                 }
+             }
+             else
+               {
+                 if(complexArg)
+                   {
+                     vFreeDoubleComplexFromPointer((doublecomplex*)pData);
+                     if(Lhs != 1)
                        {
-                               ret = C2F(intzgesvd2)("svd",3L);
-                               return 0;
+                         {/* multicore: omp sections */
+                           vGetPointerFromDoubleComplex((doublecomplex*)pU, iRows* (economy ? Min(iRows, iCols) : iRows), pUReal, pUImg);
+                           FREE(pU);
+                         }
+                         {
+                           vGetPointerFromDoubleComplex((doublecomplex*)pV, iCols* (economy ? Min(iRows, iCols) : iCols), pVReal, pVImg);
+                           FREE(pV);
+                         }
                        }
-                       break;
-         }
-         break;
-
-         default :   /*  rhs > 2 */
-               Scierror(999,("%s: Wrong number of input argument(s).\n"),fname);
-         break;
+                   }
+                 switch(Lhs)
+                   {
+                   case 4: LhsVar(4)= Rhs + 4; /* no break */
+                   case 3:
+                     {
+                       LhsVar(3)= Rhs + 3;
+                       LhsVar(2)= Rhs + 2;
+                     } /* no break */
+                   case 1: LhsVar(1)= Rhs + 1 ;
+                   }
+               }
+           }
        }
-       return 0;
+    }
+  return ret;
 }
 /*--------------------------------------------------------------------------*/
diff --git a/scilab/modules/linear_algebra/src/c/svd.c b/scilab/modules/linear_algebra/src/c/svd.c
new file mode 100644 (file)
index 0000000..800dbaa
--- /dev/null
@@ -0,0 +1,248 @@
+/*
+ * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
+ * Copyright (C) ????-2009 - INRIA Bernard Hugueney
+ *
+ * This file must be used under the terms of the CeCILL.
+ * This source file is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution.  The terms
+ * are also available at
+ * http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
+ *
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "doublecomplex.h"
+#include "machine.h"
+#include "MALLOC.h"
+#include "core_math.h"
+#include "svd.h"
+
+/*
+ * Lapack functions performing the real work
+ */
+extern void C2F(zgesvd)(char const jobU[1]/* 'A'|'S'|'O'|'N'*/, char const jobVT[1]/* 'A'|'S'|'O'|'N'*/, int const* iRows, int const* iCols
+                  , doublecomplex* a, int const* ldA, double* s, doublecomplex* u, int const* ldU, doublecomplex* vt, int const* ldVT
+                  , doublecomplex* work, int const* lWork, double* rWork, int* info);
+extern void C2F(dgesvd)(char const jobU[1]/* 'A'|'S'|'O'|'N'*/, char const jobVT[1]/* 'A'|'S'|'O'|'N'*/, int const* iRows, int const* iCols
+                  , double* a, int const* ldA, double* s, double* u, int const* ldU, double* vt, int const* ldVT
+                  , double* work, int const* lWork, int* info);
+
+/*
+ * Lapack utility functions
+ */
+/* sorting (used to correct a Lapack bug */
+extern void C2F(dlasrt)(char const id[1]/* 'I'ncreasing|'D'ecreasing order */, int const* length, double* array, int* info);
+/* querying for epsilon, used for default treshold value */
+extern double C2F(dlamch)(char const query[1], long );
+
+/*
+ * functions used to allocate workspace for Lapack functions
+ * in: job char 
+ *
+ */
+static double* allocDgesvdWorkspace(char job, int iRows, int iCols, int iColsToCompute, int* iAllocated);
+static int iDgesvd(char job, double* pData, int iRows, int iCols, int iColsToCompute, double* pSV, double* pU, double* pVT, double* pWork, int iWorkSize);
+static doublecomplex* allocZgesvdWorkspace(char job, int iRows, int iCols, int computeA, int* iAllocated);
+static int iZgesvd(char job, doublecomplex* pData, int iRows, int iCols, int iColsToCompute, double* pSV, doublecomplex* pU, doublecomplex* pVT
+                  , doublecomplex* pWork, int iWorkSize, double* pRWork);
+
+/* original fortran code says this is needed to workaround a bug in lapack, should assess necessity for current Lapack*/
+static int iCorrectSV(double* pSV, int iMinRowsCols);
+/* if lhs >1, S and V are computed from SV and VT */
+static int iComputeSandVReal( double const* pSV, double const* pVT, int iRows, int iCols, int economy, double* pS, double* pV);
+static int iComputeSandVComplex( double const* pSV, doublecomplex const* pVT, int iRows, int iCols, int economy, double* pS, doublecomplex* pV);
+
+
+static doublecomplex* allocZgesvdWorkspace(char job, int iRows, int iCols, int iColsToCompute, int* iAllocated)
+{
+  doublecomplex* res= NULL;
+  doublecomplex optimal;
+  int query= -1; /* query zgesvd for optimal worksize */
+  int info;
+  C2F(zgesvd)(&job, &job, &iRows, &iCols, NULL, &iRows, NULL, NULL, &iRows, NULL, &iColsToCompute, &optimal, &query, NULL, &info);
+  *iAllocated= (int) optimal.r;
+  res= MALLOC((*iAllocated) * sizeof(doublecomplex)); /* try to allocate optimal worksize */
+  if(!res) /* allocation failed, try with minimal worksize */
+    {
+      *iAllocated= 2 * Min(iRows, iCols) + Max(iRows, iCols);
+      res= MALLOC((*iAllocated) * sizeof(doublecomplex));
+      if(!res) /* failed again, report that we allocated 0 bytes */
+       {
+         *iAllocated= 0;
+       }
+    }
+  return res;
+}
+static int iZgesvd(char job, doublecomplex* pData, int iRows, int iCols, int iColsToCompute, double* pSV, doublecomplex* pU, doublecomplex* pVT
+                  , doublecomplex* pWork, int iWorkSize, double* pRWork)
+{
+  int info;
+  C2F(zgesvd)(&job, &job, &iRows, &iCols, pData, &iRows, pSV, (pU ? pU : pData) , &iRows, (pVT ? pVT : pData), &iColsToCompute
+             , pWork, &iWorkSize, pRWork, &info);
+  return ( job == 'N' ) ? iCorrectSV(pSV, Min(iRows, iCols)) : info ;
+}
+
+static double* allocDgesvdWorkspace(char job, int iRows, int iCols, int iColsToCompute, int* iAllocated)
+{
+  double* res=NULL;
+  double optimal;
+  int query=-1;
+  int info;
+  C2F(dgesvd)(&job, &job, &iRows, &iCols, NULL, &iRows, NULL, NULL, &iRows, NULL, &iColsToCompute, &optimal, &query, &info);
+  *iAllocated= (int) optimal;
+  res= MALLOC((*iAllocated) * sizeof(double));
+  if(!res)
+    {
+      *iAllocated= Max(3*Min(iRows, iCols) + Max(iRows, iCols), 5* Min(iRows, iCols)); /* Max & Min might have been more eficient as functions */
+      res= MALLOC((*iAllocated) * sizeof(double));
+      if(!res)
+       {
+         *iAllocated= 0;
+       }
+    }
+  return res;
+}
+
+static int iDgesvd(char job, double* pData, int iRows, int iCols, int iColsToCompute, double* pSV, double* pU, double* pVT
+                  , double* pWork, int iWorkSize)
+{
+  int info;
+  C2F(dgesvd)(&job, &job, &iRows, &iCols, pData, &iRows, pSV, (pU ? pU : pData) , &iRows, (pVT ? pVT : pData), &iColsToCompute
+             , pWork, &iWorkSize, &info);
+  return ( job == 'N' ) ? iCorrectSV(pSV, Min(iRows, iCols)) : info ;
+}
+
+/*
+  lhs==1 ->   pSV && !pU && !pS && !pV
+  lhs==3|4 ->  !pSV && pU && pS && pV
+  lhs==4 <-> pRk != NULL
+ */
+
+int iSvdM(double* pData, int iRows, int iCols, int complexArg, int economy, double tol, double* pSV, double* pU, double* pS, double* pV, double* pRk)
+{
+  int ret=0, allocOK;
+  int worksize;
+
+  double* pWork= NULL;
+  double* pRWork= NULL;
+  double* pVT= NULL;
+
+  char const job= pU ? (economy ? 'S' :'A') : 'N';
+  int colsToCompute= (job=='S') ? Min(iRows, iCols) : iCols;
+  pSV= pSV ? pSV : (double*) MALLOC(Min(iRows, iCols) * sizeof(double));
+  if( (allocOK= ( (pWork= (complexArg 
+                         ? (double*)allocZgesvdWorkspace(job, iRows, iCols, colsToCompute, &worksize)
+                         : allocDgesvdWorkspace(job, iRows, iCols, colsToCompute, &worksize)))
+                && (!complexArg 
+                    || (pRWork= (double*) MALLOC(5 * Min(iRows, iCols) * sizeof(double))))
+                && (pVT= (pU 
+                          ? MALLOC(colsToCompute * iCols * (complexArg ? sizeof(doublecomplex) : sizeof(double)))
+                          : pData)))))
+    {
+      ret= complexArg 
+       ? iZgesvd(job,  (doublecomplex*)pData, iRows, iCols, colsToCompute, pSV,(doublecomplex*) pU, (doublecomplex*)pVT
+                 , (doublecomplex*)pWork, worksize, pRWork)
+       : iDgesvd(job, pData, iRows, iCols, colsToCompute, pSV, pU, pVT, pWork, worksize);
+      if( (ret==0) && pU)
+       {
+         /* openmp sections */
+         /* openmp section */
+         {
+           int unused= complexArg 
+             ? iComputeSandVComplex(pSV, (doublecomplex*)pVT, iRows, iCols, economy, pS, (doublecomplex*)pV)
+             : iComputeSandVReal(pSV, pVT, iRows, iCols, economy, pS, pV);
+         }
+         /* openmp section */
+         {
+           if(pRk) /* compute rank */
+             {
+               int i, rk;
+               tol = (tol == 0.) ? (Max(iRows, iCols) * C2F(dlamch)("e",1L) * pSV[0]) : tol; /* original Fortran code does the strict fp compare */
+               rk= -1;
+               for(i=0; i!= Min(iRows, iCols); ++i)
+               {
+                 if(pSV[i] > tol)
+                   {
+                     rk= i;
+                   }
+               }
+               *pRk= (double)(rk+1);
+             }
+         }
+       }
+    }
+  FREE(pWork);
+  FREE(pRWork);
+  if(pU)
+    {
+      FREE(pVT);
+    }
+  return allocOK ? ret : -1;
+}
+
+
+static int iCorrectSV(double* pSV, int iMinRowsCols)
+{
+  int i, info;
+  for(i=0; i != iMinRowsCols; ++i)
+    {
+      pSV[i]= Abs(pSV[i]); /* should be fabs in c99 */
+    }
+  C2F(dlasrt)("D", &iMinRowsCols, pSV, &info);
+  return info;
+}
+
+static int iComputeSandVComplex( double const* pSV, doublecomplex const* pVT, int iRows, int iCols, int economy, double* pS, doublecomplex* pV)
+{
+  int i, j;
+  int minRowsCols= Min(iRows, iCols);
+  int economyCols= economy ? minRowsCols : iCols;
+  int economyRows= economy ? minRowsCols : iRows;
+  memset(pS, 0, economyRows * economyCols * sizeof(double));
+  for(i=0; i != minRowsCols; ++i)
+    {
+      pS[i+i*economyRows]= pSV[i];
+    }
+  /* multicore: with at least 2 caches, each half ( in !economy mode) should be done in it's on thread to avoid cache trashing */
+  for(j=0; j != economyCols; ++j)
+    {
+      for(i= 0; i != iCols; ++i)
+       {
+         pV[i + j*iCols].r= pVT[j + i*economyCols].r;
+         pV[i + j*iCols].i= -pVT[j + i*economyCols].i;
+         if(!economy)
+           {
+             pV[j + i*economyCols].r= pVT[i + j*iCols].r;
+             pV[j + i*economyCols].i= -pVT[i + j*iCols].i;
+           }
+       }
+    }
+  return 0;
+}
+
+static int iComputeSandVReal( double const* pSV, double const* pVT, int iRows, int iCols, int economy, double* pS, double* pV)
+{
+  int i, j;
+  int minRowsCols= Min(iRows, iCols);
+  int economyCols= economy ? minRowsCols : iCols;
+  int economyRows= economy ? minRowsCols : iRows;
+
+  memset(pS, 0, economyRows * economyCols * sizeof(double));
+  for(i=0; i != minRowsCols; ++i)
+    {
+      pS[i+i*economyRows]= pSV[i];
+    }
+  for(j=0; j != economyCols; ++j)
+    {
+      for(i= 0; i != iCols; ++i)
+       {
+         pV[i + j*iCols]= pVT[j + i*economyCols];
+         if(!economy)
+           {
+             pV[j + i*iCols]= pVT[i + j*iCols];
+           }
+       }
+    }
+  return 0;
+}