Mumps.cxx
1 // Copyright (C) 2003-2009 Marc DuruflĂ©
2 //
3 // This file is part of the linear-algebra library Seldon,
4 // http://seldon.sourceforge.net/.
5 //
6 // Seldon is free software; you can redistribute it and/or modify it under the
7 // terms of the GNU Lesser General Public License as published by the Free
8 // Software Foundation; either version 2.1 of the License, or (at your option)
9 // any later version.
10 //
11 // Seldon is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
14 // more details.
15 //
16 // You should have received a copy of the GNU Lesser General Public License
17 // along with Seldon. If not, see http://www.gnu.org/licenses/.
18 
19 
20 #ifndef SELDON_FILE_MUMPS_CXX
21 
22 #include "Mumps.hxx"
23 
24 extern "C"
25 {
26  // including mpi from sequential version of Mumps if the
27  // compilation is not made on a parallel machine
28 #ifndef SELDON_WITH_MPI
29 #include "mpi.h"
30 #endif
31 }
32 
33 namespace Seldon
34 {
35 
37  template<>
39  {
40  dmumps_c(&struct_mumps);
41  }
42 
43 
45  template<>
46  void MatrixMumps<complex<double> >::CallMumps()
47  {
48  zmumps_c(&struct_mumps);
49  }
50 
51 
53  template<class T>
55  {
56  struct_mumps.comm_fortran = -987654;
57 
58  // parameters for mumps
59  struct_mumps.job = -1;
60  struct_mumps.par = 1;
61  struct_mumps.sym = 0; // 0 -> unsymmetric matrix
62 
63  struct_mumps.info[8] = 0;
64  struct_mumps.info[9] = 0;
65 
66  // other parameters
67  struct_mumps.n = 0;
68  type_ordering = 7; // default : we let Mumps choose the ordering
69  print_level = -1;
70  info_facto = 0;
71  out_of_core = false;
72  parallel_ordering = false;
73  coef_overestimate = 1.3;
74  coef_increase_memory = 1.5;
75  coef_max_overestimate = 50.0;
76  threshold_pivot = 0.01;
77  }
78 
79 
80  template<class T>
81  bool MatrixMumps<T>::UseInteger8() const
82  {
83  // Blas is interfaced with 32 bits integers
84  // so we consider that Mumps is compiled with usual integers
85  return false;
86  }
87 
88 
90  template<class T>
92  {
93  /*DISP(struct_mumps.info[0]);
94  DISP(struct_mumps.icntl[23]);
95  DISP(struct_mumps.cntl[2]);
96  if (struct_mumps.info[0] == -10)
97  {
98  int npiv = struct_mumps.infog[27];
99  cout << "npiv =" << npiv << endl;
100  for (int i = 0; i < npiv; i++)
101  DISP(struct_mumps.pivnul_list[i]);
102  } */
103 
104  // if error -9 occurs, retrying with larger size
105  int init_percentage = struct_mumps.icntl[13];
106  int new_percentage = init_percentage;
107  while (((struct_mumps.info[0] == -9) || (struct_mumps.info[0] == -8)
108  || (struct_mumps.info[0] == -17) || (struct_mumps.info[0] == -20))
109  && (new_percentage < coef_max_overestimate*100.0))
110  {
111  new_percentage = int(new_percentage*coef_increase_memory);
112  struct_mumps.icntl[13] = new_percentage;
113  struct_mumps.job = 2;
114  CallMumps();
115  }
116 
117  struct_mumps.icntl[13] = init_percentage;
118  info_facto = struct_mumps.info[0];
119  }
120 
121 
123  template<class T>
124  void MatrixMumps<T>
125  ::InitMatrix(bool sym, bool distributed)
126  {
127  // we clear previous factorization
128  Clear();
129 
130 #ifdef SELDON_WITH_MPI
131  // MPI initialization for parallel version
132  if (distributed)
133  {
134  // for distributed matrices
135  // we consider that the communicator has been set previously
136  }
137  else
138  {
139  // centralized matrix => a linear system per processor
140  struct_mumps.comm_fortran = MPI_Comm_c2f(MPI_COMM_SELF);
141  }
142 #endif
143 
144  // symmetry is specified during the initialization stage
145  struct_mumps.job = -1;
146  if (sym)
147  {
148  struct_mumps.sym = 2; // general symmetric matrix
149  threshold_pivot = 0.01;
150  }
151  else
152  struct_mumps.sym = 0; // unsymmetric matrix
153 
154  // mumps is called
155  CallMumps();
156 
157  // pivot threshold
158  //struct_mumps.icntl[23] = 1;
159  struct_mumps.cntl[0] = threshold_pivot;
160  struct_mumps.icntl[13] = int(100.0*(coef_overestimate-1.0));
161  //struct_mumps.cntl[2] = 1e-30;
162  if (parallel_ordering)
163  {
164  struct_mumps.icntl[6] = 0;
165  struct_mumps.icntl[27] = 2;
166  struct_mumps.icntl[28] = type_ordering;
167  }
168  else
169  {
170  struct_mumps.icntl[6] = type_ordering;
171  if (type_ordering == 1)
172  struct_mumps.perm_in = perm.GetData();
173  }
174 
175  // setting out of core parameters
176  if (out_of_core)
177  struct_mumps.icntl[21] = 1;
178  else
179  struct_mumps.icntl[21] = 0;
180 
181  struct_mumps.icntl[17] = 0;
182 
183  // the print level is set in mumps
184  if (print_level >= 0)
185  {
186  struct_mumps.icntl[0] = 6;
187  struct_mumps.icntl[1] = 0;
188  struct_mumps.icntl[2] = 6;
189  struct_mumps.icntl[3] = 2;
190  }
191  else
192  {
193  struct_mumps.icntl[0] = -1;
194  struct_mumps.icntl[1] = -1;
195  struct_mumps.icntl[2] = -1;
196  struct_mumps.icntl[3] = 0;
197  }
198  }
199 
200 
202  template<class T>
203  void MatrixMumps<T>::SelectOrdering(int num_ordering)
204  {
205  type_ordering = num_ordering;
206  }
207 
208 
210  template<class T>
212  {
213  parallel_ordering = true;
214  type_ordering = num_ordering;
215  }
216 
217 
219  template<class T>
221  {
222  type_ordering = 1;
223  perm.Reallocate(permut.GetM());
224  for (int i = 0; i < perm.GetM(); i++)
225  perm(i) = permut(i) + 1;
226  }
227 
228 
229  template<class T>
231  {
232  threshold_pivot = eps;
233  }
234 
235 
237  template<class T>
239  {
240  Clear();
241  }
242 
243 
245  template<class T>
247  {
248  if (struct_mumps.n > 0)
249  {
250  struct_mumps.job = -2;
251  // Mumps variables are deleted.
252  CallMumps();
253 
254  num_row_glob.Clear();
255  num_col_glob.Clear();
256  struct_mumps.n = 0;
257  }
258  }
259 
260 
262  template<class T>
264  {
265  print_level = -1;
266 
267  struct_mumps.icntl[0] = -1;
268  struct_mumps.icntl[1] = -1;
269  struct_mumps.icntl[2] = -1;
270  struct_mumps.icntl[3] = 0;
271 
272  }
273 
274 
276  template<class T>
278  {
279  print_level = 0;
280 
281  struct_mumps.icntl[0] = 6;
282  struct_mumps.icntl[1] = 0;
283  struct_mumps.icntl[2] = 6;
284  struct_mumps.icntl[3] = 2;
285 
286  }
287 
288 
290  template<class T>
292  {
293  out_of_core = true;
294  }
295 
296 
298  template<class T>
300  {
301  out_of_core = false;
302  }
303 
304 
306  template<class T>
308  {
309  coef_overestimate = coef;
310  }
311 
312 
314  template<class T>
316  {
317  coef_max_overestimate = coef;
318  }
319 
320 
322  template<class T>
324  {
325  coef_increase_memory = coef;
326  }
327 
328 
330 
335  template<class T> template<class T0, class Prop,class Storage,class Allocator>
337  IVect& numbers, bool keep_matrix)
338  {
339  InitMatrix(IsSymmetricMatrix(mat));
340 
341  int n = mat.GetM();
342  // conversion in coordinate format
343  Vector<MUMPS_INT> num_row, num_col; Vector<T> values;
344  ConvertMatrix_to_Coordinates(mat, num_row, num_col, values, 1);
345 
346  long nnz = num_col.GetSize();
347  // no values needed to renumber
348  values.Clear();
349  if (!keep_matrix)
350  mat.Clear();
351 
352  struct_mumps.n = n; struct_mumps.nz = nnz;
353  struct_mumps.nnz = nnz;
354  struct_mumps.irn = num_row.GetData();
355  struct_mumps.jcn = num_col.GetData();
356 
357  // Call the MUMPS package.
358  struct_mumps.job = 1; // we analyse the system
359  CallMumps();
360 
361  info_facto = struct_mumps.info[0];
362 
363  numbers.Reallocate(n);
364  for (int i = 0; i < n; i++)
365  numbers(i) = struct_mumps.sym_perm[i]-1;
366  }
367 
368 
370 
374  template<class T> template<class T0, class Prop, class Storage, class Allocator>
376  bool keep_matrix)
377  {
378  int n = mat.GetM();
379  bool sym = IsSymmetricMatrix(mat);
380  // conversion in coordinate format with fortran convention (1-index)
381  Vector<MUMPS_INT> num_row, num_col; Vector<T> values;
382  ConvertMatrix_to_Coordinates(mat, num_row, num_col, values, 1);
383  if (!keep_matrix)
384  mat.Clear();
385 
386  FactorizeCoordinate1(n, num_row, num_col, values, sym);
387  }
388 
389 
391  template<class T>
393  Vector<MUMPS_INT>& num_col,
394  Vector<T>& values, bool sym)
395  {
396  InitMatrix(sym);
397 
398  long nnz = values.GetM();
399  //if (nnz > INT_MAX)
400  //cout << "Factorisation d'une grosse matrice" << endl;
401 
402  struct_mumps.n = n; struct_mumps.nz = nnz;
403  struct_mumps.nnz = nnz;
404  struct_mumps.irn = num_row.GetData();
405  struct_mumps.jcn = num_col.GetData();
406  struct_mumps.a = reinterpret_cast<pointer>(values.GetData());
407 
408  // Call the MUMPS package.
409  struct_mumps.job = 4; // we analyse and factorize the system
410  CallMumps();
411 
412  IterateFacto();
413  }
414 
415 
417  template<class T> template<class Prop, class Storage, class Allocator>
418  void MatrixMumps<T>
420  {
421  InitMatrix(IsSymmetricMatrix(mat));
422 
423  int n = mat.GetM();
424  long nnz = mat.GetNonZeros();
425  // conversion in coordinate format with fortran convention (1-index)
427  ConvertMatrix_to_Coordinates(mat, num_row_glob, num_col_glob, values, 1);
428 
429  struct_mumps.n = n; struct_mumps.nz = nnz;
430  struct_mumps.nnz = nnz;
431  struct_mumps.irn = num_row_glob.GetData();
432  struct_mumps.jcn = num_col_glob.GetData();
433  struct_mumps.a = reinterpret_cast<pointer>(values.GetData());
434 
435  // Call the MUMPS package.
436  struct_mumps.job = 1; // we analyse the system
437  CallMumps();
438 
439  info_facto = struct_mumps.info[0];
440  }
441 
442 
444 
450  template<class T> template<class Prop, class Allocator>
451  void MatrixMumps<T>
453  {
454  // we consider that the values are corresponding
455  // to the row/column numbers given for the analysis
456  struct_mumps.a = reinterpret_cast<pointer>(mat.GetData());
457 
458  // Call the MUMPS package.
459  struct_mumps.job = 2; // we factorize the system
460  CallMumps();
461 
462  IterateFacto();
463  }
464 
465 
467 
473  template<class T> template<class Prop, class Allocator>
474  void MatrixMumps<T>
476  {
477  // we consider that the values are corresponding
478  // to the row/column numbers given for the analysis
479  struct_mumps.a = reinterpret_cast<pointer>(mat.GetData());
480 
481  // Call the MUMPS package.
482  struct_mumps.job = 2; // we factorize the system
483  CallMumps();
484 
485  IterateFacto();
486  }
487 
488 
490 
496  template<class T> template<class Prop, class Storage, class Allocator>
497  void MatrixMumps<T>
499  {
501  ConvertMatrix_to_Coordinates(mat, num_row_glob, num_col_glob, values, 1);
502 
503  // we consider that the values are corresponding
504  // to the row/column numbers given for the analysis
505  struct_mumps.a = reinterpret_cast<pointer>(values.GetData());
506 
507  // Call the MUMPS package.
508  struct_mumps.job = 2; // we factorize the system
509  CallMumps();
510 
511  IterateFacto();
512  }
513 
514 
516  template<class T>
518  {
519  size_t taille = sizeof(int)*(num_row_glob.GetM()
520  + num_col_glob.GetM() + perm.GetM());
521 
522  if (struct_mumps.n <= 0)
523  return taille;
524 
525  size_t nnz = struct_mumps.info[8];
526  if (struct_mumps.info[8] < 0)
527  nnz = abs(struct_mumps.info[8])*size_t(1024*1024);
528 
529  taille += sizeof(T)*nnz ;
530  nnz = struct_mumps.info[9];
531  if (struct_mumps.info[9] < 0)
532  nnz = abs(struct_mumps.info[9])*size_t(1024*1024);
533 
534  taille += sizeof(int)*nnz;
535  return taille;
536  }
537 
538 
540  template<class T>
542  {
543  return info_facto;
544  }
545 
546 
548 
554  template<class T> template<class Prop1, class Storage1, class Allocator,
555  class Prop2, class Storage2, class Allocator2>
556  void MatrixMumps<T>::
559  bool keep_matrix)
560  {
561  InitMatrix(IsSymmetricMatrix(mat));
562 
563  int n_schur = num.GetM();
564  // Subscripts are changed to respect fortran convention
565  Vector<MUMPS_INT> index_schur(n_schur);
566  for (int i = 0; i < n_schur; i++)
567  index_schur(i) = num(i)+1;
568 
569  // array that will contain values of Schur matrix
570  Vector<T, VectFull, Allocator2> vec_schur(n_schur*n_schur);
571  vec_schur.Fill(0);
572 
573  struct_mumps.icntl[18] = 1;
574  struct_mumps.size_schur = n_schur;
575  struct_mumps.listvar_schur = index_schur.GetData();
576  struct_mumps.schur = reinterpret_cast<pointer>(vec_schur.GetData());
577 
578  int n = mat.GetM(); long nnz = mat.GetNonZeros();
579  // conversion in coordinate format with fortran convention (1-index)
580  Vector<MUMPS_INT> num_row, num_col;
582  ConvertMatrix_to_Coordinates(mat, num_row, num_col, values, 1);
583  if (!keep_matrix)
584  mat.Clear();
585 
586  struct_mumps.n = n;
587  struct_mumps.nz = nnz; struct_mumps.nnz = nnz;
588  struct_mumps.irn = num_row.GetData();
589  struct_mumps.jcn = num_col.GetData();
590  struct_mumps.a = reinterpret_cast<pointer>(values.GetData());
591 
592  // Call the MUMPS package.
593  struct_mumps.job = 4; // we analyse and factorize the system
594  CallMumps();
595 
596  IterateFacto();
597 
598  // resetting parameters related to Schur complement
599  struct_mumps.icntl[18] = 0;
600  struct_mumps.size_schur = 0;
601  struct_mumps.listvar_schur = NULL;
602  struct_mumps.schur = NULL;
603 
604  // schur complement stored by rows
605  int nb = 0;
606  mat_schur.Reallocate(n_schur, n_schur);
607  if (IsSymmetricMatrix(mat))
608  for (int i = 0; i < n_schur; i++)
609  for (int j = 0; j <= i; j++)
610  {
611  mat_schur(i, j) = vec_schur(i*n_schur + j);
612  mat_schur(j, i) = vec_schur(i*n_schur + j);
613  }
614  else
615  for (int i = 0; i < n_schur; i++)
616  for (int j = 0; j < n_schur;j++)
617  mat_schur(i, j) = vec_schur(nb++);
618 
619  vec_schur.Clear(); index_schur.Clear();
620  }
621 
622 
624 
629  template<class T> template<class Allocator2>
632  {
633 #ifdef SELDON_CHECK_DIMENSIONS
634  if (x.GetM() != struct_mumps.n)
635  throw WrongDim("Mumps::Solve(TransA, c)",
636  string("The length of x is equal to ")
637  + to_str(x.GetM())
638  + " while the size of the matrix is equal to "
639  + to_str(struct_mumps.n) + ".");
640 #endif
641 
642  Solve(TransA, x.GetData(), 1);
643  }
644 
645 
647  template<class T>
648  void MatrixMumps<T>::Solve(const SeldonTranspose& TransA, T* x_ptr, int nrhs)
649  {
650  if (TransA.Trans())
651  struct_mumps.icntl[8] = 0;
652  else
653  struct_mumps.icntl[8] = 1;
654 
655  struct_mumps.nrhs = nrhs;
656  struct_mumps.lrhs = struct_mumps.n;
657  struct_mumps.rhs = reinterpret_cast<pointer>(x_ptr);
658  struct_mumps.job = 3; // we solve system
659  CallMumps();
660  }
661 
662 
664  template<class T> template<class Allocator2>
666  {
667  Solve(SeldonNoTrans, x);
668  }
669 
670 
672 
676  template<class T>
677  template<class Allocator2, class Prop>
680  {
681 
682 #ifdef SELDON_CHECK_BOUNDS
683  if (x.GetM() != struct_mumps.n)
684  throw WrongDim("Mumps::Solve", string("Row size of x is equal to ")
685  + to_str(x.GetM()) + " while size of matrix is equal to "
686  + to_str(struct_mumps.n));
687 #endif
688 
689  Solve(TransA, x.GetData(), x.GetN());
690  }
691 
692 
693 #ifdef SELDON_WITH_MPI
694  template<class T>
695  void MatrixMumps<T>::
696  FactorizeDistributedMatrix(MPI_Comm& comm_facto, Vector<long>& Ptr,
697  Vector<int>& IndRow, Vector<T>& Val,
698  const Vector<int>& glob_number,
699  bool sym, bool keep_matrix)
700  {
701  FactorizeParallel(comm_facto, Ptr, IndRow, Val, glob_number,
702  sym, keep_matrix);
703  }
704 
705 
706  template<class T>
707  void MatrixMumps<T>::
708  FactorizeDistributedMatrix(MPI_Comm& comm_facto, Vector<int64_t>& Ptr,
709  Vector<int64_t>& IndRow, Vector<T>& Val,
710  const Vector<int>& glob_number,
711  bool sym, bool keep_matrix)
712  {
713  cout << "Mumps with 64-bits integers does not work in parallel" << endl;
714  abort();
715  }
716 
717 
719 
729  template<class T>
730  void MatrixMumps<T>::
731  FactorizeParallel(MPI_Comm& comm_facto, Vector<long>& Ptr,
732  Vector<MUMPS_INT>& IndRow, Vector<T>& Val,
733  const Vector<int>& glob_number,
734  bool sym, bool keep_matrix)
735  {
736  // for distributed matrix, we convert C++ communicator
737  struct_mumps.comm_fortran = MPI_Comm_c2f(comm_facto);
738 
739  // Initialization depending on symmetry of the matrix.
740  InitMatrix(sym, true);
741 
742  // Fortran communicator
743  struct_mumps.comm_fortran = MPI_Comm_c2f(comm_facto);
744 
745  // distributed matrix
746  struct_mumps.icntl[17] = 3;
747 
748  // finding the size of the overall system
749  int nmax = 0, N = 0;
750  for (int i = 0; i < glob_number.GetM(); i++)
751  nmax = max(glob_number(i)+1, nmax);
752 
753  MPI_Allreduce(&nmax, &N, 1, MPI_INTEGER, MPI_MAX, comm_facto);
754 
755  // number of non-zero entries on this processor
756  long nnz = IndRow.GetSize();
757 
758  // adding 1 to have 1-based indexes
759  Vector<MUMPS_INT> IndCol(nnz);
760  for (long i = 0; i < IndRow.GetM(); i++)
761  IndRow(i)++;
762 
763  for (int i = 0; i < Ptr.GetM()-1; i++)
764  for (long j = Ptr(i); j < Ptr(i+1); j++)
765  IndCol(j) = glob_number(i) + 1;
766 
767  if (!keep_matrix)
768  Ptr.Clear();
769 
770  // Define the problem on the host
771  struct_mumps.n = N;
772  struct_mumps.nz_loc = nnz; struct_mumps.nnz_loc = nnz;
773  struct_mumps.irn_loc = IndRow.GetData();
774  struct_mumps.jcn_loc = IndCol.GetData();
775  struct_mumps.a_loc = reinterpret_cast<pointer>(Val.GetData());
776 
777  // Call the MUMPS package.
778  struct_mumps.job = 1; // we analyse the system
779  CallMumps();
780 
781  // overestimating size in order to avoid error -9
782  double coef = coef_overestimate;
783  struct_mumps.icntl[22] = int(coef*struct_mumps.infog[25]);
784  struct_mumps.job = 2; // we factorize the system
785  CallMumps();
786 
787  int info = 0;
788  MPI_Allreduce(&struct_mumps.info[0], &info, 1, MPI_INTEGER, MPI_MIN, comm_facto);
789 
790  bool test_loop = true;
791  int init_percentage = struct_mumps.icntl[13];
792  int new_percentage = init_percentage;
793  // loop in order to complete factorisation
794  while (test_loop)
795  {
796  if (((info == -9)||(info==-8)||(info==-17)||(info==-19)|| (info==-20))
797  && (coef < coef_max_overestimate))
798  {
799  coef *= coef_increase_memory;
800  // increasing icntl(23) if error -9 occured
801  struct_mumps.icntl[22] = int(coef*struct_mumps.infog[25]);
802  new_percentage = int(new_percentage*coef_increase_memory);
803  struct_mumps.icntl[13] = new_percentage;
804  }
805  else
806  test_loop = false;
807 
808  if (test_loop)
809  {
810  CallMumps();
811 
812  MPI_Allreduce(&struct_mumps.info[0],
813  &info, 1, MPI_INTEGER, MPI_MIN, comm_facto);
814  }
815  }
816 
817  struct_mumps.icntl[13] = init_percentage;
818  info_facto = info;
819 
820  int rank_proc; MPI_Comm_rank(comm_facto, &rank_proc);
821  if ((rank_proc == 0) && (print_level >= 0))
822  cout<<"Factorization completed"<<endl;
823 
824  }
825 
826 
827  template<class T>
828  void MatrixMumps<T>
829  ::PerformAnalysisDistributed(MPI_Comm& comm_facto, Vector<long>& Ptr,
830  Vector<int>& IndRow, Vector<T>& Val,
831  const Vector<int>& glob_number,
832  bool sym, bool keep_matrix)
833  {
834  // for distributed matrix, we convert C++ communicator
835  struct_mumps.comm_fortran = MPI_Comm_c2f(comm_facto);
836 
837  // Initialization depending on symmetry of the matrix.
838  InitMatrix(sym, true);
839 
840  // Fortran communicator
841  struct_mumps.comm_fortran = MPI_Comm_c2f(comm_facto);
842 
843  // distributed matrix
844  struct_mumps.icntl[17] = 3;
845 
846  // finding the size of the overall system
847  int nmax = 0, N = 0;
848  for (int i = 0; i < glob_number.GetM(); i++)
849  nmax = max(glob_number(i)+1, nmax);
850 
851  MPI_Allreduce(&nmax, &N, 1, MPI_INTEGER, MPI_MAX, comm_facto);
852 
853  // number of non-zero entries on this processor
854  long nnz = IndRow.GetSize();
855 
856  // adding 1 to have 1-based indexes
857  Vector<int> IndCol(nnz);
858  for (long i = 0; i < IndRow.GetM(); i++)
859  IndRow(i)++;
860 
861  for (int i = 0; i < Ptr.GetM()-1; i++)
862  for (long j = Ptr(i); j < Ptr(i+1); j++)
863  IndCol(j) = glob_number(i) + 1;
864 
865  if (!keep_matrix)
866  Ptr.Clear();
867 
868  // Define the problem on the host
869  struct_mumps.n = N;
870  struct_mumps.nz_loc = nnz;
871  struct_mumps.nnz_loc = nnz;
872  struct_mumps.irn_loc = IndRow.GetData();
873  struct_mumps.jcn_loc = IndCol.GetData();
874  struct_mumps.a_loc = reinterpret_cast<pointer>(Val.GetData());
875 
876  // Call the MUMPS package.
877  struct_mumps.job = 1; // we analyse the system
878  CallMumps();
879  }
880 
881 
882  template<class T>
883  void MatrixMumps<T>
884  ::PerformAnalysisDistributed(MPI_Comm& comm_facto, Vector<int64_t>& Ptr,
885  Vector<int64_t>& IndRow, Vector<T>& Val,
886  const Vector<int>& glob_number,
887  bool sym, bool keep_matrix)
888  {
889  cout << "Mumps with 64-bits integers does not work in parallel" << endl;
890  abort();
891  }
892 
893  template<class T>
894  void MatrixMumps<T>
895  ::PerformFactorizationDistributed(MPI_Comm& comm_facto, Vector<long>& Ptr,
896  Vector<int>& IndRow, Vector<T>& Val,
897  const Vector<int>& glob_number,
898  bool sym, bool keep_matrix)
899  {
900  // number of non-zero entries on this processor
901  long nnz = IndRow.GetSize();
902 
903  // adding 1 to have 1-based indexes
904  Vector<int> IndCol(nnz);
905  for (long i = 0; i < IndRow.GetM(); i++)
906  IndRow(i)++;
907 
908  for (int i = 0; i < Ptr.GetM()-1; i++)
909  for (long j = Ptr(i); j < Ptr(i+1); j++)
910  IndCol(j) = glob_number(i) + 1;
911 
912  if (!keep_matrix)
913  Ptr.Clear();
914 
915  struct_mumps.nz_loc = nnz;
916  struct_mumps.nnz_loc = nnz;
917  struct_mumps.irn_loc = IndRow.GetData();
918  struct_mumps.jcn_loc = IndCol.GetData();
919  struct_mumps.a_loc = reinterpret_cast<pointer>(Val.GetData());
920 
921  // overestimating size in order to avoid error -9
922  double coef = coef_overestimate;
923  struct_mumps.icntl[22] = int(coef*struct_mumps.infog[25]);
924  struct_mumps.job = 2; // we factorize the system
925 
926  CallMumps();
927 
928  int info = 0;
929  MPI_Allreduce(&struct_mumps.info[0], &info, 1, MPI_INTEGER, MPI_MIN, comm_facto);
930 
931  bool test_loop = true;
932  int init_percentage = struct_mumps.icntl[13];
933  int new_percentage = init_percentage;
934  // loop in order to complete factorisation
935  while (test_loop)
936  {
937  if (((info == -9)||(info==-8)||(info==-17)||(info==-20))
938  && (coef < coef_max_overestimate))
939  {
940  coef *= coef_increase_memory;
941  // increasing icntl(23) if error -9 occured
942  struct_mumps.icntl[22] = int(coef*struct_mumps.infog[25]);
943  new_percentage = int(new_percentage*coef_increase_memory);
944  struct_mumps.icntl[13] = new_percentage;
945  }
946  else
947  test_loop = false;
948 
949  if (test_loop)
950  {
951  CallMumps();
952 
953  MPI_Allreduce(&struct_mumps.info[0],
954  &info, 1, MPI_INTEGER, MPI_MIN, comm_facto);
955  }
956  }
957 
958  struct_mumps.icntl[13] = init_percentage;
959  info_facto = info;
960 
961  int rank_proc; MPI_Comm_rank(comm_facto, &rank_proc);
962  if ((rank_proc == 0) && (print_level >= 0))
963  cout<<"Factorization completed"<<endl;
964  }
965 
966 
967  template<class T>
968  void MatrixMumps<T>
969  ::PerformFactorizationDistributed(MPI_Comm& comm_facto, Vector<int64_t>& Ptr,
970  Vector<int64_t>& IndRow, Vector<T>& Val,
971  const Vector<int>& glob_number,
972  bool sym, bool keep_matrix)
973  {
974  cout << "Mumps with 64-bits integers does not work in parallel" << endl;
975  abort();
976  }
977 
978 
980 
985  template<class T> template<class Allocator2>
986  void MatrixMumps<T>::SolveDistributed(MPI_Comm& comm_facto,
987  const SeldonTranspose& TransA,
988  Vector<T, VectFull, Allocator2>& x,
989  const IVect& glob_num)
990  {
991  SolveDistributed(comm_facto, TransA, x.GetData(), 1, glob_num);
992  }
993 
994 
996 
1001  template<class T> template<class Allocator2, class Prop>
1002  void MatrixMumps<T>::SolveDistributed(MPI_Comm& comm_facto,
1003  const SeldonTranspose& TransA,
1004  Matrix<T, Prop, ColMajor, Allocator2>& x,
1005  const IVect& glob_num)
1006  {
1007  SolveDistributed(comm_facto, TransA, x.GetData(), x.GetN(), glob_num);
1008  }
1009 
1010 
1012  template<class T>
1013  void MatrixMumps<T>::SolveDistributed(MPI_Comm& comm_facto,
1014  const SeldonTranspose& TransA,
1015  T* x_ptr, int nrhs,
1016  const IVect& glob_num)
1017  {
1018  // storing the global right hand side
1019  Matrix<T, General, ColMajor> rhs;
1020  int cplx = sizeof(T)/8;
1021  int nblock = 8; // 8 right hand sides at the same time
1022  // allocating the global right hand side
1023  int rank_proc; MPI_Comm_rank(comm_facto, &rank_proc);
1024  if (rank_proc == 0)
1025  {
1026  rhs.Reallocate(struct_mumps.n, min(nrhs, nblock));
1027  rhs.Zero();
1028  }
1029 
1030  Matrix<T, General, ColMajor> xp;
1031  int nb_procs; MPI_Comm_size(comm_facto, &nb_procs);
1032  MPI_Status status;
1033 
1034  // retrieving dofs of all processors
1035  Vector<IVect> nump(nb_procs);
1036  if (nb_procs > 1)
1037  {
1038  if (rank_proc == 0)
1039  {
1040  for (int i = 0; i < nb_procs; i++)
1041  {
1042  if (i != 0)
1043  {
1044  int nb_dof;
1045  MPI_Recv(&nb_dof, 1, MPI_INTEGER, i, 33, comm_facto, &status);
1046 
1047  nump(i).Reallocate(nb_dof);
1048  MPI_Recv(nump(i).GetData(), nb_dof, MPI_INTEGER,
1049  i, 38, comm_facto, &status);
1050  }
1051  else
1052  nump(i) = glob_num;
1053 
1054  }
1055  }
1056  else
1057  {
1058  int nb = glob_num.GetM();
1059  MPI_Send(&nb, 1, MPI_INTEGER, 0, 33, comm_facto);
1060  MPI_Send(glob_num.GetData(), nb, MPI_INTEGER, 0, 38, comm_facto);
1061  }
1062  }
1063 
1064  // then loop over blocks of right hand sides
1065  int num_rhs = 0; long ldb = glob_num.GetM();
1066  int lvl = print_level; HideMessages();
1067  while (num_rhs < nrhs)
1068  {
1069  int nrhs_p = min(nrhs-num_rhs, nblock);
1070  if (rank_proc == 0)
1071  {
1072  // on the host, we retrieve datas of all the other processors
1073  if (nb_procs > 1)
1074  {
1075  // assembling the right hand side
1076  for (int i = 0; i < nb_procs; i++)
1077  {
1078 
1079  if (i != 0)
1080  {
1081  // On the host processor receiving components of right
1082  // hand side.
1083  int nb_dof = nump(i).GetM();
1084  xp.Reallocate(nb_dof, nrhs_p);
1085 
1086  if (nb_dof > 0)
1087  MPI_Recv(xp.GetDataVoid(), cplx*nb_dof*nrhs_p,
1088  MPI_DOUBLE, i, 37, comm_facto, &status);
1089  }
1090  else
1091  {
1092  xp.Reallocate(glob_num.GetM(), nrhs_p);
1093  for (int k = 0; k < nrhs_p; k++)
1094  for (int j = 0; j < glob_num.GetM(); j++)
1095  xp(j, k) = x_ptr[j + (num_rhs+k)*ldb];
1096  }
1097 
1098  for (int k = 0; k < nrhs_p; k++)
1099  for (int j = 0; j < nump(i).GetM(); j++)
1100  rhs(nump(i)(j), k) = xp(j, k);
1101  }
1102  }
1103  else
1104  {
1105  for (int k = 0; k < nrhs_p; k++)
1106  for (int j = 0; j < glob_num.GetM(); j++)
1107  rhs(j, k) = x_ptr[j + (num_rhs+k)*ldb];
1108  }
1109 
1110  struct_mumps.rhs = reinterpret_cast<pointer>(rhs.GetData());
1111  }
1112  else
1113  {
1114  // On other processors, we send right hand side.
1115  int nb_dof = glob_num.GetM();
1116  if (nb_dof > 0)
1117  MPI_Send(&x_ptr[num_rhs*nb_dof], cplx*nb_dof*nrhs_p, MPI_DOUBLE, 0, 37, comm_facto);
1118  }
1119 
1120  // we solve system
1121  if (TransA.Trans())
1122  struct_mumps.icntl[8] = 0;
1123  else
1124  struct_mumps.icntl[8] = 1;
1125 
1126  struct_mumps.nrhs = nrhs_p;
1127  struct_mumps.lrhs = struct_mumps.n;
1128  struct_mumps.job = 3;
1129  CallMumps();
1130 
1131  // we distribute solution on all the processors
1132  if (nb_procs > 1)
1133  {
1134  if (rank_proc == 0)
1135  {
1136  for (int i = 0; i < nb_procs; i++)
1137  {
1138  if (i != 0)
1139  {
1140  int nb_dof = nump(i).GetM();
1141  xp.Reallocate(nb_dof, nrhs_p);
1142  for (int j = 0; j < nb_dof; j++)
1143  for (int k = 0; k < nrhs_p; k++)
1144  xp(j, k) = rhs(nump(i)(j), k);
1145 
1146  MPI_Send(xp.GetDataVoid(), cplx*nb_dof*nrhs_p,
1147  MPI_DOUBLE, i, 40, comm_facto);
1148  }
1149  else
1150  {
1151  for (int k = 0; k < nrhs_p; k++)
1152  for (int j = 0; j < glob_num.GetM(); j++)
1153  x_ptr[j + ldb*(num_rhs+k)] = rhs(glob_num(j), k);
1154  }
1155  }
1156  }
1157  else
1158  {
1159  int nb_dof = glob_num.GetM();
1160  xp.Reallocate(nb_dof, nrhs_p);
1161  MPI_Recv(xp.GetDataVoid(), cplx*nb_dof*nrhs_p,
1162  MPI_DOUBLE, 0, 40, comm_facto, &status);
1163 
1164  for (int k = 0; k < nrhs_p; k++)
1165  for (int j = 0; j < glob_num.GetM(); j++)
1166  x_ptr[j + (num_rhs+k)*ldb] = xp(j, k);
1167  }
1168  }
1169  else
1170  {
1171  for (int k = 0; k < nrhs_p; k++)
1172  for (int i = 0; i < glob_num.GetM(); i++)
1173  x_ptr[i + ldb*(num_rhs+k)] = rhs(i, k);
1174  }
1175 
1176  num_rhs += nrhs_p;
1177  }
1178 
1179  if (lvl >= 0)
1180  ShowMessages();
1181 
1182  print_level = lvl;
1183  }
1184 #endif
1185 
1186 
1188  template<class MatrixSparse, class T>
1189  void GetLU(MatrixSparse& A, MatrixMumps<T>& mat_lu, bool keep_matrix, T& x)
1190  {
1191  mat_lu.FactorizeMatrix(A, keep_matrix);
1192  }
1193 
1194 
1196  template<class MatrixSparse, class T>
1197  void GetLU(MatrixSparse& A, MatrixMumps<T>& mat_lu, bool keep_matrix, complex<T>& x)
1198  {
1199  throw WrongArgument("GetLU(Matrix<complex<T> >& A, MatrixMumps<T>& mat_lu, bool)",
1200  "The LU matrix must be complex");
1201  }
1202 
1203 
1205  template<class MatrixSparse, class T>
1206  void GetLU(MatrixSparse& A, MatrixMumps<complex<T> >& mat_lu, bool keep_matrix, T& x)
1207  {
1208  throw WrongArgument("GetLU(Matrix<T>& A, MatrixMumps<complex<T> >& mat_lu, bool)",
1209  "The sparse matrix must be complex");
1210  }
1211 
1212 
1214  template<class T0, class Prop, class Storage, class Allocator, class T>
1216  bool keep_matrix)
1217  {
1218  // we check if the type of non-zero entries of matrix A
1219  // and of the Mumps object (T) are different
1220  // we call one of the GetLUs written above
1221  // such a protection avoids to compile the factorisation of a complex
1222  // matrix with a real Mumps object
1224  GetLU(A, mat_lu, keep_matrix, x);
1225  }
1226 
1227 
1229  template<class T, class Storage, class Allocator, class MatrixFull>
1231  MatrixMumps<T>& mat_lu, const IVect& num,
1232  MatrixFull& schur_matrix, bool keep_matrix)
1233  {
1234  mat_lu.GetSchurMatrix(A, num, schur_matrix, keep_matrix);
1235  }
1236 
1237 
1239  template<class T, class Storage, class Allocator, class MatrixFull>
1241  MatrixMumps<T>& mat_lu, const IVect& num,
1242  MatrixFull& schur_matrix, bool keep_matrix)
1243  {
1244  mat_lu.GetSchurMatrix(A, num, schur_matrix, keep_matrix);
1245  }
1246 
1247 
1249  template<class T, class Allocator>
1251  {
1252  mat_lu.Solve(x);
1253  }
1254 
1255 
1258  template<class T, class Allocator>
1259  void SolveLU(const SeldonTranspose& TransA,
1261  {
1262  mat_lu.Solve(TransA, x);
1263  }
1264 
1265 
1267  template<class Allocator>
1268  void SolveLU(MatrixMumps<double>& mat_lu,
1269  Vector<complex<double>, VectFull, Allocator>& x)
1270  {
1271  Matrix<double, General, ColMajor> y(x.GetM(), 2);
1272 
1273  for (int i = 0; i < x.GetM(); i++)
1274  {
1275  y(i, 0) = real(x(i));
1276  y(i, 1) = imag(x(i));
1277  }
1278 
1279  SolveLU(mat_lu, y);
1280 
1281  for (int i = 0; i < x.GetM(); i++)
1282  x(i) = complex<double>(y(i, 0), y(i, 1));
1283  }
1284 
1285 
1287  template<class Allocator>
1288  void SolveLU(const SeldonTranspose& TransA,
1289  MatrixMumps<double>& mat_lu,
1290  Vector<complex<double>, VectFull, Allocator>& x)
1291  {
1292  Matrix<double, General, ColMajor> y(x.GetM(), 2);
1293 
1294  for (int i = 0; i < x.GetM(); i++)
1295  {
1296  y(i, 0) = real(x(i));
1297  y(i, 1) = imag(x(i));
1298  }
1299 
1300  SolveLU(TransA, mat_lu, y);
1301 
1302  for (int i = 0; i < x.GetM(); i++)
1303  x(i) = complex<double>(y(i, 0), y(i, 1));
1304 
1305  }
1306 
1307 
1309  template<class Allocator>
1310  void SolveLU(MatrixMumps<complex<double> >& mat_lu,
1312  {
1313  throw WrongArgument("SolveLU(MatrixMumps<complex<double> >, Vector<double>)",
1314  "The result should be a complex vector");
1315  }
1316 
1317 
1319  template<class Allocator>
1320  void SolveLU(const SeldonTranspose& TransA,
1321  MatrixMumps<complex<double> >& mat_lu,
1323  {
1324  throw WrongArgument("SolveLU(MatrixMumps<complex<double> >, Vector<double>)",
1325  "The result should be a complex vector");
1326  }
1327 
1328 
1330  template<class T, class Prop, class Allocator>
1331  void SolveLU(MatrixMumps<T>& mat_lu,
1333  {
1334  mat_lu.Solve(SeldonNoTrans, x);
1335  }
1336 
1337 
1340  template<class T, class Allocator, class Prop>
1341  void SolveLU(const SeldonTranspose& TransA,
1343  {
1344  mat_lu.Solve(TransA, x);
1345  }
1346 
1347 
1348  template<class Prop, class Allocator>
1349  void SolveLU(MatrixMumps<double>& mat_lu,
1350  Matrix<complex<double>, Prop, ColMajor, Allocator>& x)
1351  {
1352  throw WrongArgument("SolveLU(MatrixMumps<double>, Matrix<complex<double> >)",
1353  "The result should be a real vector");
1354  }
1355 
1356 
1357  template<class Prop, class Allocator>
1358  void SolveLU(const SeldonTranspose& TransA,
1359  MatrixMumps<double>& mat_lu,
1360  Matrix<complex<double>, Prop, ColMajor, Allocator>& x)
1361  {
1362  throw WrongArgument("SolveLU(MatrixMumps<double>, Matrix<complex<double> >)",
1363  "The result should be a real vector");
1364  }
1365 
1366 
1367  template<class Prop, class Allocator>
1368  void SolveLU(MatrixMumps<complex<double> >& mat_lu,
1369  Matrix<double, Prop, ColMajor, Allocator>& x)
1370  {
1371  throw WrongArgument("SolveLU(MatrixMumps<complex<double> >, Matrix<double>)",
1372  "The result should be a complex matrix");
1373  }
1374 
1375 
1376  template<class Prop, class Allocator>
1377  void SolveLU(const SeldonTranspose& TransA,
1378  MatrixMumps<complex<double> >& mat_lu,
1379  Matrix<double, Prop, ColMajor, Allocator>& x)
1380  {
1381  throw WrongArgument("SolveLU(MatrixMumps<complex<double> >, Matrix<double>)",
1382  "The result should be a complex matrix");
1383  }
1384 
1385 }
1386 
1387 #define SELDON_FILE_MUMPS_CXX
1388 #endif
Seldon::MatrixMumps::SetIncreaseCoefficientEstimationNeededMemory
void SetIncreaseCoefficientEstimationNeededMemory(double)
Sets multiplication factor for each try to factorize the matrix.
Definition: Mumps.cxx:323
Seldon::SeldonTranspose
Definition: MatrixFlag.hxx:32
Seldon::GetSchurMatrix
void GetSchurMatrix(Matrix< T, Symmetric, Storage, Allocator > &A, MatrixMumps< T > &mat_lu, const IVect &num, MatrixFull &schur_matrix, bool keep_matrix)
computes Schur complement of a symmetric matrix
Definition: Mumps.cxx:1230
Seldon::MatrixMumps::FactorizeMatrix
void FactorizeMatrix(Matrix< T0, Prop, Storage, Allocator > &mat, bool keep_matrix=false)
Factorizes a given matrix.
Definition: Mumps.cxx:375
Seldon::to_str
std::string to_str(const T &input)
Converts most types to string.
Definition: CommonInline.cxx:137
Seldon::MatrixMumps::SetCoefficientEstimationNeededMemory
void SetCoefficientEstimationNeededMemory(double)
Sets the coefficient used to overestimate the needed memory.
Definition: Mumps.cxx:307
Seldon::Vector< int, VectFull >
Seldon::MatrixMumps::FindOrdering
void FindOrdering(Matrix< T0, Prop, Storage, Allocator > &mat, IVect &numbers, bool keep_matrix=false)
Computes an ordering for matrix renumbering.
Definition: Mumps.cxx:336
Seldon::MatrixMumps::DisableOutOfCore
void DisableOutOfCore()
Disables writing on the disk (incore).
Definition: Mumps.cxx:299
Seldon::MatrixMumps::GetInfoFactorization
int GetInfoFactorization() const
Returns information about factorization performed.
Definition: Mumps.cxx:541
Seldon::MatrixMumps::MatrixMumps
MatrixMumps()
Default constructor.
Definition: Mumps.cxx:54
Seldon::MatrixMumps
object used to solve linear system by calling mumps subroutines
Definition: Mumps.hxx:59
Seldon::IsSymmetricMatrix
bool IsSymmetricMatrix(const Matrix< T, Prop, Storage, Allocator > &A)
returns true if the matrix is symmetric
Definition: Functions_BaseInline.cxx:778
Seldon::Matrix
Definition: SeldonHeader.hxx:226
Seldon::MatrixMumps::PerformAnalysis
void PerformAnalysis(Matrix< T, Prop, Storage, Allocator > &mat)
Symbolic factorization.
Definition: Mumps.cxx:419
Seldon::MatrixMumps::InitMatrix
void InitMatrix(bool sym, bool dist=false)
Calls initialization routine provided by Mumps.
Definition: Mumps.cxx:125
Seldon::Matrix< T, Prop, RowSparse, Allocator >
Row-major sparse-matrix class.
Definition: Matrix_Sparse.hxx:223
Seldon::MatrixMumps::ShowMessages
void ShowMessages()
Informs Mumps to display standard output.
Definition: Mumps.cxx:277
Seldon::VectFull
Definition: StorageInline.cxx:74
Seldon::MatrixMumps::pointer
TypeMumps< T >::pointer pointer
double* or complex<double>*
Definition: Mumps.hxx:67
Seldon::MatrixMumps::FactorizeCoordinate1
void FactorizeCoordinate1(int n, Vector< MUMPS_INT > &num_row, Vector< MUMPS_INT > &num_col, Vector< T > &values, bool sym)
Factorizes a coordinate matrix with 1-index numbering.
Definition: Mumps.cxx:392
Seldon::Matrix_Base::GetData
pointer GetData() const
Returns a pointer to the data array.
Definition: Matrix_BaseInline.cxx:241
Seldon::MatrixMumps::Clear
void Clear()
Clears factorization.
Definition: Mumps.cxx:246
Seldon::MatrixMumps::SelectParallelOrdering
void SelectParallelOrdering(int num_ordering)
Selects another ordering scheme (for distributed matrices)
Definition: Mumps.cxx:211
Seldon::MatrixMumps::SetPivotThreshold
void SetPivotThreshold(double)
Sets the threshold for pivot.
Definition: Mumps.cxx:230
Seldon::MatrixMumps::SetMaximumCoefficientEstimationNeededMemory
void SetMaximumCoefficientEstimationNeededMemory(double)
Sets the maximal allowed coefficient for the memory space multiplication.
Definition: Mumps.cxx:315
Seldon::MatrixMumps::~MatrixMumps
~MatrixMumps()
Destructor.
Definition: Mumps.cxx:238
Seldon::Vector< T, VectFull, Allocator >
Full vector class.
Definition: Vector.hxx:88
Seldon::GetLU
void GetLU(Matrix< T0, Prop0, Storage0, Allocator0 > &A)
Returns the LU factorization of a matrix.
Definition: Functions_Matrix.cxx:2073
Seldon::Matrix< T, Prop, RowSymSparse, Allocator >
Row-major sparse-matrix class.
Definition: Matrix_SymSparse.hxx:218
Seldon::WrongArgument
Definition: Errors.hxx:76
Seldon::MatrixMumps::PerformFactorization
void PerformFactorization(Matrix< T, Prop, RowSparse, Allocator > &mat)
Numerical factorization.
Definition: Mumps.cxx:452
Seldon::MatrixMumps::Solve
void Solve(const SeldonTranspose &TransA, Vector< T, VectFull, Allocator2 > &x)
Solves a linear system using the computed factorization.
Definition: Mumps.cxx:630
Seldon::WrongDim
Definition: Errors.hxx:102
Seldon::MatrixMumps::EnableOutOfCore
void EnableOutOfCore()
Enables writing on the disk (out of core).
Definition: Mumps.cxx:291
Seldon
Seldon namespace.
Definition: Array.cxx:24
Seldon::MatrixMumps::IterateFacto
void IterateFacto()
Function used to force factorisation when estimated space was too small.
Definition: Mumps.cxx:91
Seldon::MatrixMumps::SelectOrdering
void SelectOrdering(int num_ordering)
Selects another ordering scheme.
Definition: Mumps.cxx:203
Seldon::MatrixMumps::GetMemorySize
size_t GetMemorySize() const
Returns memory used by the factorisation in bytes.
Definition: Mumps.cxx:517
Seldon::MatrixMumps::SetPermutation
void SetPermutation(const IVect &permut)
Provides the permutation array.
Definition: Mumps.cxx:220
Seldon::Matrix< T, Prop, ColMajor, Allocator >
Column-major full-matrix class.
Definition: Matrix_Pointers.hxx:176
Seldon::MatrixMumps::HideMessages
void HideMessages()
Informs Mumps that no message should be displayed.
Definition: Mumps.cxx:263
Seldon::Vector_Base::GetData
pointer GetData() const
Returns a pointer to data_ (stored data).
Definition: VectorInline.cxx:177