DistributedMatrix.cxx
1 // Copyright (C) 2014 INRIA
2 // Author(s): Marc DuruflĂ©
3 //
4 // This file is part of the linear-algebra library Seldon,
5 // http://seldon.sourceforge.net/.
6 //
7 // Seldon is free software; you can redistribute it and/or modify it under the
8 // terms of the GNU Lesser General Public License as published by the Free
9 // Software Foundation; either version 2.1 of the License, or (at your option)
10 // any later version.
11 //
12 // Seldon is distributed in the hope that it will be useful, but WITHOUT ANY
13 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15 // more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with Seldon. If not, see http://www.gnu.org/licenses/.
19 
20 #ifndef SELDON_FILE_DISTRIBUTED_MATRIX_CXX
21 
22 #include "DistributedMatrix.hxx"
23 
24 namespace Seldon
25 {
26 
28  // DistributedMatrixIntegerArray //
29 
30 
31  void DistributedMatrixIntegerArray::Clear()
32  {
33  OverlapRowNumbers.Clear();
34  OverlapProcNumbers.Clear();
35  GlobalRowNumbers.Clear();
36  ProcSharingRows.Clear();
37  SharingRowNumbers.Clear();
38  }
39 
40 
41  void DistributedMatrixIntegerArray
42  ::SetData(int nl, int ng, int nodl, int nb_u, const MPI_Comm& comm_,
43  const IVect& overlap_row, const IVect& overlap_proc,
44  const IVect& global_rows, const IVect& proc_rows, const Vector<IVect>& sharing_rows)
45  {
46  nloc = nl;
47  nglob = ng;
48  nodl_scalar = nodl;
49  nb_unknowns_scal = nb_u;
50  comm = comm_;
51 
52  OverlapRowNumbers.SetData(overlap_row.GetM(), overlap_row.GetData());
53  OverlapProcNumbers.SetData(overlap_proc.GetM(), overlap_proc.GetData());
54  GlobalRowNumbers.SetData(global_rows.GetM(), global_rows.GetData());
55  ProcSharingRows.SetData(proc_rows.GetM(), proc_rows.GetData());
56  SharingRowNumbers.SetData(sharing_rows.GetM(), sharing_rows.GetData());
57  }
58 
59 
60  void DistributedMatrixIntegerArray::Nullify()
61  {
62  OverlapRowNumbers.Nullify();
63  OverlapProcNumbers.Nullify();
64  GlobalRowNumbers.Nullify();
65  ProcSharingRows.Nullify();
66  SharingRowNumbers.Nullify();
67  }
68 
69  int DistributedMatrixIntegerArray
70  ::ConstructArrays(Vector<IVect>& all_rows,
71  IVect& row_num, IVect& overlap_num, IVect& proc_num,
72  IVect& MatchingProc, Vector<IVect>& MatchingDofNumber,
73  const MPI_Comm& comm, bool distribute_row)
74  {
75  int nb_proc; MPI_Comm_size(comm, &nb_proc);
76  int rank_proc; MPI_Comm_rank(comm, &rank_proc);
77  // throwing an error if the rows are not sorted
78  if ((rank_proc == 0) && (distribute_row))
79  for (int i = 0; i < nb_proc; i++)
80  {
81  bool sorted = true;
82  for (int j = 1; j < all_rows(i).GetM(); j++)
83  if (all_rows(i)(j) < all_rows(i)(j-1))
84  sorted = false;
85 
86  if (!sorted)
87  {
88  cout << "Row numbers must be sorted in this function" << endl;
89  abort();
90  //Sort(all_rows(i));
91  }
92  }
93 
94  // finding the global number of rows
95  int nglob = 0;
96  if (rank_proc == 0)
97  for (int i = 0; i < all_rows.GetM(); i++)
98  for (int j = 0; j < all_rows(i).GetM(); j++)
99  if (all_rows(i)(j)+1 > nglob)
100  nglob = all_rows(i)(j)+1;
101 
102  if (rank_proc == 0)
103  {
104  // sending row numbers to all processors
105  for (int i = 1; i < nb_proc; i++)
106  {
107  int n = all_rows(i).GetM();
108  MPI_Send(&nglob, 1, MPI_INTEGER, i, 101, comm);
109  if (distribute_row)
110  {
111  MPI_Send(&n, 1, MPI_INTEGER, i, 102, comm);
112  MPI_Send(all_rows(i).GetData(), all_rows(i).GetM(),
113  MPI_INTEGER, i, 103, comm);
114  }
115  }
116 
117  if (distribute_row)
118  row_num = all_rows(0);
119 
120  // minimal processor for each row
121  Vector<int> MinProc(nglob);
122  MinProc.Fill(nb_proc);
123  for (int i = 0; i < all_rows.GetM(); i++)
124  for (int j = 0; j < all_rows(i).GetM(); j++)
125  if (MinProc(all_rows(i)(j)) > i)
126  MinProc(all_rows(i)(j)) = i;
127 
128  // counting all the rows that are overlapped
129  // (i.e. shared with a processor of lower rank)
130  for (int i = 0; i < all_rows.GetM(); i++)
131  {
132  int nb_overlap = 0;
133  for (int j = 0; j < all_rows(i).GetM(); j++)
134  if (MinProc(all_rows(i)(j)) < i)
135  nb_overlap++;
136 
137  IVect num(nb_overlap), proc(nb_overlap);
138  nb_overlap = 0;
139  for (int j = 0; j < all_rows(i).GetM(); j++)
140  if (MinProc(all_rows(i)(j)) < i)
141  {
142  num(nb_overlap) = j;
143  proc(nb_overlap) = MinProc(all_rows(i)(j));
144  nb_overlap++;
145  }
146 
147  // sending overlapped rows
148  if (i == 0)
149  {
150  overlap_num = num;
151  proc_num = proc;
152  }
153  else
154  {
155  MPI_Send(&nb_overlap, 1, MPI_INTEGER, i, 104, comm);
156  if (nb_overlap > 0)
157  {
158  MPI_Send(num.GetData(), nb_overlap,
159  MPI_INTEGER, i, 105, comm);
160  MPI_Send(proc.GetData(), nb_overlap,
161  MPI_INTEGER, i, 106, comm);
162  }
163  }
164  }
165 
166  // counting the number of processors sharing each global row
167  MinProc.Fill(0);
168  for (int i = 0; i < all_rows.GetM(); i++)
169  for (int j = 0; j < all_rows(i).GetM(); j++)
170  MinProc(all_rows(i)(j))++;
171 
172  int nb_shared_row = 0;
173  for (int i = 0; i < nglob; i++)
174  {
175  if (MinProc(i) > 1)
176  {
177  MinProc(i) = nb_shared_row;
178  nb_shared_row++;
179  }
180  else
181  MinProc(i) = -1;
182  }
183 
184  // for each shared row, storing the list of processors
185  Vector<IVect> ListProcRow(nb_shared_row);
186  for (int i = 0; i < all_rows.GetM(); i++)
187  for (int j = 0; j < all_rows(i).GetM(); j++)
188  {
189  int n = all_rows(i)(j);
190  if (MinProc(n) >= 0)
191  {
192  int nloc = MinProc(n);
193  ListProcRow(nloc).PushBack(i);
194  }
195  }
196 
197  // then arrays MatchingDofNumber are constructed
198  for (int i = 0; i < all_rows.GetM(); i++)
199  {
200  // searching all the processors interacting
201  Vector<bool> ProcUsed(nb_proc);
202  ProcUsed.Fill(false);
203  for (int j = 0; j < all_rows(i).GetM(); j++)
204  {
205  int n = all_rows(i)(j);
206  if (MinProc(n) >= 0)
207  {
208  int nloc = MinProc(n);
209  for (int k = 0; k < ListProcRow(nloc).GetM(); k++)
210  ProcUsed(ListProcRow(nloc)(k)) = true;
211  }
212  }
213 
214  int nb_proc_interac = 0;
215  for (int k = 0; k < ProcUsed.GetM(); k++)
216  if ((k != i) && (ProcUsed(k)))
217  nb_proc_interac++;
218 
219  Vector<int> matching_proc(nb_proc_interac);
220  Vector<Vector<int> > matching_row(nb_proc_interac);
221  nb_proc_interac = 0;
222  for (int k = 0; k < ProcUsed.GetM(); k++)
223  if ((k != i) && (ProcUsed(k)))
224  {
225  // counting rows shared with processor k
226  int nb_row_interac = 0;
227  for (int j = 0; j < all_rows(i).GetM(); j++)
228  {
229  int n = all_rows(i)(j);
230  if (MinProc(n) >= 0)
231  {
232  int nloc = MinProc(n);
233  for (int k2 = 0;
234  k2 < ListProcRow(nloc).GetM(); k2++)
235  if (ListProcRow(nloc)(k2) == k)
236  nb_row_interac++;
237  }
238  }
239 
240  // filling arrays
241  matching_proc(nb_proc_interac) = k;
242  matching_row(nb_proc_interac).Reallocate(nb_row_interac);
243  nb_row_interac = 0;
244  for (int j = 0; j < all_rows(i).GetM(); j++)
245  {
246  int n = all_rows(i)(j);
247  if (MinProc(n) >= 0)
248  {
249  int nloc = MinProc(n);
250  for (int k2 = 0;
251  k2 < ListProcRow(nloc).GetM(); k2++)
252  if (ListProcRow(nloc)(k2) == k)
253  {
254  matching_row(nb_proc_interac)
255  (nb_row_interac) = j;
256 
257  nb_row_interac++;
258  }
259  }
260  }
261 
262  nb_proc_interac++;
263  }
264 
265  // sending arrays to the processor i
266  if (i == 0)
267  {
268  MatchingProc = matching_proc;
269  MatchingDofNumber = matching_row;
270  }
271  else
272  {
273  MPI_Send(&nb_proc_interac, 1, MPI_INTEGER, i, 107, comm);
274  if (nb_proc_interac > 0)
275  {
276  MPI_Send(matching_proc.GetData(), nb_proc_interac,
277  MPI_INTEGER, i, 108, comm);
278 
279  for (int k = 0; k < nb_proc_interac; k++)
280  {
281  int nb_row = matching_row(k).GetM();
282  MPI_Send(&nb_row, 1, MPI_INTEGER, i, 109, comm);
283  MPI_Send(matching_row(k).GetData(), nb_row,
284  MPI_INTEGER, i, 110, comm);
285  }
286  }
287  }
288  }
289  }
290  else
291  {
292  // receiving row numbers
293  MPI_Status status; int n;
294  MPI_Recv(&nglob, 1, MPI_INTEGER, 0, 101, comm, &status);
295  if (distribute_row)
296  {
297  MPI_Recv(&n, 1, MPI_INTEGER, 0, 102, comm, &status);
298  row_num.Reallocate(n);
299  MPI_Recv(row_num.GetData(), n, MPI_INTEGER, 0, 103, comm, &status);
300  }
301 
302  // receiving overlapped numbers
303  MPI_Recv(&n, 1, MPI_INTEGER, 0, 104, comm, &status);
304  if (n > 0)
305  {
306  overlap_num.Reallocate(n);
307  proc_num.Reallocate(n);
308  MPI_Recv(overlap_num.GetData(), n, MPI_INTEGER,0, 105, comm, &status);
309  MPI_Recv(proc_num.GetData(), n, MPI_INTEGER, 0, 106, comm, &status);
310  }
311  else
312  {
313  overlap_num.Clear();
314  proc_num.Clear();
315  }
316 
317  // receiving local numbers of rows shared with other processors
318  MPI_Recv(&n, 1, MPI_INTEGER, 0, 107, comm, &status);
319  if (n > 0)
320  {
321  MatchingProc.Reallocate(n);
322  MatchingDofNumber.Reallocate(n);
323  MPI_Recv(MatchingProc.GetData(), n,
324  MPI_INTEGER, 0, 108, comm, &status);
325 
326  for (int k = 0; k < MatchingProc.GetM(); k++)
327  {
328  MPI_Recv(&n, 1, MPI_INTEGER, 0, 109, comm, &status);
329  MatchingDofNumber(k).Reallocate(n);
330  MPI_Recv(MatchingDofNumber(k).GetData(), n, MPI_INTEGER,
331  0, 110, comm, &status);
332  }
333  }
334  else
335  {
336  MatchingProc.Clear();
337  MatchingDofNumber.Clear();
338  }
339  }
340 
341  return nglob;
342  }
343 
344 
345  int DistributedMatrixIntegerArray
346  ::ConstructArrays(IVect& row_num, IVect& overlap_num, IVect& proc_num,
347  IVect& MatchingProc,
348  Vector<IVect>& MatchingDofNumber, const MPI_Comm& comm)
349  {
350  // throwing an error if the rows are not sorted
351  bool sorted = true;
352  for (int j = 1; j < row_num.GetM(); j++)
353  if (row_num(j) < row_num(j-1))
354  sorted = false;
355 
356  if (!sorted)
357  {
358  cout << "Row numbers must be sorted in this function" << endl;
359  abort();
360  //Sort(row_num);
361  }
362 
363  // gathering row numbers to the processor 0
364  int nb_proc; MPI_Comm_size(comm, &nb_proc);
365  int rank_proc; MPI_Comm_rank(comm, &rank_proc);
366  Vector<IVect> all_rows(nb_proc);
367  if (rank_proc == 0)
368  {
369  all_rows(0) = row_num;
370  int nodl_par; MPI_Status status;
371  for (int i = 1; i < nb_proc; i++)
372  {
373  MPI_Recv(&nodl_par, 1, MPI_INTEGER, i, 13, comm, &status);
374  all_rows(i).Reallocate(nodl_par);
375  MPI_Recv(all_rows(i).GetData(), nodl_par,
376  MPI_INTEGER, i, 14, comm, &status);
377  }
378  }
379  else
380  {
381  int nodl = row_num.GetM();
382  MPI_Send(&nodl, 1, MPI_INTEGER, 0, 13, comm);
383  MPI_Send(row_num.GetData(), nodl, MPI_INTEGER, 0, 14, comm);
384  }
385 
386  // then calling ConstructArrays with all_rows
387  return ConstructArrays(all_rows, row_num, overlap_num, proc_num,
388  MatchingProc, MatchingDofNumber, comm, false);
389  }
390 
391  // DistributedMatrixIntegerArray //
393 
394 
396  // DistributedMatrix_Base //
397 
398 
399  /********************
400  * Internal methods *
401  ********************/
402 
403 
405  template<class T>
407  {
408  global_row_to_recv.Clear(); global_col_to_recv.Clear();
409  ptr_global_row_to_recv.Clear(); ptr_global_col_to_recv.Clear();
410  local_row_to_send.Clear(); local_col_to_send.Clear();
411 
412  proc_col_to_recv.Clear(); proc_col_to_send.Clear();
413  proc_row_to_recv.Clear(); proc_row_to_send.Clear();
414 
415  local_number_distant_values = false;
416  size_max_distant_row = 0;
417  size_max_distant_col = 0;
418  }
419 
420 
423  template<class T>
426  {
427  if (!local_number_distant_values)
428  return;
429 
430  // changing row numbers
431  for (int i = 0; i < dist_row.GetM(); i++)
432  for (int j = 0; j < dist_row(i).GetM(); j++)
433  dist_row(i).Index(j) = global_row_to_recv(dist_row(i).Index(j));
434 
435  // then col numbers
436  for (int i = 0; i < dist_col.GetM(); i++)
437  for (int j = 0; j < dist_col(i).GetM(); j++)
438  dist_col(i).Index(j) = global_col_to_recv(dist_col(i).Index(j));
439 
440  // erasing datas needed for matrix-vector product
441  EraseArrayForMltAdd();
442  }
443 
444 
446 
468  template<class T> template<class TypeDist>
471  Vector<IVect>& dist_proc,
472  IVect& glob_num,
473  IVect& ptr_glob_num, IVect& proc_glob,
474  Vector<IVect>& local_num,
475  IVect& proc_local)
476  {
477  MPI_Comm& comm = comm_;
478  // counting the number of distant interactions
479  long N = 0;
480  for (int i = 0; i < dist_proc.GetM(); i++)
481  N += dist_proc(i).GetM();
482 
483  // sorting distant interactions by processor number
484  IVect all_proc(N), all_num(N); Vector<long> permut(N);
485  int nb_proc; MPI_Comm_size(comm, &nb_proc);
486  Vector<long> nb_inter_per_proc(nb_proc);
487  permut.Fill(); long nb = 0;
488  nb_inter_per_proc.Fill(0);
489  for (int i = 0; i < dist_proc.GetM(); i++)
490  for (int j = 0; j < dist_proc(i).GetM(); j++)
491  {
492  all_proc(nb) = dist_proc(i)(j);
493  all_num(nb) = dist_val(i).Index(j);
494  nb_inter_per_proc(dist_proc(i)(j))++;
495  nb++;
496  }
497 
498  Sort(all_proc, all_num, permut);
499 
500  // number of processors involved ?
501  int nb_global_proc = 0;
502  for (int i = 0; i < nb_inter_per_proc.GetM(); i++)
503  if (nb_inter_per_proc(i) > 0)
504  nb_global_proc++;
505 
506  proc_glob.Reallocate(nb_global_proc);
507 
508  // for each processor, sorting numbers,
509  // and counting how many different numbers there are
510  long offset = 0;
511  int nb_glob = 0;
512  Vector<int> nb_num_per_proc(nb_proc);
513  nb_num_per_proc.Fill(0);
514  nb_global_proc = 0;
515  for (int p = 0; p < nb_inter_per_proc.GetM(); p++)
516  if (nb_inter_per_proc(p) > 0)
517  {
518  long size = nb_inter_per_proc(p);
519  Sort(offset, offset+size-1, all_num, permut);
520  int prec = all_num(offset);
521  int nb_glob_p = 1;
522  for (long j = 1; j < size; j++)
523  if (all_num(offset+j) != prec)
524  {
525  prec = all_num(offset+j);
526  nb_glob_p++;
527  }
528 
529  nb_num_per_proc(p) = nb_glob_p;
530  nb_glob += nb_glob_p;
531  proc_glob(nb_global_proc) = p;
532  nb_global_proc++;
533  offset += size;
534  }
535 
536  // grouping global numbers
537  all_proc.Clear(); IVect local(N);
538  offset = 0; glob_num.Reallocate(nb_glob);
539  ptr_glob_num.Reallocate(nb_global_proc+1);
540  nb_glob = 0; nb_global_proc = 0; ptr_glob_num(0) = 0;
541  for (int p = 0; p < nb_inter_per_proc.GetM(); p++)
542  if (nb_inter_per_proc(p) > 0)
543  {
544  long size = nb_inter_per_proc(p);
545  int prec = all_num(offset);
546  ptr_glob_num(nb_global_proc+1) = ptr_glob_num(nb_global_proc)
547  + nb_num_per_proc(p);
548 
549  glob_num(nb_glob) = prec; nb_glob++;
550  int nb_glob_p = 1;
551  for (long j = 0; j < size; j++)
552  {
553  if (all_num(offset+j) != prec)
554  {
555  prec = all_num(offset+j);
556  glob_num(nb_glob) = prec;
557  nb_glob_p++; nb_glob++;
558  }
559 
560  local(offset+j) = nb_glob-1;
561  }
562 
563  nb_global_proc++;
564  offset += size;
565  }
566 
567  // changing numbers in dist_val
568  Vector<long> inv_permut(N);
569  for (int i = 0; i < N; i++)
570  inv_permut(permut(i)) = i;
571 
572  long nb_glob_d = 0;
573  for (int i = 0; i < dist_val.GetM(); i++)
574  for (int j = 0; j < dist_val(i).GetM(); j++)
575  {
576  int n = inv_permut(nb_glob_d);
577  dist_val(i).Index(j) = local(n);
578  nb_glob_d++;
579  }
580 
581  // exchanging nb_num_per_proc
582  IVect nb_num_send(nb_proc);
583  MPI_Alltoall(nb_num_per_proc.GetData(), 1, MPI_INTEGER,
584  nb_num_send.GetData(), 1, MPI_INTEGER, comm);
585 
586  // sending numbers
587  Vector<MPI_Request> request_send(nb_proc), request_recv(nb_proc);
588 
589  nb_global_proc = 0;
590  for (int p = 0; p < nb_proc; p++)
591  if (nb_num_per_proc(p) > 0)
592  {
593  int size = nb_num_per_proc(p);
594  MPI_Isend(&glob_num(ptr_glob_num(nb_global_proc)), size,
595  MPI_INTEGER, p, 17, comm, &request_send(p));
596 
597  nb_global_proc++;
598  }
599 
600  int nb_local_proc = 0;
601  for (int p = 0; p < nb_proc; p++)
602  if (nb_num_send(p) > 0)
603  nb_local_proc++;
604 
605  local_num.Reallocate(nb_local_proc);
606  proc_local.Reallocate(nb_local_proc);
607 
608  // receiving numbers
609  MPI_Status status; nb_local_proc = 0;
610  for (int p = 0; p < nb_proc; p++)
611  if (nb_num_send(p) > 0)
612  {
613  proc_local(nb_local_proc) = p;
614  local_num(nb_local_proc).Reallocate(nb_num_send(p));
615  MPI_Recv(local_num(nb_local_proc).GetData(), nb_num_send(p),
616  MPI_INTEGER, p, 17, comm, &status);
617 
618  nb_local_proc++;
619  }
620 
621  for (int i = 0; i < request_send.GetM(); i++)
622  if (nb_num_per_proc(i) > 0)
623  MPI_Wait(&request_send(i), &status);
624 
625  // global to local conversion
626  IVect Glob_to_local(this->GetGlobalM());
627  const IVect& RowNumber = this->GetGlobalRowNumber();
628  Glob_to_local.Fill(-1);
629  for (int i = 0; i < RowNumber.GetM(); i++)
630  Glob_to_local(RowNumber(i)) = i;
631 
632  // replacing global numbers with local numbers
633  for (int i = 0; i < local_num.GetM(); i++)
634  for (int j = 0; j < local_num(i).GetM(); j++)
635  local_num(i)(j) = Glob_to_local(local_num(i)(j));
636  }
637 
638 
640  template<class T> template<class T2>
642  ::ScatterValues(const Vector<T2>& X, const IVect& num_recv,
643  const IVect& ptr_num_recv, const IVect& proc_recv,
644  const Vector<IVect>& num_send, const IVect& proc_send,
645  Vector<T2>& Xcol) const
646  {
647  // sending datas
648  const MPI_Comm& comm = comm_;
649  Vector<Vector<T2> > xsend(proc_send.GetM()), xrecv(proc_recv.GetM());
650  Vector<Vector<int64_t> > xsend_tmp(proc_send.GetM()),
651  xrecv_tmp(proc_recv.GetM());
652 
653  int tag = 30;
654  Vector<MPI_Request> request_send(proc_send.GetM());
655  for (int i = 0; i < proc_send.GetM(); i++)
656  {
657  int nb = num_send(i).GetM();
658  xsend(i).Reallocate(nb);
659  for (int j = 0; j < nb; j++)
660  xsend(i)(j) = X(num_send(i)(j));
661 
662  request_send(i) =
663  MpiIsend(comm, xsend(i), xsend_tmp(i), nb, proc_send(i), tag);
664  }
665 
666  // receiving datas
667  Vector<MPI_Request> request_recv(proc_recv.GetM());
668  int N = 0;
669  for (int i = 0; i < proc_recv.GetM(); i++)
670  {
671  int nb = ptr_num_recv(i+1) - ptr_num_recv(i); N += nb;
672  xrecv(i).Reallocate(nb);
673  request_recv(i) =
674  MpiIrecv(comm, xrecv(i), xrecv_tmp(i), nb, proc_recv(i), tag);
675  }
676 
677  // waiting that transfers are effective
678  MPI_Status status;
679  for (int i = 0; i < request_send.GetM(); i++)
680  MPI_Wait(&request_send(i), &status);
681 
682  for (int i = 0; i < request_recv.GetM(); i++)
683  MPI_Wait(&request_recv(i), &status);
684 
685  xsend.Clear();
686  // completing receives
687  for (int i = 0; i < request_recv.GetM(); i++)
688  MpiCompleteIrecv(xrecv(i), xrecv_tmp(i), xrecv(i).GetM());
689 
690  // values are stored in Xcol
691  Xcol.Reallocate(N); N = 0;
692  for (int i = 0; i < proc_recv.GetM(); i++)
693  for (int j = 0; j < ptr_num_recv(i+1) - ptr_num_recv(i); j++)
694  Xcol(N++) = xrecv(i)(j);
695  }
696 
697 
699  template<class T> template<class T2>
701  AssembleValues(const Vector<T2>& Xcol, const IVect& num_recv,
702  const IVect& ptr_num_recv, const IVect& proc_recv,
703  const Vector<IVect>& num_send, const IVect& proc_send,
704  Vector<T2>& X) const
705  {
706  // sending datas
707  const MPI_Comm& comm = comm_;
708  Vector<Vector<T2> > xsend(proc_recv.GetM()), xrecv(proc_send.GetM());
709  Vector<Vector<int64_t> > xsend_tmp(proc_recv.GetM()),
710  xrecv_tmp(proc_send.GetM());
711 
712  int tag = 32, N = 0;
713  Vector<MPI_Request> request_send(proc_recv.GetM());
714  for (int i = 0; i < proc_recv.GetM(); i++)
715  {
716  int nb = ptr_num_recv(i+1) - ptr_num_recv(i);
717  xsend(i).Reallocate(nb);
718  for (int j = 0; j < nb; j++)
719  xsend(i)(j) = Xcol(N++);
720 
721  request_send(i) =
722  MpiIsend(comm, xsend(i), xsend_tmp(i), nb, proc_recv(i), tag);
723  }
724 
725  // receiving datas
726  Vector<MPI_Request> request_recv(proc_send.GetM());
727  for (int i = 0; i < proc_send.GetM(); i++)
728  {
729  int nb = num_send(i).GetM();
730  xrecv(i).Reallocate(nb);
731  request_recv(i) =
732  MpiIrecv(comm, xrecv(i), xrecv_tmp(i), nb, proc_send(i), tag);
733  }
734 
735  // waiting that transfers are effective
736  MPI_Status status;
737  for (int i = 0; i < request_send.GetM(); i++)
738  MPI_Wait(&request_send(i), &status);
739 
740  for (int i = 0; i < request_recv.GetM(); i++)
741  MPI_Wait(&request_recv(i), &status);
742 
743  xsend.Clear();
744  // completing receives
745  for (int i = 0; i < request_recv.GetM(); i++)
746  MpiCompleteIrecv(xrecv(i), xrecv_tmp(i), xrecv(i).GetM());
747 
748  // values are added to X
749  for (int i = 0; i < num_send.GetM(); i++)
750  for (int j = 0; j < num_send(i).GetM(); j++)
751  X(num_send(i)(j)) += xrecv(i)(j);
752  }
753 
754 
757 
762  template<class T>
764  AssembleValuesMin(const IVect& Xcol, const IVect& Xcol_proc,
765  const IVect& num_recv, const IVect& ptr_num_recv,
766  const IVect& proc_recv,
767  const Vector<IVect>& num_send, const IVect& proc_send,
768  IVect& Y, IVect& Yproc) const
769  {
770  // sending datas
771  const MPI_Comm& comm = comm_;
772  Vector<Vector<int> > xsend(proc_recv.GetM()), xrecv(proc_send.GetM());
773  int tag = 35, N = 0;
774  Vector<MPI_Request> request_send(proc_recv.GetM());
775  for (int i = 0; i < proc_recv.GetM(); i++)
776  {
777  int nb = ptr_num_recv(i+1) - ptr_num_recv(i);
778  xsend(i).Reallocate(2*nb);
779  for (int j = 0; j < nb; j++)
780  {
781  xsend(i)(j) = Xcol(N);
782  xsend(i)(nb+j) = Xcol_proc(N);
783  N++;
784  }
785 
786  MPI_Isend(xsend(i).GetDataVoid(), 2*nb,
787  GetMpiDataType(Xcol), proc_recv(i), tag, comm, &request_send(i));
788  }
789 
790  // receiving datas
791  Vector<MPI_Request> request_recv(proc_send.GetM());
792  for (int i = 0; i < proc_send.GetM(); i++)
793  {
794  int nb = num_send(i).GetM();
795  xrecv(i).Reallocate(2*nb);
796  MPI_Irecv(xrecv(i).GetDataVoid(), 2*nb,
797  GetMpiDataType(Xcol), proc_send(i), tag, comm, &request_recv(i));
798  }
799 
800  // waiting that transfers are effective
801  MPI_Status status;
802  for (int i = 0; i < request_send.GetM(); i++)
803  MPI_Wait(&request_send(i), &status);
804 
805  for (int i = 0; i < request_recv.GetM(); i++)
806  MPI_Wait(&request_recv(i), &status);
807 
808  xsend.Clear();
809  // values are assembled in X
810  for (int i = 0; i < num_send.GetM(); i++)
811  for (int j = 0; j < num_send(i).GetM(); j++)
812  {
813  int nb = num_send(i).GetM();
814  int proc = xrecv(i)(nb+j);
815  int col = xrecv(i)(j);
816  if (proc < Yproc(num_send(i)(j)))
817  {
818  Yproc(num_send(i)(j)) = proc;
819  Y(num_send(i)(j)) = col;
820  }
821  else if (proc == Yproc(num_send(i)(j)))
822  {
823  if (col < Y(num_send(i)(j)))
824  Y(num_send(i)(j)) = col;
825  }
826  }
827  }
828 
829 
832 
835  template<class T>
838  {
839  AssembleVectorMin(X, Xproc, *ProcSharingRows, *SharingRowNumbers,
840  comm_, nodl_scalar_, nb_unknowns_scal_, 13);
841  }
842 
843 
846  template<class T> template<class T0, class TypeDist>
848  ::RemoveSmallEntryDistant(const T0& epsilon,
849  TypeDist& dist_vec, Vector<IVect>& dist_proc)
850  {
851  for (int i = 0; i < dist_vec.GetM(); i++)
852  {
853  int nb = 0, size = dist_vec(i).GetM();
854  for (int j = 0; j < size; j++)
855  if (abs(dist_vec(i).Value(j)) > epsilon)
856  nb++;
857 
858  if (nb < size)
859  {
860  IVect num(size), proc(size); Vector<T> val(size);
861  for (int j = 0; j < size; j++)
862  {
863  num(j) = dist_vec(i).Index(j);
864  val(j) = dist_vec(i).Value(j);
865  proc(j) = dist_proc(i)(j);
866  }
867 
868  dist_vec(i).Reallocate(nb);
869  dist_proc(i).Reallocate(nb);
870  nb = 0;
871  for (int j = 0; j < size; j++)
872  if (abs(val(j)) > epsilon)
873  {
874  dist_vec(i).Index(nb) = num(j);
875  dist_vec(i).Value(nb) = val(j);
876  dist_proc(i)(nb) = proc(j);
877  nb++;
878  }
879  }
880  }
881  }
882 
883 
885  template<class T> template<class T0>
888  {
889  for (int i = 0; i < dist_col.GetM(); i++)
890  for (int j = 0; j < dist_col(i).GetM(); j++)
891  vec_sum(i) += abs(dist_col(i).Value(j));
892  }
893 
894 
896  template<class T> template<class T0>
899  {
900  T0 zero; SetComplexZero(zero);
901  Vector<T0> Y(global_row_to_recv.GetM());
902  Y.Fill(zero);
903  for (int i = 0; i < dist_row.GetM(); i++)
904  for (int j = 0; j < dist_row(i).GetM(); j++)
905  {
906  int jrow = dist_row(i).Index(j);
907  Y(jrow) += abs(dist_row(i).Value(j));
908  }
909 
910  AssembleRowValues(Y, vec_sum);
911  }
912 
913 
915  template<class T> template<class T0>
918  {
919  T0 zero; SetComplexZero(zero);
920  Vector<T0> Y(global_col_to_recv.GetM());
921  Y.Fill(zero);
922  for (int i = 0; i < dist_col.GetM(); i++)
923  for (int j = 0; j < dist_col(i).GetM(); j++)
924  {
925  int jrow = dist_col(i).Index(j);
926  Y(jrow) += abs(dist_col(i).Value(j));
927  }
928 
929  AssembleColValues(Y, vec_sum);
930  }
931 
932 
934  template<class T> template<class T0>
937  {
938  for (int i = 0; i < dist_row.GetM(); i++)
939  for (int j = 0; j < dist_row(i).GetM(); j++)
940  vec_sum(i) += abs(dist_row(i).Value(j));
941  }
942 
943 
944  /**********************************************
945  * Internal Methods for matrix-vector product *
946  **********************************************/
947 
948 
950 
955  template<class T>
957  {
958  if (local_number_distant_values)
959  return;
960 
961  local_number_distant_values = true;
962  const MPI_Comm& comm = comm_;
963  int nb_proc; MPI_Comm_size(comm, &nb_proc);
964  if (nb_proc == 1)
965  return;
966 
967  long N = 0;
968  for (int i = 0; i < dist_col.GetM(); i++)
969  N += dist_col(i).GetM();
970 
971  MPI_Allreduce(&N, &size_max_distant_col, 1, MPI_LONG, MPI_MAX, comm);
972  N = 0;
973  for (int i = 0; i < dist_row.GetM(); i++)
974  N += dist_row(i).GetM();
975 
976  MPI_Allreduce(&N, &size_max_distant_row, 1, MPI_INTEGER, MPI_MAX, comm);
977 
978  if (size_max_distant_col > 0)
979  SortAndAssembleDistantInteractions(dist_col, proc_col,
980  global_col_to_recv,
981  ptr_global_col_to_recv,
982  proc_col_to_recv,
983  local_col_to_send, proc_col_to_send);
984 
985  if (size_max_distant_row > 0)
986  SortAndAssembleDistantInteractions(dist_row, proc_row,
987  global_row_to_recv,
988  ptr_global_row_to_recv,
989  proc_row_to_recv,
990  local_row_to_send, proc_row_to_send);
991  }
992 
993 
995  template<class T> template<class T2>
998  {
999  ScatterValues(X, global_row_to_recv, ptr_global_row_to_recv,
1000  proc_row_to_recv,
1001  local_row_to_send, proc_row_to_send, Xcol);
1002  }
1003 
1004 
1006 
1010  template<class T> template<class T2>
1013  {
1014  ScatterValues(X, global_col_to_recv, ptr_global_col_to_recv,
1015  proc_col_to_recv,
1016  local_col_to_send, proc_col_to_send, Xcol);
1017  }
1018 
1019 
1021 
1026  template<class T> template<class T2>
1029  {
1030  AssembleValues(Xrow, global_row_to_recv, ptr_global_row_to_recv,
1031  proc_row_to_recv,
1032  local_row_to_send, proc_row_to_send, X);
1033  }
1034 
1035 
1037  template<class T> template<class T2>
1040  {
1041  AssembleValues(Xcol, global_col_to_recv, ptr_global_col_to_recv,
1042  proc_col_to_recv,
1043  local_col_to_send, proc_col_to_send, X);
1044  }
1045 
1046 
1048  template<class T> template<class T2>
1051  {
1052  AssembleVector(X, MPI_SUM, *ProcSharingRows, *SharingRowNumbers,
1053  comm_, nodl_scalar_, nb_unknowns_scal_, 14);
1054  }
1055 
1056 
1058  template<class T>
1059  template<class T2, class Storage2, class Allocator2,
1060  class T4, class Storage4, class Allocator4>
1065  {
1066  if (Trans.NoTrans())
1067  {
1068  for (int i = 0; i < dist_col.GetM(); i++)
1069  for (int j = 0; j < dist_col(i).GetM(); j++)
1070  {
1071  int jloc = dist_col(i).Index(j);
1072  Y(i) += dist_col(i).Value(j)*X(jloc);
1073  }
1074  }
1075  else
1076  {
1077  T4 zero; SetComplexZero(zero);
1078  Y.Reallocate(global_col_to_recv.GetM());
1079  Y.Fill(zero);
1080  if (Trans.Trans())
1081  {
1082  for (int i = 0; i < dist_col.GetM(); i++)
1083  for (int j = 0; j < dist_col(i).GetM(); j++)
1084  {
1085  int jrow = dist_col(i).Index(j);
1086  Y(jrow) += dist_col(i).Value(j)*X(i);
1087  }
1088  }
1089  else
1090  {
1091  for (int i = 0; i < dist_col.GetM(); i++)
1092  for (int j = 0; j < dist_col(i).GetM(); j++)
1093  {
1094  int jrow = dist_col(i).Index(j);
1095  Y(jrow) += conjugate(dist_col(i).Value(j))*X(i);
1096  }
1097  }
1098  }
1099  }
1100 
1101 
1103  template<class T>
1104  template<class T2, class Storage2, class Allocator2,
1105  class T4, class Storage4, class Allocator4>
1110  {
1111  if (Trans.NoTrans())
1112  {
1113  T4 zero; SetComplexZero(zero);
1114  Y.Reallocate(global_row_to_recv.GetM());
1115  Y.Fill(zero);
1116  for (int i = 0; i < dist_row.GetM(); i++)
1117  for (int j = 0; j < dist_row(i).GetM(); j++)
1118  {
1119  int jrow = dist_row(i).Index(j);
1120  Y(jrow) += dist_row(i).Value(j)*X(i);
1121  }
1122  }
1123  else
1124  {
1125  if (Trans.Trans())
1126  {
1127  for (int i = 0; i < dist_row.GetM(); i++)
1128  for (int j = 0; j < dist_row(i).GetM(); j++)
1129  {
1130  int jloc = dist_row(i).Index(j);
1131  Y(i) += dist_row(i).Value(j)*X(jloc);
1132  }
1133  }
1134  else
1135  {
1136  for (int i = 0; i < dist_row.GetM(); i++)
1137  for (int j = 0; j < dist_row(i).GetM(); j++)
1138  {
1139  int jloc = dist_row(i).Index(j);
1140  Y(i) += conjugate(dist_row(i).Value(j))*X(jloc);
1141  }
1142  }
1143  }
1144  }
1145 
1146 
1147  /******************
1148  * Static methods *
1149  ******************/
1150 
1151 
1154 
1162  template<class T>
1165  IVect& proc_col_, int jglob, int proc2, const T& val)
1166  {
1167  int pos = 0;
1168  int size_row = dist_col_.GetM();
1169  while ((pos < size_row) && (dist_col_.Index(pos) < jglob))
1170  pos++;
1171 
1172  if ((pos < size_row) && (dist_col_.Index(pos) == jglob))
1173  {
1174  // already existing entry
1175  dist_col_.Value(pos) += val;
1176  }
1177  else
1178  {
1179  // new entry
1180  Vector<T> value(size_row);
1181  IVect index(size_row), proc(size_row);
1182  for (int k = 0; k < size_row; k++)
1183  {
1184  index(k) = dist_col_.Index(k);
1185  value(k) = dist_col_.Value(k);
1186  proc(k) = proc_col_(k);
1187  }
1188 
1189  dist_col_.Reallocate(size_row+1);
1190  proc_col_.Reallocate(size_row+1);
1191  for (int k = 0; k < pos; k++)
1192  {
1193  dist_col_.Index(k) = index(k);
1194  dist_col_.Value(k) = value(k);
1195  proc_col_(k) = proc(k);
1196  }
1197 
1198  dist_col_.Index(pos) = jglob;
1199  dist_col_.Value(pos) = val;
1200  proc_col_(pos) = proc2;
1201  for (int k = pos+1; k <= size_row; k++)
1202  {
1203  dist_col_.Index(k) = index(k-1);
1204  dist_col_.Value(k) = value(k-1);
1205  proc_col_(k) = proc(k-1);
1206  }
1207 
1208  }
1209  }
1210 
1211 
1213 
1217  template<class T>
1219  ::SendAndReceiveDistributed(const MPI_Comm& comm, IVect& nsend_int, Vector<IVect>& EntierToSend,
1220  Vector<Vector<T> >& FloatToSend, IVect& nrecv_int,
1221  Vector<IVect>& EntierToRecv, Vector<Vector<T> >& FloatToRecv)
1222  {
1223  int nb_proc; MPI_Comm_size(comm, &nb_proc);
1224  int rank; MPI_Comm_rank(comm, &rank);
1225 
1226  Vector<MPI_Request> request(3*nb_proc);
1227  Vector<Vector<int64_t> > FloatToSend_tmp(nb_proc);
1228  for (int i = 0; i < nb_proc; i++)
1229  if (i != rank)
1230  {
1231  MPI_Isend(&nsend_int(i), 1, MPI_INTEGER, i, 4, comm, &request(i));
1232 
1233  // sending all the values and indices stored to the processor i
1234  if (nsend_int(i) > 0)
1235  {
1236  MPI_Isend(EntierToSend(i).GetData(), nsend_int(i),
1237  MPI_INTEGER, i, 5, comm, &request(i+nb_proc));
1238 
1239  if (EntierToSend(i)(0) > 0)
1240  request(i+2*nb_proc) =
1241  MpiIsend(comm, FloatToSend(i), FloatToSend_tmp(i),
1242  FloatToSend(i).GetM(), i, 6);
1243  }
1244  }
1245 
1246  // receiving the number of entries
1247  MPI_Status status;
1248  nrecv_int.Zero();
1249  Vector<int64_t> FloatToRecv_tmp;
1250  for (int i = 0; i < nb_proc; i++)
1251  if (i != rank)
1252  MPI_Recv(&nrecv_int(i), 1, MPI_INTEGER, i, 4, comm, &status);
1253 
1254  // waiting for sending of nsend_int effective
1255  for (int i = 0; i < nb_proc; i++)
1256  if (i != rank)
1257  MPI_Wait(&request(i), &status);
1258 
1259  for (int i = 0; i < nb_proc; i++)
1260  {
1261  if (nrecv_int(i) > 0)
1262  {
1263  EntierToRecv(i).Reallocate(nrecv_int(i));
1264  MPI_Recv(EntierToRecv(i).GetData(), nrecv_int(i),
1265  MPI_INTEGER, i, 5, comm, &status);
1266  }
1267  else
1268  EntierToRecv(i).Clear();
1269  }
1270 
1271  // waiting for sending of EntierToSend effective
1272  for (int i = 0; i < nb_proc; i++)
1273  if (nsend_int(i) > 0)
1274  MPI_Wait(&request(i+nb_proc), &status);
1275 
1276  for (int i = 0; i < nb_proc; i++)
1277  {
1278  if (nrecv_int(i) > 0)
1279  {
1280  int nb_float = EntierToRecv(i)(0);
1281  if (nb_float > 0)
1282  {
1283  FloatToRecv(i).Reallocate(nb_float);
1284  MpiRecv(comm, FloatToRecv(i), FloatToRecv_tmp,
1285  nb_float, i, 6, status);
1286  }
1287  else
1288  FloatToRecv(i).Clear();
1289  }
1290  else
1291  FloatToRecv(i).Clear();
1292  }
1293 
1294  // waiting for sending of FloatToSend effective
1295  for (int i = 0; i < nb_proc; i++)
1296  if (nsend_int(i) > 0)
1297  if (EntierToSend(i)(0) > 0)
1298  MPI_Wait(&request(i + 2*nb_proc), &status);
1299 
1300  // deleting sending arrays
1301  for (int i = 0; i < nb_proc; i++)
1302  {
1303  nsend_int(i) = 0;
1304  EntierToSend(i).Clear();
1305  FloatToSend(i).Clear();
1306  }
1307  }
1308 
1309 
1311  template<class T>
1314  Vector<IVect>& EntierToRecv, Vector<Vector<T> >& FloatToRecv,
1315  IVect& nrecv_int, Vector<IVect>& EntierToSend,
1316  Vector<Vector<T> >& FloatToSend, IVect& nsend_int,
1317  IVect& Glob_to_local, const IVect& OverlappedCol,
1318  const IVect& OverlapProcNumber,
1319  Vector<IVect>& procB, bool reorder)
1320  {
1321  int nb_proc; MPI_Comm_size(comm, &nb_proc);
1322  int nfac = 1;
1323  if (reorder)
1324  nfac = 2;
1325 
1326  for (int i = 0; i < nb_proc; i++)
1327  {
1328  if (FloatToRecv(i).GetM() > 0)
1329  {
1330  int nrow = EntierToRecv(i)(1);
1331  // loop over rows
1332  nrecv_int(i) = 2; int nrecv_float = 0;
1333  IVect proc_num;
1334  for (int j = 0; j < nrow; j++)
1335  {
1336  int iglob = EntierToRecv(i)(nrecv_int(i)++);
1337  int irow = Glob_to_local(iglob);
1338  int size_row = EntierToRecv(i)(nrecv_int(i)++);
1339  IVect index(size_row); Vector<T> values(size_row);
1340  if (reorder)
1341  {
1342  proc_num.Reallocate(size_row);
1343  for (int k = 0; k < size_row; k++)
1344  {
1345  index(k) = EntierToRecv(i)(nrecv_int(i)++);
1346  proc_num(k) = EntierToRecv(i)(nrecv_int(i)++);
1347  values(k) = FloatToRecv(i)(nrecv_float++);
1348  }
1349  }
1350  else
1351  {
1352  for (int k = 0; k < size_row; k++)
1353  {
1354  index(k) = EntierToRecv(i)(nrecv_int(i)++);
1355  values(k) = FloatToRecv(i)(nrecv_float++);
1356  }
1357  }
1358 
1359  // adding to matrix B if the row is not shared
1360  // otherwise we send the row to the original processor
1361  if (OverlappedCol(irow) == -1)
1362  {
1363  if (reorder)
1364  {
1365  // checking if index is sorted
1366  bool index_sorted = true;
1367  for (int k = 1; k < size_row; k++)
1368  if (index(k) < index(k-1))
1369  index_sorted = false;
1370 
1371  if (!index_sorted)
1372  Sort(size_row, index, values, proc_num);
1373 
1374  int size_rowB = B.GetRowSize(irow);
1375  IVect new_col(size_rowB + size_row), new_proc(size_rowB + size_row);
1376  Vector<T> new_val(size_rowB + size_row);
1377  int p = 0, nb = 0;
1378  for (int k = 0; k < size_row; k++)
1379  {
1380  while ((p < size_rowB) && (B.Index(irow, p) < index(k)))
1381  {
1382  new_col(nb) = B.Index(irow, p);
1383  new_val(nb) = B.Value(irow, p);
1384  new_proc(nb) = procB(irow)(p);
1385  p++; nb++;
1386  }
1387 
1388  if ((p < size_rowB) && (B.Index(irow, p) == index(k)))
1389  {
1390  // the value is added
1391  new_col(nb) = index(k);
1392  new_val(nb) = values(k) + B.Value(irow, p);
1393  new_proc(nb) = procB(irow)(p);
1394  nb++; p++;
1395  }
1396  else
1397  {
1398  // the value is created
1399  new_col(nb) = index(k);
1400  new_val(nb) = values(k);
1401  new_proc(nb) = proc_num(k);
1402  nb++;
1403  }
1404  }
1405 
1406  while (p < size_rowB)
1407  {
1408  new_col(nb) = B.Index(irow, p);
1409  new_val(nb) = B.Value(irow, p);
1410  new_proc(nb) = procB(irow)(p);
1411  p++; nb++;
1412  }
1413 
1414  B.ReallocateRow(irow, nb);
1415  procB(irow).Reallocate(nb);
1416  for (int k = 0; k < nb; k++)
1417  {
1418  B.Index(irow, k) = new_col(k);
1419  B.Value(irow, k) = new_val(k);
1420  procB(irow)(k) = new_proc(k);
1421  }
1422  }
1423  else
1424  {
1425  if (size_row == 1)
1426  B.AddInteraction(irow, index(0), values(0));
1427  else
1428  B.AddInteractionRow(irow, size_row, index, values);
1429  }
1430  }
1431  else
1432  {
1433  int irow_ = OverlappedCol(irow);
1434  int proc = OverlapProcNumber(irow_);
1435 
1436  int offset_int(2), offset_float(0);
1437  if (nsend_int(proc) == 0)
1438  {
1439  nsend_int(proc) = 2;
1440  EntierToSend(proc).Reallocate(nfac*size_row+4);
1441  FloatToSend(proc).Reallocate(size_row);
1442  EntierToSend(proc)(0) = 0;
1443  EntierToSend(proc)(1) = 0;
1444  }
1445  else
1446  {
1447  offset_int = EntierToSend(proc).GetM();
1448  offset_float = FloatToSend(proc).GetM();
1449  EntierToSend(proc).Resize(nfac*size_row+2+offset_int);
1450  FloatToSend(proc).Resize(size_row+offset_float);
1451  }
1452 
1453  nsend_int(proc) += nfac*size_row+2;
1454  EntierToSend(proc)(0) += size_row;
1455  EntierToSend(proc)(1)++;
1456  EntierToSend(proc)(offset_int++) = iglob;
1457  EntierToSend(proc)(offset_int++) = size_row;
1458  for (int k = 0; k < size_row; k++)
1459  {
1460  EntierToSend(proc)(offset_int++) = index(k);
1461  if (reorder)
1462  EntierToSend(proc)(offset_int++) = proc_num(k);
1463 
1464  FloatToSend(proc)(offset_float++) = values(k);
1465  }
1466  }
1467  }
1468  }
1469  }
1470  }
1471 
1472 
1474  template<class T>
1477  Vector<IVect>& EntierToRecv, Vector<Vector<T> >& FloatToRecv,
1478  IVect& nrecv_int, Vector<IVect>& EntierToSend,
1479  Vector<Vector<T> >& FloatToSend, IVect& nsend_int,
1480  IVect& Glob_to_local, const IVect& OverlappedCol,
1481  const IVect& OverlapProcNumber,
1482  Vector<IVect>& procB, bool reorder)
1483  {
1484  int nb_proc; MPI_Comm_size(comm, &nb_proc);
1485  int nfac = 1;
1486  if (reorder)
1487  nfac = 2;
1488 
1489  for (int i = 0; i < nb_proc; i++)
1490  {
1491  if (FloatToRecv(i).GetM() > 0)
1492  {
1493  int ncol = EntierToRecv(i)(1);
1494  // loop over column
1495  nrecv_int(i) = 2; int nrecv_float = 0;
1496  IVect proc_num;
1497  for (int j = 0; j < ncol; j++)
1498  {
1499  int iglob = EntierToRecv(i)(nrecv_int(i)++);
1500  int irow = Glob_to_local(iglob);
1501  int size_col = EntierToRecv(i)(nrecv_int(i)++);
1502  IVect index(size_col); Vector<T> values(size_col);
1503  if (reorder)
1504  {
1505  proc_num.Reallocate(size_col);
1506  for (int k = 0; k < size_col; k++)
1507  {
1508  index(k) = EntierToRecv(i)(nrecv_int(i)++);
1509  proc_num(k) = EntierToRecv(i)(nrecv_int(i)++);
1510  values(k) = FloatToRecv(i)(nrecv_float++);
1511  }
1512  }
1513  else
1514  {
1515  for (int k = 0; k < size_col; k++)
1516  {
1517  index(k) = EntierToRecv(i)(nrecv_int(i)++);
1518  values(k) = FloatToRecv(i)(nrecv_float++);
1519  }
1520  }
1521 
1522  // adding to matrix B if the row is not shared
1523  // otherwise we send the row to the original processor
1524  if (OverlappedCol(irow) == -1)
1525  {
1526  if (reorder)
1527  {
1528  // checking if index is sorted
1529  bool index_sorted = true;
1530  for (int k = 1; k < size_col; k++)
1531  if (index(k) < index(k-1))
1532  index_sorted = false;
1533 
1534  if (!index_sorted)
1535  Sort(size_col, index, values, proc_num);
1536 
1537  int size_colB = B.GetColumnSize(irow);
1538  IVect new_col(size_colB + size_col), new_proc(size_colB + size_col);
1539  Vector<T> new_val(size_colB + size_col);
1540  int p = 0, nb = 0;
1541  for (int k = 0; k < size_col; k++)
1542  {
1543  while ((p < size_colB) && (B.Index(irow, p) < index(k)))
1544  {
1545  new_col(nb) = B.Index(irow, p);
1546  new_val(nb) = B.Value(irow, p);
1547  new_proc(nb) = procB(irow)(p);
1548  p++; nb++;
1549  }
1550 
1551  if ((p < size_colB) && (B.Index(irow, p) == index(k)))
1552  {
1553  // the value is added
1554  new_col(nb) = index(k);
1555  new_val(nb) = values(k) + B.Value(irow, p);
1556  new_proc(nb) = procB(irow)(p);
1557  nb++; p++;
1558  }
1559  else
1560  {
1561  // the value is created
1562  new_col(nb) = index(k);
1563  new_val(nb) = values(k);
1564  new_proc(nb) = proc_num(k);
1565  nb++;
1566  }
1567  }
1568 
1569  while (p < size_colB)
1570  {
1571  new_col(nb) = B.Index(irow, p);
1572  new_val(nb) = B.Value(irow, p);
1573  new_proc(nb) = procB(irow)(p);
1574  p++; nb++;
1575  }
1576 
1577  B.ReallocateColumn(irow, nb);
1578  procB(irow).Reallocate(nb);
1579  for (int k = 0; k < nb; k++)
1580  {
1581  B.Index(irow, k) = new_col(k);
1582  B.Value(irow, k) = new_val(k);
1583  procB(irow)(k) = new_proc(k);
1584  }
1585  }
1586  else
1587  {
1588  if (size_col == 1)
1589  B.AddInteraction(index(0), irow, values(0));
1590  else
1591  B.AddInteractionColumn(irow, size_col, index, values);
1592  }
1593  }
1594  else
1595  {
1596  int irow_ = OverlappedCol(irow);
1597  int proc = OverlapProcNumber(irow_);
1598 
1599  int offset_int(2), offset_float(0);
1600  if (nsend_int(proc) == 0)
1601  {
1602  nsend_int(proc) = 2;
1603  EntierToSend(proc).Reallocate(nfac*size_col+4);
1604  FloatToSend(proc).Reallocate(size_col);
1605  EntierToSend(proc)(0) = 0;
1606  EntierToSend(proc)(1) = 0;
1607  }
1608  else
1609  {
1610  offset_int = EntierToSend(proc).GetM();
1611  offset_float = FloatToSend(proc).GetM();
1612  EntierToSend(proc).Resize(nfac*size_col+2+offset_int);
1613  FloatToSend(proc).Resize(size_col+offset_float);
1614  }
1615 
1616  nsend_int(proc) += nfac*size_col+2;
1617  EntierToSend(proc)(0) += size_col;
1618  EntierToSend(proc)(1)++;
1619  EntierToSend(proc)(offset_int++) = iglob;
1620  EntierToSend(proc)(offset_int++) = size_col;
1621  for (int k = 0; k < size_col; k++)
1622  {
1623  EntierToSend(proc)(offset_int++) = index(k);
1624  if (reorder)
1625  EntierToSend(proc)(offset_int++) = proc_num(k);
1626 
1627  FloatToSend(proc)(offset_float++) = values(k);
1628  }
1629  }
1630  }
1631  }
1632  }
1633  }
1634 
1635 
1637  template<class T> template<class TypeDist>
1639  ::EraseDistantEntries(MPI_Comm& comm, const Vector<bool>& IsRowDropped,
1640  const Vector<bool>& IsRowDroppedDistant,
1641  TypeDist& dist_row_, Vector<IVect>& proc_row_,
1642  TypeDist& dist_col_, Vector<IVect>& proc_col_)
1643  {
1644  typedef typename TypeDist::value_type Vect1;
1645  typedef typename Vect1::value_type T1;
1646  for (int i = 0; i < dist_col_.GetM(); i++)
1647  if (IsRowDropped(i))
1648  {
1649  dist_col_(i).Clear();
1650  proc_col_(i).Clear();
1651  }
1652 
1653  for (int j = 0; j < dist_row_.GetM(); j++)
1654  {
1655  int nb = 0;
1656  for (int iloc = 0; iloc < dist_row_(j).GetM(); iloc++)
1657  if (IsRowDroppedDistant(dist_row_(j).Index(iloc)))
1658  nb++;
1659 
1660  if (nb > 0)
1661  {
1662  int size = dist_row_(j).GetM();
1663  IVect row_num(size), proc(size); Vector<T1> val(size);
1664  for (int iloc = 0; iloc < dist_row_(j).GetM(); iloc++)
1665  {
1666  row_num(iloc) = dist_row_(j).Index(iloc);
1667  val(iloc) = dist_row_(j).Value(iloc);
1668  proc(iloc) = proc_row_(j)(iloc);
1669  }
1670 
1671  dist_row_(j).Reallocate(size-nb);
1672  proc_row_(j).Reallocate(size-nb);
1673  nb = 0;
1674  for (int iloc = 0; iloc < size; iloc++)
1675  if (!IsRowDroppedDistant(row_num(iloc)))
1676  {
1677  dist_row_(j).Index(nb) = row_num(iloc);
1678  dist_row_(j).Value(nb) = val(iloc);
1679  proc_row_(j)(nb) = proc(iloc);
1680  nb++;
1681  }
1682  }
1683  }
1684  }
1685 
1686 
1687  /****************
1688  * Constructors *
1689  ****************/
1690 
1691 
1693  template<class T>
1695  {
1696  GlobalRowNumbers = NULL;
1697  OverlapProcNumbers = NULL;
1698  OverlapRowNumbers = NULL;
1699  ProcSharingRows = NULL;
1700  SharingRowNumbers = NULL;
1701  nodl_scalar_ = 0;
1702  nb_unknowns_scal_ = 1;
1703  nglob_ = 0;
1704  comm_ = MPI_COMM_SELF;
1705 
1706  local_number_distant_values = false;
1707  size_max_distant_row = 0;
1708  size_max_distant_col = 0;
1709  }
1710 
1711 
1713 
1716  template<class T>
1718  {
1719  GlobalRowNumbers = NULL;
1720  OverlapProcNumbers = NULL;
1721  OverlapRowNumbers = NULL;
1722  ProcSharingRows = NULL;
1723  SharingRowNumbers = NULL;
1724  nglob_ = m;
1725  nodl_scalar_ = 0;
1726  nb_unknowns_scal_ = 1;
1727  comm_ = MPI_COMM_SELF;
1728 
1729  dist_col.Reallocate(m);
1730  dist_row.Reallocate(n);
1731  proc_col.Reallocate(m);
1732  proc_row.Reallocate(n);
1733 
1734  local_number_distant_values = false;
1735  size_max_distant_row = 0;
1736  size_max_distant_col = 0;
1737  }
1738 
1739 
1741 
1748  template<class T>
1750  Init(int n, IVect* row_num, IVect* overlap_num, IVect* proc_num,
1751  int Nvol, int nb_u, IVect* MatchingProc,
1752  Vector<IVect>* MatchingDofNumber, const MPI_Comm& comm)
1753  {
1754  nglob_ = n;
1755  GlobalRowNumbers = row_num;
1756  OverlapRowNumbers = overlap_num;
1757  OverlapProcNumbers = proc_num;
1758 
1759  nodl_scalar_ = Nvol;
1760  nb_unknowns_scal_ = nb_u;
1761  ProcSharingRows = MatchingProc;
1762  SharingRowNumbers = MatchingDofNumber;
1763 
1764  comm_ = comm;
1765  }
1766 
1767 
1769  template<class T>
1772  {
1773  nglob_ = Ainfo.nglob;
1774  GlobalRowNumbers = &Ainfo.GlobalRowNumbers;
1775  OverlapRowNumbers = &Ainfo.OverlapRowNumbers;
1776  OverlapProcNumbers = &Ainfo.OverlapProcNumbers;
1777 
1778  nodl_scalar_ = Ainfo.nodl_scalar;
1779  nb_unknowns_scal_ = Ainfo.nb_unknowns_scal;
1780  ProcSharingRows = &Ainfo.ProcSharingRows;
1781  SharingRowNumbers = &Ainfo.SharingRowNumbers;
1782 
1783  comm_ = Ainfo.comm;
1784  }
1785 
1786 
1788  template<class T>
1790  Init(IVect& row_num, const MPI_Comm& comm,
1792  {
1793  info.GlobalRowNumbers = row_num;
1794  info.nloc = row_num.GetM(); info.nb_unknowns_scal = 1;
1795  info.nodl_scalar = row_num.GetM();
1796  info.comm = comm;
1797 
1798  // object Ainfo is constructed from global row numbers (contained in row_num)
1799  info.nglob = DistributedMatrixIntegerArray
1800  ::ConstructArrays(row_num, info.OverlapRowNumbers, info.OverlapProcNumbers,
1801  info.ProcSharingRows, info.SharingRowNumbers, comm);
1802  }
1803 
1804 
1806  template<class T> template<class T0>
1809  {
1810  OverlapRowNumbers = A.OverlapRowNumbers;
1811  OverlapProcNumbers = A.OverlapProcNumbers;
1812  GlobalRowNumbers = A.GlobalRowNumbers;
1813  ProcSharingRows = A.ProcSharingRows;
1814  SharingRowNumbers = A.SharingRowNumbers;
1815  nodl_scalar_ = A.nodl_scalar_;
1816  nb_unknowns_scal_ = A.nb_unknowns_scal_;
1817  nglob_ = A.nglob_;
1818  comm_ = A.comm_;
1819  }
1820 
1821 
1823  template<class T>
1826  IVect& row_num, IVect& overlap_num, IVect& proc_num,
1827  IVect& MatchingProc, Vector<IVect>& MatchingDofNumber,
1828  const MPI_Comm& comm, bool distribute_row)
1829  {
1830  int nglob = DistributedMatrixIntegerArray
1831  ::ConstructArrays(all_rows, row_num, overlap_num, proc_num,
1832  MatchingProc, MatchingDofNumber, comm, distribute_row);
1833 
1834  // once the arrays are constructed, we store the pointers
1835  nglob_ = nglob;
1836  GlobalRowNumbers = &row_num;
1837  OverlapRowNumbers = &overlap_num;
1838  OverlapProcNumbers = &proc_num;
1839 
1840  nodl_scalar_ = row_num.GetM();
1841  nb_unknowns_scal_ = 1;
1842  ProcSharingRows = &MatchingProc;
1843  SharingRowNumbers = &MatchingDofNumber;
1844 
1845  comm_ = comm;
1846  }
1847 
1848 
1850  template<class T>
1852  ::Init(IVect& row_num, IVect& overlap_num, IVect& proc_num,
1853  IVect& MatchingProc,
1854  Vector<IVect>& MatchingDofNumber, const MPI_Comm& comm)
1855  {
1856  int nglob = DistributedMatrixIntegerArray
1857  ::ConstructArrays(row_num, overlap_num, proc_num,
1858  MatchingProc, MatchingDofNumber, comm);
1859 
1860  // once the arrays are constructed, we store the pointers
1861  nglob_ = nglob;
1862  GlobalRowNumbers = &row_num;
1863  OverlapRowNumbers = &overlap_num;
1864  OverlapProcNumbers = &proc_num;
1865 
1866  nodl_scalar_ = row_num.GetM();
1867  nb_unknowns_scal_ = 1;
1868  ProcSharingRows = &MatchingProc;
1869  SharingRowNumbers = &MatchingDofNumber;
1870 
1871  comm_ = comm;
1872  }
1873 
1874 
1875 
1876  /*********************
1877  * Memory Management *
1878  *********************/
1879 
1880 
1882  template<class T>
1884  ::ReallocateDist(int m, int n)
1885  {
1886  // previous values are erased for simplicity
1887  Clear();
1888 
1889  dist_col.Reallocate(m);
1890  dist_row.Reallocate(n);
1891  proc_col.Reallocate(m);
1892  proc_row.Reallocate(n);
1893  }
1894 
1895 
1897  template<class T>
1899  {
1900  if (dist_col.GetM() != m)
1901  {
1902  dist_col.Resize(m);
1903  proc_col.Resize(m);
1904  }
1905 
1906  if (dist_row.GetM() != n)
1907  {
1908  dist_row.Resize(n);
1909  proc_row.Resize(n);
1910  }
1911  }
1912 
1913 
1915  template<class T>
1917  {
1918  dist_col.Clear();
1919  proc_col.Clear();
1920  dist_row.Clear();
1921  proc_row.Clear();
1922 
1923  global_row_to_recv.Clear(); global_col_to_recv.Clear();
1924  ptr_global_row_to_recv.Clear(); ptr_global_col_to_recv.Clear();
1925  local_row_to_send.Clear(); local_col_to_send.Clear();
1926  proc_col_to_recv.Clear(); proc_col_to_send.Clear();
1927  proc_row_to_recv.Clear(); proc_row_to_send.Clear();
1928  size_max_distant_row = 0;
1929  size_max_distant_col = 0;
1930  local_number_distant_values = false;
1931  }
1932 
1933 
1934  /*******************
1935  * Basic functions *
1936  *******************/
1937 
1938 
1940  template<class T>
1943  {
1944  Copy(X);
1945  return *this;
1946  }
1947 
1948 
1950  template<class T> template<class T2>
1953  {
1954  dist_col.Reallocate(X.dist_col.GetM());
1955  for (int i = 0; i < dist_col.GetM(); i++)
1956  dist_col(i).Copy(X.dist_col(i));
1957 
1958  dist_row.Reallocate(X.dist_row.GetM());
1959  for (int i = 0; i < dist_row.GetM(); i++)
1960  dist_row(i).Copy(X.dist_row(i));
1961 
1962  proc_col = X.proc_col;
1963  proc_row = X.proc_row;
1964 
1965  GlobalRowNumbers = X.GlobalRowNumbers;
1966  OverlapProcNumbers = X.OverlapProcNumbers;
1967  OverlapRowNumbers = X.OverlapRowNumbers;
1968 
1969  ProcSharingRows = X.ProcSharingRows;
1970  SharingRowNumbers = X.SharingRowNumbers;
1971 
1972  nodl_scalar_ = X.nodl_scalar_;
1973  nb_unknowns_scal_ = X.nb_unknowns_scal_;
1974  nglob_ = X.nglob_;
1975 
1976  comm_ = X.comm_;
1977 
1978  global_row_to_recv = X.global_row_to_recv;
1979  global_col_to_recv = X.global_col_to_recv;
1980  ptr_global_row_to_recv = X.ptr_global_row_to_recv;
1981  ptr_global_col_to_recv = X.ptr_global_col_to_recv;
1982 
1983  local_row_to_send = X.local_row_to_send;
1984  local_col_to_send = X.local_col_to_send;
1985  proc_col_to_recv = X.proc_col_to_recv;
1986  proc_col_to_send = X.proc_col_to_send;
1987  proc_row_to_recv = X.proc_row_to_recv;
1988  proc_row_to_send = X.proc_row_to_send;
1989  local_number_distant_values = X.local_number_distant_values;
1990 
1991  size_max_distant_row = X.size_max_distant_row;
1992  size_max_distant_col = X.size_max_distant_col;
1993  }
1994 
1995 
1997  template<class T> template<class T0>
2000  {
2001  for (int i = 0; i < dist_col.GetM(); i++)
2002  dist_col(i) *= x;
2003 
2004  for (int i = 0; i < dist_row.GetM(); i++)
2005  dist_row(i) *= x;
2006 
2007  return *this;
2008  }
2009 
2010 
2012  template<class T>
2015  {
2016  if (this->GlobalRowNumbers == NULL)
2017  {
2018  cout << "You should call Init of DistributedMatrix" << endl;
2019  abort();
2020  }
2021 
2022  return *GlobalRowNumbers;
2023  }
2024 
2025 
2027  template<class T>
2030  {
2031  if (this->GlobalRowNumbers == NULL)
2032  {
2033  cout << "You should call Init of DistributedMatrix" << endl;
2034  abort();
2035  }
2036 
2037  return *GlobalRowNumbers;
2038  }
2039 
2040 
2042  template<class T>
2043  const IVect& DistributedMatrix_Base<T>::
2045  {
2046  if (this->OverlapRowNumbers == NULL)
2047  {
2048  cout << "You should call Init of DistributedMatrix" << endl;
2049  abort();
2050  }
2051 
2052  return *OverlapRowNumbers;
2053  }
2054 
2055 
2057  template<class T>
2060  {
2061  if (this->OverlapRowNumbers == NULL)
2062  {
2063  cout << "You should call Init of DistributedMatrix" << endl;
2064  abort();
2065  }
2066 
2067  return *OverlapRowNumbers;
2068  }
2069 
2070 
2072  template<class T>
2074  {
2075  if (this->OverlapProcNumbers == NULL)
2076  {
2077  cout << "You should call Init of DistributedMatrix" << endl;
2078  abort();
2079  }
2080 
2081  return *OverlapProcNumbers;
2082  }
2083 
2084 
2086  template<class T>
2089  {
2090  if (this->OverlapProcNumbers == NULL)
2091  {
2092  cout << "You should call Init of DistributedMatrix" << endl;
2093  abort();
2094  }
2095 
2096  return *OverlapProcNumbers;
2097  }
2098 
2099 
2101  template<class T>
2104  {
2105  if (this->ProcSharingRows == NULL)
2106  {
2107  cout << "You should call Init of DistributedMatrix" << endl;
2108  abort();
2109  }
2110 
2111  return *ProcSharingRows;
2112  }
2113 
2114 
2116  template<class T>
2119  {
2120  if (this->ProcSharingRows == NULL)
2121  {
2122  cout << "You should call Init of DistributedMatrix" << endl;
2123  abort();
2124  }
2125 
2126  return *ProcSharingRows;
2127  }
2128 
2129 
2131  template<class T>
2132  Vector<IVect>& DistributedMatrix_Base<T>
2134  {
2135  if (this->SharingRowNumbers == NULL)
2136  {
2137  cout << "You should call Init of DistributedMatrix" << endl;
2138  abort();
2139  }
2140 
2141  return *SharingRowNumbers;
2142  }
2143 
2144 
2146  template<class T>
2149  {
2150  if (this->SharingRowNumbers == NULL)
2151  {
2152  cout << "You should call Init of DistributedMatrix" << endl;
2153  abort();
2154  }
2155 
2156  return *SharingRowNumbers;
2157  }
2158 
2159 
2161  template<class T>
2163  {
2164  size_t taille = sizeof(*this) + sizeof(int*)*(proc_row.GetM()+proc_col.GetM());
2165  taille += sizeof(int*)*(local_row_to_send.GetM() + local_col_to_send.GetM()
2166  + dist_row.GetM() + dist_col.GetM());
2167 
2168  taille += global_row_to_recv.GetMemorySize() + global_col_to_recv.GetMemorySize() +
2169  ptr_global_row_to_recv.GetMemorySize() + ptr_global_col_to_recv.GetMemorySize() +
2170  proc_col_to_recv.GetMemorySize() + proc_col_to_send.GetMemorySize() +
2171  proc_row_to_recv.GetMemorySize() + proc_row_to_send.GetMemorySize();
2172 
2173  for (int i = 0; i < proc_row.GetM(); i++)
2174  taille += proc_row(i).GetMemorySize();
2175 
2176  for (int i = 0; i < proc_col.GetM(); i++)
2177  taille += proc_col(i).GetMemorySize();
2178 
2179  for (int i = 0; i < local_row_to_send.GetM(); i++)
2180  taille += local_row_to_send(i).GetMemorySize();
2181 
2182  for (int i = 0; i < local_col_to_send.GetM(); i++)
2183  taille += local_col_to_send(i).GetMemorySize();
2184 
2185  for (int i = 0; i < dist_row.GetM(); i++)
2186  taille += dist_row(i).GetMemorySize();
2187 
2188  for (int i = 0; i < dist_col.GetM(); i++)
2189  taille += dist_col(i).GetMemorySize();
2190 
2191  return taille;
2192  }
2193 
2194 
2195  /**********************
2196  * Convenient methods *
2197  **********************/
2198 
2199 
2201 
2206  template<class T>
2208  {
2209  long nnz = 0;
2210  for (int i = 0; i < dist_col.GetM(); i++)
2211  nnz += dist_col(i).GetM();
2212 
2213  for (int i = 0; i < dist_row.GetM(); i++)
2214  nnz += dist_row(i).GetM();
2215 
2216  return nnz;
2217  }
2218 
2219 
2221 
2227  template<class T>
2229  {
2230  long size_int = 0, size = 0;
2231  for (int i = 0; i < dist_col.GetM(); i++)
2232  {
2233  size_int += 2*dist_col(i).GetM();
2234  size += dist_col(i).GetM();
2235  }
2236 
2237  for (int i = 0; i < dist_row.GetM(); i++)
2238  {
2239  size_int += 2*dist_row(i).GetM();
2240  size += dist_row(i).GetM();
2241  }
2242 
2243  size_int += 2*(global_row_to_recv.GetM() + global_col_to_recv.GetM());
2244 
2245  for (int i = 0; i < local_row_to_send.GetM(); i++)
2246  size_int += local_row_to_send(i).GetM();
2247 
2248  for (int i = 0; i < local_col_to_send.GetM(); i++)
2249  size_int += local_col_to_send(i).GetM();
2250 
2251  size_int += 2*(dist_col.GetM() + dist_row.GetM()) +
2252  2*(local_row_to_send.GetM() + local_col_to_send.GetM()) + 25;
2253 
2254  int ratio = sizeof(T)/sizeof(int);
2255 
2256  return size + size_int/ratio;
2257  }
2258 
2259 
2261 
2266  template<class T> template<class T0>
2268  ::RemoveSmallEntry(const T0& epsilon)
2269  {
2270  RemoveSmallEntryDistant(epsilon, dist_col, proc_col);
2271  RemoveSmallEntryDistant(epsilon, dist_row, proc_row);
2272  }
2273 
2274 
2276  template<class T>
2278  {
2279  for (int i = 0; i < dist_col.GetM(); i++)
2280  {
2281  dist_col(i).Clear();
2282  proc_col(i).Clear();
2283  }
2284 
2285  for (int i = 0; i < dist_row.GetM(); i++)
2286  {
2287  dist_row(i).Clear();
2288  proc_row(i).Clear();
2289  }
2290 
2291  EraseArrayForMltAdd();
2292  }
2293 
2294 
2296  template<class T>
2298  {
2299  for (int i = 0; i < dist_col.GetM(); i++)
2300  dist_col(i).Zero();
2301 
2302  for (int i = 0; i < dist_row.GetM(); i++)
2303  dist_row(i).Zero();
2304  }
2305 
2306 
2308 
2312  template<class T>
2314  {
2315  long value(0);
2316  for (int i = 0; i < dist_col.GetM(); i++)
2317  for (int j = 0; j < dist_col(i).GetM(); j++)
2318  {
2319  SetComplexReal(value, dist_col(i).Value(j));
2320  value++;
2321  }
2322 
2323  for (int i = 0; i < dist_row.GetM(); i++)
2324  for (int j = 0; j < dist_row(i).GetM(); j++)
2325  {
2326  SetComplexReal(value, dist_row(i).Value(j));
2327  value++;
2328  }
2329  }
2330 
2331 
2333 
2337  template<class T> template<class T0>
2339  {
2340  for (int i = 0; i < dist_col.GetM(); i++)
2341  dist_col(i).Fill(x);
2342 
2343  for (int i = 0; i < dist_row.GetM(); i++)
2344  dist_row(i).Fill(x);
2345  }
2346 
2347 
2349  template<class T>
2351  {
2352  for (int i = 0; i < dist_col.GetM(); i++)
2353  dist_col(i).FillRand();
2354 
2355  for (int i = 0; i < dist_row.GetM(); i++)
2356  dist_row(i).FillRand();
2357  }
2358 
2359 
2361  template<class T>
2363  ::WriteText(ostream& FileStream, Vector<int>& IndRow, Vector<int>& IndCol,
2364  Vector<T>& Value, bool cplx) const
2365  {
2366 #ifdef SELDON_CHECK_IO
2367  // Checks if the stream is ready.
2368  if (!FileStream.good())
2369  throw IOError("DistributedMatrix::WriteText(ofstream& FileStream)",
2370  "Stream is not ready.");
2371 #endif
2372 
2373  // extending arrays in order to contain non-local part
2374  long N = 0;
2375  for (int i = 0; i < dist_col.GetM(); i++)
2376  N += dist_col(i).GetM();
2377 
2378  for (int i = 0; i < dist_row.GetM(); i++)
2379  N += dist_row(i).GetM();
2380 
2381  long old_size = IndRow.GetM();
2382  IndRow.Resize(old_size+N);
2383  IndCol.Resize(old_size+N);
2384  Value.Resize(old_size+N);
2385 
2386  // filling non-local part
2387  const IVect& global = this->GetGlobalRowNumber();
2388  N = old_size;
2389  for (int i = 0; i < dist_col.GetM(); i++)
2390  for (int j = 0; j < dist_col(i).GetM(); j++)
2391  {
2392  IndRow(N) = global(i) + 1;
2393  IndCol(N) = dist_col(i).Index(j) + 1;
2394  if (local_number_distant_values)
2395  IndCol(N) = global_col_to_recv(dist_col(i).Index(j)) + 1;
2396 
2397  Value(N) = dist_col(i).Value(j);
2398  N++;
2399  }
2400 
2401  for (int i = 0; i < dist_row.GetM(); i++)
2402  for (int j = 0; j < dist_row(i).GetM(); j++)
2403  {
2404  IndCol(N) = global(i) + 1;
2405  IndRow(N) = dist_row(i).Index(j) + 1;
2406  if (local_number_distant_values)
2407  IndRow(N) = global_row_to_recv(dist_row(i).Index(j)) + 1;
2408 
2409  Value(N) = dist_row(i).Value(j);
2410  N++;
2411  }
2412 
2413  // changing numbers of local part
2414  for (int i = 0; i < old_size; i++)
2415  {
2416  IndRow(i) = global(IndRow(i)) + 1;
2417  IndCol(i) = global(IndCol(i)) + 1;
2418  }
2419 
2420  // writing values on the stream
2421  WriteCoordinateMatrix(FileStream, IndRow, IndCol, Value, cplx);
2422  }
2423 
2424 
2425  /********************************************
2426  * Methods called for matrix-vector product *
2427  ********************************************/
2428 
2429 
2431  template<class T>
2432  template<class T2, class T3, class T4, class Storage4, class Allocator4>
2434  InitMltAdd(bool& proceed_distant_row, bool& proceed_distant_col,
2435  const Vector<T2>& X, Vector<T2>& Xcol,
2436  const T3& beta, Vector<T4, Storage4, Allocator4>& Y,
2438  {
2439  const MPI_Comm& comm = this->GetCommunicator();
2440  proceed_distant_row = true;
2441  proceed_distant_col = true;
2442  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2443  if (nb_proc == 1)
2444  {
2445  proceed_distant_row = false;
2446  proceed_distant_col = false;
2447  }
2448  else
2449  {
2450  if (!this->IsReadyForMltAdd())
2451  {
2452  // preparing the matrix vector product
2453  // this method will be called once for
2454  // the first matrix-vector product
2455  const_cast<DistributedMatrix_Base<T>& >(*this)
2456  .PrepareMltAdd();
2457  }
2458 
2459  if (this->GetMaxDataSizeDistantCol() == 0)
2460  proceed_distant_col = false;
2461 
2462  if (this->GetMaxDataSizeDistantRow() == 0)
2463  proceed_distant_row = false;
2464  }
2465 
2466  T3 zero;
2467  SetComplexZero(zero);
2468 
2469  if (beta == zero)
2470  Y.SetData(Yres.GetM(), Yres.GetData());
2471  else
2472  Y.Reallocate(Yres.GetM());
2473 
2474  Y.Fill(zero);
2475 
2476  // scattering column values
2477  if (proceed_distant_col)
2478  this->ScatterColValues(X, Xcol);
2479  }
2480 
2481 
2483  template<class T>
2484  template<class T2, class T3, class T4, class Storage4, class Allocator4>
2486  FinalizeMltAdd(bool proceed_distant_row, bool proceed_distant_col,
2487  const Vector<T2>& X, Vector<T2>& Xcol, const T3& alpha,
2488  const T3& beta, Vector<T4, Storage4, Allocator4>& Y,
2489  Vector<T4, Storage4, Allocator4>& Yres, bool assemble) const
2490  {
2491  // adding contributions of distant columns
2492  if (proceed_distant_col)
2493  this->MltAddCol(SeldonNoTrans, Xcol, Y);
2494 
2495  // contributions of distant rows
2496  Vector<T4> Yrow;
2497  if (proceed_distant_row)
2498  this->MltAddRow(SeldonNoTrans, X, Yrow);
2499 
2500  // assembling row values
2501  if (proceed_distant_row)
2502  this->AssembleRowValues(Yrow, Y);
2503 
2504  // assembling rows shared between processors
2505  if (assemble)
2506  this->AssembleVec(Y);
2507 
2508  T3 zero;
2509  SetComplexZero(zero);
2510 
2511  if (beta == zero)
2512  {
2513  Mlt(alpha, Y);
2514  Y.Nullify();
2515  }
2516  else
2517  {
2518  Mlt(beta, Yres);
2519  Add(alpha, Y, Yres);
2520  }
2521  }
2522 
2523 
2525  template<class T>
2526  template<class T2, class T3, class T4, class Storage4, class Allocator4>
2528  InitMltAdd(bool& proceed_distant_row, bool& proceed_distant_col,
2529  const SeldonTranspose& trans, const Vector<T2>& X, Vector<T2>& Xrow,
2530  const T3& beta, Vector<T4, Storage4, Allocator4>& Y,
2532  {
2533  const MPI_Comm& comm = this->GetCommunicator();
2534  proceed_distant_row = true;
2535  proceed_distant_col = true;
2536  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2537  if (nb_proc == 1)
2538  {
2539  proceed_distant_row = false;
2540  proceed_distant_col = false;
2541  }
2542  else
2543  {
2544  if (!this->IsReadyForMltAdd())
2545  {
2546  // preparing the matrix vector product
2547  // this method will be called once for
2548  // the first matrix-vector product
2549  const_cast<DistributedMatrix_Base<T>& >(*this)
2550  .PrepareMltAdd();
2551  }
2552 
2553  if (this->GetMaxDataSizeDistantCol() == 0)
2554  proceed_distant_col = false;
2555 
2556  if (this->GetMaxDataSizeDistantRow() == 0)
2557  proceed_distant_row = false;
2558  }
2559 
2560  T3 zero;
2561  SetComplexZero(zero);
2562 
2563  if (beta == zero)
2564  Y.SetData(Yres.GetM(), Yres.GetData());
2565  else
2566  Y.Reallocate(Yres.GetM());
2567 
2568  Y.Fill(zero);
2569 
2570  // scattering row values
2571  if (proceed_distant_row)
2572  this->ScatterRowValues(X, Xrow);
2573  }
2574 
2575 
2577  template<class T>
2578  template<class T2, class T3, class T4, class Storage4, class Allocator4>
2580  FinalizeMltAdd(bool proceed_distant_row, bool proceed_distant_col,
2581  const SeldonTranspose& trans, const Vector<T2>& X, Vector<T2>& Xrow,
2582  const T3& alpha, const T3& beta, Vector<T4, Storage4, Allocator4>& Y,
2583  Vector<T4, Storage4, Allocator4>& Yres, bool assemble) const
2584  {
2585  // adding contributions of distant rows
2586  if (proceed_distant_row)
2587  this->MltAddRow(trans, Xrow, Y);
2588 
2589  // contributions of distant columns
2590  Vector<T4> Ycol;
2591  if (proceed_distant_col)
2592  this->MltAddCol(trans, X, Ycol);
2593 
2594  // assembling row values
2595  if (proceed_distant_col)
2596  this->AssembleColValues(Ycol, Y);
2597 
2598  // assembling rows shared between processors
2599  if (assemble)
2600  this->AssembleVec(Y);
2601 
2602  T3 zero;
2603  SetComplexZero(zero);
2604 
2605  if (beta == zero)
2606  {
2607  Mlt(alpha, Y);
2608  Y.Nullify();
2609  }
2610  else
2611  {
2612  Mlt(beta, Yres);
2613  Add(alpha, Y, Yres);
2614  }
2615  }
2616 
2617 
2619  template<class T>
2622  Vector<int>& Xcol, Vector<int>& Xcol_proc) const
2623  {
2624  const MPI_Comm& comm = this->GetCommunicator();
2625  if (!this->IsReadyForMltAdd())
2626  {
2627  // preparing the matrix vector product
2628  // this method will be called once for the first matrix-vector product
2629  const_cast<DistributedMatrix_Base<T>& >(*this)
2630  .PrepareMltAdd();
2631  }
2632 
2633  // scattering column values
2634  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2635  if (nb_proc > 1)
2636  {
2637  this->ScatterColValues(Y, Xcol);
2638  this->ScatterColValues(Yproc, Xcol_proc);
2639  }
2640  }
2641 
2642 
2644  template<class T>
2647  Vector<int>& Xcol, Vector<int>& Xcol_proc) const
2648  {
2649  const MPI_Comm& comm = this->GetCommunicator();
2650 
2651  int N = this->global_col_to_recv.GetM();
2652  Vector<int> Yrow(N), Yrow_proc(N);
2653 
2654  // contributions of distant columns
2655  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2656  Yrow.Fill(0); Yrow_proc.Fill(nb_proc);
2657  for (int i = 0; i < this->dist_col.GetM(); i++)
2658  for (int j = 0; j < this->dist_col(i).GetM(); j++)
2659  {
2660  int k = this->dist_col(i).Index(j);
2661  if (Xcol_proc(k) < Yproc(i))
2662  {
2663  Yproc(i) = Xcol_proc(k);
2664  Y(i) = Xcol(k);
2665  }
2666  else if (Xcol_proc(k) == Yproc(i))
2667  {
2668  if (Xcol(k) < Y(i))
2669  Y(i) = Xcol(k);
2670  }
2671 
2672  if (Yproc(i) < Yrow_proc(k))
2673  {
2674  Yrow_proc(k) = Yproc(i);
2675  Yrow(k) = Y(i);
2676  }
2677  else if (Yproc(i) == Yrow_proc(k))
2678  {
2679  if (Y(i) < Yrow(k))
2680  Yrow(k) = Y(i);
2681  }
2682  }
2683 
2684  this->AssembleValuesMin(Yrow, Yrow_proc,
2685  global_col_to_recv, ptr_global_col_to_recv,
2686  proc_col_to_recv, local_col_to_send,
2687  proc_col_to_send, Y, Yproc);
2688 
2689  // contributions of distant rows
2690  if (nb_proc > 1)
2691  {
2692  ScatterRowValues(Y, Xcol);
2693  ScatterRowValues(Yproc, Xcol_proc);
2694  }
2695 
2696  N = global_row_to_recv.GetM();
2697  Yrow.Reallocate(N), Yrow_proc.Reallocate(N);
2698  Yrow.Fill(0); Yrow_proc.Fill(nb_proc);
2699  for (int i = 0; i < dist_row.GetM(); i++)
2700  for (int j = 0; j < dist_row(i).GetM(); j++)
2701  {
2702  int jrow = dist_row(i).Index(j);
2703  if (Xcol_proc(jrow) < Yproc(i))
2704  {
2705  Yproc(i) = Xcol_proc(jrow);
2706  Y(i) = Xcol(jrow);
2707  }
2708  else if (Xcol_proc(jrow) == Yproc(i))
2709  {
2710  if (Xcol(jrow) < Y(i))
2711  Y(i) = Xcol(jrow);
2712  }
2713 
2714  if (Yproc(i) < Yrow_proc(jrow))
2715  {
2716  Yrow_proc(jrow) = Yproc(i);
2717  Yrow(jrow) = Y(i);
2718  }
2719  else if (Yproc(i) == Yrow_proc(jrow))
2720  {
2721  if (Y(i) < Yrow(jrow))
2722  Yrow(jrow) = Y(i);
2723  }
2724  }
2725 
2726  // assembling row values
2727  if (nb_proc > 1)
2728  {
2729  AssembleValuesMin(Yrow, Yrow_proc,
2730  global_row_to_recv, ptr_global_row_to_recv,
2731  proc_row_to_recv, local_row_to_send,
2732  proc_row_to_send, Y, Yproc);
2733 
2734  // assembling rows shared between processors
2735  AssembleVecMin(Y, Yproc);
2736  }
2737  }
2738 
2739 
2740  /****************************************************
2741  * Methods called for various functions on matrices *
2742  ****************************************************/
2743 
2744 
2746  template<class T> template<class T0, class T1>
2748  ::AddDistributedMatrix(const T0& alpha,
2749  const DistributedMatrix_Base<T1>& A)
2750  {
2751  const_cast<DistributedMatrix_Base<T1>& >(A)
2752  .SwitchToGlobalNumbers();
2753 
2754  this->SwitchToGlobalNumbers();
2755 
2756  // adding distant interactions
2757  for (int i = 0; i < A.dist_row.GetM(); i++)
2758  for (int j = 0; j < A.dist_row(i).GetM(); j++)
2759  this->AddRowDistantInteraction(A.dist_row(i).Index(j), i,
2760  A.proc_row(i)(j),
2761  alpha*A.dist_row(i).Value(j));
2762 
2763  for (int i = 0; i < A.dist_col.GetM(); i++)
2764  for (int j = 0; j < A.dist_col(i).GetM(); j++)
2765  this->AddDistantInteraction(i, A.dist_col(i).Index(j),
2766  A.proc_col(i)(j),
2767  alpha*A.dist_col(i).Value(j));
2768  }
2769 
2770 
2772  template<class T>
2774  ::GetMaxAbsDistant(typename ClassComplexType<T>::Treal& res) const
2775  {
2776  const MPI_Comm& comm = this->GetCommunicator();
2777  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2778  if (nb_proc == 1)
2779  return;
2780 
2781  for (int i = 0; i < this->dist_row.GetM(); i++)
2782  for (int j = 0; j < this->dist_row(i).GetM(); j++)
2783  res = max(res, abs(this->dist_row(i).Value(j)));
2784 
2785  for (int i = 0; i < this->dist_col.GetM(); i++)
2786  for (int j = 0; j < this->dist_col(i).GetM(); j++)
2787  res = max(res, abs(this->dist_col(i).Value(j)));
2788 
2789  // selecting the maximum between processors
2790  Vector<int64_t> xtmp;
2791  typename ClassComplexType<T>::Treal amax(0);
2792  MpiAllreduce(comm, &res, xtmp, &amax, 1, MPI_MAX);
2793 
2794  res = amax;
2795  }
2796 
2797 
2799  template<class T> template<class T0>
2801  {
2802  const MPI_Comm& comm = this->GetCommunicator();
2803  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2804  if (nb_proc == 1)
2805  return;
2806 
2807  if (!this->IsReadyForMltAdd())
2808  {
2809  // preparing the matrix vector product
2810  // this method will be called once for the first matrix-vector product
2811  const_cast<DistributedMatrix_Base<T>& >(*this)
2812  .PrepareMltAdd();
2813  }
2814 
2815  this->GetRowSumDistantCol(vec_sum);
2816  this->GetRowSumDistantRow(vec_sum);
2817 
2818  this->AssembleVec(vec_sum);
2819  }
2820 
2821 
2823  template<class T> template<class T0>
2825  {
2826  const MPI_Comm& comm = this->GetCommunicator();
2827  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2828  if (nb_proc == 1)
2829  return;
2830 
2831  if (!this->IsReadyForMltAdd())
2832  {
2833  // preparing the matrix vector product
2834  // this method will be called once for the first matrix-vector product
2835  const_cast<DistributedMatrix_Base<T>& >(*this)
2836  .PrepareMltAdd();
2837  }
2838 
2839  this->GetColSumDistantCol(vec_sum);
2840  this->GetColSumDistantRow(vec_sum);
2841 
2842  this->AssembleVec(vec_sum);
2843  }
2844 
2845 
2847  template<class T> template<class T0>
2849  Vector<T0>& sum_col) const
2850  {
2851  const MPI_Comm& comm = this->GetCommunicator();
2852  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2853  if (nb_proc == 1)
2854  return;
2855 
2856  if (!this->IsReadyForMltAdd())
2857  {
2858  // preparing the matrix vector product
2859  // this method will be called once for the first matrix-vector product
2860  const_cast<DistributedMatrix_Base<T>& >(*this)
2861  .PrepareMltAdd();
2862  }
2863 
2864  this->GetRowSumDistantCol(sum_row);
2865  this->GetRowSumDistantRow(sum_row);
2866 
2867  this->GetColSumDistantCol(sum_col);
2868  this->GetColSumDistantRow(sum_col);
2869 
2870  this->AssembleVec(sum_row);
2871  this->AssembleVec(sum_col);
2872  }
2873 
2874 
2876  template<class T>
2878  ::ExchangeParallelData(int& smax_row, int& smax_col, bool& local_number,
2880  NewAlloc<Vector<T, VectSparse> > >& dist_row_,
2882  NewAlloc<Vector<T, VectSparse> > >& dist_col_,
2883  Vector<IVect>& proc_row_, Vector<IVect>& proc_col_,
2884  IVect& global_row_to_recv_, IVect& global_col_to_recv_,
2885  IVect& ptr_global_row_to_recv_, IVect& ptr_global_col_to_recv_,
2886  Vector<IVect>& local_row_to_send_, Vector<IVect>& local_col_to_send_,
2887  IVect& proc_row_to_recv_, IVect& proc_col_to_recv_,
2888  IVect& proc_row_to_send_, IVect& proc_col_to_send_)
2889  {
2890  const MPI_Comm& comm = this->GetCommunicator();
2891  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2892  if (nb_proc == 1)
2893  return;
2894 
2895  long stmp = smax_row;
2896  smax_row = size_max_distant_row;
2897  size_max_distant_row = stmp;
2898 
2899  stmp = smax_col;
2900  smax_col = size_max_distant_col;
2901  size_max_distant_col = stmp;
2902 
2903  bool btmp = local_number;
2904  local_number = local_number_distant_values;
2905  local_number_distant_values = btmp;
2906 
2907  SwapPointer(dist_row, dist_row_);
2908  SwapPointer(dist_col, dist_col_);
2909  SwapPointer(proc_row, proc_row_);
2910  SwapPointer(proc_col, proc_col_);
2911  SwapPointer(global_row_to_recv, global_row_to_recv_);
2912  SwapPointer(global_col_to_recv, global_col_to_recv_);
2913  SwapPointer(ptr_global_row_to_recv, ptr_global_row_to_recv_);
2914  SwapPointer(ptr_global_col_to_recv, ptr_global_col_to_recv_);
2915  SwapPointer(local_row_to_send, local_row_to_send_);
2916  SwapPointer(local_col_to_send, local_col_to_send_);
2917  SwapPointer(proc_row_to_recv, proc_row_to_recv_);
2918  SwapPointer(proc_col_to_recv, proc_col_to_recv_);
2919  SwapPointer(proc_row_to_send, proc_row_to_send_);
2920  SwapPointer(proc_col_to_send, proc_col_to_send_);
2921  }
2922 
2923 
2925  template<class T>
2927  {
2928  const MPI_Comm& comm = this->GetCommunicator();
2929  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2930  if (nb_proc == 1)
2931  return;
2932 
2933  for (int i = 0; i < this->dist_row.GetM(); i++)
2934  for (int j = 0; j < this->dist_row(i).GetM(); j++)
2935  this->dist_row(i).Value(j) = conjugate(this->dist_row(i).Value(j));
2936 
2937  for (int i = 0; i < this->dist_col.GetM(); i++)
2938  for (int j = 0; j < this->dist_col(i).GetM(); j++)
2939  this->dist_col(i).Value(j) = conjugate(this->dist_col(i).Value(j));
2940  }
2941 
2942 
2944  template<class T>
2946  {
2947  const MPI_Comm& comm = A.GetCommunicator();
2948  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2949  if (nb_proc == 1)
2950  return;
2951 
2952  this->Init(A);
2953  this->dist_col = A.dist_row;
2954  this->proc_col = A.proc_row;
2955  this->global_col_to_recv = A.global_row_to_recv;
2956  this->ptr_global_col_to_recv = A.ptr_global_row_to_recv;
2957  this->local_col_to_send = A.local_row_to_send;
2958  this->proc_col_to_recv = A.proc_row_to_recv;
2959  this->proc_col_to_send = A.proc_row_to_send;
2960 
2961  this->dist_row = A.dist_col;
2962  this->proc_row = A.proc_col;
2963  this->global_row_to_recv = A.global_col_to_recv;
2964  this->ptr_global_row_to_recv = A.ptr_global_col_to_recv;
2965  this->local_row_to_send = A.local_col_to_send;
2966  this->proc_row_to_recv = A.proc_col_to_recv;
2967  this->proc_row_to_send = A.proc_col_to_send;
2968  this->local_number_distant_values = A.local_number_distant_values;
2969 
2970  this->size_max_distant_row = A.size_max_distant_col;
2971  this->size_max_distant_col = A.size_max_distant_row;
2972  }
2973 
2974 
2976  template<class T> template<class T0>
2978  {
2979  MPI_Comm& comm = this->GetCommunicator();
2980  int nb_proc; MPI_Comm_size(comm, &nb_proc);
2981  if (nb_proc == 1)
2982  return;
2983 
2984  if (!this->IsReadyForMltAdd())
2985  {
2986  // preparing the matrix vector product
2987  // this method will be called once for the first matrix-vector product
2988  const_cast<DistributedMatrix_Base<T>& >(*this)
2989  .PrepareMltAdd();
2990  }
2991 
2992  // scaling distant columns
2993  for (int i = 0; i < this->dist_col.GetM(); i++)
2994  this->dist_col(i) *= Drow(i);
2995 
2996  // scaling distant rows
2997  Vector<T0> Drow_glob;
2998  this->ScatterRowValues(Drow, Drow_glob);
2999 
3000  for (int i = 0; i < this->dist_row.GetM(); i++)
3001  for (int j = 0; j < this->dist_row(i).GetM(); j++)
3002  this->dist_row(i).Value(j) *= Drow_glob(this->dist_row(i).Index(j));
3003  }
3004 
3005 
3007  template<class T> template<class T0>
3009  {
3010  MPI_Comm& comm = this->GetCommunicator();
3011  int nb_proc; MPI_Comm_size(comm, &nb_proc);
3012  if (nb_proc == 1)
3013  return;
3014 
3015  if (!this->IsReadyForMltAdd())
3016  {
3017  // preparing the matrix vector product
3018  // this method will be called once for the first matrix-vector product
3019  const_cast<DistributedMatrix_Base<T>& >(*this)
3020  .PrepareMltAdd();
3021  }
3022 
3023  // scaling distant rows
3024  for (int i = 0; i < this->dist_row.GetM(); i++)
3025  this->dist_row(i) *= Dcol(i);
3026 
3027  // scaling distant columns
3028  Vector<T0> Dcol_glob;
3029  this->ScatterColValues(Dcol, Dcol_glob);
3030 
3031  for (int i = 0; i < this->dist_col.GetM(); i++)
3032  for (int j = 0; j < this->dist_col(i).GetM(); j++)
3033  this->dist_col(i).Value(j) *= Dcol_glob(this->dist_col(i).Index(j));
3034  }
3035 
3036 
3038  template<class T> template<class T0, class T1>
3040  ::ScaleDistant(const Vector<T0>& Drow, const Vector<T1>& Dcol)
3041  {
3042  MPI_Comm& comm = this->GetCommunicator();
3043  int nb_proc; MPI_Comm_size(comm, &nb_proc);
3044  if (nb_proc == 1)
3045  return;
3046 
3047  if (!this->IsReadyForMltAdd())
3048  {
3049  // preparing the matrix vector product
3050  // this method will be called once for the first matrix-vector product
3051  const_cast<DistributedMatrix_Base<T>& >(*this)
3052  .PrepareMltAdd();
3053  }
3054 
3055  // scaling distant columns
3056  for (int i = 0; i < this->dist_col.GetM(); i++)
3057  this->dist_col(i) *= Drow(i);
3058 
3059  // scaling distant rows
3060  Vector<T0> Drow_glob;
3061  this->ScatterRowValues(Drow, Drow_glob);
3062 
3063  for (int i = 0; i < this->dist_row.GetM(); i++)
3064  for (int j = 0; j < this->dist_row(i).GetM(); j++)
3065  this->dist_row(i).Value(j) *= Drow_glob(this->dist_row(i).Index(j));
3066 
3067  // scaling distant rows
3068  for (int i = 0; i < this->dist_row.GetM(); i++)
3069  this->dist_row(i) *= Dcol(i);
3070 
3071  // scaling distant columns
3072  Vector<T1> Dcol_glob;
3073  this->ScatterColValues(Dcol, Dcol_glob);
3074 
3075  for (int i = 0; i < this->dist_col.GetM(); i++)
3076  for (int j = 0; j < this->dist_col(i).GetM(); j++)
3077  this->dist_col(i).Value(j) *= Dcol_glob(this->dist_col(i).Index(j));
3078  }
3079 
3080 
3082  template<class T>
3084  {
3085  MPI_Comm& comm = this->GetCommunicator();
3086  int nb_proc; MPI_Comm_size(comm, &nb_proc);
3087  if (nb_proc == 1)
3088  return;
3089 
3090  if (!this->IsReadyForMltAdd())
3091  this->PrepareMltAdd();
3092 
3093  Vector<bool> IsColDropped(this->GetLocalM());
3094  IsColDropped.Fill(false);
3095  for (int i = 0; i < num.GetM(); i++)
3096  IsColDropped(num(i)) = true;
3097 
3098  Vector<bool> IsColDroppedDistant;
3099  this->ScatterColValues(IsColDropped, IsColDroppedDistant);
3100 
3101  EraseDistantEntries(comm, IsColDropped, IsColDroppedDistant,
3102  this->dist_col, this->proc_col, this->dist_row, this->proc_row);
3103 
3104  if (sym)
3105  {
3106  this->ScatterRowValues(IsColDropped, IsColDroppedDistant);
3107 
3108  EraseDistantEntries(comm, IsColDropped, IsColDroppedDistant,
3109  this->dist_row, this->proc_row, this->dist_col, this->proc_col);
3110  }
3111  }
3112 
3113 
3115  template<class T>
3117  {
3118  MPI_Comm& comm = this->GetCommunicator();
3119  int nb_proc; MPI_Comm_size(comm, &nb_proc);
3120  if (nb_proc == 1)
3121  return;
3122 
3123  if (!this->IsReadyForMltAdd())
3124  this->PrepareMltAdd();
3125 
3126  Vector<bool> IsRowDropped(this->GetLocalM());
3127  IsRowDropped.Fill(false);
3128  for (int i = 0; i < num.GetM(); i++)
3129  IsRowDropped(num(i)) = true;
3130 
3131  Vector<bool> IsRowDroppedDistant;
3132  this->ScatterRowValues(IsRowDropped, IsRowDroppedDistant);
3133 
3134  EraseDistantEntries(comm, IsRowDropped, IsRowDroppedDistant,
3135  this->dist_row, this->proc_row, this->dist_col, this->proc_col);
3136 
3137  if (sym)
3138  {
3139  this->ScatterColValues(IsRowDropped, IsRowDroppedDistant);
3140 
3141  EraseDistantEntries(comm, IsRowDropped, IsRowDroppedDistant,
3142  this->dist_col, this->proc_col, this->dist_row, this->proc_row);
3143  }
3144  }
3145 
3146 
3148  template<class T> template<class T1>
3151  const IVect& row, const IVect& col, bool sym)
3152  {
3153  MPI_Comm& comm = this->GetCommunicator();
3154  int nb_proc; MPI_Comm_size(comm, &nb_proc);
3155  if (nb_proc == 1)
3156  return;
3157 
3158  if (!A.IsReadyForMltAdd())
3159  const_cast<DistributedMatrix_Base<T1>& >(A)
3160  .PrepareMltAdd();
3161 
3162  this->Init(A);
3163  int m = A.GetLocalM(), n = A.GetLocalN();
3164  Vector<bool> RowKept(m), ColKept(n);
3165  RowKept.Fill(false); ColKept.Fill(false);
3166  for (int i = 0; i < row.GetM(); i++)
3167  RowKept(row(i)) = true;
3168 
3169  for (int i = 0; i < col.GetM(); i++)
3170  ColKept(col(i)) = true;
3171 
3172  // checking consistency of row/col with symmetry of matrix B
3173  if (sym)
3174  {
3175  if (m != n)
3176  {
3177  cout << "A is non-symmetric while B is symmetric" << endl;
3178  abort();
3179  }
3180 
3181  for (int i = 0; i < m; i++)
3182  if (RowKept(i) ^ ColKept(i))
3183  {
3184  cout << "row and col must be identic to obtain "
3185  << "a symmetric matrix" << endl;
3186  abort();
3187  }
3188  }
3189 
3190  Vector<bool> RowKeptDistant, ColKeptDistant;
3191  A.ScatterRowValues(RowKept, RowKeptDistant);
3192  A.ScatterColValues(ColKept, ColKeptDistant);
3193 
3194  // using global numbers for B
3195  this->SwitchToGlobalNumbers();
3196 
3197  // extracting values of dist_col
3198  this->dist_col.Reallocate(m);
3199  this->proc_col.Reallocate(m);
3200  for (int i = 0; i < A.dist_col.GetM(); i++)
3201  {
3202  if (RowKept(i))
3203  {
3204  int size_row = 0;
3205  for (int j = 0; j < A.dist_col(i).GetM(); j++)
3206  if (ColKeptDistant(A.dist_col(i).Index(j)))
3207  size_row++;
3208 
3209  this->dist_col(i).Reallocate(size_row);
3210  this->proc_col(i).Reallocate(size_row);
3211  size_row = 0;
3212  for (int j = 0; j < A.dist_col(i).GetM(); j++)
3213  if (ColKeptDistant(A.dist_col(i).Index(j)))
3214  {
3215  this->dist_col(i).Value(size_row) = A.dist_col(i).Value(j);
3216  this->dist_col(i).Index(size_row)
3217  = A.global_col_to_recv(A.dist_col(i).Index(j));
3218 
3219  this->proc_col(i)(size_row) = A.proc_col(i)(j);
3220  size_row++;
3221  }
3222  }
3223  else
3224  {
3225  this->dist_col(i).Clear();
3226  this->proc_col(i).Clear();
3227  }
3228  }
3229 
3230  // same stuff for distant rows
3231  this->dist_row.Reallocate(n);
3232  this->proc_row.Reallocate(n);
3233  for (int i = 0; i < A.dist_row.GetM(); i++)
3234  {
3235  if (ColKept(i))
3236  {
3237  int size_col = 0;
3238  for (int j = 0; j < A.dist_row(i).GetM(); j++)
3239  if (RowKeptDistant(A.dist_row(i).Index(j)))
3240  size_col++;
3241 
3242  this->dist_row(i).Reallocate(size_col);
3243  this->proc_row(i).Reallocate(size_col);
3244  size_col = 0;
3245  for (int j = 0; j < A.dist_row(i).GetM(); j++)
3246  if (RowKeptDistant(A.dist_row(i).Index(j)))
3247  {
3248  this->dist_row(i).Value(size_col) = A.dist_row(i).Value(j);
3249  this->dist_row(i).Index(size_col)
3250  = A.global_row_to_recv(A.dist_row(i).Index(j));
3251 
3252  this->proc_row(i)(size_col) = A.proc_row(i)(j);
3253  size_col++;
3254  }
3255  }
3256  else
3257  {
3258  this->dist_row(i).Clear();
3259  this->proc_row(i).Clear();
3260  }
3261 
3262  }
3263  }
3264 
3265 
3267  template<class T>
3269  {
3270  if (this->nglob_ != A.nglob_)
3271  return false;
3272 
3273  if (nodl_scalar_ != A.nodl_scalar_)
3274  return false;
3275 
3276  if (nb_unknowns_scal_ != A.nb_unknowns_scal_)
3277  return false;
3278 
3279  IVect& glob = *GlobalRowNumbers;
3280  IVect& globA = *A.GlobalRowNumbers;
3281  if (glob.GetM() != globA.GetM())
3282  return false;
3283 
3284  for (int i = 0; i < glob.GetM(); i++)
3285  if (glob(i) != globA(i))
3286  return false;
3287 
3288  IVect& over = *OverlapRowNumbers;
3289  IVect& overA = *A.OverlapRowNumbers;
3290  if (over.GetM() != overA.GetM())
3291  return false;
3292 
3293  for (int i = 0; i < over.GetM(); i++)
3294  if (over(i) != overA(i))
3295  return false;
3296 
3297  IVect& proc = *OverlapProcNumbers;
3298  IVect& procA = *A.OverlapProcNumbers;
3299  if (proc.GetM() != procA.GetM())
3300  return false;
3301 
3302  for (int i = 0; i < proc.GetM(); i++)
3303  if (proc(i) != procA(i))
3304  return false;
3305 
3306  return true;
3307  }
3308 
3309 
3310  /*************************************************
3311  * Methods for parallel assembling of the matrix *
3312  *************************************************/
3313 
3314 
3316  template<class T>
3319  Symmetric& sym, IVect& row_numbers, IVect& local_row_numbers,
3320  IVect& OverlappedCol, bool sym_pattern, bool reorder)
3321  {
3322  int m = this->GetLocalM();
3323  int n = this->GetGlobalM();
3324 
3325  MPI_Comm& comm = this->GetCommunicator();
3326  int nb_proc; MPI_Comm_size(comm, &nb_proc);
3327  int rank; MPI_Comm_rank(comm, &rank);
3328 
3329  IVect RowNumber(this->GetGlobalRowNumber());
3330  const IVect& OverlapRowNumber = this->GetOverlapRowNumber();
3331  const IVect& OverlapProcNumber = this->GetOverlapProcNumber();
3332 
3333  // constructing array to know if a column is overlapped
3334  // (treated by another processor)
3335  OverlappedCol.Reallocate(m); OverlappedCol.Fill(-1);
3336  for (int i = 0; i < OverlapRowNumber.GetM(); i++)
3337  OverlappedCol(OverlapRowNumber(i)) = i;
3338 
3339  /*********************************
3340  * Parallel assembling of matrix *
3341  *********************************/
3342 
3343  // in this section, we will send/receive overlapped rows and distant rows
3344  Vector<int> nb_row_sent(nb_proc);
3345  Vector<int> nsend_int(nb_proc), nsend_float(nb_proc);
3346  Vector<IVect> EntierToSend(nb_proc);
3347  Vector<Vector<T> > FloatToSend(nb_proc);
3348 
3349  // counting the size of arrays to send
3350  // nsend_int : the number of integers
3351  // nsend_float : the number of floats
3352  for (int i = 0; i < nb_proc; i++)
3353  {
3354  if (i != rank)
3355  nsend_int(i) = 2;
3356  else
3357  nsend_int(i) = 0;
3358 
3359  nsend_float(i) = 0;
3360  nb_row_sent(i) = 0;
3361  }
3362 
3363  // overlapped rows
3364  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
3365  {
3366  int i = OverlapProcNumber(j);
3367  if (i != rank)
3368  {
3369  nsend_int(i) += 2;
3370  nb_row_sent(i)++;
3371  int irow = OverlapRowNumber(j);
3372  nsend_int(i) += B.GetRowSize(irow);
3373  nsend_float(i) += B.GetRowSize(irow);
3374  if (reorder)
3375  nsend_int(i) += B.GetRowSize(irow);
3376  }
3377  }
3378 
3379  // distant rows
3380  for (int j = 0; j < this->dist_row.GetM(); j++)
3381  for (int k = 0; k < this->dist_row(j).GetM(); k++)
3382  {
3383  int i = this->proc_row(j)(k);
3384  if (i != rank)
3385  {
3386  int irow = this->dist_row(j).Index(k);
3387  if (this->local_number_distant_values)
3388  irow = this->global_row_to_recv(irow);
3389 
3390  if (irow <= RowNumber(j))
3391  {
3392  nb_row_sent(i)++;
3393  nsend_int(i) += 3;
3394  nsend_float(i)++;
3395  if (reorder)
3396  nsend_int(i)++;
3397  }
3398  }
3399  }
3400 
3401  // allocating arrays EntierToSend and FloatToSend
3402  for (int i = 0; i < nb_proc; i++)
3403  if (i != rank)
3404  {
3405  if (nb_row_sent(i) == 0)
3406  nsend_int(i) = 0;
3407 
3408  if (nb_row_sent(i) > 0)
3409  {
3410  EntierToSend(i).Reallocate(nsend_int(i));
3411  FloatToSend(i).Reallocate(nsend_float(i));
3412  EntierToSend(i)(0) = nsend_float(i);
3413  EntierToSend(i)(1) = nb_row_sent(i);
3414  nsend_int(i) = 2; nsend_float(i) = 0;
3415  }
3416  }
3417 
3418  // then arrays EntierToSend and FloatToSend are filled
3419 
3420  // storing values and indices of a row shared with processor i
3421  // processor i is the owner of this row
3422  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
3423  {
3424  int i = OverlapProcNumber(j);
3425  if (i != rank)
3426  {
3427  int irow = OverlapRowNumber(j);
3428  EntierToSend(i)(nsend_int(i)++) = RowNumber(irow);
3429  EntierToSend(i)(nsend_int(i)++) = B.GetRowSize(irow);
3430  for (int k = 0; k < B.GetRowSize(irow); k++)
3431  {
3432  EntierToSend(i)(nsend_int(i)++) = B.Index(irow, k);
3433  if (reorder)
3434  EntierToSend(i)(nsend_int(i)++) = procB(irow)(k);
3435 
3436  FloatToSend(i)(nsend_float(i)++) = B.Value(irow, k);
3437  }
3438 
3439  // the corresponding values of B are cleared
3440  // they are no longer needed since they will be present in the distant processor
3441  // after the exchange of datas
3442  B.ClearRow(irow);
3443  if (reorder)
3444  procB(irow).Clear();
3445  }
3446  }
3447 
3448  // storing values of row associated with processor rank
3449  for (int j = 0; j < this->dist_row.GetM(); j++)
3450  for (int k = 0; k < this->dist_row(j).GetM(); k++)
3451  {
3452  int i = this->proc_row(j)(k);
3453  if (i != rank)
3454  {
3455  int irow = this->dist_row(j).Index(k);
3456  if (this->local_number_distant_values)
3457  irow = this->global_row_to_recv(irow);
3458 
3459  if (irow <= RowNumber(j))
3460  {
3461  EntierToSend(i)(nsend_int(i)++) = irow;
3462  EntierToSend(i)(nsend_int(i)++) = 1;
3463  EntierToSend(i)(nsend_int(i)++) = RowNumber(j);
3464  if (reorder)
3465  EntierToSend(i)(nsend_int(i)++) = rank;
3466 
3467  FloatToSend(i)(nsend_float(i)++)
3468  = this->dist_row(j).Value(k);
3469  }
3470  }
3471  }
3472 
3473  // now the initial matrix can be cleared
3474  this->Clear();
3475 
3476  // Datas for receiving EntierToSend and FloatToSend
3477  Vector<IVect> EntierToRecv(nb_proc);
3478  Vector<Vector<T> > FloatToRecv(nb_proc);
3479  IVect nrecv_int(nb_proc);
3480 
3481  // exchanging datas
3482  SendAndReceiveDistributed(comm, nsend_int, EntierToSend, FloatToSend,
3483  nrecv_int, EntierToRecv, FloatToRecv);
3484 
3485  // constructing local row numbers
3486  int nloc = m - OverlapRowNumber.GetM();
3487  local_row_numbers.Reallocate(nloc);
3488  int nrow = 0;
3489  for (int i = 0; i < m; i++)
3490  if (OverlappedCol(i) == -1)
3491  local_row_numbers(nrow++) = i;
3492 
3493  // index array to obtain local numbers in array local_row_numbers
3494  // from local numbers of the matrix
3495  IVect inv_local_row_numbers(m);
3496  inv_local_row_numbers.Fill(-1);
3497  nrow = 0;
3498  for (int i = 0; i < m; i++)
3499  if (OverlappedCol(i) == -1)
3500  inv_local_row_numbers(i) = nrow++;
3501 
3502  // global to local conversion
3503  IVect Glob_to_local(n);
3504  Glob_to_local.Fill(-1);
3505  for (int i = 0; i < m; i++)
3506  Glob_to_local(RowNumber(i)) = i;
3507 
3508  // assembling matrix B with interactions coming from other processors
3509  AddReceivedInteractions(comm, B, EntierToRecv, FloatToRecv, nrecv_int,
3510  EntierToSend, FloatToSend, nsend_int, Glob_to_local,
3511  OverlappedCol, OverlapProcNumber, procB, reorder);
3512 
3513  // exchanging datas
3514  SendAndReceiveDistributed(comm, nsend_int, EntierToSend, FloatToSend,
3515  nrecv_int, EntierToRecv, FloatToRecv);
3516 
3517  // assembling matrix B with last interactions coming from other processors
3518  AddReceivedInteractions(comm, B, EntierToRecv, FloatToRecv, nrecv_int,
3519  EntierToSend, FloatToSend, nsend_int, Glob_to_local,
3520  OverlappedCol, OverlapProcNumber, procB, reorder);
3521 
3522  /****************************
3523  * Reordering of the matrix *
3524  ****************************/
3525 
3526  if (reorder)
3527  {
3528  // in this section, global rows are renumbered such that
3529  // each processor has consecutive row numbers (mandatory for SuperLU)
3530  // processor 0 will be affected with rows 0..nloc0
3531  // processor 1 with rows nloc0 ... nloc0 + nloc1
3532  // ...
3533  IVect OverlapLocalNumber(OverlapRowNumber.GetM());
3534  IVect offset_global(nb_proc+1);
3535 
3536  IVect nb_col_sent(nb_proc);
3537  IVect nb_row_overlap(nb_proc);
3538 
3539  Vector<IVect> col_number_sorted(nb_proc);
3540  Vector<IVect> col_number_to_send(nb_proc);
3541  nb_col_sent.Zero();
3542 
3543  // counting the number of rows to send
3544  nb_row_overlap.Zero();
3545  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
3546  {
3547  int i = OverlapProcNumber(j);
3548  if (i != rank)
3549  nb_row_overlap(i)++;
3550  }
3551 
3552  Vector<IVect> row_send_overlap(nb_proc);
3553  for (int i = 0; i < nb_proc; i++)
3554  if (nb_row_overlap(i) > 0)
3555  row_send_overlap(i).Reallocate(nb_row_overlap(i));
3556 
3557  nb_row_overlap.Zero();
3558  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
3559  {
3560  int i = OverlapProcNumber(j);
3561  if (i != rank)
3562  row_send_overlap(i)(nb_row_overlap(i)++) = RowNumber(OverlapRowNumber(j));
3563  }
3564 
3565  // counting the number of columns to send
3566  for (int j = 0; j < B.GetM(); j++)
3567  for (int k = 0; k < B.GetRowSize(j); k++)
3568  {
3569  int i = procB(j)(k);
3570  if (i != rank)
3571  nb_col_sent(i)++;
3572  }
3573 
3574  for (int i = 0; i < nb_proc; i++)
3575  if (i != rank)
3576  col_number_sorted(i).Reallocate(nb_col_sent(i));
3577 
3578  // storing all the column numbers with distant processors
3579  nb_col_sent.Zero();
3580  for (int j = 0; j < B.GetM(); j++)
3581  for (int k = 0; k < B.GetRowSize(j); k++)
3582  {
3583  int i = procB(j)(k);
3584  if (i != rank)
3585  col_number_sorted(i)(nb_col_sent(i)++) = B.Index(j, k);
3586  }
3587 
3588  // duplicates are removed in order to send a few numbers
3589  for (int i = 0; i < nb_proc; i++)
3590  if (i != rank)
3591  {
3592  IVect permut(nb_col_sent(i)), col_number_sort(col_number_sorted(i));
3593  permut.Fill();
3594  Sort(nb_col_sent(i), col_number_sort, permut);
3595 
3596  // counting the number of unique numbers
3597  int prec = -1; nb_col_sent(i) = 0;
3598  for (int j = 0; j < col_number_sort.GetM(); j++)
3599  {
3600  if (col_number_sort(j) != prec)
3601  nb_col_sent(i)++;
3602 
3603  prec = col_number_sort(j);
3604  }
3605 
3606  // filling col_number_to_send
3607  col_number_to_send(i).Reallocate(nb_col_sent(i));
3608  nb_col_sent(i) = 0;
3609  prec = -1;
3610  for (int j = 0; j < col_number_sort.GetM(); j++)
3611  {
3612  if (col_number_sort(j) != prec)
3613  {
3614  col_number_to_send(i)(nb_col_sent(i)) = col_number_sort(j);
3615  nb_col_sent(i)++;
3616  }
3617 
3618  col_number_sorted(i)(permut(j)) = nb_col_sent(i)-1;
3619  prec = col_number_sort(j);
3620  }
3621  }
3622 
3623  // allocating the array EntierToSend
3624  nsend_int.Zero();
3625  for (int i = 0; i < nb_proc; i++)
3626  if ((i != rank) && (nb_col_sent(i)+nb_row_overlap(i) > 0))
3627  {
3628  // column numbers will be sent
3629  nsend_int(i) = 2+nb_col_sent(i) + nb_row_overlap(i);
3630  EntierToSend(i).Reallocate(nsend_int(i));
3631  EntierToSend(i)(0) = 0;
3632  EntierToSend(i)(1) = nb_col_sent(i) + nb_row_overlap(i);
3633  nsend_int(i) = 2;
3634  }
3635 
3636  // storing columns numbers associated with processor i
3637  for (int i = 0; i < nb_proc; i++)
3638  {
3639  for (int j = 0; j < nb_row_overlap(i); j++)
3640  EntierToSend(i)(nsend_int(i)++) = row_send_overlap(i)(j);
3641 
3642  for (int j = 0; j < nb_col_sent(i); j++)
3643  EntierToSend(i)(nsend_int(i)++) = col_number_to_send(i)(j);
3644  }
3645 
3646  // exchanging datas
3647  SendAndReceiveDistributed(comm, nsend_int, EntierToSend, FloatToSend,
3648  nrecv_int, EntierToRecv, FloatToRecv);
3649 
3650  IVect nsend_intB(nb_proc), nrecv_intB(nb_proc);
3651  nsend_intB.Zero(); nrecv_intB.Zero();
3652  Vector<IVect> EntierToSendB(nb_proc), EntierToRecvB(nb_proc);
3653  // detecting if there are some numbers that do not belong to the current processor
3654  for (int i = 0; i < nb_proc; i++)
3655  if ((i != rank) && (nrecv_int(i) > 0))
3656  {
3657  int nb_col = EntierToRecv(i)(1);
3658  for (int j = 0; j < nb_col; j++)
3659  {
3660  int iglob = EntierToRecv(i)(2+j);
3661  int irow = Glob_to_local(iglob);
3662  if (inv_local_row_numbers(irow) == -1)
3663  {
3664  int p = OverlappedCol(irow);
3665  int nproc = OverlapProcNumber(p);
3666  if (nsend_intB(nproc) == 0)
3667  {
3668  nsend_intB(nproc) = 3;
3669  EntierToSendB(nproc).Reallocate(3);
3670  EntierToSendB(nproc)(0) = 0;
3671  EntierToSendB(nproc)(1) = 1;
3672  EntierToSendB(nproc)(2) = iglob;
3673  }
3674  else
3675  {
3676  nsend_intB(nproc)++;
3677  EntierToSendB(nproc)(1)++;
3678  EntierToSendB(nproc).PushBack(iglob);
3679  }
3680  }
3681  }
3682  }
3683 
3684  // exchanging non-original dofs
3685  SendAndReceiveDistributed(comm, nsend_intB, EntierToSendB, FloatToSend,
3686  nrecv_intB, EntierToRecvB, FloatToRecv);
3687 
3688  nsend_intB.Zero();
3689  for (int i = 0; i < nb_proc; i++)
3690  if ((i != rank) && (nrecv_intB(i) > 0))
3691  {
3692  int nb_col = EntierToRecvB(i)(1);
3693  if (nb_col > 0)
3694  {
3695  nsend_intB(i) = nb_col+2;
3696  EntierToSendB(i).Reallocate(nb_col+2);
3697  EntierToSendB(i)(0) = 0;
3698  EntierToSendB(i)(1) = nb_col;
3699 
3700  for (int j = 0; j < nb_col; j++)
3701  {
3702  int iglob = EntierToRecvB(i)(2+j);
3703  int irow = Glob_to_local(iglob);
3704  if (inv_local_row_numbers(irow) == -1)
3705  {
3706  cout << "impossible case" << endl;
3707  abort();
3708  }
3709  else
3710  EntierToSendB(i)(2+j) = inv_local_row_numbers(irow);
3711  }
3712  }
3713  }
3714 
3715  // returning the local numbers of non-original dofs
3716  SendAndReceiveDistributed(comm, nsend_intB, EntierToSendB, FloatToSend,
3717  nrecv_intB, EntierToRecvB, FloatToRecv);
3718 
3719  // filling local row numbers that need to be sent back
3720  nsend_intB.Zero();
3721  for (int i = 0; i < nb_proc; i++)
3722  if ((i != rank) && (nrecv_int(i) > 0))
3723  {
3724  int nb_col = EntierToRecv(i)(1);
3725  if (nb_col > 0)
3726  {
3727  nsend_int(i) = 2*nb_col+2;
3728  EntierToSend(i).Reallocate(2*nb_col+2);
3729  EntierToSend(i)(0) = 0;
3730  EntierToSend(i)(1) = 2*nb_col;
3731 
3732  for (int j = 0; j < nb_col; j++)
3733  {
3734  int iglob = EntierToRecv(i)(2+j);
3735  int irow = Glob_to_local(iglob);
3736  if (inv_local_row_numbers(irow) == -1)
3737  {
3738  int p = OverlappedCol(irow);
3739  int nproc = OverlapProcNumber(p);
3740  int num = EntierToRecvB(nproc)(2+nsend_intB(nproc));
3741  nsend_intB(nproc)++;
3742 
3743  EntierToSend(i)(2+2*j) = num;
3744  EntierToSend(i)(3+2*j) = nproc;
3745  }
3746  else
3747  {
3748  EntierToSend(i)(2+2*j) = inv_local_row_numbers(irow);
3749  EntierToSend(i)(3+2*j) = rank;
3750  }
3751  }
3752  }
3753  }
3754 
3755  // exchanging datas
3756  SendAndReceiveDistributed(comm, nsend_int, EntierToSend, FloatToSend,
3757  nrecv_int, EntierToRecv, FloatToRecv);
3758 
3759  // receiving local numbers
3760  Vector<IVect> proc_number_to_send(nb_proc);
3761  for (int i = 0; i < nb_proc; i++)
3762  {
3763  if (EntierToRecv(i).GetM() > 0)
3764  {
3765  nrecv_int(i) = 2;
3766  proc_number_to_send(i).Reallocate(nb_col_sent(i));
3767  for (int j = 0; j < nb_row_overlap(i); j++)
3768  {
3769  row_send_overlap(i)(j) = EntierToRecv(i)(nrecv_int(i));
3770  if (EntierToRecv(i)(nrecv_int(i)+1) != i)
3771  {
3772  cout << "Impossible case" << endl;
3773  abort();
3774  }
3775 
3776  nrecv_int(i) += 2;
3777  }
3778 
3779  for (int j = 0; j < nb_col_sent(i); j++)
3780  {
3781  col_number_to_send(i)(j) = EntierToRecv(i)(nrecv_int(i));
3782  proc_number_to_send(i)(j) = EntierToRecv(i)(nrecv_int(i)+1);
3783  nrecv_int(i) += 2;
3784  }
3785  }
3786  }
3787 
3788  // filling OverlapLocalNumber
3789  nb_row_overlap.Zero();
3790 
3791  OverlapLocalNumber.Fill(-1);
3792  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
3793  {
3794  int i = OverlapProcNumber(j);
3795  if (i != rank)
3796  {
3797  OverlapLocalNumber(j) = row_send_overlap(i)(nb_row_overlap(i));
3798  nb_row_overlap(i)++;
3799  }
3800  }
3801 
3802  // now in order to compute the global row numbers for any processor
3803  // we need to retrieve the offsets (i.e. nloc cumulated)
3804  offset_global.Zero();
3805 
3806  MPI_Allgather(&nloc, 1, MPI_INTEGER, &offset_global(1), 1, MPI_INTEGER, comm);
3807 
3808  for (int i = 1; i < nb_proc; i++)
3809  offset_global(i+1) += offset_global(i);
3810 
3811  // RowNumber is modified
3812  nrow = 0;
3813  for (int i = 0; i < m; i++)
3814  {
3815  if (OverlappedCol(i) == -1)
3816  {
3817  RowNumber(i) = offset_global(rank) + nrow;
3818  nrow++;
3819  }
3820  else
3821  RowNumber(i) = -1;
3822  }
3823 
3824  // then numbers in B are modified with the new numbering
3825  nb_col_sent.Zero();
3826  for (int j = 0; j < m; j++)
3827  if (OverlappedCol(j) == -1)
3828  {
3829  for (int k = 0; k < B.GetRowSize(j); k++)
3830  {
3831  int jglob = B.Index(j, k);
3832  int i = procB(j)(k);
3833  int ireal = i;
3834  int iloc = -1;
3835  if (i == rank)
3836  {
3837  int irow = Glob_to_local(jglob);
3838  if (OverlappedCol(irow) == -1)
3839  iloc = inv_local_row_numbers(irow);
3840  else
3841  {
3842  int p = OverlappedCol(irow);
3843  i = OverlapProcNumber(p);
3844  ireal = i;
3845  iloc = OverlapLocalNumber(p);
3846  }
3847  }
3848  else
3849  {
3850  int ilocC = col_number_sorted(i)(nb_col_sent(i));
3851  ireal = proc_number_to_send(i)(ilocC);
3852  iloc = col_number_to_send(i)(ilocC);
3853  nb_col_sent(i)++;
3854  }
3855 
3856  if (iloc >= 0)
3857  {
3858  B.Index(j, k) = offset_global(ireal) + iloc;
3859  }
3860  else
3861  {
3862  cout << "Impossible case" << endl;
3863  abort();
3864  }
3865  }
3866  }
3867  }
3868 
3869  Glob_to_local.Clear();
3870 
3871  nrow = 0;
3872  row_numbers.Reallocate(nloc);
3873  for (int i = 0; i < m; i++)
3874  if (OverlappedCol(i) == -1)
3875  {
3876  row_numbers(nrow) = RowNumber(i);
3877  nrow++;
3878  }
3879  }
3880 
3881 
3883  template<class T> template<class Tint0, class Tint1>
3886  Vector<Tint0>& PtrA, Vector<Tint1>& IndA, Vector<T>& ValA)
3887  {
3888  int m = B.GetM();
3889  int nloc = 0;
3890  for (int i = 0; i < m; i++)
3891  if (OverlappedCol(i) == -1)
3892  nloc++;
3893 
3894  /***************************
3895  * Final conversion in CSR *
3896  ***************************/
3897 
3898  // now we convert Bh to RowSparse while removing overlapped rows
3899  PtrA.Reallocate(nloc+1);
3900  int nrow = 0; long nnz = 0;
3901  PtrA(nrow) = 0;
3902  for (int i = 0; i < m; i++)
3903  if (OverlappedCol(i) == -1)
3904  {
3905  PtrA(nrow+1) = PtrA(nrow) + B.GetRowSize(i);
3906  nrow++;
3907  nnz += B.GetRowSize(i);
3908  }
3909 
3910  IndA.Reallocate(nnz);
3911  ValA.Reallocate(nnz); nrow = 0; nnz = 0;
3912  for (int i = 0; i < m; i++)
3913  if (OverlappedCol(i) == -1)
3914  {
3915  for (int j = 0; j < B.GetRowSize(i); j++)
3916  {
3917  IndA(nnz) = B.Index(i, j);
3918  ValA(nnz) = B.Value(i, j);
3919  nnz++;
3920  }
3921 
3922  nrow++;
3923  }
3924  }
3925 
3926 
3928  template<class T>
3931  General& prop, IVect& col_numbers, IVect& local_col_numbers,
3932  IVect& OverlappedCol, bool sym_pattern, bool reorder)
3933  {
3934  int n = this->GetLocalN();
3935  int m = this->GetGlobalM();
3936 
3937  const MPI_Comm& comm = this->GetCommunicator();
3938  int nb_proc; MPI_Comm_size(comm, &nb_proc);
3939  int rank; MPI_Comm_rank(comm, &rank);
3940 
3941  IVect RowNumber = this->GetGlobalRowNumber();
3942  const IVect& OverlapRowNumber = this->GetOverlapRowNumber();
3943  const IVect& OverlapProcNumber = this->GetOverlapProcNumber();
3944 
3945  // constructing array to know if a column is overlapped
3946  // (treated by another processor)
3947  OverlappedCol.Reallocate(n); OverlappedCol.Fill(-1);
3948  for (int i = 0; i < OverlapRowNumber.GetM(); i++)
3949  OverlappedCol(OverlapRowNumber(i)) = i;
3950 
3951  /*********************************
3952  * Parallel assembling of matrix *
3953  *********************************/
3954 
3955  // we send to each processor additional entries due to overlapping
3956  // distant columns or distant rows (because of symmetrisation of patterns
3957  IVect nsend_int(nb_proc), nsend_float(nb_proc), nb_col_sent(nb_proc);
3958  Vector<IVect> EntierToSend(nb_proc);
3959  Vector<Vector<T> > FloatToSend(nb_proc);
3960 
3961 
3962  // counting the size of arrays to send
3963  // nsend_int : the number of integers
3964  // nsend_float : the number of floats
3965  for (int i = 0; i < nb_proc; i++)
3966  {
3967  if (i != rank)
3968  nsend_int(i) = 2;
3969  else
3970  nsend_int(i) = 0;
3971 
3972  nsend_float(i) = 0;
3973  nb_col_sent(i) = 0;
3974  }
3975 
3976  // overlapped columns
3977  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
3978  {
3979  int i = OverlapProcNumber(j);
3980  if (i != rank)
3981  {
3982  nsend_int(i) += 2;
3983  nb_col_sent(i)++;
3984  int irow = OverlapRowNumber(j);
3985  nsend_int(i) += B.GetColumnSize(irow);
3986  nsend_float(i) += B.GetColumnSize(irow);
3987  if (reorder)
3988  nsend_int(i) += B.GetColumnSize(irow);
3989  }
3990  }
3991 
3992  // distant rows
3993  if (sym_pattern)
3994  for (int j = 0; j < this->dist_row.GetM(); j++)
3995  for (int k = 0; k < this->dist_row(j).GetM(); k++)
3996  {
3997  int i = this->proc_row(j)(k);
3998  if (i != rank)
3999  {
4000  nb_col_sent(i)++;
4001  nsend_int(i) += 3;
4002  nsend_float(i)++;
4003  if (reorder)
4004  nsend_int(i)++;
4005  }
4006  }
4007 
4008  // distant columns
4009  for (int j = 0; j < this->dist_col.GetM(); j++)
4010  for (int k = 0; k < this->dist_col(j).GetM(); k++)
4011  {
4012  int i = this->proc_col(j)(k);
4013  if (i != rank)
4014  {
4015  nb_col_sent(i)++;
4016  nsend_int(i) += 3;
4017  nsend_float(i)++;
4018  if (reorder)
4019  nsend_int(i)++;
4020  }
4021  }
4022 
4023 
4024  // allocating arrays EntierToSend and FloatToSend
4025  for (int i = 0; i < nb_proc; i++)
4026  if (i != rank)
4027  {
4028  if (nb_col_sent(i) == 0)
4029  nsend_int(i) = 0;
4030 
4031  if (nb_col_sent(i) > 0)
4032  {
4033  EntierToSend(i).Reallocate(nsend_int(i));
4034  FloatToSend(i).Reallocate(nsend_float(i));
4035  EntierToSend(i)(0) = nsend_float(i);
4036  EntierToSend(i)(1) = nb_col_sent(i);
4037  nsend_int(i) = 2; nsend_float(i) = 0;
4038  }
4039  }
4040 
4041  // then arrays EntierToSend and FloatToSend are filled
4042 
4043  // storing values and indices of a column shared with processor i
4044  // processor i is the owner of this column
4045  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
4046  {
4047  int i = OverlapProcNumber(j);
4048  if (i != rank)
4049  {
4050  int irow = OverlapRowNumber(j);
4051  EntierToSend(i)(nsend_int(i)++) = RowNumber(irow);
4052  EntierToSend(i)(nsend_int(i)++) = B.GetColumnSize(irow);
4053  for (int k = 0; k < B.GetColumnSize(irow); k++)
4054  {
4055  EntierToSend(i)(nsend_int(i)++) = B.Index(irow, k);
4056  if (reorder)
4057  EntierToSend(i)(nsend_int(i)++) = procB(irow)(k);
4058 
4059  FloatToSend(i)(nsend_float(i)++) = B.Value(irow, k);
4060  }
4061 
4062  // the corresponding values of B are cleared
4063  // they are no longer needed since they will be present in the distant processor
4064  // after the exchange of datas
4065  B.ClearColumn(irow);
4066  if (reorder)
4067  procB(irow).Clear();
4068  }
4069  }
4070 
4071  // storing values to enforce a symmetric pattern
4072  if (sym_pattern)
4073  for (int j = 0; j < this->dist_row.GetM(); j++)
4074  for (int k = 0; k < this->dist_row(j).GetM(); k++)
4075  {
4076  int i = this->proc_row(j)(k);
4077  if (i != rank)
4078  {
4079  int irow = this->dist_row(j).Index(k);
4080  if (this->local_number_distant_values)
4081  irow = this->global_row_to_recv(irow);
4082 
4083  EntierToSend(i)(nsend_int(i)++) = irow;
4084  EntierToSend(i)(nsend_int(i)++) = 1;
4085  EntierToSend(i)(nsend_int(i)++) = RowNumber(j);
4086  if (reorder)
4087  EntierToSend(i)(nsend_int(i)++) = rank;
4088 
4089  FloatToSend(i)(nsend_float(i)++) = 0;
4090  }
4091  }
4092 
4093  // storing values of row associated with processor rank
4094  for (int j = 0; j < this->dist_col.GetM(); j++)
4095  for (int k = 0; k < this->dist_col(j).GetM(); k++)
4096  {
4097  int i = this->proc_col(j)(k);
4098  if (i != rank)
4099  {
4100  int irow = this->dist_col(j).Index(k);
4101  if (this->local_number_distant_values)
4102  irow = this->global_col_to_recv(irow);
4103 
4104  EntierToSend(i)(nsend_int(i)++) = irow;
4105  EntierToSend(i)(nsend_int(i)++) = 1;
4106  EntierToSend(i)(nsend_int(i)++) = RowNumber(j);
4107  if (reorder)
4108  EntierToSend(i)(nsend_int(i)++) = rank;
4109 
4110  FloatToSend(i)(nsend_float(i)++)
4111  = this->dist_col(j).Value(k);
4112  }
4113  }
4114 
4115  // now the initial matrix can be cleared
4116  this->Clear();
4117 
4118  // Datas for receiving EntierToSend and FloatToSend
4119  IVect nrecv_int(nb_proc);
4120  Vector<IVect> EntierToRecv(nb_proc);
4121  Vector<Vector<T> > FloatToRecv(nb_proc);
4122 
4123  // exchanging datas
4124  SendAndReceiveDistributed(comm, nsend_int, EntierToSend, FloatToSend,
4125  nrecv_int, EntierToRecv, FloatToRecv);
4126 
4127  // constructing local column numbers
4128  int nloc = n - OverlapRowNumber.GetM();
4129  local_col_numbers.Reallocate(nloc);
4130  int ncol = 0;
4131  for (int i = 0; i < n; i++)
4132  if (OverlappedCol(i) == -1)
4133  local_col_numbers(ncol++) = i;
4134 
4135  // index array to obtain local numbers in array local_col_numbers
4136  // from local numbers of the matrix
4137  IVect inv_local_col_numbers(n);
4138  inv_local_col_numbers.Fill(-1);
4139  ncol = 0;
4140  for (int i = 0; i < n; i++)
4141  if (OverlappedCol(i) == -1)
4142  inv_local_col_numbers(i) = ncol++;
4143 
4144  // global to local conversion
4145  IVect Glob_to_local(m);
4146  Glob_to_local.Fill(-1);
4147  for (int i = 0; i < n; i++)
4148  Glob_to_local(RowNumber(i)) = i;
4149 
4150  // assembling matrix B with interactions coming from other processors
4151  AddReceivedInteractions(comm, B, EntierToRecv, FloatToRecv, nrecv_int,
4152  EntierToSend, FloatToSend, nsend_int, Glob_to_local,
4153  OverlappedCol, OverlapProcNumber, procB, reorder);
4154 
4155  // exchanging datas
4156  SendAndReceiveDistributed(comm, nsend_int, EntierToSend, FloatToSend,
4157  nrecv_int, EntierToRecv, FloatToRecv);
4158 
4159  // assembling matrix B with last interactions coming from other processors
4160  AddReceivedInteractions(comm, B, EntierToRecv, FloatToRecv, nrecv_int,
4161  EntierToSend, FloatToSend, nsend_int, Glob_to_local,
4162  OverlappedCol, OverlapProcNumber, procB, reorder);
4163 
4164  /****************************
4165  * Reordering of the matrix *
4166  ****************************/
4167 
4168  if (reorder)
4169  {
4170  // in this section, global rows are renumbered such that
4171  // each processor has consecutive row numbers (mandatory for SuperLU)
4172  // processor 0 will be affected with rows 0..nloc0
4173  // processor 1 with rows nloc0 ... nloc0 + nloc1
4174  // ...
4175  IVect OverlapLocalNumber(OverlapRowNumber.GetM());
4176  IVect offset_global(nb_proc+1);
4177 
4178  IVect nb_col_overlap(nb_proc);
4179 
4180  Vector<IVect> col_number_sorted(nb_proc);
4181  Vector<IVect> col_number_to_send(nb_proc);
4182  nb_col_sent.Zero();
4183 
4184  // counting the number of columns to send
4185  nb_col_overlap.Zero();
4186  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
4187  {
4188  int i = OverlapProcNumber(j);
4189  if (i != rank)
4190  nb_col_overlap(i)++;
4191  }
4192 
4193  Vector<IVect> col_send_overlap(nb_proc);
4194  for (int i = 0; i < nb_proc; i++)
4195  if (nb_col_overlap(i) > 0)
4196  col_send_overlap(i).Reallocate(nb_col_overlap(i));
4197 
4198  nb_col_overlap.Zero();
4199  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
4200  {
4201  int i = OverlapProcNumber(j);
4202  if (i != rank)
4203  col_send_overlap(i)(nb_col_overlap(i)++) = RowNumber(OverlapRowNumber(j));
4204  }
4205 
4206  // counting the number of columns to send
4207  for (int j = 0; j < B.GetN(); j++)
4208  for (int k = 0; k < B.GetColumnSize(j); k++)
4209  {
4210  int i = procB(j)(k);
4211  if (i != rank)
4212  nb_col_sent(i)++;
4213  }
4214 
4215  for (int i = 0; i < nb_proc; i++)
4216  if (i != rank)
4217  col_number_sorted(i).Reallocate(nb_col_sent(i));
4218 
4219  // storing all the column numbers with distant processors
4220  nb_col_sent.Zero();
4221  for (int j = 0; j < B.GetN(); j++)
4222  for (int k = 0; k < B.GetColumnSize(j); k++)
4223  {
4224  int i = procB(j)(k);
4225  if (i != rank)
4226  col_number_sorted(i)(nb_col_sent(i)++) = B.Index(j, k);
4227  }
4228 
4229  // duplicates are removed in order to send a few numbers
4230  for (int i = 0; i < nb_proc; i++)
4231  if (i != rank)
4232  {
4233  IVect permut(nb_col_sent(i)), col_number_sort(col_number_sorted(i));
4234  permut.Fill();
4235  Sort(nb_col_sent(i), col_number_sort, permut);
4236 
4237  // counting the number of unique numbers
4238  int prec = -1; nb_col_sent(i) = 0;
4239  for (int j = 0; j < col_number_sort.GetM(); j++)
4240  {
4241  if (col_number_sort(j) != prec)
4242  nb_col_sent(i)++;
4243 
4244  prec = col_number_sort(j);
4245  }
4246 
4247  // filling col_number_to_send
4248  col_number_to_send(i).Reallocate(nb_col_sent(i));
4249  nb_col_sent(i) = 0;
4250  prec = -1;
4251  for (int j = 0; j < col_number_sort.GetM(); j++)
4252  {
4253  if (col_number_sort(j) != prec)
4254  {
4255  col_number_to_send(i)(nb_col_sent(i)) = col_number_sort(j);
4256  nb_col_sent(i)++;
4257  }
4258 
4259  col_number_sorted(i)(permut(j)) = nb_col_sent(i)-1;
4260  prec = col_number_sort(j);
4261  }
4262  }
4263 
4264  // allocating the array EntierToSend
4265  nsend_int.Zero();
4266  for (int i = 0; i < nb_proc; i++)
4267  if ((i != rank) && (nb_col_sent(i)+nb_col_overlap(i) > 0))
4268  {
4269  // column numbers will be sent
4270  nsend_int(i) = 2+nb_col_sent(i) + nb_col_overlap(i);
4271  EntierToSend(i).Reallocate(nsend_int(i));
4272  EntierToSend(i)(0) = 0;
4273  EntierToSend(i)(1) = nb_col_sent(i) + nb_col_overlap(i);
4274  nsend_int(i) = 2;
4275  }
4276 
4277  // storing columns numbers associated with processor i
4278  for (int i = 0; i < nb_proc; i++)
4279  {
4280  for (int j = 0; j < nb_col_overlap(i); j++)
4281  EntierToSend(i)(nsend_int(i)++) = col_send_overlap(i)(j);
4282 
4283  for (int j = 0; j < nb_col_sent(i); j++)
4284  EntierToSend(i)(nsend_int(i)++) = col_number_to_send(i)(j);
4285  }
4286 
4287  // exchanging datas
4288  SendAndReceiveDistributed(comm, nsend_int, EntierToSend, FloatToSend,
4289  nrecv_int, EntierToRecv, FloatToRecv);
4290 
4291  IVect nsend_intB(nb_proc), nrecv_intB(nb_proc);
4292  nsend_intB.Zero(); nrecv_intB.Zero();
4293  Vector<IVect> EntierToSendB(nb_proc), EntierToRecvB(nb_proc);
4294  // detecting if there are some numbers that do not belong to the current processor
4295  for (int i = 0; i < nb_proc; i++)
4296  if ((i != rank) && (nrecv_int(i) > 0))
4297  {
4298  int nb_col = EntierToRecv(i)(1);
4299  for (int j = 0; j < nb_col; j++)
4300  {
4301  int iglob = EntierToRecv(i)(2+j);
4302  int irow = Glob_to_local(iglob);
4303  if (inv_local_col_numbers(irow) == -1)
4304  {
4305  int p = OverlappedCol(irow);
4306  int nproc = OverlapProcNumber(p);
4307  if (nsend_intB(nproc) == 0)
4308  {
4309  nsend_intB(nproc) = 3;
4310  EntierToSendB(nproc).Reallocate(3);
4311  EntierToSendB(nproc)(0) = 0;
4312  EntierToSendB(nproc)(1) = 1;
4313  EntierToSendB(nproc)(2) = iglob;
4314  }
4315  else
4316  {
4317  nsend_intB(nproc)++;
4318  EntierToSendB(nproc)(1)++;
4319  EntierToSendB(nproc).PushBack(iglob);
4320  }
4321  }
4322  }
4323  }
4324 
4325  // exchanging non-original dofs
4326  SendAndReceiveDistributed(comm, nsend_intB, EntierToSendB, FloatToSend,
4327  nrecv_intB, EntierToRecvB, FloatToRecv);
4328 
4329  nsend_intB.Zero();
4330  for (int i = 0; i < nb_proc; i++)
4331  if ((i != rank) && (nrecv_intB(i) > 0))
4332  {
4333  int nb_col = EntierToRecvB(i)(1);
4334  if (nb_col > 0)
4335  {
4336  nsend_intB(i) = nb_col+2;
4337  EntierToSendB(i).Reallocate(nb_col+2);
4338  EntierToSendB(i)(0) = 0;
4339  EntierToSendB(i)(1) = nb_col;
4340 
4341  for (int j = 0; j < nb_col; j++)
4342  {
4343  int iglob = EntierToRecvB(i)(2+j);
4344  int irow = Glob_to_local(iglob);
4345  if (inv_local_col_numbers(irow) == -1)
4346  {
4347  cout << "impossible case" << endl;
4348  abort();
4349  }
4350  else
4351  EntierToSendB(i)(2+j) = inv_local_col_numbers(irow);
4352  }
4353  }
4354  }
4355 
4356  // returning the local numbers of non-original dofs
4357  SendAndReceiveDistributed(comm, nsend_intB, EntierToSendB, FloatToSend,
4358  nrecv_intB, EntierToRecvB, FloatToRecv);
4359 
4360  // filling local row numbers that need to be sent back
4361  nsend_intB.Zero();
4362  for (int i = 0; i < nb_proc; i++)
4363  if ((i != rank) && (nrecv_int(i) > 0))
4364  {
4365  int nb_col = EntierToRecv(i)(1);
4366  if (nb_col > 0)
4367  {
4368  nsend_int(i) = 2*nb_col+2;
4369  EntierToSend(i).Reallocate(2*nb_col+2);
4370  EntierToSend(i)(0) = 0;
4371  EntierToSend(i)(1) = 2*nb_col;
4372 
4373  for (int j = 0; j < nb_col; j++)
4374  {
4375  int iglob = EntierToRecv(i)(2+j);
4376  int irow = Glob_to_local(iglob);
4377  if (inv_local_col_numbers(irow) == -1)
4378  {
4379  int p = OverlappedCol(irow);
4380  int nproc = OverlapProcNumber(p);
4381  int num = EntierToRecvB(nproc)(2+nsend_intB(nproc));
4382  nsend_intB(nproc)++;
4383 
4384  EntierToSend(i)(2+2*j) = num;
4385  EntierToSend(i)(3+2*j) = nproc;
4386  }
4387  else
4388  {
4389  EntierToSend(i)(2+2*j) = inv_local_col_numbers(irow);
4390  EntierToSend(i)(3+2*j) = rank;
4391  }
4392  }
4393  }
4394  }
4395 
4396  // exchanging datas
4397  SendAndReceiveDistributed(comm, nsend_int, EntierToSend, FloatToSend,
4398  nrecv_int, EntierToRecv, FloatToRecv);
4399 
4400  // receiving local numbers
4401  Vector<IVect> proc_number_to_send(nb_proc);
4402  for (int i = 0; i < nb_proc; i++)
4403  {
4404  if (EntierToRecv(i).GetM() > 0)
4405  {
4406  nrecv_int(i) = 2;
4407  proc_number_to_send(i).Reallocate(nb_col_sent(i));
4408  for (int j = 0; j < nb_col_overlap(i); j++)
4409  {
4410  col_send_overlap(i)(j) = EntierToRecv(i)(nrecv_int(i));
4411  if (EntierToRecv(i)(nrecv_int(i)+1) != i)
4412  {
4413  cout << "Impossible case" << endl;
4414  abort();
4415  }
4416 
4417  nrecv_int(i) += 2;
4418  }
4419 
4420  for (int j = 0; j < nb_col_sent(i); j++)
4421  {
4422  col_number_to_send(i)(j) = EntierToRecv(i)(nrecv_int(i));
4423  proc_number_to_send(i)(j) = EntierToRecv(i)(nrecv_int(i)+1);
4424  nrecv_int(i) += 2;
4425  }
4426  }
4427  }
4428 
4429  // filling OverlapLocalNumber
4430  nb_col_overlap.Zero();
4431 
4432  OverlapLocalNumber.Fill(-1);
4433  for (int j = 0; j < OverlapRowNumber.GetM(); j++)
4434  {
4435  int i = OverlapProcNumber(j);
4436  if (i != rank)
4437  {
4438  OverlapLocalNumber(j) = col_send_overlap(i)(nb_col_overlap(i));
4439  nb_col_overlap(i)++;
4440  }
4441  }
4442 
4443  // now in order to compute the global row numbers for any processor
4444  // we need to retrieve the offsets (i.e. nloc cumulated)
4445  offset_global.Zero();
4446 
4447  MPI_Allgather(&nloc, 1, MPI_INTEGER, &offset_global(1), 1, MPI_INTEGER, comm);
4448 
4449  for (int i = 1; i < nb_proc; i++)
4450  offset_global(i+1) += offset_global(i);
4451 
4452  // RowNumber is modified
4453  ncol = 0;
4454  for (int i = 0; i < n; i++)
4455  {
4456  if (OverlappedCol(i) == -1)
4457  {
4458  RowNumber(i) = offset_global(rank) + ncol;
4459  ncol++;
4460  }
4461  else
4462  RowNumber(i) = -1;
4463  }
4464 
4465  // then numbers in B are modified with the new numbering
4466  nb_col_sent.Zero();
4467  for (int j = 0; j < n; j++)
4468  if (OverlappedCol(j) == -1)
4469  {
4470  for (int k = 0; k < B.GetColumnSize(j); k++)
4471  {
4472  int jglob = B.Index(j, k);
4473  int i = procB(j)(k);
4474  int ireal = i;
4475  int iloc = -1;
4476  if (i == rank)
4477  {
4478  int irow = Glob_to_local(jglob);
4479  if (OverlappedCol(irow) == -1)
4480  iloc = inv_local_col_numbers(irow);
4481  else
4482  {
4483  int p = OverlappedCol(irow);
4484  i = OverlapProcNumber(p);
4485  ireal = i;
4486  iloc = OverlapLocalNumber(p);
4487  }
4488  }
4489  else
4490  {
4491  int ilocC = col_number_sorted(i)(nb_col_sent(i));
4492  ireal = proc_number_to_send(i)(ilocC);
4493  iloc = col_number_to_send(i)(ilocC);
4494  nb_col_sent(i)++;
4495  }
4496 
4497  if (iloc >= 0)
4498  {
4499  B.Index(j, k) = offset_global(ireal) + iloc;
4500  }
4501  else
4502  {
4503  cout << "Impossible case" << endl;
4504  abort();
4505  }
4506  }
4507 
4508  B.AssembleColumn(j);
4509  }
4510  }
4511 
4512  Glob_to_local.Clear();
4513 
4514  ncol = 0;
4515  col_numbers.Reallocate(nloc);
4516  for (int i = 0; i < n; i++)
4517  if (OverlappedCol(i) == -1)
4518  {
4519  col_numbers(ncol) = RowNumber(i);
4520  ncol++;
4521  }
4522  }
4523 
4524 
4526  template<class T> template<class Tint0, class Tint1>
4529  Vector<Tint0>& PtrA, Vector<Tint1>& IndA, Vector<T>& ValA)
4530  {
4531  int n = B.GetN();
4532  int nloc = 0;
4533  for (int i = 0; i < n; i++)
4534  if (OverlappedCol(i) == -1)
4535  nloc++;
4536 
4537  /***************************
4538  * Final conversion in CSC *
4539  ***************************/
4540 
4541  PtrA.Reallocate(nloc+1);
4542  int ncol = 0; long nnz = 0;
4543  PtrA(ncol) = 0;
4544  for (int i = 0; i < n; i++)
4545  if (OverlappedCol(i) == -1)
4546  {
4547  PtrA(ncol+1) = PtrA(ncol) + B.GetColumnSize(i);
4548  ncol++;
4549  nnz += B.GetColumnSize(i);
4550  }
4551 
4552  IndA.Reallocate(nnz);
4553  ValA.Reallocate(nnz);
4554  ncol = 0; nnz = 0;
4555  for (int i = 0; i < n; i++)
4556  if (OverlappedCol(i) == -1)
4557  {
4558  for (int j = 0; j < B.GetColumnSize(i); j++)
4559  {
4560  IndA(nnz) = B.Index(i, j);
4561  ValA(nnz) = B.Value(i, j);
4562  nnz++;
4563  }
4564 
4565  ncol++;
4566  }
4567  }
4568 
4569 
4571 
4576  template<class T>
4577  template<class T0, class Allocator0>
4580  Allocator0>& B, Vector<IVect>& procB, bool sym) const
4581  {
4582  int m = this->GetLocalM();
4583  int n = this->GetGlobalM();
4584  int rank; MPI_Comm_rank(this->comm_, &rank);
4585 
4586  bool retrieve_proc = false;
4587  if (procB.GetM() == m)
4588  retrieve_proc = true;
4589 
4590  // now, we are using global numbers
4591  // and removing lower part of the matrix if symmetric
4592  B.Resize(m, n);
4593  const IVect& RowNumber = this->GetGlobalRowNumber();
4594  for (int i = 0; i < m; i++)
4595  {
4596  int size_row = B.GetRowSize(i);
4597  size_row += dist_col(i).GetM();
4598  IVect index(size_row), proc_loc(size_row);
4599  Vector<T0> value(size_row);
4600  int nb = 0;
4601  int num_row = RowNumber(i);
4602  if (sym)
4603  {
4604  // local values
4605  for (int j = 0; j < B.GetRowSize(i); j++)
4606  {
4607  int num_col = RowNumber(B.Index(i, j));
4608  if (num_row <= num_col)
4609  {
4610  index(nb) = num_col;
4611  value(nb) = B.Value(i, j);
4612  proc_loc(nb) = rank;
4613  nb++;
4614  }
4615  }
4616 
4617  // distant values
4618  for (int j = 0; j < dist_col(i).GetM(); j++)
4619  {
4620  int num_col = dist_col(i).Index(j);
4621  if (this->local_number_distant_values)
4622  num_col = global_col_to_recv(num_col);
4623 
4624  if (num_row <= num_col)
4625  {
4626  index(nb) = num_col;
4627  value(nb) = dist_col(i).Value(j);
4628  proc_loc(nb) = proc_col(i)(j);
4629  nb++;
4630  }
4631  }
4632  }
4633  else
4634  {
4635  // local values
4636  for (int j = 0; j < B.GetRowSize(i); j++)
4637  {
4638  int num_col = RowNumber(B.Index(i, j));
4639  index(nb) = num_col;
4640  value(nb) = B.Value(i, j);
4641  proc_loc(nb) = rank;
4642  nb++;
4643  }
4644 
4645  // distant values
4646  for (int j = 0; j < dist_col(i).GetM(); j++)
4647  {
4648  int num_col = dist_col(i).Index(j);
4649  if (this->local_number_distant_values)
4650  num_col = global_col_to_recv(num_col);
4651 
4652  index(nb) = num_col;
4653  value(nb) = dist_col(i).Value(j);
4654  proc_loc(nb) = proc_col(i)(j);
4655  nb++;
4656  }
4657  }
4658 
4659  Sort(nb, index, value, proc_loc);
4660  size_row = 0;
4661  int prec = -1;
4662  for (int j = 0; j < nb; j++)
4663  {
4664  if (index(j) == prec)
4665  value(size_row-1) += value(j);
4666  else
4667  {
4668  index(size_row) = index(j);
4669  value(size_row) = value(j);
4670  proc_loc(size_row) = proc_loc(j);
4671  size_row++;
4672  }
4673 
4674  prec = index(j);
4675  }
4676 
4677  B.ReallocateRow(i, size_row);
4678  for (int j = 0; j < size_row; j++)
4679  {
4680  B.Index(i, j) = index(j);
4681  B.Value(i, j) = value(j);
4682  }
4683 
4684  if (retrieve_proc)
4685  {
4686  procB(i).Reallocate(size_row);
4687  for (int j = 0; j < size_row; j++)
4688  procB(i)(j) = proc_loc(j);
4689  }
4690  }
4691  }
4692 
4693 
4695 
4700  template<class T>
4701  template<class T0, class Allocator0>
4704  Vector<IVect>& procB, Vector<long>& Ptr, IVect& Ind,
4705  Vector<T0>& Val, bool sym_pattern) const
4706  {
4707  int m = this->GetGlobalM();
4708  int n = this->GetLocalN();
4709  int rank; MPI_Comm_rank(this->comm_, &rank);
4710 
4711  bool retrieve_proc = false;
4712  if (procB.GetM() == n)
4713  retrieve_proc = true;
4714 
4715  // for row numbers, we put global numbers and we add some distant entries
4716  // (i.e entries with local columns,
4717  // and null values by symmetry of local rows )
4718  B.Clear(); B.Reallocate(m, n);
4719  const IVect& RowNumber = this->GetGlobalRowNumber();
4720  for (int i = 0; i < n; i++)
4721  {
4722  int size_col = Ptr(i+1) - Ptr(i);
4723  size_col += dist_row(i).GetM();
4724  if (sym_pattern)
4725  size_col += dist_col(i).GetM();
4726 
4727  IVect index(size_col), proc_loc(size_col);
4728  Vector<T0> value(size_col);
4729  int nb = 0;
4730  // local values
4731  for (int j = Ptr(i); j < Ptr(i+1); j++)
4732  {
4733  index(nb) = RowNumber(Ind(j));
4734  value(nb) = Val(j);
4735  proc_loc(nb) = rank;
4736  nb++;
4737  }
4738 
4739  // distant values
4740  for (int j = 0; j < dist_row(i).GetM(); j++)
4741  {
4742  index(nb) = dist_row(i).Index(j);
4743  if (this->local_number_distant_values)
4744  index(nb) = global_row_to_recv(index(nb));
4745 
4746  proc_loc(nb) = proc_row(i)(j);
4747  value(nb) = dist_row(i).Value(j);
4748  nb++;
4749  }
4750 
4751  // values due to symmetrisation of pattern
4752  if (sym_pattern)
4753  for (int j = 0; j < dist_col(i).GetM(); j++)
4754  {
4755  index(nb) = dist_col(i).Index(j);
4756  if (this->local_number_distant_values)
4757  index(nb) = global_col_to_recv(index(nb));
4758 
4759  proc_loc(nb) = proc_col(i)(j);
4760  value(nb) = 0;
4761  nb++;
4762  }
4763 
4764  Sort(nb, index, value, proc_loc);
4765  size_col = 0;
4766  int prec = -1;
4767  for (int j = 0; j < nb; j++)
4768  {
4769  if (index(j) == prec)
4770  value(size_col-1) += value(j);
4771  else
4772  {
4773  index(size_col) = index(j);
4774  value(size_col) = value(j);
4775  proc_loc(size_col) = proc_loc(j);
4776  size_col++;
4777  }
4778 
4779  prec = index(j);
4780  }
4781 
4782  B.ReallocateColumn(i, size_col);
4783  for (int j = 0; j < size_col; j++)
4784  {
4785  B.Index(i, j) = index(j);
4786  B.Value(i, j) = value(j);
4787  }
4788 
4789  if (retrieve_proc)
4790  {
4791  procB(i).Reallocate(size_col);
4792  for (int j = 0; j < size_col; j++)
4793  procB(i)(j) = proc_loc(j);
4794  }
4795  }
4796  }
4797 
4798 
4799  // DistributedMatrix_Base //
4801 
4802 
4804  // DistributedMatrix //
4805 
4806 
4808  template<class T, class Prop, class Storage, class Allocator>
4810  : Matrix<T, Prop, Storage, Allocator>(),
4812  {
4813  }
4814 
4815 
4817 
4821  template<class T, class Prop, class Storage, class Allocator>
4823  : Matrix<T, Prop, Storage, Allocator>(i, j),
4824  DistributedMatrix_Base<T>(i, j)
4825  {
4826  }
4827 
4828 
4830  template<class T, class Prop, class Storage, class Allocator>
4832  ::Reallocate(int m, int n)
4833  {
4836  }
4837 
4838 
4840  template<class T, class Prop, class Storage, class Allocator>
4842  ::Resize(int m, int n)
4843  {
4846  }
4847 
4848 
4850  template<class T, class Prop, class Storage, class Allocator>
4852  {
4855  }
4856 
4857 
4859  template<class T, class Prop, class Storage, class Allocator>
4861  {
4863  }
4864 
4865 
4867  template<class T, class Prop, class Storage, class Allocator>
4868  template<class T2, class Prop2, class Storage2, class Allocator2>
4872  {
4873  Seldon::Copy(static_cast<const Matrix<T2, Prop2, Storage2, Allocator2>& >(X),
4874  static_cast<Matrix<T, Prop, Storage, Allocator>& >(*this));
4875 
4877  return *this;
4878  }
4879 
4880 
4882  template<class T, class Prop, class Storage, class Allocator>
4883  template<class T0>
4886  {
4887  Mlt(x, static_cast<Matrix<T, Prop, Storage, Allocator>& >(*this));
4888 
4889  static_cast<DistributedMatrix_Base<T>& >(*this) *= x;
4890  return *this;
4891  }
4892 
4893 
4895  template<class T, class Prop, class Storage, class Allocator>
4898  {
4901  return taille;
4902  }
4903 
4904 
4906 
4911  template<class T, class Prop, class Storage, class Allocator>
4913  {
4916  return nnz;
4917  }
4918 
4919 
4921 
4927  template<class T, class Prop, class Storage, class Allocator>
4929  {
4932  return size;
4933  }
4934 
4935 
4937 
4942  template<class T, class Prop, class Storage, class Allocator>
4943  template<class T0>
4945  ::RemoveSmallEntry(const T0& epsilon)
4946  {
4947  Seldon::RemoveSmallEntry(static_cast<Matrix<T, Prop, Storage,
4948  Allocator>& >(*this),
4949  epsilon);
4950 
4952  }
4953 
4954 
4956  template<class T, class Prop, class Storage, class Allocator>
4958  {
4960 
4961  MPI_Comm& comm = this->comm_;
4962  int nb_proc; MPI_Comm_size(comm, &nb_proc);
4963  if (nb_proc == 1)
4964  return;
4965 
4966  // setting to 0 diagonal of overlapped rows
4967  const IVect& overlap = this->GetOverlapRowNumber();
4968  T zero; SetComplexZero(zero);
4969  for (int i = 0; i < overlap.GetM(); i++)
4970  this->Set(overlap(i), overlap(i), zero);
4971 
4973  }
4974 
4975 
4977  template<class T, class Prop, class Storage, class Allocator>
4979  {
4982  }
4983 
4984 
4986 
4990  template<class T, class Prop, class Storage, class Allocator>
4992  {
4995  }
4996 
4997 
4999 
5003  template<class T, class Prop, class Storage, class Allocator>
5004  template<class T0>
5006  {
5009  }
5010 
5011 
5013  template<class T, class Prop, class Storage, class Allocator>
5015  {
5017  }
5018 
5019 
5021  template<class T, class Prop, class Storage, class Allocator>
5023  ::Write(string FileName) const
5024  {
5025  const MPI_Comm& comm = this->comm_;
5026  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5027  if (nb_proc == 1)
5029 
5030  cout << "Write not implemented for distributed matrices" << endl;
5031  abort();
5032  }
5033 
5034 
5036  template<class T, class Prop, class Storage, class Allocator>
5038  ::Write(ostream& FileStream) const
5039  {
5040  const MPI_Comm& comm = this->comm_;
5041  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5042  if (nb_proc == 1)
5043  return Matrix<T, Prop, Storage, Allocator>::Write(FileStream);
5044 
5045  cout << "Write not implemented for distributed matrices" << endl;
5046  abort();
5047  }
5048 
5049 
5051  template<class T, class Prop, class Storage, class Allocator>
5053  ::WriteText(string FileName, bool cplx) const
5054  {
5055  const MPI_Comm& comm = this->comm_;
5056  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5057  int rank_proc; MPI_Comm_rank(comm, &rank_proc);
5058  if (nb_proc == 1)
5059  return Matrix<T, Prop, Storage, Allocator>::WriteText(FileName, cplx);
5060 
5061  // a different file name for each processor
5062  string name = GetBaseString(FileName) + "_P" + to_str(rank_proc)
5063  + "." + GetExtension(FileName);
5064 
5065  // opening the stream
5066  ofstream FileStream(name.c_str());
5067  FileStream.precision(15);
5068 
5069 #ifdef SELDON_CHECK_IO
5070  // Checks if the file was opened.
5071  if (!FileStream.is_open())
5072  throw IOError("DistributedMatrix::WriteText(string FileName)",
5073  string("Unable to open file \"") + name + "\".");
5074 #endif
5075 
5076  // then writing datas
5077  WriteText(FileStream, cplx);
5078 
5079  // closing files
5080  FileStream.close();
5081  }
5082 
5083 
5085  template<class T, class Prop, class Storage, class Allocator>
5087  ::WriteText(ostream& FileStream, bool cplx) const
5088  {
5089  const MPI_Comm& comm = this->comm_;
5090  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5091  if (nb_proc == 1)
5092  return Matrix<T, Prop, Storage, Allocator>::WriteText(FileStream, cplx);
5093 
5094  // converting local part into coordinate form
5095  Vector<int> IndRow, IndCol;
5096  Vector<T> Value;
5097  ConvertMatrix_to_Coordinates(*this, IndRow, IndCol, Value, 0, true);
5098 
5100  IndRow, IndCol, Value, cplx);
5101  }
5102 
5103 
5105  template<class T, class Prop, class Storage, class Allocator>
5107  {
5108  MPI_Comm& comm = this->comm_;
5109  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5110  if (nb_proc == 1)
5112 
5113  cout << "Read not implemented for distributed matrices" << endl;
5114  abort();
5115  }
5116 
5117 
5119  template<class T, class Prop, class Storage, class Allocator>
5121  ::Read(istream& FileStream)
5122  {
5123  MPI_Comm& comm = this->comm_;
5124  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5125  if (nb_proc == 1)
5126  return Matrix<T, Prop, Storage, Allocator>::Read(FileStream);
5127 
5128  cout << "Read not implemented for distributed matrices" << endl;
5129  abort();
5130  }
5131 
5132 
5134  template<class T, class Prop, class Storage, class Allocator>
5136  ::ReadText(string FileName, bool cplx)
5137  {
5138  MPI_Comm& comm = this->comm_;
5139  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5140  if (nb_proc == 1)
5141  return Matrix<T, Prop, Storage, Allocator>::ReadText(FileName, cplx);
5142 
5143  cout << "ReadText not implemented for distributed matrices" << endl;
5144  abort();
5145  }
5146 
5147 
5149  template<class T, class Prop, class Storage, class Allocator>
5151  ::ReadText(istream& FileStream, bool cplx)
5152  {
5153  MPI_Comm& comm = this->comm_;
5154  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5155  if (nb_proc == 1)
5156  return Matrix<T, Prop, Storage, Allocator>::ReadText(FileStream, cplx);
5157 
5158  cout << "ReadText not implemented for distributed matrices" << endl;
5159  abort();
5160  }
5161 
5162 
5164 
5169  template<class T, class Prop, class Storage, class Allocator>
5170  template<class T0, class Allocator0>
5173  Allocator0>& B, Vector<IVect>& procB) const
5174  {
5175  Copy(static_cast<const Matrix<T, Prop, Storage, Allocator>& >(*this), B);
5177  }
5178 
5179 
5181 
5186  template<class T, class Prop, class Storage, class Allocator>
5187  template<class T0, class Allocator0>
5190  Vector<IVect>& procB, bool sym_pattern) const
5191  {
5192  // conversion to CSC format of local part and symmetrisation of pattern
5193  Vector<long> Ptr; Vector<int> Ind; Vector<T0> Val;
5194  General sym;
5195  ConvertToCSC(*this, sym, Ptr, Ind, Val, sym_pattern);
5196 
5198  GetDistributedColumns(B, procB, Ptr, Ind, Val, sym_pattern);
5199 
5200  }
5201 
5202 
5203  // DistributedMatrix //
5205 
5206 
5207  /*************************
5208  * Matrix vector product *
5209  *************************/
5210 
5211 
5213  template<class T0, class T1, class Prop1, class Storage1, class Allocator1,
5214  class T2, class Storage2, class Allocator2, class T3,
5215  class T4, class Storage4, class Allocator4>
5216  void MltAddVector(const T0& alpha,
5219  const T3& beta,
5220  Vector<T4, Storage4, Allocator4>& Yres, bool assemble)
5221  {
5222  bool proceed_distant_row, proceed_distant_col;
5224  M.InitMltAdd(proceed_distant_row, proceed_distant_col,
5225  X, Xcol, beta, Y, Yres);
5226 
5227  // local matrix
5228  MltVector(static_cast<const Matrix<T1, Prop1, Storage1, Allocator1>& >(M),
5229  X, Y);
5230 
5231  // distributed contribution
5232  M.FinalizeMltAdd(proceed_distant_row, proceed_distant_col,
5233  X, Xcol, alpha, beta, Y, Yres, assemble);
5234  }
5235 
5236 
5238  template<class T0, class T1, class Prop1, class Storage1,
5239  class Allocator1,
5240  class T2, class Storage2, class Allocator2, class T3,
5241  class T4, class Storage4, class Allocator4>
5242  void MltAddVector(const T0& alpha,
5243  const SeldonTranspose& Trans,
5246  const T3& beta,
5247  Vector<T4, Storage4, Allocator4>& Yres, bool assemble)
5248  {
5249  if (Trans.NoTrans())
5250  {
5251  MltAddVector(alpha, M, X, beta, Yres, assemble);
5252  return;
5253  }
5254 
5255  bool proceed_distant_row, proceed_distant_col;
5257  M.InitMltAdd(proceed_distant_row, proceed_distant_col,
5258  Trans, X, Xrow, beta, Y, Yres);
5259 
5260  // local matrix
5261  MltVector(Trans, static_cast<const Matrix<T1, Prop1,
5262  Storage1, Allocator1>& >(M), X, Y);
5263 
5264  M.FinalizeMltAdd(proceed_distant_row, proceed_distant_col,
5265  Trans, X, Xrow, alpha, beta, Y, Yres, assemble);
5266  }
5267 
5268 
5271  template<class T1, class Prop1, class Allocator1>
5273  const IVect& global, IVect& Y, IVect& Yproc)
5274  {
5275  for (int i = 0; i < A.GetM(); i++)
5276  for (int j = 0; j < A.GetRowSize(i); j++)
5277  {
5278  int col = A.Index(i, j);
5279  if (Yproc(col) < Yproc(i))
5280  {
5281  Yproc(i) = Yproc(col);
5282  Y(i) = Y(col);
5283  }
5284  else if (Yproc(col) == Yproc(i))
5285  {
5286  if (Y(col) < Y(i))
5287  Y(i) = Y(col);
5288  else
5289  Y(col) = Y(i);
5290  }
5291  else
5292  {
5293  Yproc(col) = Yproc(i);
5294  Y(col) = Y(i);
5295  }
5296  }
5297  }
5298 
5299 
5302  template<class T1, class Prop1, class Allocator1>
5304  const IVect& global, IVect& Y, IVect& Yproc)
5305  {
5306  for (int i = 0; i < A.GetM(); i++)
5307  for (int j = 0; j < A.GetRowSize(i); j++)
5308  {
5309  int col = A.Index(i, j);
5310  if (Yproc(col) < Yproc(i))
5311  {
5312  Yproc(i) = Yproc(col);
5313  Y(i) = Y(col);
5314  }
5315  else if (Yproc(col) == Yproc(i))
5316  {
5317  if (Y(col) < Y(i))
5318  Y(i) = Y(col);
5319  else
5320  Y(col) = Y(i);
5321  }
5322  else
5323  {
5324  Yproc(col) = Yproc(i);
5325  Y(col) = Y(i);
5326  }
5327  }
5328  }
5329 
5330 
5331 #ifdef SELDON_FILE_MATRIX_ARRAY_COMPLEX_SPARSE_HXX
5332  template<class T1, class Prop1, class Allocator1>
5335  void MltMin(const Matrix<T1, Prop1, ArrayRowComplexSparse, Allocator1>& A,
5336  const IVect& global, IVect& Y, IVect& Yproc)
5337  {
5338  for (int i = 0; i < A.GetM(); i++)
5339  {
5340  for (int j = 0; j < A.GetRealRowSize(i); j++)
5341  {
5342  int col = A.IndexReal(i, j);
5343  if (Yproc(col) < Yproc(i))
5344  {
5345  Yproc(i) = Yproc(col);
5346  Y(i) = Y(col);
5347  }
5348  else if (Yproc(col) == Yproc(i))
5349  {
5350  if (Y(col) < Y(i))
5351  Y(i) = Y(col);
5352  else
5353  Y(col) = Y(i);
5354  }
5355  else
5356  {
5357  Yproc(col) = Yproc(i);
5358  Y(col) = Y(i);
5359  }
5360  }
5361 
5362  for (int j = 0; j < A.GetImagRowSize(i); j++)
5363  {
5364  int col = A.IndexImag(i, j);
5365  if (Yproc(col) < Yproc(i))
5366  {
5367  Yproc(i) = Yproc(col);
5368  Y(i) = Y(col);
5369  }
5370  else if (Yproc(col) == Yproc(i))
5371  {
5372  if (Y(col) < Y(i))
5373  Y(i) = Y(col);
5374  else
5375  Y(col) = Y(i);
5376  }
5377  else
5378  {
5379  Yproc(col) = Yproc(i);
5380  Y(col) = Y(i);
5381  }
5382  }
5383  }
5384  }
5385 
5386 
5389  template<class T1, class Prop1, class Allocator1>
5390  void MltMin(const Matrix<T1, Prop1,
5391  ArrayRowSymComplexSparse, Allocator1>& A,
5392  const IVect& global, IVect& Y, IVect& Yproc)
5393  {
5394  for (int i = 0; i < A.GetM(); i++)
5395  {
5396  for (int j = 0; j < A.GetRealRowSize(i); j++)
5397  {
5398  int col = A.IndexReal(i, j);
5399  if (Yproc(col) < Yproc(i))
5400  {
5401  Yproc(i) = Yproc(col);
5402  Y(i) = Y(col);
5403  }
5404  else if (Yproc(col) == Yproc(i))
5405  {
5406  if (Y(col) < Y(i))
5407  Y(i) = Y(col);
5408  else
5409  Y(col) = Y(i);
5410  }
5411  else
5412  {
5413  Yproc(col) = Yproc(i);
5414  Y(col) = Y(i);
5415  }
5416  }
5417 
5418  for (int j = 0; j < A.GetImagRowSize(i); j++)
5419  {
5420  int col = A.IndexImag(i, j);
5421  if (Yproc(col) < Yproc(i))
5422  {
5423  Yproc(i) = Yproc(col);
5424  Y(i) = Y(col);
5425  }
5426  else if (Yproc(col) == Yproc(i))
5427  {
5428  if (Y(col) < Y(i))
5429  Y(i) = Y(col);
5430  else
5431  Y(col) = Y(i);
5432  }
5433  else
5434  {
5435  Yproc(col) = Yproc(i);
5436  Y(col) = Y(i);
5437  }
5438  }
5439  }
5440  }
5441 #endif
5442 
5443 
5445  template<class T1, class Prop1, class Storage1, class Allocator1>
5447  IVect& Y, IVect& Yproc)
5448  {
5449  IVect Xcol, Xcol_proc;
5450  M.InitMltMin(Y, Yproc, Xcol, Xcol_proc);
5451 
5452  // local matrix
5453  const IVect& global = M.GetGlobalRowNumber();
5454  MltMin(static_cast<const Matrix<T1, Prop1, Storage1, Allocator1>& >(M),
5455  global, Y, Yproc);
5456 
5457  M.FinalizeMltMin(Y, Yproc, Xcol, Xcol_proc);
5458  }
5459 
5460 
5461  /**************************
5462  * Functions for matrices *
5463  **************************/
5464 
5465 
5467  template<class T0, class T1, class Prop1, class Storage1, class Allocator1,
5468  class T2, class Prop2, class Storage2, class Allocator2>
5469  void AddMatrix(const T0& alpha,
5472  {
5473  // adding local part
5474  AddMatrix(alpha,
5475  static_cast<const Matrix<T1, Prop1, Storage1, Allocator1>& >(A),
5476  static_cast<Matrix<T2, Prop2, Storage2, Allocator2>& >(B));
5477 
5478  B.AddDistributedMatrix(alpha, A);
5479  }
5480 
5481 
5483 
5488  template<class T1, class Prop1, class Storage1, class Allocator1>
5489  typename ClassComplexType<T1>::Treal
5491  {
5492  typename ClassComplexType<T1>::Treal res;
5493  res = MaxAbs(static_cast<const Matrix<T1, Prop1,
5494  Storage1, Allocator1>& >(A));
5495 
5496  A.GetMaxAbsDistant(res);
5497  return res;
5498  }
5499 
5500 
5502 
5506  template<class T0, class T, class Prop, class Storage, class Allocator>
5507  void GetRowSum(Vector<T0>& vec_sum,
5509  {
5510  GetRowSum(vec_sum,
5511  static_cast<const Matrix<T, Prop, Storage, Allocator>& >(A));
5512 
5513  A.AddRowSumDistant(vec_sum);
5514  }
5515 
5516 
5519 
5523  template<class T0, class T, class Prop, class Storage, class Allocator>
5524  void GetColSum(Vector<T0>& vec_sum,
5526  {
5527  GetColSum(vec_sum,
5528  static_cast<const Matrix<T, Prop, Storage, Allocator>& >(A));
5529 
5530  A.AddColSumDistant(vec_sum);
5531  }
5532 
5533 
5535 
5539  template<class T0, class T, class Prop, class Storage, class Allocator>
5540  void GetRowColSum(Vector<T0>& sum_row, Vector<T0>& sum_col,
5542  {
5543  GetRowColSum(sum_row, sum_col,
5544  static_cast<const Matrix<T, Prop, Storage, Allocator>& >(A));
5545 
5546  A.AddRowColSumDistant(sum_row, sum_col);
5547  }
5548 
5549 
5551 
5555  template<class T1, class Prop1, class Storage1, class Allocator1>
5556  typename ClassComplexType<T1>::Treal
5558  {
5559  const MPI_Comm& comm = A.GetCommunicator();
5560  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5561  if (nb_proc == 1)
5562  return Norm1(static_cast<const Matrix<T1, Prop1,
5563  Storage1, Allocator1>& >(A));
5564 
5566  GetColSum(sum_col, A);
5567 
5568  typename ClassComplexType<T1>::Treal res, amax;
5569  amax = 0;
5570  for (int i = 0; i < sum_col.GetM(); i++)
5571  amax = max(amax, abs(sum_col(i)));
5572 
5573  Vector<int64_t> xtmp;
5574  MpiAllreduce(comm, &amax, xtmp, &res, 1, MPI_MAX);
5575  return res;
5576  }
5577 
5578 
5580 
5584  template<class T1, class Prop1, class Storage1, class Allocator1>
5585  typename ClassComplexType<T1>::Treal
5587  {
5588  const MPI_Comm& comm = A.GetCommunicator();
5589  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5590  if (nb_proc == 1)
5591  return NormInf(static_cast<const Matrix<T1, Prop1,
5592  Storage1, Allocator1>& >(A));
5593 
5595  GetRowSum(sum_row, A);
5596 
5597  typename ClassComplexType<T1>::Treal res, amax;
5598  amax = 0;
5599  for (int i = 0; i < sum_row.GetM(); i++)
5600  amax = max(amax, abs(sum_row(i)));
5601 
5602  Vector<int64_t> xtmp;
5603  MpiAllreduce(comm, &amax, xtmp, &res, 1, MPI_MAX);
5604  return res;
5605  }
5606 
5607 
5609  template<class T1, class Prop1, class Storage1, class Allocator1>
5611  {
5612 
5613  // storing the distributed datas
5615  NewAlloc<Vector<T1, VectSparse> > > dist_row, dist_col;
5616 
5617  bool local_number_distant_values;
5618  int smax_row, smax_col;
5619  Vector<IVect> proc_row, proc_col;
5620 
5621  IVect global_row_to_recv, global_col_to_recv;
5622  IVect ptr_global_row_to_recv, ptr_global_col_to_recv;
5623  Vector<IVect> local_row_to_send, local_col_to_send;
5624  IVect proc_col_to_recv, proc_col_to_send,
5625  proc_row_to_recv, proc_row_to_send;
5626 
5627  // distributed datas are retrieved without copy
5628  A.ExchangeParallelData(smax_row, smax_col, local_number_distant_values,
5629  dist_row, dist_col, proc_row, proc_col,
5630  global_row_to_recv, global_col_to_recv,
5631  ptr_global_row_to_recv, ptr_global_col_to_recv,
5632  local_row_to_send, local_col_to_send,
5633  proc_row_to_recv, proc_col_to_recv,
5634  proc_row_to_send, proc_col_to_send);
5635 
5636  // local matrix is transposed (A may be erased during the process)
5638 
5639  // transposing distributed datas
5640  A.ExchangeParallelData(smax_col, smax_row, local_number_distant_values,
5641  dist_col, dist_row, proc_col, proc_row,
5642  global_col_to_recv, global_row_to_recv,
5643  ptr_global_col_to_recv, ptr_global_row_to_recv,
5644  local_col_to_send, local_row_to_send,
5645  proc_col_to_recv, proc_row_to_recv,
5646  proc_col_to_send, proc_row_to_send);
5647  }
5648 
5649 
5651  template<class T1, class Prop1, class Storage1, class Allocator1>
5653  {
5655 
5656  A.ConjugateDistant();
5657  }
5658 
5659 
5661  template<class T1, class Prop1, class Storage1, class Allocator1>
5664  {
5665  Transpose(static_cast<const Matrix<T1, Prop1, Storage1, Allocator1>& >(A),
5666  static_cast<Matrix<T1, Prop1, Storage1, Allocator1>& >(B));
5667 
5668  B.TransposeDistant(A);
5669  }
5670 
5671 
5673  template<class T, class Prop, class Storage, class Allocator>
5676  {
5677  Transpose(A, B);
5678  Conjugate(B);
5679  }
5680 
5681 
5683  template<class T, class Prop, class Storage, class Allocator>
5685  {
5686  Transpose(A);
5687  Conjugate(A);
5688  }
5689 
5690 
5691  /**************************
5692  * Matrix-matrix products *
5693  **************************/
5694 
5695 
5697  template<class T1, class Prop1, class Storage1, class Allocator1,
5698  class T2, class Prop2, class Storage2, class Allocator2,
5699  class T4, class Prop4, class Storage4, class Allocator4>
5703  {
5704  const MPI_Comm& comm = A.GetCommunicator();
5705  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5706  if (nb_proc == 1)
5707  return
5708  MltMatrix(static_cast<const Matrix<T1, Prop1, Storage1, Allocator1>& >(A),
5709  static_cast<const Matrix<T2, Prop2, Storage2, Allocator2>& >(B),
5710  static_cast<Matrix<T4, Prop4, Storage4, Allocator4>& >(C));
5711 
5712  cout << "Mlt not implemented for distributed matrices" << endl;
5713  abort();
5714  }
5715 
5716 
5718  template<class T0,
5719  class T1, class Prop1, class Storage1, class Allocator1,
5720  class T2, class Prop2, class Storage2, class Allocator2,
5721  class T3,
5722  class T4, class Prop4, class Storage4, class Allocator4>
5723  void MltAddMatrix(const T0& alpha,
5726  const T3& beta,
5728  {
5729  const MPI_Comm& comm = A.GetCommunicator();
5730  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5731  if (nb_proc == 1)
5732  return
5733  MltAddMatrix(alpha,
5734  static_cast<const Matrix<T1, Prop1,Storage1,Allocator1>& >(A),
5735  static_cast<const Matrix<T2, Prop2,Storage2,Allocator2>& >(B),
5736  beta,
5737  static_cast<Matrix<T4, Prop4, Storage4, Allocator4>& >(C));
5738 
5739  cout << "MltAdd not implemented for distributed matrices" << endl;
5740  abort();
5741  }
5742 
5743 
5745  template<class T0,
5746  class T1, class Prop1, class Storage1, class Allocator1,
5747  class T2, class Prop2, class Storage2, class Allocator2,
5748  class T3,
5749  class T4, class Prop4, class Storage4, class Allocator4>
5750  void MltAddMatrix(const T0& alpha, const SeldonTranspose& transA,
5752  const SeldonTranspose& transB,
5754  const T3& beta,
5756  {
5757  const MPI_Comm& comm = A.GetCommunicator();
5758  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5759  if (nb_proc == 1)
5760  return MltAddMatrix(alpha, transA,
5761  static_cast<const Matrix<T1, Prop1,
5762  Storage1, Allocator1>& >(A),
5763  transB,
5764  static_cast<const Matrix<T2, Prop2,
5765  Storage2, Allocator2>& >(B),
5766  beta,
5767  static_cast<Matrix<T4, Prop4,
5768  Storage4, Allocator4>& >(C));
5769 
5770  cout << "MltAdd not implemented for distributed matrices" << endl;
5771  abort();
5772  }
5773 
5774 
5775  /********************
5776  * Matrix functions *
5777  ********************/
5778 
5779 
5781  template<class T0, class Prop0, class Storage0, class Allocator0,
5782  class T1, class Allocator1>
5785  {
5786  const MPI_Comm& comm = A.GetCommunicator();
5787  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5788  if (nb_proc == 1)
5789  return GetRow(static_cast<const Matrix<T0, Prop0,
5790  Storage0, Allocator0>& >(A), i, X);
5791 
5792  cout << "GetRow not implemented for distributed matrices" << endl;
5793  abort();
5794  }
5795 
5796 
5798  template<class T0, class Prop0, class Storage0, class Allocator0,
5799  class T1, class Allocator1>
5802  {
5803  const MPI_Comm& comm = A.GetCommunicator();
5804  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5805  if (nb_proc == 1)
5806  return GetCol(static_cast<const Matrix<T0, Prop0,
5807  Storage0, Allocator0>& >(A), i, X);
5808 
5809  cout << "GetCol not implemented for distributed matrices" << endl;
5810  abort();
5811  }
5812 
5813 
5815  template<class T0, class Prop0, class Storage0, class Allocator0,
5816  class T1, class Allocator1>
5819  {
5820  MPI_Comm& comm = A.GetCommunicator();
5821  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5822  if (nb_proc == 1)
5823  return SetRow(X, i, static_cast<Matrix<T0, Prop0,
5824  Storage0, Allocator0>& >(A));
5825 
5826  cout << "SetRow not implemented for distributed matrices" << endl;
5827  abort();
5828  }
5829 
5830 
5832  template<class T0, class Prop0, class Storage0, class Allocator0,
5833  class T1, class Allocator1>
5836  {
5837  MPI_Comm& comm = A.GetCommunicator();
5838  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5839  if (nb_proc == 1)
5840  return SetCol(X, i, static_cast<Matrix<T0, Prop0,
5841  Storage0, Allocator0>& >(A));
5842 
5843  cout << "SetCol not implemented for distributed matrices" << endl;
5844  abort();
5845  }
5846 
5847 
5849  template<class T, class Prop, class Storage, class Allocator>
5851  const Vector<int>& row_perm,
5852  const Vector<int>& col_perm)
5853  {
5854  MPI_Comm& comm = A.GetCommunicator();
5855  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5856  if (nb_proc == 1)
5857  return ApplyPermutation(static_cast<Matrix<T, Prop,
5858  Storage, Allocator>& >(A),
5859  row_perm, col_perm);
5860 
5861  cout << "ApplyPermutation not implemented for distributed matrices"
5862  << endl;
5863  abort();
5864  }
5865 
5866 
5868  template<class T, class Prop, class Storage, class Allocator>
5869  void ApplyInversePermutation(DistributedMatrix<T, Prop,
5870  Storage, Allocator>& A,
5871  const Vector<int>& row_perm,
5872  const Vector<int>& col_perm)
5873  {
5874  MPI_Comm& comm = A.GetCommunicator();
5875  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5876  if (nb_proc == 1)
5877  return ApplyInversePermutation(static_cast<Matrix<T, Prop, Storage,
5878  Allocator>& >(A),
5879  row_perm, col_perm);
5880 
5881  cout << "ApplyInversePermutation not implemented for distributed matrices"
5882  << endl;
5883  abort();
5884  }
5885 
5886 
5888  template <class T0, class Prop0, class Storage0, class Allocator0,
5889  class T1, class Storage1, class Allocator1,
5890  class T2, class Storage2, class Allocator2, class T3>
5894  const T3& omega, int iter, int type_ssor)
5895  {
5896  const MPI_Comm& comm = A.GetCommunicator();
5897  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5898  if (nb_proc == 1)
5899  return SorVector(static_cast<const Matrix<T0, Prop0,
5900  Storage0, Allocator0>& >(A),
5901  X, B, omega, iter, type_ssor);
5902 
5903  cout << "SOR not implemented for distributed matrices" << endl;
5904  abort();
5905  }
5906 
5907 
5909  template <class T0, class Prop0, class Storage0, class Allocator0,
5910  class T1, class Storage1, class Allocator1,
5911  class T2, class Storage2, class Allocator2, class T3>
5912  void SorVector(const SeldonTranspose& transM,
5916  const T3& omega, int iter, int type_ssor)
5917  {
5918  if (transM.NoTrans())
5919  return SorVector(A, X, B, omega, iter, type_ssor);
5920 
5921  const MPI_Comm& comm = A.GetCommunicator();
5922  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5923  if (nb_proc == 1)
5924  return SOR(transM,
5925  static_cast<const Matrix<T0, Prop0,
5926  Storage0, Allocator0>& >(A),
5927  X, B, omega, iter, type_ssor);
5928 
5929  cout << "SOR not implemented for distributed matrices" << endl;
5930  abort();
5931  }
5932 
5933 
5935  template<class T1, class Prop, class Storage, class Allocator,
5936  class T2, class Allocator2, class Allocator3>
5938  const IVect& col_number,
5940  VectSparse, Allocator3>& V)
5941  {
5942  const MPI_Comm& comm = A.GetCommunicator();
5943  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5944  if (nb_proc == 1)
5945  return GetCol(static_cast<const Matrix<T1, Prop,
5946  Storage, Allocator>& >(A),
5947  col_number, V);
5948 
5949  cout << "GetCol not implemented for distributed matrices" << endl;
5950  abort();
5951  }
5952 
5953 
5955  template<class T, class Prop1, class Storage1, class Allocator1,
5956  class Prop2, class Storage2, class Allocator2>
5959  {
5960  B = A;
5961  }
5962 
5963 
5965  template<class T, class Prop1, class Storage1, class Allocator1,
5966  class Prop2, class Storage2, class Allocator2>
5968  DistributedMatrix<complex<T>, Prop2, Storage2, Allocator2>& B)
5969  {
5970  B = A;
5971  }
5972 
5973 
5975  template<class T1, class Prop1, class Storage1, class Allocator1,
5976  class T2, class Prop2, class Storage2, class Allocator2>
5979  {
5980  MPI_Comm& comm = B.GetCommunicator();
5981  int nb_proc; MPI_Comm_size(comm, &nb_proc);
5982  if (nb_proc == 1)
5983  return CopyReal(static_cast<const Matrix<T1, Prop1,
5984  Storage1, Allocator1>& >(A),
5985  static_cast<Matrix<T2, Prop2,
5986  Storage2, Allocator2>& >(B));
5987 
5988  cout << "CopyReal not implemented for distributed matrices" << endl;
5989  abort();
5990  }
5991 
5992 
5994  template<class T, class Prop, class Storage, class Allocator>
5995  typename ClassComplexType<T>::Treal
5997  {
5998  const MPI_Comm& comm = A.GetCommunicator();
5999  int nb_proc; MPI_Comm_size(comm, &nb_proc);
6000  if (nb_proc == 1)
6001  return NormFro(static_cast<const Matrix<T, Prop,
6002  Storage, Allocator>& >(A));
6003 
6004  cout << "NormFro not implemented for distributed matrices" << endl;
6005  abort();
6006  }
6007 
6008 
6010  template<class T, class Storage, class Allocator,
6011  class T1, class Allocator1>
6014  {
6015  ScaleLeftMatrix(static_cast<Matrix<T, General,
6016  Storage, Allocator>& >(A), Drow);
6017 
6018  A.ScaleLeftDistant(Drow);
6019  }
6020 
6021 
6023  template<class T, class Storage, class Allocator,
6024  class T1, class Allocator1>
6027  {
6028  ScaleRightMatrix(static_cast<Matrix<T, General,
6029  Storage, Allocator>& >(A), Dcol);
6030 
6031  A.ScaleRightDistant(Dcol);
6032  }
6033 
6034 
6036  template<class T, class Prop, class Storage, class Allocator,
6037  class T1, class Allocator1, class T2, class Allocator2>
6041  {
6042  ScaleMatrix(static_cast<Matrix<T, Prop, Storage, Allocator>& >(A),
6043  Drow, Dcol);
6044 
6045  A.ScaleDistant(Drow, Dcol);
6046  }
6047 
6048 
6050 
6062  template<class Prop, class Storage, class Alloc, class Tint0, class Tint1, class T>
6063  void AssembleDistributed(DistributedMatrix<T, Prop, Storage, Alloc>& A,
6064  Symmetric& sym, const MPI_Comm& comm,
6065  IVect& row_numbers, IVect& local_row_numbers,
6066  Vector<Tint0>& PtrA, Vector<Tint1>& IndA,
6067  Vector<T>& ValA, bool sym_pattern, bool reorder)
6068  {
6069  PtrA.Clear(); IndA.Clear(); ValA.Clear();
6070 
6071  // we convert A in ArrayRowSparse format
6073  Vector<IVect> procB;
6074  if (reorder)
6075  procB.Reallocate(A.GetM());
6076 
6077  A.GetDistributedRows(B, procB);
6078 
6079  // local matrix is cleared
6080  A.ClearLocal();
6081 
6082  // then calling AssembleParallel
6083  IVect OverlappedCol;
6084  A.AssembleParallel(B, procB, sym, row_numbers, local_row_numbers,
6085  OverlappedCol, sym_pattern, reorder);
6086 
6087  // final conversion with type Tint
6088  A.ConvertToCSR(B, OverlappedCol, PtrA, IndA, ValA);
6089  }
6090 
6091 
6093 
6105  template<class Prop, class Storage, class Alloc, class Tint0, class Tint1, class T>
6106  void AssembleDistributed(DistributedMatrix<T, Prop, Storage, Alloc>& A,
6107  General& prop, const MPI_Comm& comm,
6108  IVect& col_numbers, IVect& local_col_numbers,
6109  Vector<Tint0>& PtrA, Vector<Tint1>& IndA,
6110  Vector<T>& ValA, bool sym_pattern, bool reorder)
6111  {
6112  PtrA.Clear(); IndA.Clear(); ValA.Clear();
6113 
6114  // we convert A in CSC format
6116  Vector<IVect> procB;
6117  if (reorder)
6118  procB.Reallocate(A.GetN());
6119 
6120  A.GetDistributedColumns(B, procB, sym_pattern);
6121 
6122  // local matrix is cleared
6123  A.ClearLocal();
6124 
6125  // then AssembleParallel is called
6126  IVect OverlappedCol;
6127  A.AssembleParallel(B, procB, prop, col_numbers, local_col_numbers,
6128  OverlappedCol, sym_pattern, reorder);
6129 
6130  // final conversion with type Tint
6131  A.ConvertToCSC(B, OverlappedCol, PtrA, IndA, ValA);
6132  }
6133 
6134 
6136 
6142  template<class T1, class Prop1, class Storage1, class Allocator1>
6143  void EraseCol(const IVect& num,
6145  {
6146  // erasing columns of the local entries
6147  EraseCol(num, static_cast<Matrix<T1, Prop1, Storage1, Allocator1>& >(A));
6148 
6150  }
6151 
6152 
6154 
6160  template<class T1, class Prop1, class Storage1, class Allocator1>
6161  void EraseRow(const IVect& num,
6163  {
6164  // erasing rows of the local entries
6165  EraseRow(num, static_cast<Matrix<T1, Prop1, Storage1, Allocator1>& >(A));
6166 
6168  }
6169 
6170 
6172  template<class T0, class Prop0, class Storage0, class Allocator0,
6173  class T1, class Prop1, class Storage1, class Allocator1>
6174  void
6176  const IVect& row, const IVect& col,
6178  {
6179  CopySubMatrix(static_cast<const Matrix<T0, Prop0,
6180  Storage0, Allocator0>& >(A),
6181  row, col, static_cast<Matrix<T1, Prop1,
6182  Storage1, Allocator1>& >(B));
6183 
6184  B.CopySubDistant(A, row, col, IsSymmetricMatrix(B));
6185  }
6186 
6187 }
6188 
6189 #define SELDON_FILE_DISTRIBUTED_MATRIX_CXX
6190 #endif
6191 
Seldon::GetBaseString
string GetBaseString(const string &nom)
returns base of a string
Definition: Common.cxx:43
Seldon::DistributedMatrix_Base::global_row_to_recv
IVect global_row_to_recv
global row/col numbers (needed for MltAdd)
Definition: DistributedMatrix.hxx:158
Seldon::DistributedMatrix::operator*=
DistributedMatrix< T, Prop, Storage, Allocator > & operator*=(const T0 &x)
multiplication by a scalar
Definition: DistributedMatrix.cxx:4885
Seldon::DistributedMatrix_Base::PrepareMltAdd
void PrepareMltAdd()
prepares a matrix vector product with the distributed matrix
Definition: DistributedMatrix.cxx:956
Seldon::DistributedMatrix_Base::InitMltAdd
void InitMltAdd(bool &proceed_distant_row, bool &proceed_distant_col, const Vector< T2 > &X, Vector< T2 > &Xcol, const T3 &beta, Vector< T4, Storage4, Allocator4 > &Y, Vector< T4, Storage4, Allocator4 > &Yres) const
Initializes the matrix-vector product
Definition: DistributedMatrix.cxx:2434
Seldon::DistributedMatrix_Base::dist_col
Vector< Vector< T, VectSparse >, VectFull, NewAlloc< Vector< T, VectSparse > > > dist_col
additional values on rows with non-local columns
Definition: DistributedMatrix.hxx:148
Seldon::DistributedMatrix_Base::EraseArrayForMltAdd
void EraseArrayForMltAdd()
erases any array used for MltAdd
Definition: DistributedMatrix.cxx:406
Seldon::TransposeConj
void TransposeConj(const Matrix< T, Prop, Storage, Allocator > &A, Matrix< T, Prop, Storage, Allocator > &B)
Matrix transposition and conjugation.
Definition: Functions_Matrix.cxx:2925
Seldon::DistributedMatrix_Base::GetLocalN
int GetLocalN() const
returns the local number of columns
Definition: DistributedMatrixInline.cxx:58
Seldon::DistributedMatrix::GetDistributedColumns
void GetDistributedColumns(Matrix< T0, General, ArrayColSparse, Allocator0 > &rows, Vector< IVect > &, bool sym_pattern) const
grouping all the local columns of the matrix into CSC form
Definition: DistributedMatrix.cxx:5189
Seldon::SeldonTranspose
Definition: MatrixFlag.hxx:32
Seldon::DistributedMatrix::Clear
void Clear()
Clears the matrix.
Definition: DistributedMatrix.cxx:4851
Seldon::DistributedMatrix_Base::AssembleValuesMin
void AssembleValuesMin(const IVect &Xcol, const IVect &Xcol_proc, const IVect &num_recv, const IVect &ptr_num_recv, const IVect &proc_recv, const Vector< IVect > &num_send, const IVect &proc_send, IVect &Y, IVect &Yproc) const
assembles the results for each row, by taking the minimum of Yproc then the minimum of Y
Definition: DistributedMatrix.cxx:764
Seldon::DistributedMatrix_Base::Zero
void Zero()
sets values of non-zero entries to 0
Definition: DistributedMatrix.cxx:2297
Seldon::DistributedMatrix_Base::GetRowSumDistantCol
void GetRowSumDistantCol(Vector< T0 > &vec_sum) const
adds contribution of dist_col for GetRowSum
Definition: DistributedMatrix.cxx:887
Seldon::DistributedMatrix::Read
void Read(string FileName)
reads the matrix in binary format
Definition: DistributedMatrix.cxx:5106
Seldon::DistributedMatrix_Base::ConvertToCSR
static void ConvertToCSR(Matrix< T, General, ArrayRowSparse > &B, IVect &OverlappedCol, Vector< Tint0 > &PtrA, Vector< Tint1 > &IndA, Vector< T > &ValA)
Fills PtrA, IndA and ValA from values contained in B.
Definition: DistributedMatrix.cxx:3885
Seldon::DistributedMatrix_Base::Init
void Init(int n, IVect *, IVect *, IVect *, int, int, IVect *, Vector< IVect > *, const MPI_Comm &)
Initialisation of pointers.
Definition: DistributedMatrix.cxx:1750
Seldon::Copy
void Copy(const DistributedMatrix< complex< T >, Prop1, Storage1, Allocator1 > &A, DistributedMatrix< T, Prop2, Storage2, Allocator2 > &B)
converts a distributed matrix to another one
Definition: DistributedMatrixInline.cxx:344
Seldon::DistributedMatrix_Base::GetDistributedColumns
void GetDistributedColumns(Matrix< T0, General, ArrayColSparse, Allocator0 > &B, Vector< IVect > &procB, Vector< long > &Ptr, IVect &Ind, Vector< T0 > &Val, bool sym_pattern) const
grouping all the local columns of the matrix into CSC form
Definition: DistributedMatrix.cxx:4703
Seldon::DistributedMatrix_Base::AddDistributedMatrix
void AddDistributedMatrix(const T0 &alpha, const DistributedMatrix_Base< T1 > &A)
Adds alpha A to the current matrix (only distant values)
Definition: DistributedMatrix.cxx:2748
Seldon::DistributedMatrix_Base::FillRand
void FillRand()
sets values of non-zero entries to random values
Definition: DistributedMatrix.cxx:2350
Seldon::DistributedMatrix_Base::Clear
void Clear()
matrix is cleared
Definition: DistributedMatrix.cxx:1916
Seldon::to_str
std::string to_str(const T &input)
Converts most types to string.
Definition: CommonInline.cxx:137
Seldon::Norm1
ClassComplexType< T >::Treal Norm1(const VectorExpression< T, E > &X)
returns 1-norm of an expression with vectors
Definition: Functions_BaseInline.cxx:140
Seldon::DistributedMatrix_Base::AddRowColSumDistant
void AddRowColSumDistant(Vector< T0 > &sum_row, Vector< T0 > &sum_col) const
Adds \sum |a_ij| to sum_row(i) and sum_col(j) for distant non-zero entries.
Definition: DistributedMatrix.cxx:2848
Seldon::DistributedMatrix::SetIdentity
void SetIdentity()
sets matrix to identity matrix
Definition: DistributedMatrix.cxx:4957
Seldon::DistributedMatrix::operator=
DistributedMatrix< T, Prop, Storage, Allocator > & operator=(const DistributedMatrix< T2, Prop2, Storage2, Allocator2 > &X)
equality *this = X
Definition: DistributedMatrix.cxx:4871
Seldon::DistributedMatrix_Base::MltAddRow
void MltAddRow(const SeldonTranspose &Trans, const Vector< T2, Storage2, Allocator2 > &X, Vector< T4, Storage4, Allocator4 > &Y) const
Y = Y + alpha A^T X with only distant rows of A.
Definition: DistributedMatrix.cxx:1107
Seldon::Vector
Definition: SeldonHeader.hxx:207
Seldon::DistributedMatrix_Base::AddRowSumDistant
void AddRowSumDistant(Vector< T0 > &vec_sum) const
Adds \sum |a_ij| to vec_sum(i) for distant non-zero entries.
Definition: DistributedMatrix.cxx:2800
Seldon::SwapPointer
void SwapPointer(Vector< T > &X, Vector< T > &Y)
Swaps X and Y with pointers.
Definition: Functions_Vector.cxx:377
Seldon::DistributedMatrix_Base::ConvertToCSC
static void ConvertToCSC(Matrix< T, General, ArrayColSparse > &B, IVect &OverlappedCol, Vector< Tint0 > &PtrA, Vector< Tint1 > &IndA, Vector< T > &ValA)
Fills PtrA, IndA and ValA from values contained in B.
Definition: DistributedMatrix.cxx:4528
Seldon::DistributedMatrix_Base::ReallocateDist
void ReallocateDist(int m, int n)
changing the size of the local matrix, previous values are lost
Definition: DistributedMatrix.cxx:1884
Seldon::DistributedMatrix::Write
void Write(string FileName) const
writes the matrix in binary format
Definition: DistributedMatrix.cxx:5023
Seldon::DistributedMatrix::FillRand
void FillRand()
sets values of non-zero entries to random values
Definition: DistributedMatrix.cxx:5014
Seldon::DistributedMatrix_Base::AddColSumDistant
void AddColSumDistant(Vector< T0 > &vec_sum) const
Adds \sum |a_ij| to vec_sum(j) for distant non-zero entries.
Definition: DistributedMatrix.cxx:2824
Seldon::DistributedMatrix_Base::InitMltMin
void InitMltMin(Vector< int > &Y, Vector< int > &Yproc, Vector< int > &Xcol, Vector< int > &Xcol_proc) const
Initializes the matrix-vector product MltMin
Definition: DistributedMatrix.cxx:2621
Seldon::DistributedMatrix_Base::OverlapRowNumbers
IVect * OverlapRowNumbers
row numbers shared with other processors
Definition: DistributedMatrix.hxx:114
Seldon::IsSymmetricMatrix
bool IsSymmetricMatrix(const Matrix< T, Prop, Storage, Allocator > &A)
returns true if the matrix is symmetric
Definition: Functions_BaseInline.cxx:778
Seldon::DistributedMatrix_Base::nodl_scalar_
int nodl_scalar_
number of "scalar" unknowns
Definition: DistributedMatrix.hxx:138
Seldon::DistributedMatrix_Base::comm_
MPI_Comm comm_
MPI communicator.
Definition: DistributedMatrix.hxx:144
Seldon::DistributedMatrix_Base::GetMemorySize
size_t GetMemorySize() const
returns the size of memory used to store the matrix
Definition: DistributedMatrix.cxx:2162
Seldon::DistributedMatrix_Base::GetColSumDistantRow
void GetColSumDistantRow(Vector< T0 > &vec_sum) const
adds contribution of dist_row for GetColSum
Definition: DistributedMatrix.cxx:936
Seldon::DistributedMatrix::RemoveSmallEntry
void RemoveSmallEntry(const T0 &epsilon)
removes small entries of the matrix
Definition: DistributedMatrix.cxx:4945
Seldon::DistributedMatrix_Base::GetOverlapRowNumber
IVect & GetOverlapRowNumber()
returns rows already counted in another processor
Definition: DistributedMatrix.cxx:2044
Seldon::Matrix
Definition: SeldonHeader.hxx:226
Seldon::DistributedMatrix_Base::AssembleVec
void AssembleVec(Vector< T2 > &) const
assembles the vector (adds values of shared rows)
Definition: DistributedMatrix.cxx:1050
Seldon::DistributedMatrix_Base::GetDistributedRows
void GetDistributedRows(Matrix< T0, General, ArrayRowSparse, Allocator0 > &rows, Vector< IVect > &proc, bool sym) const
grouping all the local rows of the matrix into CSR form
Definition: DistributedMatrix.cxx:4579
Seldon::DistributedMatrix_Base::ScatterValues
void ScatterValues(const Vector< T2 > &X, const IVect &num_recv, const IVect &, const IVect &proc_recv, const Vector< IVect > &num_send, const IVect &proc_send, Vector< T2 > &Xcol) const
internal function
Definition: DistributedMatrix.cxx:642
Seldon::DistributedMatrix_Base::GetNonZeros
long GetNonZeros() const
returns the number of non-zero entries stored in the matrix
Definition: DistributedMatrix.cxx:2207
Seldon::CopyReal
void CopyReal(const DistributedMatrix< T1, Prop1, Storage1, Allocator1 > &A, DistributedMatrix< T2, Prop2, Storage2, Allocator2 > &B)
extracts real part of a matrix
Definition: DistributedMatrix.cxx:5977
Seldon::DistributedMatrix::ReadText
void ReadText(string FileName, bool cplx=false)
reads the matrix in text format
Definition: DistributedMatrix.cxx:5136
Seldon::NewAlloc
Definition: Allocator.hxx:91
Seldon::DistributedMatrix_Base::GetGlobalRowNumber
IVect & GetGlobalRowNumber()
returns local to global numbering
Definition: DistributedMatrix.cxx:2014
Seldon::DistributedMatrixIntegerArray
class storing arrays needed for a distributed matrix
Definition: DistributedMatrix.hxx:26
Seldon::DistributedMatrix_Base::GetSharingRowNumbers
Vector< IVect > & GetSharingRowNumbers()
returns row numbers for each set of shared rows
Definition: DistributedMatrix.cxx:2133
Seldon::VectFull
Definition: StorageInline.cxx:74
Seldon::DistributedMatrix_Base::GetDataSize
long GetDataSize() const
returns the number of elements stored
Definition: DistributedMatrix.cxx:2228
Seldon::DistributedMatrix_Base::CopySubDistant
void CopySubDistant(const DistributedMatrix_Base< T1 > &A, const IVect &row, const IVect &col, bool sym)
Copies A(row, col) to the current matrix.
Definition: DistributedMatrix.cxx:3150
Seldon::DistributedMatrix_Base::AssembleValues
void AssembleValues(const Vector< T2 > &Xcol, const IVect &num_recv, const IVect &, const IVect &proc_recv, const Vector< IVect > &num_send, const IVect &proc_send, Vector< T2 > &X) const
internal function
Definition: DistributedMatrix.cxx:701
Seldon::RemoveSmallEntry
void RemoveSmallEntry(Matrix< T, Prop, RowSymSparse, Allocator > &A, const T0 &epsilon)
drops non-zero entries below epsilon
Definition: Functions_MatrixArray.cxx:2764
Seldon::DistributedMatrix_Base
Base class for distributed matrix over all the processors.
Definition: DistributedMatrix.hxx:103
Seldon::DistributedMatrix::Resize
void Resize(int m, int n)
changing the size of the local matrix, previous values are kept
Definition: DistributedMatrix.cxx:4842
Seldon::DistributedMatrix_Base::ProcSharingRows
IVect * ProcSharingRows
list of processors sharing rows with the current one
Definition: DistributedMatrix.hxx:127
Seldon::DistributedMatrix_Base::SameDistributedRows
bool SameDistributedRows(const DistributedMatrix_Base< T > &A)
returns true if the distributed rows of A coincide with the distributed rows of the current matrix
Definition: DistributedMatrix.cxx:3268
Seldon::DistributedMatrix::Fill
void Fill()
sets values of non-zero entries to 0, 1, 2, etc
Definition: DistributedMatrix.cxx:4991
Seldon::DistributedMatrix_Base::SwitchToGlobalNumbers
void SwitchToGlobalNumbers()
erases informations for matrix-vector product and reverts dist_row/dist_col to global numbers
Definition: DistributedMatrix.cxx:425
Seldon::DistributedMatrix_Base::RemoveSmallEntry
void RemoveSmallEntry(const T0 &epsilon)
removes small entries of the matrix
Definition: DistributedMatrix.cxx:2268
Seldon::Conjugate
void Conjugate(Matrix< T, Prop, Storage, Allocator > &A)
A is replaced by its conjugate.
Definition: Functions_Matrix.cxx:2915
Seldon::DistributedMatrix_Base::SortAndAssembleDistantInteractions
void SortAndAssembleDistantInteractions(TypeDist &dist_val, Vector< IVect > &dist_proc, IVect &glob_num, IVect &ptr_glob_num, IVect &proc_glob, Vector< IVect > &local_num, IVect &proc_local)
changes global numbers in proc_row to local numbers
Definition: DistributedMatrix.cxx:470
Seldon::DistributedMatrix_Base::ScatterColValues
void ScatterColValues(const Vector< T2 > &X, Vector< T2 > &Xcol) const
Sends/receives values of X on distant columns.
Definition: DistributedMatrix.cxx:1012
Seldon::DistributedMatrix::GetDistributedRows
void GetDistributedRows(Matrix< T0, General, ArrayRowSparse, Allocator0 > &rows, Vector< IVect > &proc) const
grouping all the local rows of the matrix into CSR form
Definition: DistributedMatrix.cxx:5172
Seldon::DistributedMatrix_Base::Fill
void Fill()
sets values of non-zero entries to 0, 1, 2, etc
Definition: DistributedMatrix.cxx:2313
Seldon::DistributedMatrix::Reallocate
void Reallocate(int m, int n)
changing the size of the local matrix, previous values are lost
Definition: DistributedMatrix.cxx:4832
Seldon::DistributedMatrix_Base::local_number_distant_values
bool local_number_distant_values
if true local numbers are present in dist_row/dist_col instead of global numbers
Definition: DistributedMatrix.hxx:170
Seldon::DistributedMatrix_Base::Copy
void Copy(const DistributedMatrix_Base< T2 > &X)
Copies content of X in the current object.
Definition: DistributedMatrix.cxx:1952
Seldon::DistributedMatrix_Base::GetProcessorSharingRows
IVect & GetProcessorSharingRows()
returns processor numbers for each set of shared rows
Definition: DistributedMatrix.cxx:2103
Seldon::DistributedMatrix_Base::AssembleColValues
void AssembleColValues(const Vector< T2 > &Xrow, Vector< T2 > &X) const
Sends/receives values of Xcol on distant columns and adds them to X.
Definition: DistributedMatrix.cxx:1039
Seldon::DistributedMatrix_Base::FinalizeMltMin
void FinalizeMltMin(Vector< int > &Y, Vector< int > &Yproc, Vector< int > &Xcol, Vector< int > &Xcol_proc) const
Finalizes the matrix-vector product MltMin.
Definition: DistributedMatrix.cxx:2646
Seldon::DistributedMatrix_Base::RemoveSmallEntryDistant
void RemoveSmallEntryDistant(const T0 &, TypeDist &, Vector< IVect > &)
removes non-zero entries contained in dist_vec below epsilon dist_proc is modified in the same way
Definition: DistributedMatrix.cxx:848
Seldon::DistributedMatrix_Base::AssembleRowValues
void AssembleRowValues(const Vector< T2 > &Xrow, Vector< T2 > &X) const
Sends/receives values of Xrow on distant rows and adds them to X.
Definition: DistributedMatrix.cxx:1028
Seldon::DistributedMatrix_Base::FinalizeMltAdd
void FinalizeMltAdd(bool proceed_distant_row, bool proceed_distant_col, const Vector< T2 > &X, Vector< T2 > &Xcol, const T3 &alpha, const T3 &beta, Vector< T4, Storage4, Allocator4 > &Y, Vector< T4, Storage4, Allocator4 > &Yres, bool assemble) const
Finalizes the matrix-vector product.
Definition: DistributedMatrix.cxx:2486
Seldon::DistributedMatrix_Base::GlobalRowNumbers
IVect * GlobalRowNumbers
global row numbers
Definition: DistributedMatrix.hxx:124
Seldon::DistributedMatrix_Base::WriteText
void WriteText(ostream &FileStream, Vector< int > &Indow, Vector< int > &IndCol, Vector< T > &Value, bool cplx) const
writes the matrix in text format
Definition: DistributedMatrix.cxx:2363
Seldon::MltMatrix
void MltMatrix(const Matrix< T0, Prop0, RowSparse, Allocator0 > &A, const Matrix< T1, Prop1, RowSparse, Allocator1 > &B, Matrix< T2, Prop2, RowSparse, Allocator2 > &C)
Multiplies two row-major sparse matrices in Harwell-Boeing format.
Definition: Functions_Matrix.cxx:207
Seldon::DistributedMatrix_Base::GetColSumDistantCol
void GetColSumDistantCol(Vector< T0 > &vec_sum) const
adds contribution of dist_col for GetColSum
Definition: DistributedMatrix.cxx:917
Seldon::DistributedMatrix_Base::EraseRowDistant
void EraseRowDistant(const IVect &num, bool sym)
erases rows num(i)
Definition: DistributedMatrix.cxx:3116
Seldon::General
Definition: Properties.hxx:26
Seldon::DistributedMatrix_Base::local_row_to_send
Vector< IVect > local_row_to_send
local row/col numbers (needed for MltAdd)
Definition: DistributedMatrix.hxx:162
Seldon::DistributedMatrix_Base::ScaleLeftDistant
void ScaleLeftDistant(const Vector< T0 > &Drow)
Multiplies a_ij by Drow(i) for distant non-zero entries.
Definition: DistributedMatrix.cxx:2977
Seldon::DistributedMatrix_Base::proc_col
Vector< IVect > proc_col
distant processor for additional values
Definition: DistributedMatrix.hxx:155
Seldon::DistributedMatrix_Base::size_max_distant_row
long size_max_distant_row
number of distant non-zero entries
Definition: DistributedMatrix.hxx:173
Seldon::DistributedMatrix_Base::EraseColDistant
void EraseColDistant(const IVect &num, bool sym)
erases columns num(i)
Definition: DistributedMatrix.cxx:3083
Seldon::DistributedMatrix_Base::GetRowSumDistantRow
void GetRowSumDistantRow(Vector< T0 > &vec_sum) const
adds contribution of dist_row for GetRowSum
Definition: DistributedMatrix.cxx:898
Seldon::DistributedMatrix::WriteText
void WriteText(string FileName, bool cplx=false) const
writes the matrix in text format
Definition: DistributedMatrix.cxx:5053
Seldon::DistributedMatrix::GetMemorySize
size_t GetMemorySize() const
returns the size of memory used to store the matrix
Definition: DistributedMatrix.cxx:4897
Seldon::DistributedMatrix_Base::AssembleParallel
void AssembleParallel(Matrix< T, General, ArrayRowSparse > &B, Vector< IVect > &procB, Symmetric &sym, IVect &row_numbers, IVect &local_row_numbers, IVect &OverlappedCol, bool sym_pattern, bool reorder)
Method called by AssembleDistributed.
Definition: DistributedMatrix.cxx:3318
Seldon::DistributedMatrix_Base::SetIdentity
void SetIdentity()
sets matrix to identity matrix
Definition: DistributedMatrix.cxx:2277
Seldon::DistributedMatrix_Base::TransposeDistant
void TransposeDistant(const DistributedMatrix_Base< T > &A)
Computes *this = A^T for non-zero entries.
Definition: DistributedMatrix.cxx:2945
Seldon::DistributedMatrix::ClearLocal
void ClearLocal()
Clears the local matrix.
Definition: DistributedMatrix.cxx:4860
Seldon::AssembleVectorMin
void AssembleVectorMin(Vector< int > &X, Vector< int > &Xproc, const IVect &ProcNumber, const Vector< IVect > &DofNumber, const MPI_Comm &comm, int Nvol, int nb_u, int tag)
assembles minimums of two vectors
Definition: DistributedVector.cxx:190
Seldon::DistributedMatrix_Base::Resize
void Resize(int m, int n)
changing the size of the local matrix, previous values are kept
Definition: DistributedMatrix.cxx:1898
Seldon::DistributedMatrix_Base::ScaleRightDistant
void ScaleRightDistant(const Vector< T0 > &Dcol)
Multiplies a_ij by Dcol(j) for distant non-zero entries.
Definition: DistributedMatrix.cxx:3008
Seldon::DistributedMatrix_Base::operator=
DistributedMatrix_Base< T > & operator=(const DistributedMatrix_Base< T > &X)
equality *this = X
Definition: DistributedMatrix.cxx:1942
Seldon::NormInf
ClassComplexType< T >::Treal NormInf(const Matrix< T, Prop, Storage, Allocator > &A)
Returns the infinity-norm of a matrix.
Definition: Functions_Matrix.cxx:2435
Seldon::DistributedMatrix_Base::ScatterRowValues
void ScatterRowValues(const Vector< T2 > &X, Vector< T2 > &Xcol) const
Sends/receives values of X on distant rows (similar to ScatterColValues)
Definition: DistributedMatrix.cxx:997
Seldon::Transpose
void Transpose(Matrix< T, Prop, Storage, Allocator > &A)
Matrix transposition.
Definition: Functions_Matrix.cxx:2699
Seldon::DistributedMatrix::GetNonZeros
long GetNonZeros() const
returns the number of non-zero entries stored in the matrix
Definition: DistributedMatrix.cxx:4912
Seldon::DistributedMatrix_Base::SendAndReceiveDistributed
static void SendAndReceiveDistributed(const MPI_Comm &comm, IVect &nsend_int, Vector< IVect > &EntierToSend, Vector< Vector< T > > &FloatToSend, IVect &nrecv_int, Vector< IVect > &EntierToRecv, Vector< Vector< T > > &FloatToRecv)
Sends EntierToSend and FloatToSend to the required processors.
Definition: DistributedMatrix.cxx:1219
Seldon::DistributedMatrix_Base::ConjugateDistant
void ConjugateDistant()
Conjugates distant non-zero entries.
Definition: DistributedMatrix.cxx:2926
Seldon::DistributedMatrix
matrix distributed over all the processors
Definition: DistributedMatrix.hxx:506
Seldon::DistributedMatrix::GetDataSize
long GetDataSize() const
returns the number of elements stored
Definition: DistributedMatrix.cxx:4928
Seldon::DistributedMatrix_Base::OverlapProcNumbers
IVect * OverlapProcNumbers
processor where each shared row should be assembled
Definition: DistributedMatrix.hxx:121
Seldon::DistributedMatrix_Base::GetLocalM
int GetLocalM() const
returns the local number of rows
Definition: DistributedMatrixInline.cxx:50
Seldon::DistributedMatrix_Base::nglob_
int nglob_
total number of rows (on all processors)
Definition: DistributedMatrix.hxx:141
Seldon::VectSparse
Definition: StorageInline.cxx:79
Seldon::DistributedMatrix_Base::SharingRowNumbers
Vector< IVect > * SharingRowNumbers
for each processor sharing rows, list of "local numbers" shared
Definition: DistributedMatrix.hxx:135
Seldon::DistributedMatrix::Zero
void Zero()
sets values of non-zero entries to 0
Definition: DistributedMatrix.cxx:4978
Seldon::DistributedMatrix_Base::EraseDistantEntries
static void EraseDistantEntries(MPI_Comm &comm, const Vector< bool > &IsRowDropped, const Vector< bool > &IsRowDroppedDistant, TypeDist &dist_row_, Vector< IVect > &proc_row_, TypeDist &dist_col_, Vector< IVect > &proc_col_)
clears distant numbers in a sparse matrix
Definition: DistributedMatrix.cxx:1639
Seldon::DistributedMatrix_Base::GetOverlapProcNumber
IVect & GetOverlapProcNumber()
returns processor numbers of the original rows
Definition: DistributedMatrix.cxx:2073
Seldon::DistributedMatrix_Base::ExchangeParallelData
void ExchangeParallelData(int &smax_row, int &smax_col, bool &local_number, Vector< Vector< T, VectSparse >, VectFull, NewAlloc< Vector< T, VectSparse > > > &dist_row_, Vector< Vector< T, VectSparse >, VectFull, NewAlloc< Vector< T, VectSparse > > > &dist_col_, Vector< IVect > &proc_row_, Vector< IVect > &proc_col_, IVect &global_row_to_recv_, IVect &global_col_to_recv_, IVect &ptr_global_row_to_recv_, IVect &ptr_global_col_to_recv_, Vector< IVect > &local_row_to_send_, Vector< IVect > &local_col_to_send_, IVect &proc_row_to_recv_, IVect &proc_col_to_recv_, IVect &proc_row_to_send_, IVect &proc_col_to_send_)
Swaps arrays contained in the current structure with arrays given as parameters.
Definition: DistributedMatrix.cxx:2878
Seldon::Symmetric
Definition: Properties.hxx:30
Seldon::MaxAbs
ClassComplexType< T >::Treal MaxAbs(const Matrix< T, Prop, Storage, Allocator > &A)
Returns the maximum (in absolute value) of a matrix.
Definition: Functions_Matrix.cxx:2386
Seldon::SorVector
void SorVector(const Matrix< T0, Prop0, Storage0, Allocator0 > &M, Vector< T2, Storage2, Allocator2 > &Y, const Vector< T1, Storage1, Allocator1 > &X, const T3 &omega, int iter, int type_ssor)
Solve M Y = X with S.O.R. method.
Definition: Functions_MatVect.cxx:1638
Seldon::DistributedMatrix_Base::proc_col_to_recv
IVect proc_col_to_recv
processor numbers (needed for MltAdd)
Definition: DistributedMatrix.hxx:165
Seldon::DistributedMatrix_Base::ScaleDistant
void ScaleDistant(const Vector< T0 > &Drow, const Vector< T1 > &Dcol)
Multiplies a_ij by Drow(i) Dcol(i) for distant non-zero entries.
Definition: DistributedMatrix.cxx:3040
Seldon::ArrayRowSparse
Definition: Storage.hxx:124
Seldon::GetExtension
string GetExtension(const string &nom)
returns extension of a string
Definition: Common.cxx:31
Seldon
Seldon namespace.
Definition: Array.cxx:24
Seldon::MltAddMatrix
void MltAddMatrix(const T0 &alpha, const Matrix< T1, Prop1, Storage1, Allocator1 > &A, const Matrix< T2, Prop2, Storage2, Allocator2 > &B, const T3 &beta, Matrix< T4, Prop4, Storage4, Allocator4 > &C)
Multiplies two matrices, and adds the result to a third matrix.
Definition: Functions_Matrix.cxx:617
Seldon::IOError
Definition: Errors.hxx:150
Seldon::DistributedMatrix_Base::dist_row
Vector< Vector< T, VectSparse >, VectFull, NewAlloc< Vector< T, VectSparse > > > dist_row
additional values on columns with non-local rows
Definition: DistributedMatrix.hxx:152
Seldon::DistributedMatrix_Base::GetMaxAbsDistant
void GetMaxAbsDistant(typename ClassComplexType< T >::Treal &res) const
Computes res = max(res, maximum absolute value of non-zero entries)
Definition: DistributedMatrix.cxx:2774
Seldon::AddMatrix
void AddMatrix(const T0 &alpha, const Matrix< T1, Prop1, Storage1, Allocator1 > &A, Matrix< T2, Prop2, Storage2, Allocator2 > &B)
Adds two matrices.
Definition: Functions_Matrix.cxx:1619
Seldon::DistributedMatrix_Base::AssembleVecMin
void AssembleVecMin(Vector< int > &X, Vector< int > &Xproc) const
assembles the vector (by taking the minimum instead of summing for AssembleVec
Definition: DistributedMatrix.cxx:837
Seldon::DistributedMatrix_Base::GetCommunicator
MPI_Comm & GetCommunicator()
returns MPI communicator (processors that will share the matrix)
Definition: DistributedMatrixInline.cxx:34
Seldon::DistributedMatrix_Base::operator*=
DistributedMatrix_Base< T > & operator*=(const T0 &x)
multiplication by a scalar
Definition: DistributedMatrix.cxx:1999
Seldon::DistributedMatrix_Base::IsReadyForMltAdd
bool IsReadyForMltAdd() const
returns true if the matrix is ready to perform a matrix-vector product
Definition: DistributedMatrixInline.cxx:149
Seldon::SetComplexReal
void SetComplexReal(int n, std::complex< T > &number)
Sets a complex number to (n, 0).
Definition: CommonInline.cxx:234
Seldon::DistributedMatrix_Base::AddReceivedInteractions
static void AddReceivedInteractions(const MPI_Comm &comm, Matrix< T, General, ArrayRowSparse > &B, Vector< IVect > &EntierToRecv, Vector< Vector< T > > &FloatToRecv, IVect &nrecv_int, Vector< IVect > &EntierToSend, Vector< Vector< T > > &FloatToSend, IVect &nsend_int, IVect &Glob_to_local, const IVect &OverlappedCol, const IVect &OverlapProcNumber, Vector< IVect > &procB, bool reorder)
Adds received non-zero entries to the matrix B.
Definition: DistributedMatrix.cxx:1313
Seldon::DistributedMatrix_Base::AddDistantValue
static void AddDistantValue(Vector< T, VectSparse > &dist_col_, IVect &proc_col_, int jglob, int proc2, const T &val)
adding a non-zero entry, between a local row/column and a non-local row/column
Definition: DistributedMatrix.cxx:1164
Seldon::DistributedMatrix_Base::MltAddCol
void MltAddCol(const SeldonTranspose &Trans, const Vector< T2, Storage2, Allocator2 > &X, Vector< T4, Storage4, Allocator4 > &Y) const
Y = Y + alpha A X with only distant columns of A.
Definition: DistributedMatrix.cxx:1062