DistributedVector.cxx
1 // Copyright (C) 2014 INRIA
2 // Author(s): Marc DuruflĂ©
3 //
4 // This file is part of the linear-algebra library Seldon,
5 // http://seldon.sourceforge.net/.
6 //
7 // Seldon is free software; you can redistribute it and/or modify it under the
8 // terms of the GNU Lesser General Public License as published by the Free
9 // Software Foundation; either version 2.1 of the License, or (at your option)
10 // any later version.
11 //
12 // Seldon is distributed in the hope that it will be useful, but WITHOUT ANY
13 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15 // more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with Seldon. If not, see http://www.gnu.org/licenses/.
19 
20 #ifndef SELDON_FILE_DISTRIBUTED_VECTOR_CXX
21 
22 #include "DistributedVector.hxx"
23 
24 namespace Seldon
25 {
26 
28 
33  template<class T1, class Allocator1>
36  {
37  T1 value;
38  SetComplexZero(value);
39  int nb_over = X.GetNbOverlap();
40  for (int i = 0; i < nb_over; i++)
41  value += X(X.GetOverlapRow(i)) * Y(Y.GetOverlapRow(i));
42 
43  value = -value + DotProd(static_cast<const Vector<T1, VectFull,
44  Allocator1>& >(X),
45  static_cast<const Vector<T1, VectFull,
46  Allocator1>& >(Y));
47 
48  const MPI_Comm& comm = X.GetCommunicator();
49  int nb_proc; MPI_Comm_size(comm, &nb_proc);
50  if (nb_proc > 1)
51  {
52  T1 sum; SetComplexZero(sum);
53  Vector<int64_t> xtmp;
54  MpiAllreduce(comm, &value, xtmp, &sum, 1, MPI_SUM);
55  return sum;
56  }
57 
58  return value;
59  }
60 
61 
63 
68  template<class T1, class Allocator1>
71  {
72  T1 value;
73  SetComplexZero(value);
74  int nb_over = X.GetNbOverlap();
75  for (int i = 0; i < nb_over; i++)
76  value += conjugate(X(X.GetOverlapRow(i))) * Y(Y.GetOverlapRow(i));
77 
78  value = -value +
79  DotProdConj(static_cast<const Vector<T1, VectFull, Allocator1>& >(X),
80  static_cast<const Vector<T1, VectFull, Allocator1>& >(Y));
81 
82  const MPI_Comm& comm = X.GetCommunicator();
83  int nb_proc; MPI_Comm_size(comm, &nb_proc);
84  if (nb_proc > 1)
85  {
86  T1 sum; SetComplexZero(sum);
87  Vector<int64_t> xtmp;
88  MpiAllreduce(comm, &value, xtmp, &sum, 1, MPI_SUM);
89  return sum;
90  }
91 
92  return value;
93  }
94 
95 
97  template<class T, class Allocator>
98  T Norm2(const DistributedVector<complex<T>, Allocator>& x)
99  {
100  T scal = abs(DotProdConj(x, x));
101  return sqrt(scal);
102  }
103 
104 
106  template<class T, class Allocator>
108  {
109  T scal = DotProd(x, x);
110  return sqrt(scal);
111  }
112 
113 
115  template<class T>
116  T minComplex(const T& x, const T& y)
117  {
118  return min(x, y);
119  }
120 
121 
123  template<class T>
124  complex<T> minComplex(const complex<T>& x, const complex<T>& y)
125  {
126  if (abs(x) > abs(y))
127  return y;
128 
129  return x;
130  }
131 
132 
134  template<class T>
135  T maxComplex(const T& x, const T& y)
136  {
137  return max(x, y);
138  }
139 
140 
142  template<class T>
143  complex<T> maxComplex(const complex<T>& x, const complex<T>& y)
144  {
145  if (abs(x) > abs(y))
146  return x;
147 
148  return y;
149  }
150 
151 
154  void ExtractDistributedSharedNumbers(const IVect& MatchingNumber_Subdomain,
155  const Vector<IVect>& MatchingDofOrig_Subdomain,
156  const Vector<bool>& is_local, int nb_ddl_local,
157  IVect& MatchingNumber_Local,
158  Vector<IVect>& MatchingDof_Local)
159  {
160  MatchingNumber_Local.Clear();
161  MatchingDof_Local.Clear();
162  for (int i = 0; i < MatchingNumber_Subdomain.GetM(); i++)
163  {
164  int proc = MatchingNumber_Subdomain(i);
165  int nb_dof = 0;
166  for (int j = 0; j < MatchingDofOrig_Subdomain(i).GetM(); j++)
167  if ((nb_ddl_local > 0) &&
168  (is_local(MatchingDofOrig_Subdomain(i)(j))))
169  nb_dof++;
170 
171  if (nb_dof > 0)
172  {
173  IVect dof_num(nb_dof);
174  nb_dof = 0;
175  for (int j = 0; j < MatchingDofOrig_Subdomain(i).GetM(); j++)
176  if (is_local(MatchingDofOrig_Subdomain(i)(j)))
177  {
178  dof_num(nb_dof) = MatchingDofOrig_Subdomain(i)(j);
179  nb_dof++;
180  }
181 
182  MatchingNumber_Local.PushBack(proc);
183  MatchingDof_Local.PushBack(dof_num);
184  }
185  }
186  }
187 
188 
191  const IVect& ProcNumber,
192  const Vector<IVect>& DofNumber,
193  const MPI_Comm& comm, int Nvol, int nb_u, int tag)
194  {
195  int nb_proc; MPI_Comm_size(comm, &nb_proc);
196  if (nb_proc == 1)
197  return;
198 
199  // number of procs interacting with the current processor
200  int nb_dom = DofNumber.GetM();
201  Vector<MPI_Request> request_send(nb_dom), request_recv(nb_dom);
202  MPI_Status status;
203  Vector<Vector<int> > xsend(nb_dom);
204  Vector<Vector<int64_t> > xsend_tmp(nb_dom);
205 
206  // sending informations to other processors
207  for (int i = 0; i < nb_dom; i++)
208  {
209  int j = ProcNumber(i);
210  int nb = DofNumber(i).GetM();
211  if (nb > 0)
212  {
213  xsend(i).Reallocate(2*nb_u*nb);
214  for (int m = 0; m < nb_u; m++)
215  for (int k = 0; k < nb; k++)
216  {
217  xsend(i)(nb_u*k+m) = X(DofNumber(i)(k) + m*Nvol);
218  xsend(i)(nb_u*k+m+nb*nb_u)
219  = Xproc(DofNumber(i)(k) + m*Nvol);
220  }
221 
222  // sending the value to the corresponding processor
223  MPI_Isend(&xsend(i)(0), 2*nb*nb_u,
224  MPI_INTEGER, j, tag, comm, &request_send(i));
225  }
226  }
227 
228  // receiving the informations
229  Vector<Vector<int> > xdom(nb_dom);
230  for (int i = 0; i < nb_dom; i++)
231  {
232  int j = ProcNumber(i);
233  int nb = DofNumber(i).GetM();
234  if (nb > 0)
235  {
236  xdom(i).Reallocate(2*nb_u*nb);
237  xdom(i).Fill(0);
238 
239  // receiving the values of domain j
240  MPI_Irecv(&xdom(i)(0), 2*nb*nb_u,
241  MPI_INTEGER, j, tag, comm, &request_recv(i));
242  }
243  }
244 
245  // now waiting all communications are effective
246  for (int i = 0; i < nb_dom; i++)
247  if (DofNumber(i).GetM() > 0)
248  MPI_Wait(&request_recv(i), &status);
249 
250  for (int i = 0; i < nb_dom; i++)
251  if (DofNumber(i).GetM() > 0)
252  MPI_Wait(&request_send(i), &status);
253 
254  // final reduction step
255  for (int i = 0; i < nb_dom; i++)
256  {
257  for (int m = 0; m < nb_u; m++)
258  for (int k = 0; k < DofNumber(i).GetM(); k++)
259  {
260  int nb = DofNumber(i).GetM();
261  int p = DofNumber(i)(k) + m*Nvol;
262  int proc = xdom(i)(k*nb_u+m+nb*nb_u);
263  int col = xdom(i)(k*nb_u+m);
264  if (proc < Xproc(p))
265  {
266  Xproc(p) = proc;
267  X(p) = col;
268  }
269  else if (proc == Xproc(p))
270  {
271  if (col < X(p))
272  X(p) = col;
273  }
274  }
275  }
276  }
277 
278 
280 
296  template<class T>
297  void AssembleVector(Vector<T>& X, const MPI_Op& oper,
298  const IVect& ProcNumber, const Vector<IVect>& DofNumber,
299  const MPI_Comm& comm, int Nvol, int nb_u, int tag)
300  {
301  int nb_proc; MPI_Comm_size(comm, &nb_proc);
302  if (nb_proc == 1)
303  return;
304 
305  // number of procs interacting with the current processor
306  int nb_dom = DofNumber.GetM();
307  Vector<MPI_Request> request_send(nb_dom), request_recv(nb_dom);
308  MPI_Status status;
309  Vector<Vector<T> > xsend(nb_dom);
310  Vector<Vector<int64_t> > xsend_tmp(nb_dom);
311 
312  // sending informations to other processors
313  for (int i = 0; i < nb_dom; i++)
314  {
315  int j = ProcNumber(i);
316  int nb = DofNumber(i).GetM();
317  if (nb > 0)
318  {
319  xsend(i).Reallocate(nb_u*nb);
320  for (int m = 0; m < nb_u; m++)
321  for (int k = 0; k < nb; k++)
322  xsend(i)(nb_u*k+m) = X(DofNumber(i)(k) + m*Nvol);
323 
324  // sending the value to the corresponding processor
325  request_send(i) = MpiIsend(comm, xsend(i), xsend_tmp(i),
326  nb*nb_u, j, tag);
327  }
328  }
329 
330  // receiving the informations
331  T zero; SetComplexZero(zero);
332  Vector<Vector<T> > xdom(nb_dom);
333  Vector<Vector<int64_t> > xdom_tmp(nb_dom);
334  for (int i = 0; i < nb_dom; i++)
335  {
336  int j = ProcNumber(i);
337  int nb = DofNumber(i).GetM();
338  if (nb > 0)
339  {
340  xdom(i).Reallocate(nb_u*nb);
341  xdom(i).Fill(zero);
342 
343  // receiving the values of domain j
344  request_recv(i) = MpiIrecv(comm, xdom(i), xdom_tmp(i),
345  nb*nb_u, j, tag);
346  }
347  }
348 
349  // now waiting all communications are effective
350  for (int i = 0; i < nb_dom; i++)
351  if (DofNumber(i).GetM() > 0)
352  MPI_Wait(&request_send(i), &status);
353 
354  for (int i = 0; i < nb_dom; i++)
355  if (DofNumber(i).GetM() > 0)
356  MPI_Wait(&request_recv(i), &status);
357 
358  // completing writing operations for receive
359  for (int i = 0; i < nb_dom; i++)
360  if (DofNumber(i).GetM() > 0)
361  MpiCompleteIrecv(xdom(i), xdom_tmp(i), DofNumber(i).GetM()*nb_u);
362 
363  // final reduction step
364  for (int i = 0; i < nb_dom; i++)
365  {
366  if (oper == MPI_SUM)
367  {
368  for (int m = 0; m < nb_u; m++)
369  for (int k = 0; k < DofNumber(i).GetM(); k++)
370  X(DofNumber(i)(k) + m*Nvol) += xdom(i)(k*nb_u+m);
371  }
372  else if (oper == MPI_MIN)
373  {
374  for (int m = 0; m < nb_u; m++)
375  for (int k = 0; k < DofNumber(i).GetM(); k++)
376  X(DofNumber(i)(k) + m*Nvol)
377  = minComplex(X(DofNumber(i)(k) + m*Nvol),
378  xdom(i)(k*nb_u+m));
379  }
380  else if (oper == MPI_MAX)
381  {
382  for (int m = 0; m < nb_u; m++)
383  for (int k = 0; k < DofNumber(i).GetM(); k++)
384  X(DofNumber(i)(k) + m*Nvol)
385  = maxComplex(X(DofNumber(i)(k) + m*Nvol),
386  xdom(i)(k*nb_u+m));
387  }
388  else
389  {
390  cout << "Operation not implemented" << endl;
391  abort();
392  }
393  }
394  }
395 
396 
398 
414  template<class T>
415  void ExchangeVector(Vector<T>& X,
416  const IVect& ProcNumber, const Vector<IVect>& DofNumber,
417  const MPI_Comm& comm, int Nvol, int nb_u, int tag)
418  {
419  int nb_proc; MPI_Comm_size(comm, &nb_proc);
420  if (nb_proc == 1)
421  return;
422 
423  // number of procs interacting with the current processor
424  int nb_dom = DofNumber.GetM();
425  Vector<MPI_Request> request_send(nb_dom), request_recv(nb_dom);
426  MPI_Status status;
427  Vector<Vector<T> > xsend(nb_dom);
428  Vector<Vector<int64_t> > xsend_tmp(nb_dom);
429 
430  // sending informations to other processors
431  for (int i = 0; i < nb_dom; i++)
432  {
433  int j = ProcNumber(i);
434  int nb = DofNumber(i).GetM();
435  if (nb > 0)
436  {
437  xsend(i).Reallocate(nb_u*nb);
438  for (int m = 0; m < nb_u; m++)
439  for (int k = 0; k < nb; k++)
440  xsend(i)(nb_u*k+m) = X(DofNumber(i)(k) + m*Nvol);
441 
442  // sending the value to the corresponding processor
443  request_send(i) = MpiIsend(comm, xsend(i), xsend_tmp(i),
444  nb*nb_u, j, tag);
445  }
446  }
447 
448  // receiving the informations
449  T zero; SetComplexZero(zero);
450  Vector<Vector<T> > xdom(nb_dom);
451  Vector<Vector<int64_t> > xdom_tmp(nb_dom);
452  for (int i = 0; i < nb_dom; i++)
453  {
454  int j = ProcNumber(i);
455  int nb = DofNumber(i).GetM();
456  if (nb > 0)
457  {
458  xdom(i).Reallocate(nb_u*nb);
459  xdom(i).Fill(zero);
460 
461  // receiving the values of domain j
462  request_recv(i) = MpiIrecv(comm, xdom(i), xdom_tmp(i),
463  nb*nb_u, j, tag);
464  }
465  }
466 
467  // now waiting all communications are effective
468  for (int i = 0; i < nb_dom; i++)
469  if (DofNumber(i).GetM() > 0)
470  MPI_Wait(&request_send(i), &status);
471 
472  for (int i = 0; i < nb_dom; i++)
473  if (DofNumber(i).GetM() > 0)
474  MPI_Wait(&request_recv(i), &status);
475 
476  // completing writing operations for receive
477  for (int i = 0; i < nb_dom; i++)
478  if (DofNumber(i).GetM() > 0)
479  MpiCompleteIrecv(xdom(i), xdom_tmp(i), DofNumber(i).GetM()*nb_u);
480 
481  // final reduction step
482  for (int i = 0; i < nb_dom; i++)
483  {
484  for (int m = 0; m < nb_u; m++)
485  for (int k = 0; k < DofNumber(i).GetM(); k++)
486  X(DofNumber(i)(k) + m*Nvol) = xdom(i)(k*nb_u+m);
487  }
488  }
489 
490 
493 
508  template<class T, class Treal>
509  void ExchangeRelaxVector(Vector<T>& X, const Treal& omega, int proc,
510  const IVect& ProcNumber,
511  const Vector<IVect>& DofNumber,
512  const MPI_Comm& comm, int Nvol, int nb_u, int tag)
513  {
514  int nb_proc; MPI_Comm_size(comm, &nb_proc);
515  int rank_proc; MPI_Comm_rank(comm, &rank_proc);
516  if (nb_proc == 1)
517  return;
518 
519  // number of procs interacting with the current processor
520  int nb_dom = DofNumber.GetM();
521  MPI_Status status;
522  Vector<T> xsend(nb_dom);
523  Vector<int64_t> xsend_tmp(nb_dom);
524 
525  if (rank_proc == proc)
526  {
527  // processor proc is sending informations to other processors
528  for (int i = 0; i < nb_dom; i++)
529  {
530  int j = ProcNumber(i);
531  int nb = DofNumber(i).GetM();
532  if (nb > 0)
533  {
534  xsend.Reallocate(nb_u*nb);
535  for (int m = 0; m < nb_u; m++)
536  for (int k = 0; k < nb; k++)
537  xsend(nb_u*k+m) = X(DofNumber(i)(k) + m*Nvol);
538 
539  // sending the value to the corresponding processor
540  MpiSsend(comm, xsend, xsend_tmp, nb*nb_u, j, tag);
541  }
542  }
543  }
544  else
545  {
546  // others processors are receiving the informations
547  Vector<T> xdom(nb_dom);
548  Vector<int64_t> xdom_tmp(nb_dom);
549  T zero; SetComplexZero(zero);
550  for (int i = 0; i < nb_dom; i++)
551  {
552  int j = ProcNumber(i);
553  if (j == proc)
554  {
555  int nb = DofNumber(i).GetM();
556  if (nb > 0)
557  {
558  xdom.Reallocate(nb_u*nb);
559  xdom.Fill(zero);
560 
561  // receiving the values of domain j
562  MpiRecv(comm, xdom, xdom_tmp, nb*nb_u, j, tag, status);
563 
564  for (int m = 0; m < nb_u; m++)
565  for (int k = 0; k < DofNumber(i).GetM(); k++)
566  X(DofNumber(i)(k) + m*Nvol) =
567  (1.0-omega)* X(DofNumber(i)(k) + m*Nvol) +
568  omega*xdom(k*nb_u+m);
569  }
570  }
571  }
572  }
573  }
574 
575 }
576 
577 #define SELDON_FILE_DISTRIBUTED_VECTOR_CXX
578 #endif
579 
Seldon::DistributedVector::GetOverlapRow
int GetOverlapRow(int i) const
returns an overlapped row number
Definition: DistributedVectorInline.cxx:86
Seldon::maxComplex
T maxComplex(const T &x, const T &y)
maximum for real numbers
Definition: DistributedVector.cxx:135
Seldon::Norm2
ClassComplexType< T >::Treal Norm2(const VectorExpression< T, E > &X)
returns 2-norm of an expression with vectors
Definition: Functions_BaseInline.cxx:153
Seldon::DistributedVector::GetNbOverlap
int GetNbOverlap() const
returns the number of rows already counted
Definition: DistributedVectorInline.cxx:75
Seldon::Vector
Definition: SeldonHeader.hxx:207
Seldon::DotProdVector
T1 DotProdVector(const Vector< T1, Storage1, Allocator1 > &X, const Vector< T2, Storage2, Allocator2 > &Y)
Scalar product between two vectors.
Definition: Functions_Vector.cxx:399
Seldon::VectFull
Definition: StorageInline.cxx:74
Seldon::DistributedVector::GetCommunicator
const MPI_Comm & GetCommunicator() const
returns communicator
Definition: DistributedVectorInline.cxx:99
Seldon::ExtractDistributedSharedNumbers
void ExtractDistributedSharedNumbers(const IVect &MatchingNumber_Subdomain, const Vector< IVect > &MatchingDofOrig_Subdomain, const Vector< bool > &is_local, int nb_ddl_local, IVect &MatchingNumber_Local, Vector< IVect > &MatchingDof_Local)
extracts MatchingNumber_Local / MatchingDof_Local from arrays MatchingNumber_Subdomain / MatchingDofO...
Definition: DistributedVector.cxx:154
Seldon::AssembleVectorMin
void AssembleVectorMin(Vector< int > &X, Vector< int > &Xproc, const IVect &ProcNumber, const Vector< IVect > &DofNumber, const MPI_Comm &comm, int Nvol, int nb_u, int tag)
assembles minimums of two vectors
Definition: DistributedVector.cxx:190
Seldon::minComplex
T minComplex(const T &x, const T &y)
minimum for real numbers
Definition: DistributedVector.cxx:116
Seldon
Seldon namespace.
Definition: Array.cxx:24
Seldon::DotProdConjVector
T1 DotProdConjVector(const Vector< T1, Storage1, Allocator1 > &X, const Vector< T2, Storage2, Allocator2 > &Y)
Scalar product between two vectors conj(X).Y .
Definition: Functions_Vector.cxx:461
Seldon::DistributedVector
class storing a vector distributed over all the processors
Definition: DistributedVector.hxx:33