GlobalLinSysIterative.cpp 22 KB
Newer Older
1 2
///////////////////////////////////////////////////////////////////////////////
//
3
// File: GlobalLinSysIterative.cpp
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
//
// For more information, please see: http://www.nektar.info
//
// The MIT License
//
// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
// Department of Aeronautics, Imperial College London (UK), and Scientific
// Computing and Imaging Institute, University of Utah (USA).
//
// License for the specific language governing rights and limitations under
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//
// Description: GlobalLinSysIterative definition
//
///////////////////////////////////////////////////////////////////////////////

#include <MultiRegions/GlobalLinSysIterative.h>

38 39
using namespace std;

40 41 42 43 44 45 46
namespace Nektar
{
    namespace MultiRegions
    {
        /**
         * @class GlobalLinSysIterative
         *
47
         * Solves a linear system using iterative methods.
48 49 50
         */

        /// Constructor for full direct matrix solve.
51 52
        GlobalLinSysIterative::GlobalLinSysIterative(
                const GlobalLinSysKey &pKey,
53
                const boost::weak_ptr<ExpList> &pExpList,
54
                const boost::shared_ptr<AssemblyMap>
55
                &pLocToGloMap)
56
                : GlobalLinSys(pKey, pExpList, pLocToGloMap),
57
                  m_rhs_magnitude(NekConstants::kNekUnsetDouble),
58
                  m_rhs_mag_sm(0.9),
59
                  m_precon(NullPreconditionerSharedPtr),
60 61
                  m_totalIterations(0),
                  m_useProjection(false),
62
                  m_numPrevSols(0)
63
        {
64
            m_tolerance = pLocToGloMap->GetIterativeTolerance();
65
            m_maxiter   = pLocToGloMap->GetMaxIterations();
66

67
            LibUtilities::CommSharedPtr vComm = m_expList.lock()->GetComm()->GetRowComm();
68
            m_root    = (vComm->GetRank())? false : true;
69

70 71 72
            int successiveRHS;
            
            if((successiveRHS = pLocToGloMap->GetSuccessiveRHS()))
73
            {
74
                m_prevLinSol.set_capacity(successiveRHS);
75 76
                m_useProjection = true;
            }
77
            else
78
            {
79
                m_useProjection = false;
80
            }
81 82 83 84 85 86
        }

        GlobalLinSysIterative::~GlobalLinSysIterative()
        {
        }

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122

        /**
         * 
         */
        void GlobalLinSysIterative::v_SolveLinearSystem(
                    const int nGlobal,
                    const Array<OneD,const NekDouble> &pInput,
                          Array<OneD,      NekDouble> &pOutput,
                    const AssemblyMapSharedPtr &plocToGloMap,
                    const int nDir)
        {
            if (m_useProjection)
            {
                DoAconjugateProjection(nGlobal, pInput, pOutput, plocToGloMap, nDir);
            }
            else
            {
                // applying plain Conjugate Gradient
                DoConjugateGradient(nGlobal, pInput, pOutput, plocToGloMap, nDir);
            }
        }


        /**
         * This method implements A-conjugate projection technique
         * in order to speed up successive linear solves with
         * right-hand sides arising from time-dependent discretisations.
         * (P.F.Fischer, Comput. Methods Appl. Mech. Engrg. 163, 1998)
         */
        void GlobalLinSysIterative::DoAconjugateProjection(
                    const int nGlobal,
                    const Array<OneD,const NekDouble> &pInput,
                          Array<OneD,      NekDouble> &pOutput,
                    const AssemblyMapSharedPtr &plocToGloMap,
                    const int nDir)
        {
Pavel Burovskiy's avatar
Pavel Burovskiy committed
123 124 125 126
            // Get the communicator for performing data exchanges
            LibUtilities::CommSharedPtr vComm
                                = m_expList.lock()->GetComm()->GetRowComm();

127 128
            // Get vector sizes
            int nNonDir = nGlobal - nDir;
129
            Array<OneD, NekDouble> tmp;
130 131 132 133 134 135 136 137 138 139 140 141 142

            if (0 == m_numPrevSols)
            {
                // no previous solutions found, call CG

                DoConjugateGradient(nGlobal, pInput, pOutput, plocToGloMap, nDir);

                UpdateKnownSolutions(nGlobal, pOutput, nDir);
            }
            else
            {
                // Create NekVector wrappers for linear algebra operations
                NekVector<NekDouble> b     (nNonDir, pInput  + nDir, eWrapper);
143
                NekVector<NekDouble> x     (nNonDir, tmp = pOutput + nDir, eWrapper);
144 145 146

                // check the input vector (rhs) is not zero

Pavel Burovskiy's avatar
Pavel Burovskiy committed
147 148 149 150 151 152
                NekDouble rhsNorm = Vmath::Dot2(nNonDir,
                                                pInput + nDir,
                                                pInput + nDir,
                                                m_map + nDir);

                vComm->AllReduce(rhsNorm, Nektar::LibUtilities::ReduceSum);
153 154 155 156 157 158 159 160 161 162 163 164 165 166

                if (rhsNorm < NekConstants::kNekZeroTol)
                {
                    Array<OneD, NekDouble> tmp = pOutput+nDir;
                    Vmath::Zero(nNonDir, tmp, 1);
                    return;
                }

                // Allocate array storage
                Array<OneD, NekDouble> px_s       (nGlobal, 0.0);
                Array<OneD, NekDouble> pb_s       (nGlobal, 0.0);
                Array<OneD, NekDouble> tmpAx_s    (nGlobal, 0.0);
                Array<OneD, NekDouble> tmpx_s     (nGlobal, 0.0);

167 168 169 170
                NekVector<NekDouble> pb    (nNonDir, tmp = pb_s    + nDir, eWrapper);
                NekVector<NekDouble> px    (nNonDir, tmp = px_s    + nDir, eWrapper);
                NekVector<NekDouble> tmpAx (nNonDir, tmp = tmpAx_s + nDir, eWrapper);
                NekVector<NekDouble> tmpx  (nNonDir, tmp = tmpx_s  + nDir, eWrapper);
171 172 173 174 175 176


                // notation follows the paper cited:
                // \alpha_i = \tilda{x_i}^T b^n
                // projected x, px = \sum \alpha_i \tilda{x_i}

Pavel Burovskiy's avatar
Pavel Burovskiy committed
177
                Array<OneD, NekDouble> alpha     (m_prevLinSol.size(), 0.0);
178 179
                for (int i = 0; i < m_prevLinSol.size(); i++)
                {
Pavel Burovskiy's avatar
Pavel Burovskiy committed
180 181 182 183 184 185
                    alpha[i] = Vmath::Dot2(nNonDir,
                                           m_prevLinSol[i],
                                           pInput + nDir,
                                           m_map + nDir);
                }
                vComm->AllReduce(alpha, Nektar::LibUtilities::ReduceSum);
186

Pavel Burovskiy's avatar
Pavel Burovskiy committed
187 188 189
                for (int i = 0; i < m_prevLinSol.size(); i++)
                {
                    if (alpha[i] < NekConstants::kNekZeroTol)
190 191 192 193 194
                    {
                        continue;
                    }

                    NekVector<NekDouble> xi (nNonDir, m_prevLinSol[i], eWrapper);
Pavel Burovskiy's avatar
Pavel Burovskiy committed
195
                    px += alpha[i] * xi;
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
                }

                // pb = b^n - A px
                Vmath::Vcopy(nNonDir,
                             pInput.get() + nDir, 1,
                             pb_s.get()   + nDir, 1);

                v_DoMatrixMultiply(px_s, tmpAx_s);

                pb -= tmpAx;


                // solve the system with projected rhs
                DoConjugateGradient(nGlobal, pb_s, tmpx_s, plocToGloMap, nDir);


                // remainder solution + projection of previous solutions
                x = tmpx + px;

                // save the auxiliary solution to prev. known solutions
                UpdateKnownSolutions(nGlobal, tmpx_s, nDir);
            }
        }

220
        
221 222 223 224 225
        /**
         * Calculating A-norm of an input vector,
         * A-norm(x) := sqrt( < x, Ax > )
         */
        NekDouble GlobalLinSysIterative::CalculateAnorm(
226 227 228
                                                        const int nGlobal,
                                                        const Array<OneD,const NekDouble> &in,
                                                        const int nDir)
229
        {
Pavel Burovskiy's avatar
Pavel Burovskiy committed
230 231
            // Get the communicator for performing data exchanges
            LibUtilities::CommSharedPtr vComm
232
                = m_expList.lock()->GetComm()->GetRowComm();
Pavel Burovskiy's avatar
Pavel Burovskiy committed
233

234 235 236 237 238 239 240 241
            // Get vector sizes
            int nNonDir = nGlobal - nDir;

            // Allocate array storage
            Array<OneD, NekDouble> tmpAx_s    (nGlobal, 0.0);

            v_DoMatrixMultiply(in, tmpAx_s);

Pavel Burovskiy's avatar
Pavel Burovskiy committed
242 243 244 245 246
            NekDouble anorm_sq = Vmath::Dot2(nNonDir,
                                             in      + nDir,
                                             tmpAx_s + nDir,
                                             m_map   + nDir);
            vComm->AllReduce(anorm_sq, Nektar::LibUtilities::ReduceSum);
247 248 249 250 251 252 253 254
            return std::sqrt(anorm_sq);
        }

        /**
         * Updates the storage of previously known solutions.
         * Performs normalisation of input vector wrt A-norm.
         */
        void GlobalLinSysIterative::UpdateKnownSolutions(
255 256 257
                                                         const int nGlobal,
                                                         const Array<OneD,const NekDouble> &newX,
                                                         const int nDir)
258 259 260 261
        {
            // Get vector sizes
            int nNonDir = nGlobal - nDir;

Pavel Burovskiy's avatar
Pavel Burovskiy committed
262 263
            // Get the communicator for performing data exchanges
            LibUtilities::CommSharedPtr vComm
264
                = m_expList.lock()->GetComm()->GetRowComm();
265 266

            // Check the solution is non-zero
Pavel Burovskiy's avatar
Pavel Burovskiy committed
267 268 269 270 271
            NekDouble solNorm = Vmath::Dot2(nNonDir,
                                            newX + nDir,
                                            newX + nDir,
                                            m_map + nDir);
            vComm->AllReduce(solNorm, Nektar::LibUtilities::ReduceSum);
272 273 274 275 276 277 278 279 280 281 282 283 284

            if (solNorm < NekConstants::kNekZeroTol)
            {
                return;
            }


            // Allocate array storage
            Array<OneD, NekDouble> tmpAx_s    (nGlobal, 0.0);
            Array<OneD, NekDouble> px_s       (nGlobal, 0.0);
            Array<OneD, NekDouble> tmp1, tmp2;

            // Create NekVector wrappers for linear algebra operations
285 286
            NekVector<NekDouble> px           (nNonDir, tmp1 = px_s    + nDir, eWrapper);
            NekVector<NekDouble> tmpAx        (nNonDir, tmp2 = tmpAx_s + nDir, eWrapper);
287 288 289 290 291 292 293 294 295 296 297 298


            // calculating \tilda{x} - sum \alpha_i\tilda{x}_i

            Vmath::Vcopy(nNonDir,
                         tmp1 = newX + nDir, 1,
                         tmp2 = px_s + nDir, 1);

            if (m_prevLinSol.size() > 0)
            {
                v_DoMatrixMultiply(newX, tmpAx_s);
            }
Pavel Burovskiy's avatar
Pavel Burovskiy committed
299 300

            Array<OneD, NekDouble> alpha (m_prevLinSol.size(), 0.0);
301 302
            for (int i = 0; i < m_prevLinSol.size(); i++)
            {
Pavel Burovskiy's avatar
Pavel Burovskiy committed
303 304 305 306 307 308
                alpha[i] = Vmath::Dot2(nNonDir,
                                       m_prevLinSol[i],
                                       tmpAx_s + nDir,
                                       m_map + nDir);
            }
            vComm->AllReduce(alpha, Nektar::LibUtilities::ReduceSum);
309

Pavel Burovskiy's avatar
Pavel Burovskiy committed
310 311 312
            for (int i = 0; i < m_prevLinSol.size(); i++)
            {
                if (alpha[i] < NekConstants::kNekZeroTol)
313 314 315 316 317
                {
                    continue;
                }

                NekVector<NekDouble> xi (nNonDir, m_prevLinSol[i], eWrapper);
Pavel Burovskiy's avatar
Pavel Burovskiy committed
318
                px -= alpha[i] * xi;
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
            }


            // Some solutions generated by CG are identical zeros, see
            // solutions generated for Test_Tet_equitri.xml (IncNavierStokesSolver).
            // Not going to store identically zero solutions.

            NekDouble anorm = CalculateAnorm(nGlobal, px_s, nDir);
            if (anorm < NekConstants::kNekZeroTol)
            {
                return;
            }

            // normalisation of new solution
            Vmath::Smul(nNonDir, 1.0/anorm, px_s.get() + nDir, 1, px_s.get() + nDir, 1);

            // updating storage with non-Dirichlet-dof part of new solution vector
            m_prevLinSol.push_back(px_s + nDir);
            m_numPrevSols++;
        }



Pavel Burovskiy's avatar
Pavel Burovskiy committed
342 343 344 345 346 347 348 349 350 351 352 353
        /**  
         * Solve a global linear system using the conjugate gradient method.  
         * We solve only for the non-Dirichlet modes. The operator is evaluated  
         * using an auxiliary function v_DoMatrixMultiply defined by the  
         * specific solver. Distributed math routines are used to support  
         * parallel execution of the solver.  
         *  
         * The implemented algorithm uses a reduced-communication reordering of  
         * the standard PCG method (Demmel, Heath and Vorst, 1993)  
         *  
         * @param       pInput      Input residual  of all DOFs.  
         * @param       pOutput     Solution vector of all DOFs.  
354
         */
355
        void GlobalLinSysIterative::DoConjugateGradient(
356 357 358 359 360
            const int                          nGlobal,
            const Array<OneD,const NekDouble> &pInput,
                  Array<OneD,      NekDouble> &pOutput,
            const AssemblyMapSharedPtr        &plocToGloMap,
            const int                          nDir)
361
        {
362
            if (!m_precon)
363
            {
364
                v_UniqueMap();
365
                m_precon = CreatePrecon(plocToGloMap);
366
                m_precon->BuildPreconditioner();
367
            }
368

369
            // Get the communicator for performing data exchanges
370
            LibUtilities::CommSharedPtr vComm
371
                = m_expList.lock()->GetComm()->GetRowComm();
372 373 374 375 376

            // Get vector sizes
            int nNonDir = nGlobal - nDir;

            // Allocate array storage
377 378 379
            Array<OneD, NekDouble> w_A    (nGlobal, 0.0);
            Array<OneD, NekDouble> s_A    (nGlobal, 0.0);
            Array<OneD, NekDouble> p_A    (nNonDir, 0.0);
380
            Array<OneD, NekDouble> r_A    (nNonDir, 0.0);
381 382
            Array<OneD, NekDouble> q_A    (nNonDir, 0.0);
            Array<OneD, NekDouble> tmp;
383 384

            // Create NekVector wrappers for linear algebra operations
385 386 387 388 389 390 391
            NekVector<NekDouble> in (nNonDir,pInput  + nDir,      eWrapper);
            NekVector<NekDouble> out(nNonDir,tmp = pOutput + nDir,eWrapper);
            NekVector<NekDouble> w  (nNonDir,tmp = w_A + nDir,    eWrapper);
            NekVector<NekDouble> s  (nNonDir,tmp = s_A + nDir,    eWrapper);
            NekVector<NekDouble> p  (nNonDir,p_A,                 eWrapper);
            NekVector<NekDouble> r  (nNonDir,r_A,                 eWrapper);
            NekVector<NekDouble> q  (nNonDir,q_A,                 eWrapper);
392 393

            int k;
394 395
            NekDouble alpha, beta, rho, rho_new, mu, eps,  min_resid;
            Array<OneD, NekDouble> vExchange(3,0.0);
396

397
            // Copy initial residual from input
398
            r = in;
399 400 401 402 403
            // zero homogeneous out array ready for solution updates
            // Should not be earlier in case input vector is same as
            // output and above copy has been peformed
            Vmath::Zero(nNonDir,tmp = pOutput + nDir,1);

404 405 406 407 408 409 410 411 412 413

            // evaluate initial residual error for exit check
            vExchange[2] = Vmath::Dot2(nNonDir,
                                       r_A,
                                       r_A,
                                       m_map + nDir);

            vComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum);
            
            eps       = vExchange[2];
Spencer Sherwin's avatar
Spencer Sherwin committed
414

415 416
            if(m_rhs_magnitude == NekConstants::kNekUnsetDouble)
            {
417 418
                NekVector<NekDouble> inGlob (nGlobal, pInput, eWrapper);
                Set_Rhs_Magnitude(inGlob);
419 420 421 422 423 424 425 426 427
            }

            // If input residual is less than tolerance skip solve.
            if (eps < m_tolerance * m_tolerance * m_rhs_magnitude)
            {
                if (m_verbose && m_root)
                {
                    cout << "CG iterations made = " << m_totalIterations 
                         << " using tolerance of "  << m_tolerance 
428 429 430
                         << " (error = " << sqrt(eps/m_rhs_magnitude) 
                         << ", rhs_mag = " << sqrt(m_rhs_magnitude) <<  ")" 
                         << endl;
431 432 433 434 435
                }
                return;
            }

            m_totalIterations = 1;
436
            m_precon->DoPreconditioner(r_A, tmp = w_A + nDir);
437

438
            v_DoMatrixMultiply(w_A, s_A);
439

440 441
            k = 0;

442 443 444 445 446 447 448 449 450 451
            vExchange[0] = Vmath::Dot2(nNonDir,
                                       r_A,
                                       w_A + nDir,
                                       m_map + nDir);

            vExchange[1] = Vmath::Dot2(nNonDir,
                                       s_A + nDir,
                                       w_A + nDir,
                                       m_map + nDir);

452
            vComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum);
453 454 455

            rho       = vExchange[0];
            mu        = vExchange[1];
456
            min_resid = m_rhs_magnitude;
457 458
            beta      = 0.0;
            alpha     = rho/mu;
459

460 461 462
            // Continue until convergence
            while (true)
            {
463 464 465 466 467 468 469 470 471 472
                if(k >= m_maxiter)
                {
                    if (m_root)
                    {
                        cout << "CG iterations made = " << m_totalIterations 
                             << " using tolerance of "  << m_tolerance 
                             << " (error = " << sqrt(eps/m_rhs_magnitude)
                             << ", rhs_mag = " << sqrt(m_rhs_magnitude) <<  ")"
                             << endl;
                    }
473 474
                    ASSERTL0(false,
                             "Exceeded maximum number of iterations");
475
                }
476

477
                // Compute new search direction p_k, q_k
478 479
                Vmath::Svtvp(nNonDir, beta, &p_A[0], 1, &w_A[nDir], 1, &p_A[0], 1);
                Vmath::Svtvp(nNonDir, beta, &q_A[0], 1, &s_A[nDir], 1, &q_A[0], 1);
480 481

                // Update solution x_{k+1}
482
                Vmath::Svtvp(nNonDir, alpha, &p_A[0], 1, &pOutput[nDir], 1, &pOutput[nDir], 1);
483 484

                // Update residual vector r_{k+1}
485
                Vmath::Svtvp(nNonDir, -alpha, &q_A[0], 1, &r_A[0], 1, &r_A[0], 1);
486

487
                // Apply preconditioner
488 489 490 491
                m_precon->DoPreconditioner(r_A, tmp = w_A + nDir);

                // Perform the method-specific matrix-vector multiply operation.
                v_DoMatrixMultiply(w_A, s_A);
492

493
                // <r_{k+1}, w_{k+1}>
494
                vExchange[0] = Vmath::Dot2(nNonDir,
495 496 497
                                           r_A,
                                           w_A + nDir,
                                           m_map + nDir);
498
                // <s_{k+1}, w_{k+1}>
499
                vExchange[1] = Vmath::Dot2(nNonDir,
500 501 502
                                           s_A + nDir,
                                           w_A + nDir,
                                           m_map + nDir);
503

504
                // <r_{k+1}, r_{k+1}>
505
                vExchange[2] = Vmath::Dot2(nNonDir,
506 507 508
                                           r_A,
                                           r_A,
                                           m_map + nDir);
509

510
                // Perform inner-product exchanges
511 512
                vComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum);

513 514 515
                rho_new = vExchange[0];
                mu      = vExchange[1];
                eps     = vExchange[2];
516
cout << "CG Iteration " << k << ", " << eps << ", " << m_rhs_magnitude << endl;
517
                m_totalIterations++;
518
                // test if norm is within tolerance
519
                if (eps < m_tolerance * m_tolerance * m_rhs_magnitude)
520
                {
521
                    if (m_verbose && m_root)
522
                    {
523 524
                        cout << "CG iterations made = " << m_totalIterations 
                             << " using tolerance of "  << m_tolerance 
525 526
                             << " (error = " << sqrt(eps/m_rhs_magnitude)
                             << ", rhs_mag = " << sqrt(m_rhs_magnitude) <<  ")"
527
                             << endl;
528
                    }
529 530
                    break;
                }
531 532 533 534 535 536 537 538 539 540
                min_resid = min(min_resid, eps);

                // Compute search direction and solution coefficients
                beta  = rho_new/rho;
                alpha = rho_new/(mu - rho_new*beta/alpha);
                rho   = rho_new;
                k++;
            }
        }

541 542
        void GlobalLinSysIterative::Set_Rhs_Magnitude(
            const NekVector<NekDouble> &pIn)
543
        {
544
            Array<OneD, NekDouble> vExchange(1);
545
            vExchange[0] = Vmath::Dot(pIn.GetDimension(),&pIn[0],&pIn[0]);
546

547 548
            m_expList.lock()->GetComm()->GetRowComm()->AllReduce(
                vExchange, Nektar::LibUtilities::ReduceSum);
549 550 551
cout << "Rank " << m_expList.lock()->GetComm()->GetRank() << ", vExchange[0] = " << vExchange[0] << endl;
int x;
cin >> x;
552 553 554 555
            // To ensure that very different rhs values are not being
            // used in subsequent solvers such as the velocit solve in
            // INC NS. If this works we then need to work out a better
            // way to control this.
556 557
            NekDouble new_rhs_mag = (vExchange[0] > 1e-6)? vExchange[0] : 1.0;

558 559 560 561 562 563
            if(m_rhs_magnitude == NekConstants::kNekUnsetDouble)
            {
                m_rhs_magnitude = new_rhs_mag;
            }
            else
            {
564 565
                m_rhs_magnitude = (m_rhs_mag_sm*(m_rhs_magnitude) + 
                                   (1.0-m_rhs_mag_sm)*new_rhs_mag); 
566
            }
567
        }
568

569 570
    }
}