#ifndef _INCLUDED_L3_MAT3_H #define _INCLUDED_L3_MAT3_H // Copyright (C) Krzysztof Bosak, 1999-10-28...2000-12-26 // All rights reserved. // kbosak@box43.pl // http://www.kbosak.prv.pl // *Mat3Diag with Gauss LU decomposition and Solve* // *Mat3DiagSym with Cholesky LDL^(T) decomposition and Solve* // *Mat3DiagCycl with Gauss LU decomposition and Solve* #include "l3_ptr.h" #include "l3_array.h" template class Mat3DiagCycl { // Cyclically tridiagonal matrix. // In comparison with Mat3Diag has 2 additional nonzero elements: // a(m,1) (UpperRightElement()==c0) and a(1,n) (LowerLeftElement()==r0). // Mat3DiagCycl::Solve() is about 50% slower than with Mat3Diag::Solve(). // // d0 ud0 0 c0 // ld0 d1 ud1 c1 // 0 ld1 d2 ud2 // r0 r1 ld2 d3 basearray _updiag; basearray _diag; basearray _lowdiag; basearray _column; basearray _row; int _order; bool _decomposed; public: explicit Mat3DiagCycl(int order) : _updiag(order-1), _diag(order), _lowdiag(order-1), _column(order-2), _row(order-2), _order(order), _decomposed(false) { assert(order>=3); _row.erase(); _column.erase(); } void Decompose() { // Gauss method: A=L*U matrix decomposition, no pivoting (nonzero diagonal needed). assert(_decomposed==false); _decomposed=true; const int maxi=_order-3; for(int i=0; i& result, const basearray& rhs) const { // This part of algorithm known as backsubstitution. // Assumes that matrix is already LDL^(T) decomposed. // result and rhs can be the same vector. assert(_decomposed==true); assert(_order==result.size()); assert(_order==rhs.size()); static basearray buffer(_order); buffer.setsize(_order); /* //Classic version buffer[0]=rhs[0]; int i; for(i=1; i<_order-1; i++) { buffer[i]=rhs[i]-buffer[i-1]*_lowdiag[i-1]; } real sum=0; for(i=0; i<_order-2; i++) { sum+=_row[i]*buffer[i]; } buffer[_order-1]=rhs[_order-1]-sum-_lowdiag[_order-2]*buffer[_order-2]; assert(_diag[_order-1]!=0); result[_order-1]=buffer[_order-1]/_diag[_order-1]; assert(_diag[_order-2]!=0); result[_order-2]=(buffer[_order-2]-_updiag[_order-2]*result[_order-1])/_diag[_order-2]; for(i=_order-3; i>=0; i--) { assert(_diag[i]!=0); result[i]=(buffer[i]-_updiag[i]*result[i+1]-_column[i]*result[_order-1])/_diag[i]; } */ const SmartPtr buffer_p(&buffer[0], &buffer[0], buffer.size()); const SmartPtr buffer_m1(&buffer[0]-1, &buffer[0], buffer.size()); const SmartPtrReadOnly lowdiag_m1(&_lowdiag[0]-1, &_lowdiag[0], _lowdiag.size()); const SmartPtrReadOnly rhs_p(&rhs[0], &rhs[0], rhs.size()); (*buffer_p)=(*rhs_p); int i=1; const int order_m4=_order-4; while(i row_p(&_row[0], &_row[0], _row.size()); const int order_m5=_order-5; while(i result_p(&result[0], &result[0], result.size()); const SmartPtr result_m1(&result[0]-1, &result[0], result.size()); const SmartPtr result_p1(&result[0]+1, &result[0], result.size()); const SmartPtrReadOnly updiag_p(&_updiag[0], &_updiag[0], _updiag.size()); const SmartPtrReadOnly column_p(&_column[0], &_column[0], _column.size()); const SmartPtrReadOnly diag_p(&_diag[0], &_diag[0], _diag.size()); while(i>=3) { assert(_diag[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i]-column_p[i]*result_m1[_order])/diag_p[i]; i--; assert(_diag[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i]-column_p[i]*result_m1[_order])/diag_p[i]; i--; assert(_diag[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i]-column_p[i]*result_m1[_order])/diag_p[i]; i--; assert(_diag[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i]-column_p[i]*result_m1[_order])/diag_p[i]; i--; } while(i>=0) { assert(_diag[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i]-column_p[i]*result_m1[_order])/diag_p[i]; i--; } } inline void Swap(Mat3DiagCycl& mat2) { assert(_order==mat2._order); _updiag.swap(mat2._updiag); _diag.swap(mat2._diag); _lowdiag.swap(mat2._lowdiag); _column.swap(mat2._column); _row.swap(mat2._row); } inline int Order() const { return _order; } inline bool Decomposed() const { return _decomposed; } inline const basearray& GetUpperDiagonal() const { return _updiag; } inline const basearray& GetDiagonal() const { return _diag; } inline const basearray& GetLowerDiagonal() const { return _lowdiag; } inline const basearray& GetRightColumn() const { return _column; } inline const basearray& GetLowerRow() const { return _row; } inline real& UpperRightElement() { assert(_decomposed==false); return _column[0]; } inline real& LowerLeftElement() { assert(_decomposed==false); return _row[0]; } inline basearray& UpperDiagonal() { assert(_decomposed==false); return _updiag; } inline basearray& Diagonal() { assert(_decomposed==false); return _diag; } inline basearray& LowerDiagonal() { assert(_decomposed==false); return _lowdiag; } real MaxLocalError(const basearray& x, const basearray& rhs) const {// Assumes that matrix is NOT LU decomposed! Calculates max of abs. val. of residual vector. assert(_decomposed==false); assert(x.size()==_order); assert(x.size()==rhs.size()); real maxerr=0; real err=_diag[0]*x[0]+_updiag[0]*x[1]+_column[0]*x[_order-1] - rhs[0]; if(err<0) { err=-err; } if(err>maxerr) { maxerr=err; } const int stepsm1=_order-1; for(int i=1; imaxerr) { maxerr=err; } } const int stepsm2=_order-2; err=_row[0]*x[0]+_lowdiag[stepsm2]*x[stepsm2]+_diag[stepsm1]*x[stepsm1] - rhs[stepsm1]; if(err<0) err=-err; if(err>maxerr) maxerr=err; return maxerr; } }; template class Mat3DiagSym { // Symmetric tridiagonal matrix. // Mat3DiagSym::Solve() is 11% faster...7% slower than with Mat3Diag::Solve(). // Mat3DiagSym uses 33% less memory than Mat3Diag. // // d0 od0 0 0 // od0 d1 od1 0 // 0 od1 d2 od2 // 0 0 od2 d3 basearray _offdiag; basearray _diag; int _order; bool _decomposed; public: explicit Mat3DiagSym(int order) : _offdiag(order-1), _diag(order), _order(order), _decomposed(false) { assert(order>=2); } void Decompose() { // Cholesky method: A=LDL^(T) matrix decomposition, no pivoting (nonzero diagonal needed). assert(_decomposed==false); _decomposed=true; real old_offdiag_value=_offdiag[0]; _offdiag[0]/=_diag[0]; const int maxi=_order-1; int i; for(i=1; i& result, const basearray& rhs) const { // This part of algorithm known as backsubstitution. // Assumes that matrix is already LDL^(T) decomposed. // result and rhs can be the same vector. assert(_decomposed==true); assert(_order==result.size()); assert(_order==rhs.size()); static basearray buffer(_order); buffer.setsize(_order); // Classic version /* buffer[0]=rhs[0]; int i; for(i=1; i<_order; i++) { buffer[i]=rhs[i]-_offdiag[i-1]*buffer[i-1]; } assert(_diag[_order-1]!=0); result[_order-1]=buffer[_order-1]/_diag[_order-1]; for(i=_order-2; i>=0; i--) { assert(_diag[i]!=0); result[i]=buffer[i]/_diag[i]-_offdiag[i]*result[i+1]; } */ buffer[0]=rhs[0]; const int order_m3=_order-3; const SmartPtr buffer_p(&buffer[0], &buffer[0], buffer.size()); const SmartPtrReadOnly rhs_p(&rhs[0], &rhs[0], rhs.size()); const SmartPtrReadOnly offdiag_m1(&_offdiag[0]-1, &_offdiag[0], _offdiag.size()); const SmartPtr buffer_m1(&buffer[0]-1, &buffer[0], buffer.size()); int i=1; while(i diag_p(&_diag[0], &_diag[0], _diag.size()); const SmartPtr result_p(&result[0], &result[0], result.size()); result_p[_order-1]=buffer_p[_order-1]/diag_p[_order-1]; const SmartPtr result_p1(&result[1], &result[0], result.size()); const SmartPtrReadOnly offdiag_p(&_offdiag[0], &_offdiag[0], _offdiag.size()); i=_order-2; while(i>=3) { result_p[i]=buffer_p[i]/diag_p[i]-offdiag_p[i]*result_p1[i]; i--; result_p[i]=buffer_p[i]/diag_p[i]-offdiag_p[i]*result_p1[i]; i--; result_p[i]=buffer_p[i]/diag_p[i]-offdiag_p[i]*result_p1[i]; i--; result_p[i]=buffer_p[i]/diag_p[i]-offdiag_p[i]*result_p1[i]; i--; } while(i>=0) { result_p[i]=buffer_p[i]/diag_p[i]-offdiag_p[i]*result_p1[i]; i--; } } inline void Swap(Mat3DiagSym& mat2) { assert(_order==mat2._order); _offdiag.swap(mat2._offdiag); _diag.swap(mat2._diag); } inline int Order() const { return _order; } inline bool Decomposed() const { return _decomposed; } inline const basearray& GetUpperDiagonal() const { return _offdiag; } inline const basearray& GetDiagonal() const { return _diag; } inline const basearray& GetLowerDiagonal() const { return _offdiag; } inline basearray& UpperDiagonal() { assert(_decomposed==false); return _offdiag; } inline basearray& Diagonal() { assert(_decomposed==false); return _diag; } inline basearray& LowerDiagonal() { assert(_decomposed==false); return _offdiag; } real MaxLocalError(const basearray& x, const basearray& rhs) const {// Assumes that matrix is NOT LDL^(T) decomposed! Calculates max of abs. val. of residual vector. assert(_decomposed==false); assert(x.size()==_order); assert(x.size()==rhs.size()); real maxerr=0; real err=_diag[0]*x[0]+_offdiag[0]*x[1]-rhs[0]; if(err<0) { err=-err; } if(err>maxerr) { maxerr=err; } const int stepsm1=_order-1; for(int i=1; imaxerr) { maxerr=err; } } const int stepsm2=_order-2; err=_offdiag[stepsm2]*x[stepsm2]+_diag[stepsm1]*x[stepsm1] - rhs[stepsm1]; if(err<0) { err=-err; } if(err>maxerr) { maxerr=err; } return maxerr; } }; template class Mat3Diag { // Tridiagonal matrix. // // d0 ud0 0 0 // ld0 d1 ud1 0 // 0 ld1 d2 ud2 // 0 0 ld2 d3 basearray _updiag; basearray _diag; basearray _lowdiag; int _order; bool _decomposed; public: explicit Mat3Diag(int order) : _updiag(order-1), _diag(order), _lowdiag(order-1), _order(order), _decomposed(false) { assert(order>=2); } void Decompose() { // Gauss method: A=L*U matrix decomposition, no pivoting (nonzero diagonal needed). assert(_decomposed==false); _decomposed=true; /* // Classic version const int maxi=_order-1; for(int i=0; i diag(&_diag[0], &_diag[0], _diag.size()); SmartPtr lowdiag(&_lowdiag[0], &_lowdiag[0], _lowdiag.size()); SmartPtr updiag(&_updiag[0], &_updiag[0], _updiag.size()); int i=_order-1; // Unrolled, redundant part begins while(i>3) { assert((*diag)!=0); (*lowdiag)/=(*diag); ++diag; (*diag)-=(*updiag)*(*lowdiag); ++updiag; ++lowdiag; i--; assert((*diag)!=0); (*lowdiag)/=(*diag); ++diag; (*diag)-=(*updiag)*(*lowdiag); ++updiag; ++lowdiag; i--; assert((*diag)!=0); (*lowdiag)/=(*diag); ++diag; (*diag)-=(*updiag)*(*lowdiag); ++updiag; ++lowdiag; i--; assert((*diag)!=0); (*lowdiag)/=(*diag); ++diag; (*diag)-=(*updiag)*(*lowdiag); ++updiag; ++lowdiag; i--; } // Unrolled, redundant part ends while(i>0) { assert((*diag)!=0); (*lowdiag)/=(*diag); ++diag; (*diag)-=(*updiag)*(*lowdiag); ++updiag; ++lowdiag; i--; } } void Solve(basearray& result, const basearray& rhs) const { // This part of algorithm known as backsubstitution. // Assumes that matrix is already LU decomposed. // result and rhs can be the same vector. assert(_decomposed==true); assert(_order==result.size()); assert(_order==rhs.size()); static basearray buffer(_order); buffer.setsize(_order); /* // Classic version buffer[0]=rhs[0]; int i=1; while(i<_order) { buffer[i]=rhs[i]-_lowdiag[i-1]*buffer[i-1]; i++; } result[_order-1]=buffer[_order-1]/_diag[_order-1]; i=_order-2; while(i>=0) { assert(_diag[i]!=0); result[i]=(buffer[i]-_updiag[i]*result[i+1])/_diag[i]; i--; } */ const SmartPtrReadOnly rhs_p(&rhs[0], &rhs[0], rhs.size()); const SmartPtr buffer_p(&buffer[0], &buffer[0], buffer.size()); (*buffer_p)=(*rhs_p); const SmartPtr buffer_m1(&buffer[0]-1, &buffer[0], buffer.size()); const SmartPtrReadOnly lowdiag_m1(&_lowdiag[0]-1, &_lowdiag[0], _lowdiag.size()); const SmartPtrReadOnly updiag_p(&_updiag[0], &_updiag[0], _updiag.size()); // Unrolled, redundant part begins const int order_m3=_order-3; int i=1; while(i result_p(&result[0], &result[0], result.size()); const SmartPtrReadOnly diag_p(&_diag[0], &_diag[0], _diag.size()); const int order_m1=_order-1; result_p[order_m1]=buffer_p[order_m1]/diag_p[order_m1]; i=_order-2; const SmartPtr result_p1(&result[1], &result[0], result.size()); // Unrolled, redundant part begins while(i>=3) { assert(diag_p[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i])/diag_p[i]; i--; assert(diag_p[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i])/diag_p[i]; i--; assert(diag_p[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i])/diag_p[i]; i--; assert(diag_p[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i])/diag_p[i]; i--; } // Unrolled, redundant part ends while(i>=0) { assert(diag_p[i]!=0); result_p[i]=(buffer_p[i]-updiag_p[i]*result_p1[i])/diag_p[i]; i--; } } inline void Swap(Mat3Diag& mat2) { assert(_order==mat2._order); _updiag.swap(mat2._updiag); _diag.swap(mat2._diag); _lowdiag.swap(mat2._lowdiag); } inline int Order() const { return _order; } inline bool Decomposed() const { return _decomposed; } inline const basearray& GetUpperDiagonal() const { return _updiag; } inline const basearray& GetDiagonal() const { return _diag; } inline const basearray& GetLowerDiagonal() const { return _lowdiag; } inline basearray& UpperDiagonal() { assert(_decomposed==false); return _updiag; } inline basearray& Diagonal() { assert(_decomposed==false); return _diag; } inline basearray& LowerDiagonal() { assert(_decomposed==false); return _lowdiag; } real MaxLocalError(const basearray& x, const basearray& rhs) const {// Assumes that matrix is NOT LU decomposed! Calculates max of abs. val. of residual vector. assert(_decomposed==false); assert(x.size()==_order); assert(x.size()==rhs.size()); real maxerr=0; real err=_diag[0]*x[0]+_updiag[0]*x[1]-rhs[0]; if(err<0) { err=-err; } if(err>maxerr) { maxerr=err; } const int stepsm1=_order-1; for(int i=1; imaxerr) { maxerr=err; } } const int stepsm2=_order-2; err=_lowdiag[stepsm2]*x[stepsm2]+_diag[stepsm1]*x[stepsm1] - rhs[stepsm1]; if(err<0) { err=-err; } if(err>maxerr) { maxerr=err; } return maxerr; } void Statistics() const; }; template void Mat3Diag::Statistics() const {// Shows info about possibility of solution. assert(_decomposed==false); cout<<"[ Matrix statistics ]\n"; int i; // Zero entries on diagonal: int numz=0, numfz=0; for(i=_order-1; i>=0; i--) { if(_diag[i]==0) { numz++; numfz=i; } } if(numz>0) { cout<<" - There are "<=0; i--) { if(_lowdiag[i]!=_updiag[i]) { notsym++; notsymf=i; } } if(notsym==0) { cout<<" - Matrix is symmetric, you should use 2 times more efficient LDL^-1 (class Mat3DiagSym) "; cout<<"decomposition!\n"; } else { cout<<" - Matrix is asymmetric, there is "<=0; i--) { if(ud!=_updiag[i]) { udconst=false; } if(ld!=_lowdiag[i]) { ldconst=false; } if(md!=_diag[i]) { mdconst=false; } } if(udconst==true) cout<<" - In this case all upper diagonal elements are constant, code can be optimized!\n"; else cout<<" - In this case upper diagonal elements are different, no further optimization possible.\n"; if(ldconst==true) cout<<" - In this case all lower diagonal elements are constant, code can be optimized!\n"; else cout<<" - In this case lower diagonal elements are different, no further optimization possible.\n"; if(mdconst==true) cout<<" - In this case all diagonal elements are constant, code can be optimized!\n"; else cout<<" - In this case diagonal elements are different, no further optimization possible.\n"; flush(cout); #define ABS(a) (a>=0 ? a: -a) // Column domination: int notcoldom=0; real coldomval=0; real test=ABS(_diag[0]) - ABS(_lowdiag[0]); if(test<0) { notcoldom++; } coldomval+=test; test=ABS(_diag[_order-1]) - ABS(_updiag[_order-2]); if(test<0) { notcoldom++; } coldomval+=test; for(i=1; i<_order-1; i++) { test=ABS(_diag[i]) - ( ABS(_updiag[i-1])+ABS(_lowdiag[i]) ); if(test<0) { notcoldom++; } coldomval+=test; } if(notcoldom==0) { cout<<" - Matrix is dominant by column, Mat3Diag* method is equivalent to LU with partial pivoting.\n"; } else { cout<<" - Matrix is not dominant by column, "<rowdepref)) { cout<<" - Matrix seems to be more dominant by row, it would be better to use Cholesky scheme "; } cout<<"(equal to LU with partial row pivoting, instead of equal to LU with column pivoting, used here)!\n"; int KBsize=(sizeof(real)*(_updiag.size()+_diag.size()+_lowdiag.size())+sizeof(*this)+1023)/1024; int KBDsolve=(sizeof(real)*(_updiag.size()+3*_diag.size()+_lowdiag.size())+sizeof(*this)+1023)/1024; int KBNDsolve=(sizeof(real)*(_updiag.size()+4*_diag.size()+_lowdiag.size())+sizeof(*this)+1023)/1024; int KBresult=(sizeof(real)*_diag.size()+sizeof(*this)+1023)/1024; cout<<" - This matrix uses "<> TestMatrixing done.\n"; flush(cout); #undef ABS } #endif //_INCLUDED_L3_MAT3_H