From 1531b7b29caf746a90eba6150a03cf9ef6bb8af6 Mon Sep 17 00:00:00 2001 From: Huanjing Gong <108850865+AroundPeking@users.noreply.github.com> Date: Tue, 23 Dec 2025 11:04:12 +0800 Subject: [PATCH] Feature: add exx_cs_inv_thr to get inverse Coulomb --- docs/advanced/input_files/input-main.md | 6 + .../module_external/lapack_connector.h | 23 +- source/source_hamilt/module_xc/exx_info.h | 1 + source/source_io/input_conv.cpp | 1 + .../module_parameter/input_parameter.h | 1 + source/source_io/read_input_item_exx_dftu.cpp | 6 + .../module_ri/ABFs_Construct-PCA.cpp | 217 ++++++++---- .../module_ri/ABFs_Construct-PCA.h | 19 +- source/source_lcao/module_ri/Inverse_Matrix.h | 32 +- .../source_lcao/module_ri/Inverse_Matrix.hpp | 325 ++++++++++++------ source/source_lcao/module_ri/LRI_CV.hpp | 13 +- source/source_lcao/module_ri/LRI_CV_Tools.h | 11 +- source/source_lcao/module_ri/LRI_CV_Tools.hpp | 18 +- 13 files changed, 458 insertions(+), 215 deletions(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index 512c9bb524..350c30a13a 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -2971,6 +2971,12 @@ These variables are relevant when using hybrid functionals with *[basis_type](#b - **Description**: See also the entry [exx_pca_threshold](#exx_pca_threshold). Smaller components (less than exx_c_threshold) of the $C^{a}_{ik}$ matrix are neglected to accelerate calculation. The larger the threshold is, the faster the calculation and the lower the accuracy. A relatively safe choice of the value is 1e-4. - **Default**: 1E-4 +### exx_cs_inv_thr + +- **Type**: Real +- **Description**: By default, the Coulomb matrix inversion required for obtaining LRI coefficients is performed using LU decomposition. However, this approach may suffer from numerical instabilities when a large set of auxiliary basis functions (ABFs) is employed. When `exx_cs_inv_thr > 0`, the inversion is instead carried out via matrix diagonalization. Eigenvalues smaller than `exx_cs_inv_thr` are discarded to improve numerical stability. A relatively safe and commonly recommended value is `1e-5`. +- **Default**: -1 + ### exx_v_threshold - **Type**: Real diff --git a/source/source_base/module_external/lapack_connector.h b/source/source_base/module_external/lapack_connector.h index abd29c107c..5007aedabf 100644 --- a/source/source_base/module_external/lapack_connector.h +++ b/source/source_base/module_external/lapack_connector.h @@ -170,11 +170,24 @@ void zhegv_(const int* itype, const char* jobz, const char* uplo, const int* n, // === Standard Hermitian eigenproblem === -void dsyev_(const char* jobz, const char* uplo, const int* n, - double* a, const int* lda, - double* w, - double* work, const int* lwork, - int* info); + void ssyev_(const char* jobz, + const char* uplo, + const int* n, + float* a, + const int* lda, + float* w, + float* work, + const int* lwork, + int* info); + void dsyev_(const char* jobz, + const char* uplo, + const int* n, + double* a, + const int* lda, + double* w, + double* work, + const int* lwork, + int* info); void cheev_(const char* jobz, const char* uplo, const int* n, std::complex* a, const int* lda, diff --git a/source/source_hamilt/module_xc/exx_info.h b/source/source_hamilt/module_xc/exx_info.h index fa01daf46b..9cbd51c18f 100644 --- a/source/source_hamilt/module_xc/exx_info.h +++ b/source/source_hamilt/module_xc/exx_info.h @@ -67,6 +67,7 @@ struct Exx_Info double ccp_rmesh_times = 10; bool exx_symmetry_realspace = true; double kmesh_times = 4; + double Cs_inv_thr = -1; int abfs_Lmax = 0; // tmp diff --git a/source/source_io/input_conv.cpp b/source/source_io/input_conv.cpp index f4edf1da6e..8e3fb560ca 100644 --- a/source/source_io/input_conv.cpp +++ b/source/source_io/input_conv.cpp @@ -474,6 +474,7 @@ void Input_Conv::Convert() GlobalC::exx_info.info_ri.V_grad_R_threshold = PARAM.inp.exx_v_grad_r_threshold; GlobalC::exx_info.info_ri.ccp_rmesh_times = std::stod(PARAM.inp.exx_ccp_rmesh_times); GlobalC::exx_info.info_ri.exx_symmetry_realspace = PARAM.inp.exx_symmetry_realspace; + GlobalC::exx_info.info_ri.Cs_inv_thr = PARAM.inp.exx_cs_inv_thr; GlobalC::exx_info.info_opt_abfs.pca_threshold = PARAM.inp.exx_pca_threshold; GlobalC::exx_info.info_opt_abfs.abfs_Lmax = PARAM.inp.exx_opt_orb_lmax; diff --git a/source/source_io/module_parameter/input_parameter.h b/source/source_io/module_parameter/input_parameter.h index 605db6572f..d60df6c093 100644 --- a/source/source_io/module_parameter/input_parameter.h +++ b/source/source_io/module_parameter/input_parameter.h @@ -552,6 +552,7 @@ struct Input_para = true; ///< whether to reduce the real-space sector in when using symmetry=1 in EXX calculation double rpa_ccp_rmesh_times = 10.0; ///< how many times larger the radial mesh required for ///< calculating Columb potential is to that of atomic orbitals + double exx_cs_inv_thr = -1; ///< threshold to inverse Vq in abfs for generating Cs bool out_ri_cv = false; ///< Whether to output the coefficient tensor C and ABFs-representation Coulomb matrix V // ============== #Parameters (16.dft+u) ====================== // DFT+U Xin Qu added on 2020-10-29 diff --git a/source/source_io/read_input_item_exx_dftu.cpp b/source/source_io/read_input_item_exx_dftu.cpp index eec98263c1..d235710a36 100644 --- a/source/source_io/read_input_item_exx_dftu.cpp +++ b/source/source_io/read_input_item_exx_dftu.cpp @@ -243,6 +243,12 @@ void ReadInput::item_exx() read_sync_double(input.exx_pca_threshold); this->add_item(item); } + { + Input_Item item("exx_cs_inv_thr"); + item.annotation = "threshold to inverse Vq in abfs for generating Cs"; + read_sync_double(input.exx_cs_inv_thr); + this->add_item(item); + } { Input_Item item("exx_c_threshold"); item.annotation = "threshold to screen C matrix in exx"; diff --git a/source/source_lcao/module_ri/ABFs_Construct-PCA.cpp b/source/source_lcao/module_ri/ABFs_Construct-PCA.cpp index b96ad3169e..c4f5cd7f13 100644 --- a/source/source_lcao/module_ri/ABFs_Construct-PCA.cpp +++ b/source/source_lcao/module_ri/ABFs_Construct-PCA.cpp @@ -14,139 +14,226 @@ namespace ABFs_Construct { namespace PCA { - void tensor_dsyev(const char jobz, const char uplo, RI::Tensor & a, double*const w, int & info) +template <> +void tensor_syev(char jobz, char uplo, RI::Tensor& a, double* w, int& info) { - // reference: dsyev in lapack_connector.h (for ModuleBase::matrix) assert(a.shape.size() == 2); assert(a.shape[0] == a.shape[1]); - const int nr = a.shape[0]; - const int nc = a.shape[1]; - double work_tmp=0.0; + const int n = a.shape[0]; + const int lda = a.shape[1]; + + double work_query = 0.0; constexpr int minus_one = -1; - dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, &work_tmp, &minus_one, &info); // get best lwork - const int lwork = work_tmp; + dsyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, &info); + + const int lwork = static_cast(work_query); std::vector work(std::max(1, lwork)); - dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, work.data(), &lwork, &info); + + dsyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, &info); +} + +template <> +void tensor_syev(char jobz, char uplo, RI::Tensor& a, float* w, int& info) +{ + assert(a.shape.size() == 2); + assert(a.shape[0] == a.shape[1]); + + const int n = a.shape[0]; + const int lda = a.shape[1]; + + float work_query = 0.0f; + constexpr int minus_one = -1; + + ssyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, &info); + + const int lwork = static_cast(work_query); + std::vector work(std::max(1, lwork)); + + ssyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, &info); } - RI::Tensor get_sub_matrix( - const RI::Tensor & m, // size: (lcaos, lcaos, abfs) - const std::size_t & T, - const std::size_t & L, - const ModuleBase::Element_Basis_Index::Range & range, - const ModuleBase::Element_Basis_Index::IndexLNM & index ) +template <> +void tensor_syev>(char jobz, char uplo, RI::Tensor>& a, double* w, int& info) +{ + assert(a.shape.size() == 2); + assert(a.shape[0] == a.shape[1]); + + const int n = a.shape[0]; + const int lda = a.shape[1]; + + std::complex work_query; + constexpr int minus_one = -1; + + zheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, nullptr, &info); + + const int lwork = static_cast(work_query.real()); + std::vector> work(std::max(1, lwork)); + std::vector rwork(std::max(1, 3 * n - 2)); + + zheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, rwork.data(), &info); +} + +template <> +void tensor_syev>(char jobz, char uplo, RI::Tensor>& a, float* w, int& info) +{ + assert(a.shape.size() == 2); + assert(a.shape[0] == a.shape[1]); + + const int n = a.shape[0]; + const int lda = a.shape[1]; + + std::complex work_query; + constexpr int minus_one = -1; + + cheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, nullptr, &info); + + const int lwork = static_cast(work_query.real()); + std::vector> work(std::max(1, lwork)); + std::vector rwork(std::max(1, 3 * n - 2)); + + cheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, rwork.data(), &info); +} +// void tensor_dsyev(const char jobz, const char uplo, RI::Tensor & a, double*const w, int & info) +// { +// // reference: dsyev in lapack_connector.h (for ModuleBase::matrix) +// assert(a.shape.size() == 2); +// assert(a.shape[0] == a.shape[1]); +// const int nr = a.shape[0]; +// const int nc = a.shape[1]; + +// double work_tmp=0.0; +// constexpr int minus_one = -1; +// dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, &work_tmp, &minus_one, &info); // get best lwork + +// const int lwork = work_tmp; +// std::vector work(std::max(1, lwork)); +// dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, work.data(), &lwork, &info); +// } + +RI::Tensor get_sub_matrix(const RI::Tensor& m, // size: (lcaos, lcaos, abfs) + const std::size_t& T, + const std::size_t& L, + const ModuleBase::Element_Basis_Index::Range& range, + const ModuleBase::Element_Basis_Index::IndexLNM& index) { ModuleBase::TITLE("ABFs_Construct::PCA::get_sub_matrix"); assert(m.shape.size() == 3); - RI::Tensor m_sub({ m.shape[0], m.shape[1], range[T][L].N }); - for (std::size_t ir=0; ir!=m.shape[0]; ++ir) { - for (std::size_t jr=0; jr!=m.shape[1]; ++jr) { - for (std::size_t N=0; N!=range[T][L].N; ++N) { + RI::Tensor m_sub({m.shape[0], m.shape[1], range[T][L].N}); + for (std::size_t ir = 0; ir != m.shape[0]; ++ir) + { + for (std::size_t jr = 0; jr != m.shape[1]; ++jr) + { + for (std::size_t N = 0; N != range[T][L].N; ++N) + { m_sub(ir, jr, N) = m(ir, jr, index[T][L][N][0]); } } } - m_sub = m_sub.reshape({ m.shape[0] * m.shape[1], range[T][L].N }); + m_sub = m_sub.reshape({m.shape[0] * m.shape[1], range[T][L].N}); return m_sub; } - RI::Tensor get_column_mean0_matrix( const RI::Tensor & m ) +RI::Tensor get_column_mean0_matrix(const RI::Tensor& m) { ModuleBase::TITLE("ABFs_Construct::PCA::get_column_mean0_matrix"); - RI::Tensor m_new( m.shape); - for( std::size_t ic=0; ic!=m.shape[1]; ++ic ) + RI::Tensor m_new(m.shape); + for (std::size_t ic = 0; ic != m.shape[1]; ++ic) { - double sum=0; - for( std::size_t ir=0; ir!=m.shape[0]; ++ir ) { - sum += m(ir,ic); + double sum = 0; + for (std::size_t ir = 0; ir != m.shape[0]; ++ir) + { + sum += m(ir, ic); } - const double mean = sum/m.shape[0]; - for( std::size_t ir=0; ir!=m.shape[0]; ++ir ) { - m_new(ir,ic) = m(ir,ic) - mean; + const double mean = sum / m.shape[0]; + for (std::size_t ir = 0; ir != m.shape[0]; ++ir) + { + m_new(ir, ic) = m(ir, ic) - mean; } } return m_new; } std::vector, RI::Tensor>>> cal_PCA( - const UnitCell &ucell, + const UnitCell& ucell, const LCAO_Orbitals& orb, - const std::vector>> &lcaos, - const std::vector>> &abfs, - const double kmesh_times ) + const std::vector>>& lcaos, + const std::vector>>& abfs, + const double kmesh_times) { ModuleBase::TITLE("ABFs_Construct::PCA::cal_PCA"); - const ModuleBase::Element_Basis_Index::Range - range_lcaos = ModuleBase::Element_Basis_Index::construct_range( lcaos ); - const ModuleBase::Element_Basis_Index::IndexLNM - index_lcaos = ModuleBase::Element_Basis_Index::construct_index( range_lcaos ); + const ModuleBase::Element_Basis_Index::Range range_lcaos = ModuleBase::Element_Basis_Index::construct_range(lcaos); + const ModuleBase::Element_Basis_Index::IndexLNM index_lcaos + = ModuleBase::Element_Basis_Index::construct_index(range_lcaos); - const ModuleBase::Element_Basis_Index::Range - range_abfs = ModuleBase::Element_Basis_Index::construct_range( abfs ); - const ModuleBase::Element_Basis_Index::IndexLNM - index_abfs = ModuleBase::Element_Basis_Index::construct_index( range_abfs ); + const ModuleBase::Element_Basis_Index::Range range_abfs = ModuleBase::Element_Basis_Index::construct_range(abfs); + const ModuleBase::Element_Basis_Index::IndexLNM index_abfs + = ModuleBase::Element_Basis_Index::construct_index(range_abfs); const int Lmax_bak = GlobalC::exx_info.info_ri.abfs_Lmax; GlobalC::exx_info.info_ri.abfs_Lmax = std::numeric_limits::min(); - for( std::size_t T=0; T!=abfs.size(); ++T ) { - GlobalC::exx_info.info_ri.abfs_Lmax = std::max( GlobalC::exx_info.info_ri.abfs_Lmax, static_cast(abfs[T].size())-1 ); + for (std::size_t T = 0; T != abfs.size(); ++T) + { + GlobalC::exx_info.info_ri.abfs_Lmax + = std::max(GlobalC::exx_info.info_ri.abfs_Lmax, static_cast(abfs[T].size()) - 1); } Matrix_Orbs21 m_abfslcaos_lcaos; ORB_gaunt_table MGT; int Lmax; - m_abfslcaos_lcaos.init( 1, ucell , orb, kmesh_times, orb.get_Rmax(), Lmax ); + m_abfslcaos_lcaos.init(1, ucell, orb, kmesh_times, orb.get_Rmax(), Lmax); MGT.init_Gaunt_CH(Lmax); MGT.init_Gaunt(Lmax); - m_abfslcaos_lcaos.init_radial( abfs, lcaos, lcaos, MGT ); + m_abfslcaos_lcaos.init_radial(abfs, lcaos, lcaos, MGT); - std::map>> delta_R; - for( std::size_t it=0; it!=abfs.size(); ++it ) { + std::map>> delta_R; + for (std::size_t it = 0; it != abfs.size(); ++it) + { delta_R[it][it] = {0.0}; } m_abfslcaos_lcaos.init_radial_table(delta_R); GlobalC::exx_info.info_ri.abfs_Lmax = Lmax_bak; - std::vector,RI::Tensor>>> eig(abfs.size()); - for( std::size_t T=0; T!=abfs.size(); ++T ) + std::vector, RI::Tensor>>> eig(abfs.size()); + for (std::size_t T = 0; T != abfs.size(); ++T) { - const RI::Tensor A = m_abfslcaos_lcaos.cal_overlap_matrix( - T, + const RI::Tensor A = m_abfslcaos_lcaos.cal_overlap_matrix(T, T, - ModuleBase::Vector3{0,0,0}, - ModuleBase::Vector3{0,0,0}, + ModuleBase::Vector3{0, 0, 0}, + ModuleBase::Vector3{0, 0, 0}, index_abfs, index_lcaos, index_lcaos, Matrix_Orbs21::Matrix_Order::A2BA1); eig[T].resize(abfs[T].size()); - for( std::size_t L=0; L!=abfs[T].size(); ++L ) + for (std::size_t L = 0; L != abfs[T].size(); ++L) { - const RI::Tensor A_sub = get_sub_matrix( A, T, L, range_abfs, index_abfs ); + const RI::Tensor A_sub = get_sub_matrix(A, T, L, range_abfs, index_abfs); RI::Tensor mm = A_sub.transpose() * A_sub; std::vector eig_value(mm.shape[0]); - int info=1; + int info = 1; - tensor_dsyev('V', 'L', mm, eig_value.data(), info); + tensor_syev('V', 'L', mm, eig_value.data(), info); - if( info ) + if (info) { std::cout << std::endl << "info_dsyev = " << info << std::endl; - auto tensor_print = [](RI::Tensor& m, std::ostream& os, const double threshold) - { + auto tensor_print = [](RI::Tensor& m, std::ostream& os, const double threshold) { for (int ir = 0; ir != m.shape[0]; ++ir) { for (int ic = 0; ic != m.shape[1]; ++ic) { - if (std::abs(m(ir, ic)) > threshold) { + if (std::abs(m(ir, ic)) > threshold) + { os << m(ir, ic) << "\t"; - } else { + } + else + { os << 0 << "\t"; } } @@ -155,15 +242,15 @@ namespace PCA os << std::endl; }; tensor_print(mm, GlobalV::ofs_warning, 0.0); - std::cout<<"in file "<<__FILE__<<" line "<<__LINE__<,RI::Tensor>>> cal_PCA( +extern std::vector, RI::Tensor>>> cal_PCA( const UnitCell& ucell, - const LCAO_Orbitals &orb, - const std::vector>> &lcaos, - const std::vector>> &abfs, // abfs must be orthonormal - const double kmesh_times ); -} -} + const LCAO_Orbitals& orb, + const std::vector>>& lcaos, + const std::vector>>& abfs, // abfs must be orthonormal + const double kmesh_times); -#endif // ABFS_CONSTRUCT_PCA_H +template +void tensor_syev(char jobz, char uplo, RI::Tensor& a, RI::Global_Func::To_Real_t* w, int& info); +} // namespace PCA +} // namespace ABFs_Construct + +#endif // ABFS_CONSTRUCT_PCA_H diff --git a/source/source_lcao/module_ri/Inverse_Matrix.h b/source/source_lcao/module_ri/Inverse_Matrix.h index 48c65db627..141a09b39e 100644 --- a/source/source_lcao/module_ri/Inverse_Matrix.h +++ b/source/source_lcao/module_ri/Inverse_Matrix.h @@ -5,25 +5,33 @@ #pragma once +#include "ABFs_Construct-PCA.h" + #include #include -template +template class Inverse_Matrix { -public: - enum class Method{potrf}; //, syev}; - void cal_inverse(const Method &method); + public: + enum class Method + { + potrf, + syev + }; + void cal_inverse(const Method& method, const double& threshold_condition_number = 0.); - void input(const RI::Tensor &m); - void input(const std::vector>> &ms); - RI::Tensor output() const; - std::vector>> output(const std::vector &n0, const std::vector &n1) const; + void input(const RI::Tensor& m); + void input(const std::vector>>& ms); + RI::Tensor output() const; + std::vector>> output(const std::vector& n0, + const std::vector& n1) const; -private: - void using_potrf(); - void copy_down_triangle(); - RI::Tensor A; + private: + void using_potrf(); + void using_syev(const double& threshold_condition_number); + void copy_down_triangle(); + RI::Tensor A; }; #include "Inverse_Matrix.hpp" \ No newline at end of file diff --git a/source/source_lcao/module_ri/Inverse_Matrix.hpp b/source/source_lcao/module_ri/Inverse_Matrix.hpp index 20ef034239..b632d1a40c 100644 --- a/source/source_lcao/module_ri/Inverse_Matrix.hpp +++ b/source/source_lcao/module_ri/Inverse_Matrix.hpp @@ -8,152 +8,251 @@ #include "Inverse_Matrix.h" #include "source_base/module_external/lapack_connector.h" +#include "source_hamilt/module_xc/exx_info.h" #include -template -void Inverse_Matrix::cal_inverse( const Method &method ) +template +void Inverse_Matrix::cal_inverse(const Method& method, const double& threshold_condition_number) { - switch(method) - { - case Method::potrf: using_potrf(); break; -// case Method::syev: using_syev(1E-6); break; - } + switch (method) + { + case Method::potrf: + using_potrf(); + break; + case Method::syev: + using_syev(threshold_condition_number); + break; + } } -template +template void Inverse_Matrix::using_potrf() { - int info; - LapackConnector::potrf('U', A.shape[0], A.ptr(), A.shape[0], info); - if(info) - throw std::range_error("info="+std::to_string(info)+"\n"+std::string(__FILE__)+" line "+std::to_string(__LINE__)); + int info; + LapackConnector::potrf('U', A.shape[0], A.ptr(), A.shape[0], info); + if (info) + throw std::range_error("info=" + std::to_string(info) + "\n" + std::string(__FILE__) + " line " + + std::to_string(__LINE__)); - LapackConnector::potri('U', A.shape[0], A.ptr(), A.shape[0], info); - if(info) - throw std::range_error("info="+std::to_string(info)+"\n"+std::string(__FILE__)+" line "+std::to_string(__LINE__)); + LapackConnector::potri('U', A.shape[0], A.ptr(), A.shape[0], info); + if (info) + throw std::range_error("info=" + std::to_string(info) + "\n" + std::string(__FILE__) + " line " + + std::to_string(__LINE__)); - copy_down_triangle(); + copy_down_triangle(); +} + +template +struct InverseMatrixTraits; + +// double +template <> +struct InverseMatrixTraits +{ + using matrix_type = ModuleBase::matrix; + using value_type = double; + static void syev(RI::Tensor& A, value_type* w, int& info) + { + ABFs_Construct::PCA::tensor_syev('V', 'U', A, w, info); + } + static constexpr char gemm_trans = 'T'; +}; + +// complex +template <> +struct InverseMatrixTraits> +{ + using matrix_type = ModuleBase::ComplexMatrix; + using value_type = double; // eigenvalues are always real + static void syev(RI::Tensor>& A, value_type* w, int& info) + { + ABFs_Construct::PCA::tensor_syev('V', 'U', A, w, info); + } + static constexpr char gemm_trans = 'C'; +}; + +// float +template <> +struct InverseMatrixTraits +{ + using matrix_type = ModuleBase::matrix; + using value_type = float; + static void syev(RI::Tensor& A, value_type* w, int& info) + { + ABFs_Construct::PCA::tensor_syev('V', 'U', A, w, info); + } + static constexpr char gemm_trans = 'T'; +}; + +// complex +template <> +struct InverseMatrixTraits> +{ + using matrix_type = ModuleBase::ComplexMatrix; + using value_type = float; + static void syev(RI::Tensor>& A, value_type* w, int& info) + { + ABFs_Construct::PCA::tensor_syev('V', 'U', A, w, info); + } + static constexpr char gemm_trans = 'C'; +}; + +template +inline void Inverse_Matrix::using_syev(const double& threshold_condition_number) +{ + using traits = InverseMatrixTraits; + using val_t = typename traits::value_type; + + int info; + std::vector eigen_value(A.shape[0]); + + traits::syev(A, eigen_value.data(), info); + if (info) + throw std::range_error("info=" + std::to_string(info) + "\n" + std::string(__FILE__) + " line " + + std::to_string(__LINE__)); + + val_t eigen_value_max = 0; + for (const val_t& val: eigen_value) + eigen_value_max = std::max(val, eigen_value_max); + + const double threshold = eigen_value_max * threshold_condition_number; + + typename traits::matrix_type eA(A.shape[0], A.shape[1]); + + int ie = 0; + for (int i = 0; i != A.shape[0]; ++i) + { + if (eigen_value[i] > threshold) + { + BlasConnector::axpy(A.shape[1], + sqrt(1.0 / eigen_value[i]), + A.ptr() + i * A.shape[1], + 1, + eA.c + ie * eA.nc, + 1); + ++ie; + } + } + // std::cout << "No. of singularity: " << A.shape[0] - ie << std::endl; + + BlasConnector::gemm(traits::gemm_trans, 'N', eA.nc, eA.nc, ie, 1, eA.c, eA.nc, eA.c, eA.nc, 0, A.ptr(), A.shape[1]); } /* void Inverse_Matrix::using_syev( const double &threshold_condition_number ) { - std::vector eigen_value(A.nr); - LapackConnector::dsyev('V','U',A,eigen_value.data(),info); - - double eigen_value_max = 0; - for( const double &ie : eigen_value ) - eigen_value_max = std::max( ie, eigen_value_max ); - const double threshold = eigen_value_max * threshold_condition_number; - - ModuleBase::matrix eA( A.nr, A.nc ); - int ie=0; - for( int i=0; i!=A.nr; ++i ) - if( eigen_value[i] > threshold ) - { - BlasConnector::axpy( A.nc, sqrt(1.0/eigen_value[i]), A.c+i*A.nc,1, eA.c+ie*eA.nc,1 ); - ++ie; - } - BlasConnector::gemm( 'T','N', eA.nc,eA.nc,ie, 1, eA.c,eA.nc, eA.c,eA.nc, 0, A.c,A.nc ); + std::vector eigen_value(A.nr); + LapackConnector::dsyev('V','U',A,eigen_value.data(),info); + + double eigen_value_max = 0; + for( const double &ie : eigen_value ) + eigen_value_max = std::max( ie, eigen_value_max ); + const double threshold = eigen_value_max * threshold_condition_number; + + ModuleBase::matrix eA( A.nr, A.nc ); + int ie=0; + for( int i=0; i!=A.nr; ++i ) + if( eigen_value[i] > threshold ) + { + BlasConnector::axpy( A.nc, sqrt(1.0/eigen_value[i]), A.c+i*A.nc,1, eA.c+ie*eA.nc,1 ); + ++ie; + } + BlasConnector::gemm( 'T','N', eA.nc,eA.nc,ie, 1, eA.c,eA.nc, eA.c,eA.nc, 0, A.c,A.nc ); } */ -template -void Inverse_Matrix::input( const RI::Tensor &m ) +template +void Inverse_Matrix::input(const RI::Tensor& m) { - assert(m.shape.size()==2); - assert(m.shape[0]==m.shape[1]); - this->A = m.copy(); + assert(m.shape.size() == 2); + assert(m.shape[0] == m.shape[1]); + this->A = m.copy(); } - -template -void Inverse_Matrix::input(const std::vector>> &ms) +template +void Inverse_Matrix::input(const std::vector>>& ms) { - const size_t N0 = ms.size(); - assert(N0>0); - const size_t N1 = ms[0].size(); - assert(N1>0); - for(size_t Im0=0; Im0 n0(N0); - for(size_t Im0=0; Im0 n1(N1); - for(size_t Im1=0; Im1A = RI::Tensor({n_all, n_all}); - - std::vector n0_partial(N0+1); - std::partial_sum(n0.begin(), n0.end(), n0_partial.begin()+1); - std::vector n1_partial(N1+1); - std::partial_sum(n1.begin(), n1.end(), n1_partial.begin()+1); - - for(size_t Im0=0; Im0 &m_tmp = ms.at(Im0).at(Im1); - for(size_t im0=0; im0A(im0+n0_partial[Im0], im1+n1_partial[Im1]) = m_tmp(im0,im1); - } -} + const size_t N0 = ms.size(); + assert(N0 > 0); + const size_t N1 = ms[0].size(); + assert(N1 > 0); + for (size_t Im0 = 0; Im0 < N0; ++Im0) + assert(ms[Im0].size() == N1); + + for (size_t Im0 = 0; Im0 < N0; ++Im0) + for (size_t Im1 = 0; Im1 < N1; ++Im1) + assert(ms[Im0][Im1].shape.size() == 2); + + std::vector n0(N0); + for (size_t Im0 = 0; Im0 < N0; ++Im0) + n0[Im0] = ms[Im0][0].shape[0]; + std::vector n1(N1); + for (size_t Im1 = 0; Im1 < N1; ++Im1) + n1[Im1] = ms[0][Im1].shape[1]; + + for (size_t Im0 = 0; Im0 < N0; ++Im0) + for (size_t Im1 = 0; Im1 < N1; ++Im1) + assert((ms[Im0][Im1].shape[0] == n0[Im0]) && (ms[Im0][Im1].shape[1] == n1[Im1])); + const size_t n_all = std::accumulate(n0.begin(), n0.end(), 0); + assert(n_all == std::accumulate(n1.begin(), n1.end(), 0)); + this->A = RI::Tensor({n_all, n_all}); -template + std::vector n0_partial(N0 + 1); + std::partial_sum(n0.begin(), n0.end(), n0_partial.begin() + 1); + std::vector n1_partial(N1 + 1); + std::partial_sum(n1.begin(), n1.end(), n1_partial.begin() + 1); + + for (size_t Im0 = 0; Im0 < N0; ++Im0) + for (size_t Im1 = 0; Im1 < N1; ++Im1) + { + const RI::Tensor& m_tmp = ms.at(Im0).at(Im1); + for (size_t im0 = 0; im0 < m_tmp.shape[0]; ++im0) + for (size_t im1 = 0; im1 < m_tmp.shape[1]; ++im1) + this->A(im0 + n0_partial[Im0], im1 + n1_partial[Im1]) = m_tmp(im0, im1); + } +} + +template RI::Tensor Inverse_Matrix::output() const { - return this->A.copy(); + return this->A.copy(); } - -template -std::vector>> -Inverse_Matrix::output(const std::vector &n0, const std::vector &n1) const +template +std::vector>> Inverse_Matrix::output(const std::vector& n0, + const std::vector& n1) const { - assert( std::accumulate(n0.begin(), n0.end(), 0) == this->A.shape[0] ); - assert( std::accumulate(n1.begin(), n1.end(), 0) == this->A.shape[1] ); - - const size_t N0 = n0.size(); - const size_t N1 = n1.size(); - - std::vector n0_partial(N0+1); - std::partial_sum(n0.begin(), n0.end(), n0_partial.begin()+1); - std::vector n1_partial(N1+1); - std::partial_sum(n1.begin(), n1.end(), n1_partial.begin()+1); - - std::vector>> ms(N0, std::vector>(N1)); - for(size_t Im0=0; Im0 &m_tmp = ms[Im0][Im1] = RI::Tensor({n0[Im0], n1[Im1]}); - for(size_t im0=0; im0A(im0+n0_partial[Im0], im1+n1_partial[Im1]); - } - return ms; -} + assert(std::accumulate(n0.begin(), n0.end(), 0) == this->A.shape[0]); + assert(std::accumulate(n1.begin(), n1.end(), 0) == this->A.shape[1]); + + const size_t N0 = n0.size(); + const size_t N1 = n1.size(); + std::vector n0_partial(N0 + 1); + std::partial_sum(n0.begin(), n0.end(), n0_partial.begin() + 1); + std::vector n1_partial(N1 + 1); + std::partial_sum(n1.begin(), n1.end(), n1_partial.begin() + 1); + + std::vector>> ms(N0, std::vector>(N1)); + for (size_t Im0 = 0; Im0 < N0; ++Im0) + for (size_t Im1 = 0; Im1 < N1; ++Im1) + { + RI::Tensor& m_tmp = ms[Im0][Im1] = RI::Tensor({n0[Im0], n1[Im1]}); + for (size_t im0 = 0; im0 < n0[Im0]; ++im0) + for (size_t im1 = 0; im1 < n1[Im1]; ++im1) + m_tmp(im0, im1) = this->A(im0 + n0_partial[Im0], im1 + n1_partial[Im1]); + } + return ms; +} -template +template void Inverse_Matrix::copy_down_triangle() { - for( size_t i0=0; i0::DPcal_C_dC( it0, it1, {0,0,0}, {0,0,0}, this->index_abfs, this->index_lcaos, this->index_lcaos, Matrix_Orbs21::Matrix_Order::A1A2B); - const RI::Tensor V = this->DPcal_V( it0, it0, {0,0,0}, {{"writable_Vws",true}}); - const RI::Tensor L = LRI_CV_Tools::cal_I(V); + const RI::Tensor V = this->DPcal_V(it0, it0, {0, 0, 0}, {{"writable_Vws", true}}); + RI::Tensor L; + if (GlobalC::exx_info.info_ri.Cs_inv_thr > 0) + L = LRI_CV_Tools::cal_I(V, Inverse_Matrix::Method::syev, GlobalC::exx_info.info_ri.Cs_inv_thr); + else + L = LRI_CV_Tools::cal_I(V); const RI::Tensor C = RI::Global_Func::convert(0.5) * LRI_CV_Tools::mul1(L,A); // Attention 0.5! if(flags.at("writable_Cws")) @@ -412,7 +416,10 @@ LRI_CV::DPcal_C_dC( {DPcal_V(it1, it0, Rm, flags), DPcal_V(it1, it1, {0,0,0}, {{"writable_Vws",true}})}}; - const std::vector>> + std::vector>> L; + if (GlobalC::exx_info.info_ri.Cs_inv_thr > 0) + L = LRI_CV_Tools::cal_I(V, Inverse_Matrix::Method::syev, GlobalC::exx_info.info_ri.Cs_inv_thr); + else L = LRI_CV_Tools::cal_I(V); const std::vector> C = LRI_CV_Tools::mul2(L,A); diff --git a/source/source_lcao/module_ri/LRI_CV_Tools.h b/source/source_lcao/module_ri/LRI_CV_Tools.h index 21e94cbfa5..a87320a2ea 100644 --- a/source/source_lcao/module_ri/LRI_CV_Tools.h +++ b/source/source_lcao/module_ri/LRI_CV_Tools.h @@ -6,6 +6,7 @@ #ifndef LRI_CV_TOOLS_H #define LRI_CV_TOOLS_H +#include "Inverse_Matrix.h" #include "source_base/abfs-vector3_order.h" #include "source_lcao/module_ri/abfs.h" @@ -19,9 +20,15 @@ namespace LRI_CV_Tools { template -extern RI::Tensor cal_I(const RI::Tensor& m); +extern RI::Tensor cal_I(const RI::Tensor& m, + const typename Inverse_Matrix::Method method + = Inverse_Matrix::Method::potrf, + const double& threshold_condition_number = 0.); template -extern std::vector>> cal_I(const std::vector>>& ms); +extern std::vector>> cal_I(const std::vector>>& ms, + const typename Inverse_Matrix::Method method + = Inverse_Matrix::Method::potrf, + const double& threshold_condition_number = 0.); template inline RI::Tensor transform_Rm(const RI::Tensor& V); diff --git a/source/source_lcao/module_ri/LRI_CV_Tools.hpp b/source/source_lcao/module_ri/LRI_CV_Tools.hpp index 2f7951d94e..42567d58e0 100644 --- a/source/source_lcao/module_ri/LRI_CV_Tools.hpp +++ b/source/source_lcao/module_ri/LRI_CV_Tools.hpp @@ -16,21 +16,25 @@ #include "../../source_pw/module_pwdft/global.h" template -RI::Tensor LRI_CV_Tools::cal_I(const RI::Tensor& m) { +RI::Tensor LRI_CV_Tools::cal_I(const RI::Tensor& m, + const typename Inverse_Matrix::Method method, + const double& threshold_condition_number) +{ Inverse_Matrix I; I.input(m); - I.cal_inverse(Inverse_Matrix::Method::potrf); + I.cal_inverse(method, threshold_condition_number); return I.output(); } template -std::vector>> - LRI_CV_Tools::cal_I(const std::vector>>& ms) { +std::vector>> LRI_CV_Tools::cal_I(const std::vector>>& ms, + const typename Inverse_Matrix::Method method, + const double& threshold_condition_number) +{ Inverse_Matrix I; I.input(ms); - I.cal_inverse(Inverse_Matrix::Method::potrf); - return I.output({ms[0][0].shape[0], ms[1][0].shape[0]}, - {ms[0][0].shape[1], ms[0][1].shape[1]}); + I.cal_inverse(method, threshold_condition_number); + return I.output({ms[0][0].shape[0], ms[1][0].shape[0]}, {ms[0][0].shape[1], ms[0][1].shape[1]}); } template