From 1531b7b29caf746a90eba6150a03cf9ef6bb8af6 Mon Sep 17 00:00:00 2001
From: Huanjing Gong <108850865+AroundPeking@users.noreply.github.com>
Date: Tue, 23 Dec 2025 11:04:12 +0800
Subject: [PATCH] Feature: add exx_cs_inv_thr to get inverse Coulomb

---
 docs/advanced/input_files/input-main.md       |   6 +
 .../module_external/lapack_connector.h        |  23 +-
 source/source_hamilt/module_xc/exx_info.h     |   1 +
 source/source_io/input_conv.cpp               |   1 +
 .../module_parameter/input_parameter.h        |   1 +
 source/source_io/read_input_item_exx_dftu.cpp |   6 +
 .../module_ri/ABFs_Construct-PCA.cpp          | 217 ++++++++----
 .../module_ri/ABFs_Construct-PCA.h            |  19 +-
 source/source_lcao/module_ri/Inverse_Matrix.h |  32 +-
 .../source_lcao/module_ri/Inverse_Matrix.hpp  | 325 ++++++++++++------
 source/source_lcao/module_ri/LRI_CV.hpp       |  13 +-
 source/source_lcao/module_ri/LRI_CV_Tools.h   |  11 +-
 source/source_lcao/module_ri/LRI_CV_Tools.hpp |  18 +-
 13 files changed, 458 insertions(+), 215 deletions(-)
diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index 512c9bb524..350c30a13a 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -2971,6 +2971,12 @@ These variables are relevant when using hybrid functionals with *[basis_type](#b
 - **Description**: See also the entry [exx_pca_threshold](#exx_pca_threshold). Smaller components (less than exx_c_threshold) of the $C^{a}_{ik}$ matrix are neglected to accelerate calculation. The larger the threshold is, the faster the calculation and the lower the accuracy. A relatively safe choice of the value is 1e-4.
 - **Default**: 1E-4
 
+### exx_cs_inv_thr
+
+- **Type**: Real
+- **Description**: By default, the Coulomb matrix inversion required for obtaining LRI coefficients is performed using LU decomposition. However, this approach may suffer from numerical instabilities when a large set of auxiliary basis functions (ABFs) is employed. When `exx_cs_inv_thr > 0`, the inversion is instead carried out via matrix diagonalization. Eigenvalues smaller than `exx_cs_inv_thr` are discarded to improve numerical stability. A relatively safe and commonly recommended value is `1e-5`.
+- **Default**: -1
+
 ### exx_v_threshold
 
 - **Type**: Real
diff --git a/source/source_base/module_external/lapack_connector.h b/source/source_base/module_external/lapack_connector.h
index abd29c107c..5007aedabf 100644
--- a/source/source_base/module_external/lapack_connector.h
+++ b/source/source_base/module_external/lapack_connector.h
@@ -170,11 +170,24 @@ void zhegv_(const int* itype, const char* jobz, const char* uplo, const int* n,
 
 // === Standard Hermitian eigenproblem ===
 
-void dsyev_(const char* jobz, const char* uplo, const int* n,
-            double* a, const int* lda,
-            double* w,
-            double* work, const int* lwork,
-            int* info);
+    void ssyev_(const char* jobz,
+                const char* uplo,
+                const int* n,
+                float* a,
+                const int* lda,
+                float* w,
+                float* work,
+                const int* lwork,
+                int* info);
+    void dsyev_(const char* jobz,
+                const char* uplo,
+                const int* n,
+                double* a,
+                const int* lda,
+                double* w,
+                double* work,
+                const int* lwork,
+                int* info);
 
 void cheev_(const char* jobz, const char* uplo, const int* n,
             std::complex<float>* a, const int* lda,
diff --git a/source/source_hamilt/module_xc/exx_info.h b/source/source_hamilt/module_xc/exx_info.h
index fa01daf46b..9cbd51c18f 100644
--- a/source/source_hamilt/module_xc/exx_info.h
+++ b/source/source_hamilt/module_xc/exx_info.h
@@ -67,6 +67,7 @@ struct Exx_Info
         double ccp_rmesh_times = 10;
         bool exx_symmetry_realspace = true;
         double kmesh_times = 4;
+        double Cs_inv_thr = -1;
 
         int abfs_Lmax = 0; // tmp
 
diff --git a/source/source_io/input_conv.cpp b/source/source_io/input_conv.cpp
index f4edf1da6e..8e3fb560ca 100644
--- a/source/source_io/input_conv.cpp
+++ b/source/source_io/input_conv.cpp
@@ -474,6 +474,7 @@ void Input_Conv::Convert()
         GlobalC::exx_info.info_ri.V_grad_R_threshold = PARAM.inp.exx_v_grad_r_threshold;
         GlobalC::exx_info.info_ri.ccp_rmesh_times = std::stod(PARAM.inp.exx_ccp_rmesh_times);
         GlobalC::exx_info.info_ri.exx_symmetry_realspace = PARAM.inp.exx_symmetry_realspace;
+        GlobalC::exx_info.info_ri.Cs_inv_thr = PARAM.inp.exx_cs_inv_thr;
 
         GlobalC::exx_info.info_opt_abfs.pca_threshold = PARAM.inp.exx_pca_threshold;
         GlobalC::exx_info.info_opt_abfs.abfs_Lmax  = PARAM.inp.exx_opt_orb_lmax;
diff --git a/source/source_io/module_parameter/input_parameter.h b/source/source_io/module_parameter/input_parameter.h
index 605db6572f..d60df6c093 100644
--- a/source/source_io/module_parameter/input_parameter.h
+++ b/source/source_io/module_parameter/input_parameter.h
@@ -552,6 +552,7 @@ struct Input_para
         = true; ///< whether to reduce the real-space sector in when using symmetry=1 in EXX calculation
     double rpa_ccp_rmesh_times = 10.0; ///< how many times larger the radial mesh required for
                                        ///< calculating Columb potential is to that of atomic orbitals
+    double exx_cs_inv_thr = -1;        ///< threshold to inverse Vq in abfs for generating Cs
     bool out_ri_cv = false; ///< Whether to output the coefficient tensor C and ABFs-representation Coulomb matrix V
     // ==============   #Parameters (16.dft+u) ======================
     //    DFT+U       Xin Qu added on 2020-10-29
diff --git a/source/source_io/read_input_item_exx_dftu.cpp b/source/source_io/read_input_item_exx_dftu.cpp
index eec98263c1..d235710a36 100644
--- a/source/source_io/read_input_item_exx_dftu.cpp
+++ b/source/source_io/read_input_item_exx_dftu.cpp
@@ -243,6 +243,12 @@ void ReadInput::item_exx()
         read_sync_double(input.exx_pca_threshold);
         this->add_item(item);
     }
+    {
+        Input_Item item("exx_cs_inv_thr");
+        item.annotation = "threshold to inverse Vq in abfs for generating Cs";
+        read_sync_double(input.exx_cs_inv_thr);
+        this->add_item(item);
+    }
     {
         Input_Item item("exx_c_threshold");
         item.annotation = "threshold to screen C matrix in exx";
diff --git a/source/source_lcao/module_ri/ABFs_Construct-PCA.cpp b/source/source_lcao/module_ri/ABFs_Construct-PCA.cpp
index b96ad3169e..c4f5cd7f13 100644
--- a/source/source_lcao/module_ri/ABFs_Construct-PCA.cpp
+++ b/source/source_lcao/module_ri/ABFs_Construct-PCA.cpp
@@ -14,139 +14,226 @@ namespace ABFs_Construct
 {
 namespace PCA
 {
-	void tensor_dsyev(const char jobz, const char uplo, RI::Tensor<double> & a, double*const w, int & info)
+template <>
+void tensor_syev<double>(char jobz, char uplo, RI::Tensor<double>& a, double* w, int& info)
 	{
-		// reference: dsyev in lapack_connector.h (for ModuleBase::matrix)
 		assert(a.shape.size() == 2);
 		assert(a.shape[0] == a.shape[1]);
-		const int nr = a.shape[0];
-		const int nc = a.shape[1];
 
-		double work_tmp=0.0;
+    const int n = a.shape[0];
+    const int lda = a.shape[1];
+
+    double work_query = 0.0;
 		constexpr int minus_one = -1;
-		dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, &work_tmp, &minus_one, &info);		// get best lwork
 
-		const int lwork = work_tmp;
+    dsyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, &info);
+
+    const int lwork = static_cast<int>(work_query);
 		std::vector<double> work(std::max(1, lwork));
-		dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, work.data(), &lwork, &info);
+
+    dsyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, &info);
+}
+
+template <>
+void tensor_syev<float>(char jobz, char uplo, RI::Tensor<float>& a, float* w, int& info)
+{
+    assert(a.shape.size() == 2);
+    assert(a.shape[0] == a.shape[1]);
+
+    const int n = a.shape[0];
+    const int lda = a.shape[1];
+
+    float work_query = 0.0f;
+    constexpr int minus_one = -1;
+
+    ssyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, &info);
+
+    const int lwork = static_cast<int>(work_query);
+    std::vector<float> work(std::max(1, lwork));
+
+    ssyev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, &info);
 	}
 
-	RI::Tensor<double> get_sub_matrix( 
-		const RI::Tensor<double> & m,		// size: (lcaos, lcaos, abfs)
-		const std::size_t & T,
-		const std::size_t & L,
-		const ModuleBase::Element_Basis_Index::Range & range,
-		const ModuleBase::Element_Basis_Index::IndexLNM & index )
+template <>
+void tensor_syev<std::complex<double>>(char jobz, char uplo, RI::Tensor<std::complex<double>>& a, double* w, int& info)
+{
+    assert(a.shape.size() == 2);
+    assert(a.shape[0] == a.shape[1]);
+
+    const int n = a.shape[0];
+    const int lda = a.shape[1];
+
+    std::complex<double> work_query;
+    constexpr int minus_one = -1;
+
+    zheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, nullptr, &info);
+
+    const int lwork = static_cast<int>(work_query.real());
+    std::vector<std::complex<double>> work(std::max(1, lwork));
+    std::vector<double> rwork(std::max(1, 3 * n - 2));
+
+    zheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, rwork.data(), &info);
+}
+
+template <>
+void tensor_syev<std::complex<float>>(char jobz, char uplo, RI::Tensor<std::complex<float>>& a, float* w, int& info)
+{
+    assert(a.shape.size() == 2);
+    assert(a.shape[0] == a.shape[1]);
+
+    const int n = a.shape[0];
+    const int lda = a.shape[1];
+
+    std::complex<float> work_query;
+    constexpr int minus_one = -1;
+
+    cheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, &work_query, &minus_one, nullptr, &info);
+
+    const int lwork = static_cast<int>(work_query.real());
+    std::vector<std::complex<float>> work(std::max(1, lwork));
+    std::vector<float> rwork(std::max(1, 3 * n - 2));
+
+    cheev_(&jobz, &uplo, &n, a.ptr(), &lda, w, work.data(), &lwork, rwork.data(), &info);
+}
+// void tensor_dsyev(const char jobz, const char uplo, RI::Tensor<double> & a, double*const w, int & info)
+// {
+// 	// reference: dsyev in lapack_connector.h (for ModuleBase::matrix)
+// 	assert(a.shape.size() == 2);
+// 	assert(a.shape[0] == a.shape[1]);
+// 	const int nr = a.shape[0];
+// 	const int nc = a.shape[1];
+
+// 	double work_tmp=0.0;
+// 	constexpr int minus_one = -1;
+// 	dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, &work_tmp, &minus_one, &info);		// get best lwork
+
+// 	const int lwork = work_tmp;
+// 	std::vector<double> work(std::max(1, lwork));
+// 	dsyev_(&jobz, &uplo, &nr, a.ptr(), &nc, w, work.data(), &lwork, &info);
+// }
+
+RI::Tensor<double> get_sub_matrix(const RI::Tensor<double>& m, // size: (lcaos, lcaos, abfs)
+                                  const std::size_t& T,
+                                  const std::size_t& L,
+                                  const ModuleBase::Element_Basis_Index::Range& range,
+                                  const ModuleBase::Element_Basis_Index::IndexLNM& index)
 	{
 		ModuleBase::TITLE("ABFs_Construct::PCA::get_sub_matrix");		
 		assert(m.shape.size() == 3);
-		RI::Tensor<double> m_sub({ m.shape[0], m.shape[1], range[T][L].N });
-		for (std::size_t ir=0; ir!=m.shape[0]; ++ir) {
-			for (std::size_t jr=0; jr!=m.shape[1]; ++jr) {
-				for (std::size_t N=0; N!=range[T][L].N; ++N) {
+    RI::Tensor<double> m_sub({m.shape[0], m.shape[1], range[T][L].N});
+    for (std::size_t ir = 0; ir != m.shape[0]; ++ir)
+    {
+        for (std::size_t jr = 0; jr != m.shape[1]; ++jr)
+        {
+            for (std::size_t N = 0; N != range[T][L].N; ++N)
+            {
 					m_sub(ir, jr, N) = m(ir, jr, index[T][L][N][0]);
 }
 }
 }
-		m_sub = m_sub.reshape({ m.shape[0] * m.shape[1], range[T][L].N });
+    m_sub = m_sub.reshape({m.shape[0] * m.shape[1], range[T][L].N});
 		return m_sub;
 	}
 
-	RI::Tensor<double> get_column_mean0_matrix( const RI::Tensor<double> & m )
+RI::Tensor<double> get_column_mean0_matrix(const RI::Tensor<double>& m)
 	{
 		ModuleBase::TITLE("ABFs_Construct::PCA::get_column_mean0_matrix");
-		RI::Tensor<double> m_new( m.shape);
-		for( std::size_t ic=0; ic!=m.shape[1]; ++ic )
+    RI::Tensor<double> m_new(m.shape);
+    for (std::size_t ic = 0; ic != m.shape[1]; ++ic)
 		{
-			double sum=0;
-			for( std::size_t ir=0; ir!=m.shape[0]; ++ir ) {
-				sum += m(ir,ic);
+        double sum = 0;
+        for (std::size_t ir = 0; ir != m.shape[0]; ++ir)
+        {
+            sum += m(ir, ic);
 }
-			const double mean = sum/m.shape[0];
-			for( std::size_t ir=0; ir!=m.shape[0]; ++ir ) {
-				m_new(ir,ic) = m(ir,ic) - mean;
+        const double mean = sum / m.shape[0];
+        for (std::size_t ir = 0; ir != m.shape[0]; ++ir)
+        {
+            m_new(ir, ic) = m(ir, ic) - mean;
 }
 		}
 		return m_new;
 	}
 
 	std::vector<std::vector<std::pair<std::vector<double>, RI::Tensor<double>>>> cal_PCA(
-		const UnitCell &ucell,
+    const UnitCell& ucell,
         const LCAO_Orbitals& orb,
-		const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>> &lcaos, 
-		const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>> &abfs,
-		const double kmesh_times )
+    const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>>& lcaos,
+    const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>>& abfs,
+    const double kmesh_times)
 	{
 		ModuleBase::TITLE("ABFs_Construct::PCA::cal_PCA");
 		
-		const ModuleBase::Element_Basis_Index::Range
-			range_lcaos = ModuleBase::Element_Basis_Index::construct_range( lcaos );
-		const ModuleBase::Element_Basis_Index::IndexLNM
-			index_lcaos = ModuleBase::Element_Basis_Index::construct_index( range_lcaos );
+    const ModuleBase::Element_Basis_Index::Range range_lcaos = ModuleBase::Element_Basis_Index::construct_range(lcaos);
+    const ModuleBase::Element_Basis_Index::IndexLNM index_lcaos
+        = ModuleBase::Element_Basis_Index::construct_index(range_lcaos);
 
-		const ModuleBase::Element_Basis_Index::Range
-			range_abfs = ModuleBase::Element_Basis_Index::construct_range( abfs );
-		const ModuleBase::Element_Basis_Index::IndexLNM
-			index_abfs = ModuleBase::Element_Basis_Index::construct_index( range_abfs );
+    const ModuleBase::Element_Basis_Index::Range range_abfs = ModuleBase::Element_Basis_Index::construct_range(abfs);
+    const ModuleBase::Element_Basis_Index::IndexLNM index_abfs
+        = ModuleBase::Element_Basis_Index::construct_index(range_abfs);
 
 		const int Lmax_bak = GlobalC::exx_info.info_ri.abfs_Lmax;
 		GlobalC::exx_info.info_ri.abfs_Lmax = std::numeric_limits<int>::min();
-		for( std::size_t T=0; T!=abfs.size(); ++T ) {
-			GlobalC::exx_info.info_ri.abfs_Lmax = std::max( GlobalC::exx_info.info_ri.abfs_Lmax, static_cast<int>(abfs[T].size())-1 );
+    for (std::size_t T = 0; T != abfs.size(); ++T)
+    {
+        GlobalC::exx_info.info_ri.abfs_Lmax
+            = std::max(GlobalC::exx_info.info_ri.abfs_Lmax, static_cast<int>(abfs[T].size()) - 1);
 }
 
 		Matrix_Orbs21 m_abfslcaos_lcaos;
 		ORB_gaunt_table MGT;
 		int Lmax;
-		m_abfslcaos_lcaos.init( 1, ucell , orb, kmesh_times, orb.get_Rmax(), Lmax );
+    m_abfslcaos_lcaos.init(1, ucell, orb, kmesh_times, orb.get_Rmax(), Lmax);
 		MGT.init_Gaunt_CH(Lmax);
         MGT.init_Gaunt(Lmax);
-		m_abfslcaos_lcaos.init_radial( abfs, lcaos, lcaos, MGT );
+    m_abfslcaos_lcaos.init_radial(abfs, lcaos, lcaos, MGT);
 
-		std::map<std::size_t,std::map<std::size_t,std::set<double>>> delta_R;
-		for( std::size_t it=0; it!=abfs.size(); ++it ) {
+    std::map<std::size_t, std::map<std::size_t, std::set<double>>> delta_R;
+    for (std::size_t it = 0; it != abfs.size(); ++it)
+    {
 			delta_R[it][it] = {0.0};
 }
 		m_abfslcaos_lcaos.init_radial_table(delta_R);
 
 		GlobalC::exx_info.info_ri.abfs_Lmax = Lmax_bak;
 		
-		std::vector<std::vector<std::pair<std::vector<double>,RI::Tensor<double>>>> eig(abfs.size());
-		for( std::size_t T=0; T!=abfs.size(); ++T )
+    std::vector<std::vector<std::pair<std::vector<double>, RI::Tensor<double>>>> eig(abfs.size());
+    for (std::size_t T = 0; T != abfs.size(); ++T)
 		{
-			const RI::Tensor<double> A = m_abfslcaos_lcaos.cal_overlap_matrix<double>(  
-				T, 
+        const RI::Tensor<double> A = m_abfslcaos_lcaos.cal_overlap_matrix<double>(T,
 				T, 
-				ModuleBase::Vector3<double>{0,0,0},
-				ModuleBase::Vector3<double>{0,0,0},  
+                                                                                  ModuleBase::Vector3<double>{0, 0, 0},
+                                                                                  ModuleBase::Vector3<double>{0, 0, 0},
 				index_abfs, 
 				index_lcaos,
 				index_lcaos,
 				Matrix_Orbs21::Matrix_Order::A2BA1);
 			
 			eig[T].resize(abfs[T].size());
-			for( std::size_t L=0; L!=abfs[T].size(); ++L )
+        for (std::size_t L = 0; L != abfs[T].size(); ++L)
 			{
-				const RI::Tensor<double> A_sub = get_sub_matrix( A, T, L, range_abfs, index_abfs );
+            const RI::Tensor<double> A_sub = get_sub_matrix(A, T, L, range_abfs, index_abfs);
 				RI::Tensor<double> mm = A_sub.transpose() * A_sub;
 				std::vector<double> eig_value(mm.shape[0]);
 				
-				int info=1;
+            int info = 1;
 
-				tensor_dsyev('V', 'L', mm, eig_value.data(), info);
+            tensor_syev<double>('V', 'L', mm, eig_value.data(), info);
 
-				if( info )
+            if (info)
 				{
 					std::cout << std::endl << "info_dsyev = " << info << std::endl;
-					auto tensor_print = [](RI::Tensor<double>& m, std::ostream& os, const double threshold)
-					{
+                auto tensor_print = [](RI::Tensor<double>& m, std::ostream& os, const double threshold) {
 						for (int ir = 0; ir != m.shape[0]; ++ir)
 						{
 							for (int ic = 0; ic != m.shape[1]; ++ic)
 							{
-								if (std::abs(m(ir, ic)) > threshold) {
+                            if (std::abs(m(ir, ic)) > threshold)
+                            {
 									os << m(ir, ic) << "\t";
-								} else {
+                            }
+                            else
+                            {
 									os << 0 << "\t";
 }
 							}
@@ -155,15 +242,15 @@ namespace PCA
 						os << std::endl;
 					};
 					tensor_print(mm, GlobalV::ofs_warning, 0.0);
-					std::cout<<"in file "<<__FILE__<<" line "<<__LINE__<<std::endl;
+                std::cout << "in file " << __FILE__ << " line " << __LINE__ << std::endl;
 					ModuleBase::QUIT();
 				}
-				eig[T][L] = std::make_pair( eig_value, mm );
+            eig[T][L] = std::make_pair(eig_value, mm);
 			}
 		}
 		
 		return eig;
 	}
 
-}	// namespace ABFs_Construct::PCA
-}	// namespace ABFs_Construct
+} // namespace PCA
+} // namespace ABFs_Construct
diff --git a/source/source_lcao/module_ri/ABFs_Construct-PCA.h b/source/source_lcao/module_ri/ABFs_Construct-PCA.h
index a8cbed30fd..b090d2ec1d 100644
--- a/source/source_lcao/module_ri/ABFs_Construct-PCA.h
+++ b/source/source_lcao/module_ri/ABFs_Construct-PCA.h
@@ -15,13 +15,16 @@ namespace ABFs_Construct
 {
 namespace PCA
 {
-	extern std::vector<std::vector<std::pair<std::vector<double>,RI::Tensor<double>>>> cal_PCA( 
+extern std::vector<std::vector<std::pair<std::vector<double>, RI::Tensor<double>>>> cal_PCA(
 		const UnitCell& ucell,
-        const LCAO_Orbitals &orb,
-		const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>> &lcaos, 
-		const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>> &abfs,		// abfs must be orthonormal
-		const double kmesh_times );
-}
-}
+    const LCAO_Orbitals& orb,
+    const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>>& lcaos,
+    const std::vector<std::vector<std::vector<Numerical_Orbital_Lm>>>& abfs, // abfs must be orthonormal
+    const double kmesh_times);
 
-#endif	// ABFS_CONSTRUCT_PCA_H
+template <typename T>
+void tensor_syev(char jobz, char uplo, RI::Tensor<T>& a, RI::Global_Func::To_Real_t<T>* w, int& info);
+} // namespace PCA
+} // namespace ABFs_Construct
+
+#endif // ABFS_CONSTRUCT_PCA_H
diff --git a/source/source_lcao/module_ri/Inverse_Matrix.h b/source/source_lcao/module_ri/Inverse_Matrix.h
index 48c65db627..141a09b39e 100644
--- a/source/source_lcao/module_ri/Inverse_Matrix.h
+++ b/source/source_lcao/module_ri/Inverse_Matrix.h
@@ -5,25 +5,33 @@
 
 #pragma once
 
+#include "ABFs_Construct-PCA.h"
+
 #include <RI/global/Tensor.h>
 #include <vector>
 
-template<typename Tdata>
+template <typename Tdata>
 class Inverse_Matrix
 {
-public:
-	enum class Method{potrf};	//, syev};
-	void cal_inverse(const Method &method);
+  public:
+    enum class Method
+    {
+        potrf,
+        syev
+    };
+    void cal_inverse(const Method& method, const double& threshold_condition_number = 0.);
 
-	void input(const RI::Tensor<Tdata> &m);
-	void input(const std::vector<std::vector<RI::Tensor<Tdata>>> &ms);
-	RI::Tensor<Tdata> output() const;
-	std::vector<std::vector<RI::Tensor<Tdata>>> output(const std::vector<size_t> &n0, const std::vector<size_t> &n1) const;
+    void input(const RI::Tensor<Tdata>& m);
+    void input(const std::vector<std::vector<RI::Tensor<Tdata>>>& ms);
+    RI::Tensor<Tdata> output() const;
+    std::vector<std::vector<RI::Tensor<Tdata>>> output(const std::vector<size_t>& n0,
+                                                       const std::vector<size_t>& n1) const;
 
-private:
-	void using_potrf();
-	void copy_down_triangle();
-	RI::Tensor<Tdata> A;
+  private:
+    void using_potrf();
+    void using_syev(const double& threshold_condition_number);
+    void copy_down_triangle();
+    RI::Tensor<Tdata> A;
 };
 
 #include "Inverse_Matrix.hpp"
\ No newline at end of file
diff --git a/source/source_lcao/module_ri/Inverse_Matrix.hpp b/source/source_lcao/module_ri/Inverse_Matrix.hpp
index 20ef034239..b632d1a40c 100644
--- a/source/source_lcao/module_ri/Inverse_Matrix.hpp
+++ b/source/source_lcao/module_ri/Inverse_Matrix.hpp
@@ -8,152 +8,251 @@
 
 #include "Inverse_Matrix.h"
 #include "source_base/module_external/lapack_connector.h"
+#include "source_hamilt/module_xc/exx_info.h"
 
 #include <cassert>
 
-template<typename Tdata>
-void Inverse_Matrix<Tdata>::cal_inverse( const Method &method )
+template <typename Tdata>
+void Inverse_Matrix<Tdata>::cal_inverse(const Method& method, const double& threshold_condition_number)
 {
-	switch(method)
-	{
-		case Method::potrf:		using_potrf();			break;
-//		case Method::syev:		using_syev(1E-6);		break;
-	}
+    switch (method)
+    {
+    case Method::potrf:
+        using_potrf();
+        break;
+    case Method::syev:
+        using_syev(threshold_condition_number);
+        break;
+    }
 }
 
-template<typename Tdata>
+template <typename Tdata>
 void Inverse_Matrix<Tdata>::using_potrf()
 {
-	int info;
-	LapackConnector::potrf('U', A.shape[0], A.ptr(), A.shape[0], info);
-	if(info)
-		throw std::range_error("info="+std::to_string(info)+"\n"+std::string(__FILE__)+" line "+std::to_string(__LINE__));
+    int info;
+    LapackConnector::potrf('U', A.shape[0], A.ptr(), A.shape[0], info);
+    if (info)
+        throw std::range_error("info=" + std::to_string(info) + "\n" + std::string(__FILE__) + " line "
+                               + std::to_string(__LINE__));
 
-	LapackConnector::potri('U', A.shape[0], A.ptr(), A.shape[0], info);
-	if(info)
-		throw std::range_error("info="+std::to_string(info)+"\n"+std::string(__FILE__)+" line "+std::to_string(__LINE__));
+    LapackConnector::potri('U', A.shape[0], A.ptr(), A.shape[0], info);
+    if (info)
+        throw std::range_error("info=" + std::to_string(info) + "\n" + std::string(__FILE__) + " line "
+                               + std::to_string(__LINE__));
 
-	copy_down_triangle();
+    copy_down_triangle();
+}
+
+template <typename T>
+struct InverseMatrixTraits;
+
+// double
+template <>
+struct InverseMatrixTraits<double>
+{
+    using matrix_type = ModuleBase::matrix;
+    using value_type = double;
+    static void syev(RI::Tensor<double>& A, value_type* w, int& info)
+    {
+        ABFs_Construct::PCA::tensor_syev('V', 'U', A, w, info);
+    }
+    static constexpr char gemm_trans = 'T';
+};
+
+// complex<double>
+template <>
+struct InverseMatrixTraits<std::complex<double>>
+{
+    using matrix_type = ModuleBase::ComplexMatrix;
+    using value_type = double; // eigenvalues are always real
+    static void syev(RI::Tensor<std::complex<double>>& A, value_type* w, int& info)
+    {
+        ABFs_Construct::PCA::tensor_syev('V', 'U', A, w, info);
+    }
+    static constexpr char gemm_trans = 'C';
+};
+
+// float
+template <>
+struct InverseMatrixTraits<float>
+{
+    using matrix_type = ModuleBase::matrix;
+    using value_type = float;
+    static void syev(RI::Tensor<float>& A, value_type* w, int& info)
+    {
+        ABFs_Construct::PCA::tensor_syev('V', 'U', A, w, info);
+    }
+    static constexpr char gemm_trans = 'T';
+};
+
+// complex<float>
+template <>
+struct InverseMatrixTraits<std::complex<float>>
+{
+    using matrix_type = ModuleBase::ComplexMatrix;
+    using value_type = float;
+    static void syev(RI::Tensor<std::complex<float>>& A, value_type* w, int& info)
+    {
+        ABFs_Construct::PCA::tensor_syev('V', 'U', A, w, info);
+    }
+    static constexpr char gemm_trans = 'C';
+};
+
+template <typename T>
+inline void Inverse_Matrix<T>::using_syev(const double& threshold_condition_number)
+{
+    using traits = InverseMatrixTraits<T>;
+    using val_t = typename traits::value_type;
+
+    int info;
+    std::vector<val_t> eigen_value(A.shape[0]);
+
+    traits::syev(A, eigen_value.data(), info);
+    if (info)
+        throw std::range_error("info=" + std::to_string(info) + "\n" + std::string(__FILE__) + " line "
+                               + std::to_string(__LINE__));
+
+    val_t eigen_value_max = 0;
+    for (const val_t& val: eigen_value)
+        eigen_value_max = std::max(val, eigen_value_max);
+
+    const double threshold = eigen_value_max * threshold_condition_number;
+
+    typename traits::matrix_type eA(A.shape[0], A.shape[1]);
+
+    int ie = 0;
+    for (int i = 0; i != A.shape[0]; ++i)
+    {
+        if (eigen_value[i] > threshold)
+        {
+            BlasConnector::axpy(A.shape[1],
+                                sqrt(1.0 / eigen_value[i]),
+                                A.ptr() + i * A.shape[1],
+                                1,
+                                eA.c + ie * eA.nc,
+                                1);
+            ++ie;
+        }
+    }
+    // std::cout << "No. of singularity: " << A.shape[0] - ie << std::endl;
+
+    BlasConnector::gemm(traits::gemm_trans, 'N', eA.nc, eA.nc, ie, 1, eA.c, eA.nc, eA.c, eA.nc, 0, A.ptr(), A.shape[1]);
 }
 
 /*
 void Inverse_Matrix::using_syev( const double &threshold_condition_number )
 {
-	std::vector<double> eigen_value(A.nr);
-	LapackConnector::dsyev('V','U',A,eigen_value.data(),info);
-
-	double eigen_value_max = 0;
-	for( const double &ie : eigen_value )
-		eigen_value_max = std::max( ie, eigen_value_max );
-	const double threshold = eigen_value_max * threshold_condition_number;
-
-	ModuleBase::matrix eA( A.nr, A.nc );
-	int ie=0;
-	for( int i=0; i!=A.nr; ++i )
-		if( eigen_value[i] > threshold )
-		{
-			BlasConnector::axpy( A.nc, sqrt(1.0/eigen_value[i]), A.c+i*A.nc,1, eA.c+ie*eA.nc,1 );
-			++ie;
-		}
-	BlasConnector::gemm( 'T','N', eA.nc,eA.nc,ie, 1, eA.c,eA.nc, eA.c,eA.nc, 0, A.c,A.nc );
+    std::vector<double> eigen_value(A.nr);
+    LapackConnector::dsyev('V','U',A,eigen_value.data(),info);
+
+    double eigen_value_max = 0;
+    for( const double &ie : eigen_value )
+        eigen_value_max = std::max( ie, eigen_value_max );
+    const double threshold = eigen_value_max * threshold_condition_number;
+
+    ModuleBase::matrix eA( A.nr, A.nc );
+    int ie=0;
+    for( int i=0; i!=A.nr; ++i )
+        if( eigen_value[i] > threshold )
+        {
+            BlasConnector::axpy( A.nc, sqrt(1.0/eigen_value[i]), A.c+i*A.nc,1, eA.c+ie*eA.nc,1 );
+            ++ie;
+        }
+    BlasConnector::gemm( 'T','N', eA.nc,eA.nc,ie, 1, eA.c,eA.nc, eA.c,eA.nc, 0, A.c,A.nc );
 }
 */
 
-template<typename Tdata>
-void Inverse_Matrix<Tdata>::input( const RI::Tensor<Tdata> &m )
+template <typename Tdata>
+void Inverse_Matrix<Tdata>::input(const RI::Tensor<Tdata>& m)
 {
-	assert(m.shape.size()==2);
-	assert(m.shape[0]==m.shape[1]);
-	this->A = m.copy();
+    assert(m.shape.size() == 2);
+    assert(m.shape[0] == m.shape[1]);
+    this->A = m.copy();
 }
 
-
-template<typename Tdata>
-void Inverse_Matrix<Tdata>::input(const std::vector<std::vector<RI::Tensor<Tdata>>> &ms)
+template <typename Tdata>
+void Inverse_Matrix<Tdata>::input(const std::vector<std::vector<RI::Tensor<Tdata>>>& ms)
 {
-	const size_t N0 = ms.size();
-	assert(N0>0);
-	const size_t N1 = ms[0].size();
-	assert(N1>0);
-	for(size_t Im0=0; Im0<N0; ++Im0)
-		assert(ms[Im0].size()==N1);
-
-	for(size_t Im0=0; Im0<N0; ++Im0)
-		for(size_t Im1=0; Im1<N1; ++Im1)
-			assert(ms[Im0][Im1].shape.size()==2);
-
-	std::vector<size_t> n0(N0);
-	for(size_t Im0=0; Im0<N0; ++Im0)
-		n0[Im0] = ms[Im0][0].shape[0];
-	std::vector<size_t> n1(N1);
-	for(size_t Im1=0; Im1<N1; ++Im1)
-		n1[Im1] = ms[0][Im1].shape[1];
-
-	for(size_t Im0=0; Im0<N0; ++Im0)
-		for(size_t Im1=0; Im1<N1; ++Im1)
-			assert((ms[Im0][Im1].shape[0]==n0[Im0]) && (ms[Im0][Im1].shape[1]==n1[Im1]));
-
-	const size_t n_all = std::accumulate(n0.begin(), n0.end(), 0);
-	assert(n_all == std::accumulate(n1.begin(), n1.end(), 0));
-	this->A = RI::Tensor<Tdata>({n_all, n_all});
-
-	std::vector<size_t> n0_partial(N0+1);
-	std::partial_sum(n0.begin(), n0.end(), n0_partial.begin()+1);
-	std::vector<size_t> n1_partial(N1+1);
-	std::partial_sum(n1.begin(), n1.end(), n1_partial.begin()+1);
-
-	for(size_t Im0=0; Im0<N0; ++Im0)
-		for(size_t Im1=0; Im1<N1; ++Im1)
-		{
-			const RI::Tensor<Tdata> &m_tmp = ms.at(Im0).at(Im1);
-			for(size_t im0=0; im0<m_tmp.shape[0]; ++im0)
-				for(size_t im1=0; im1<m_tmp.shape[1]; ++im1)
-					this->A(im0+n0_partial[Im0], im1+n1_partial[Im1]) = m_tmp(im0,im1);
-		}
-}
+    const size_t N0 = ms.size();
+    assert(N0 > 0);
+    const size_t N1 = ms[0].size();
+    assert(N1 > 0);
+    for (size_t Im0 = 0; Im0 < N0; ++Im0)
+        assert(ms[Im0].size() == N1);
+
+    for (size_t Im0 = 0; Im0 < N0; ++Im0)
+        for (size_t Im1 = 0; Im1 < N1; ++Im1)
+            assert(ms[Im0][Im1].shape.size() == 2);
+
+    std::vector<size_t> n0(N0);
+    for (size_t Im0 = 0; Im0 < N0; ++Im0)
+        n0[Im0] = ms[Im0][0].shape[0];
+    std::vector<size_t> n1(N1);
+    for (size_t Im1 = 0; Im1 < N1; ++Im1)
+        n1[Im1] = ms[0][Im1].shape[1];
+
+    for (size_t Im0 = 0; Im0 < N0; ++Im0)
+        for (size_t Im1 = 0; Im1 < N1; ++Im1)
+            assert((ms[Im0][Im1].shape[0] == n0[Im0]) && (ms[Im0][Im1].shape[1] == n1[Im1]));
 
+    const size_t n_all = std::accumulate(n0.begin(), n0.end(), 0);
+    assert(n_all == std::accumulate(n1.begin(), n1.end(), 0));
+    this->A = RI::Tensor<Tdata>({n_all, n_all});
 
-template<typename Tdata>
+    std::vector<size_t> n0_partial(N0 + 1);
+    std::partial_sum(n0.begin(), n0.end(), n0_partial.begin() + 1);
+    std::vector<size_t> n1_partial(N1 + 1);
+    std::partial_sum(n1.begin(), n1.end(), n1_partial.begin() + 1);
+
+    for (size_t Im0 = 0; Im0 < N0; ++Im0)
+        for (size_t Im1 = 0; Im1 < N1; ++Im1)
+        {
+            const RI::Tensor<Tdata>& m_tmp = ms.at(Im0).at(Im1);
+            for (size_t im0 = 0; im0 < m_tmp.shape[0]; ++im0)
+                for (size_t im1 = 0; im1 < m_tmp.shape[1]; ++im1)
+                    this->A(im0 + n0_partial[Im0], im1 + n1_partial[Im1]) = m_tmp(im0, im1);
+        }
+}
+
+template <typename Tdata>
 RI::Tensor<Tdata> Inverse_Matrix<Tdata>::output() const
 {
-	return this->A.copy();
+    return this->A.copy();
 }
 
-
-template<typename Tdata>
-std::vector<std::vector<RI::Tensor<Tdata>>>
-Inverse_Matrix<Tdata>::output(const std::vector<size_t> &n0, const std::vector<size_t> &n1) const
+template <typename Tdata>
+std::vector<std::vector<RI::Tensor<Tdata>>> Inverse_Matrix<Tdata>::output(const std::vector<size_t>& n0,
+                                                                          const std::vector<size_t>& n1) const
 {
-	assert( std::accumulate(n0.begin(), n0.end(), 0) == this->A.shape[0] );
-	assert( std::accumulate(n1.begin(), n1.end(), 0) == this->A.shape[1] );
-
-	const size_t N0 = n0.size();
-	const size_t N1 = n1.size();
-
-	std::vector<size_t> n0_partial(N0+1);
-	std::partial_sum(n0.begin(), n0.end(), n0_partial.begin()+1);
-	std::vector<size_t> n1_partial(N1+1);
-	std::partial_sum(n1.begin(), n1.end(), n1_partial.begin()+1);
-
-	std::vector<std::vector<RI::Tensor<Tdata>>> ms(N0, std::vector<RI::Tensor<Tdata>>(N1));
-	for(size_t Im0=0; Im0<N0; ++Im0)
-		for(size_t Im1=0; Im1<N1; ++Im1)
-		{
-			RI::Tensor<Tdata> &m_tmp = ms[Im0][Im1] = RI::Tensor<Tdata>({n0[Im0], n1[Im1]});
-			for(size_t im0=0; im0<n0[Im0]; ++im0)
-				for(size_t im1=0; im1<n1[Im1]; ++im1)
-					m_tmp(im0,im1) = this->A(im0+n0_partial[Im0], im1+n1_partial[Im1]);
-		}
-	return ms;
-}
+    assert(std::accumulate(n0.begin(), n0.end(), 0) == this->A.shape[0]);
+    assert(std::accumulate(n1.begin(), n1.end(), 0) == this->A.shape[1]);
+
+    const size_t N0 = n0.size();
+    const size_t N1 = n1.size();
 
+    std::vector<size_t> n0_partial(N0 + 1);
+    std::partial_sum(n0.begin(), n0.end(), n0_partial.begin() + 1);
+    std::vector<size_t> n1_partial(N1 + 1);
+    std::partial_sum(n1.begin(), n1.end(), n1_partial.begin() + 1);
+
+    std::vector<std::vector<RI::Tensor<Tdata>>> ms(N0, std::vector<RI::Tensor<Tdata>>(N1));
+    for (size_t Im0 = 0; Im0 < N0; ++Im0)
+        for (size_t Im1 = 0; Im1 < N1; ++Im1)
+        {
+            RI::Tensor<Tdata>& m_tmp = ms[Im0][Im1] = RI::Tensor<Tdata>({n0[Im0], n1[Im1]});
+            for (size_t im0 = 0; im0 < n0[Im0]; ++im0)
+                for (size_t im1 = 0; im1 < n1[Im1]; ++im1)
+                    m_tmp(im0, im1) = this->A(im0 + n0_partial[Im0], im1 + n1_partial[Im1]);
+        }
+    return ms;
+}
 
-template<typename Tdata>
+template <typename Tdata>
 void Inverse_Matrix<Tdata>::copy_down_triangle()
 {
-	for( size_t i0=0; i0<A.shape[0]; ++i0 )
-		for( size_t i1=0; i1<i0; ++i1 )
-			A(i0,i1) = A(i1,i0);
+    for (size_t i0 = 0; i0 < A.shape[0]; ++i0)
+        for (size_t i1 = 0; i1 < i0; ++i1)
+            A(i0, i1) = A(i1, i0);
 }
 
 #endif
\ No newline at end of file
diff --git a/source/source_lcao/module_ri/LRI_CV.hpp b/source/source_lcao/module_ri/LRI_CV.hpp
index c3a7177147..53f9931fd9 100644
--- a/source/source_lcao/module_ri/LRI_CV.hpp
+++ b/source/source_lcao/module_ri/LRI_CV.hpp
@@ -363,8 +363,12 @@ LRI_CV<Tdata>::DPcal_C_dC(
 						it0, it1, {0,0,0}, {0,0,0},
 						this->index_abfs, this->index_lcaos, this->index_lcaos,
 						Matrix_Orbs21::Matrix_Order::A1A2B);
-			const RI::Tensor<Tdata> V = this->DPcal_V( it0, it0, {0,0,0}, {{"writable_Vws",true}});
-			const RI::Tensor<Tdata> L = LRI_CV_Tools::cal_I(V);
+            const RI::Tensor<Tdata> V = this->DPcal_V(it0, it0, {0, 0, 0}, {{"writable_Vws", true}});
+            RI::Tensor<Tdata> L;
+            if (GlobalC::exx_info.info_ri.Cs_inv_thr > 0)
+                L = LRI_CV_Tools::cal_I(V, Inverse_Matrix<Tdata>::Method::syev, GlobalC::exx_info.info_ri.Cs_inv_thr);
+            else
+                L = LRI_CV_Tools::cal_I(V);
 
 			const RI::Tensor<Tdata> C = RI::Global_Func::convert<Tdata>(0.5) * LRI_CV_Tools::mul1(L,A);					// Attention 0.5!
 			if(flags.at("writable_Cws"))
@@ -412,7 +416,10 @@ LRI_CV<Tdata>::DPcal_C_dC(
 				     {DPcal_V(it1, it0, Rm,      flags),
 				      DPcal_V(it1, it1, {0,0,0}, {{"writable_Vws",true}})}};
 
-			const std::vector<std::vector<RI::Tensor<Tdata>>>
+            std::vector<std::vector<RI::Tensor<Tdata>>> L;
+            if (GlobalC::exx_info.info_ri.Cs_inv_thr > 0)
+                L = LRI_CV_Tools::cal_I(V, Inverse_Matrix<Tdata>::Method::syev, GlobalC::exx_info.info_ri.Cs_inv_thr);
+            else
 				L = LRI_CV_Tools::cal_I(V);
 
 			const std::vector<RI::Tensor<Tdata>> C = LRI_CV_Tools::mul2(L,A);
diff --git a/source/source_lcao/module_ri/LRI_CV_Tools.h b/source/source_lcao/module_ri/LRI_CV_Tools.h
index 21e94cbfa5..a87320a2ea 100644
--- a/source/source_lcao/module_ri/LRI_CV_Tools.h
+++ b/source/source_lcao/module_ri/LRI_CV_Tools.h
@@ -6,6 +6,7 @@
 #ifndef LRI_CV_TOOLS_H
 #define LRI_CV_TOOLS_H
 
+#include "Inverse_Matrix.h"
 #include "source_base/abfs-vector3_order.h"
 #include "source_lcao/module_ri/abfs.h"
 
@@ -19,9 +20,15 @@
 namespace LRI_CV_Tools
 {
 template <typename Tdata>
-extern RI::Tensor<Tdata> cal_I(const RI::Tensor<Tdata>& m);
+extern RI::Tensor<Tdata> cal_I(const RI::Tensor<Tdata>& m,
+                               const typename Inverse_Matrix<Tdata>::Method method
+                               = Inverse_Matrix<Tdata>::Method::potrf,
+                               const double& threshold_condition_number = 0.);
 template <typename Tdata>
-extern std::vector<std::vector<RI::Tensor<Tdata>>> cal_I(const std::vector<std::vector<RI::Tensor<Tdata>>>& ms);
+extern std::vector<std::vector<RI::Tensor<Tdata>>> cal_I(const std::vector<std::vector<RI::Tensor<Tdata>>>& ms,
+                                                         const typename Inverse_Matrix<Tdata>::Method method
+                                                         = Inverse_Matrix<Tdata>::Method::potrf,
+                                                         const double& threshold_condition_number = 0.);
 
 template <typename Tdata>
 inline RI::Tensor<Tdata> transform_Rm(const RI::Tensor<Tdata>& V);
diff --git a/source/source_lcao/module_ri/LRI_CV_Tools.hpp b/source/source_lcao/module_ri/LRI_CV_Tools.hpp
index 2f7951d94e..42567d58e0 100644
--- a/source/source_lcao/module_ri/LRI_CV_Tools.hpp
+++ b/source/source_lcao/module_ri/LRI_CV_Tools.hpp
@@ -16,21 +16,25 @@
 #include "../../source_pw/module_pwdft/global.h"
 
 template <typename Tdata>
-RI::Tensor<Tdata> LRI_CV_Tools::cal_I(const RI::Tensor<Tdata>& m) {
+RI::Tensor<Tdata> LRI_CV_Tools::cal_I(const RI::Tensor<Tdata>& m,
+                                      const typename Inverse_Matrix<Tdata>::Method method,
+                                      const double& threshold_condition_number)
+{
     Inverse_Matrix<Tdata> I;
     I.input(m);
-    I.cal_inverse(Inverse_Matrix<Tdata>::Method::potrf);
+    I.cal_inverse(method, threshold_condition_number);
     return I.output();
 }
 
 template <typename Tdata>
-std::vector<std::vector<RI::Tensor<Tdata>>>
-    LRI_CV_Tools::cal_I(const std::vector<std::vector<RI::Tensor<Tdata>>>& ms) {
+std::vector<std::vector<RI::Tensor<Tdata>>> LRI_CV_Tools::cal_I(const std::vector<std::vector<RI::Tensor<Tdata>>>& ms,
+                                                                const typename Inverse_Matrix<Tdata>::Method method,
+                                                                const double& threshold_condition_number)
+{
     Inverse_Matrix<Tdata> I;
     I.input(ms);
-    I.cal_inverse(Inverse_Matrix<Tdata>::Method::potrf);
-    return I.output({ms[0][0].shape[0], ms[1][0].shape[0]},
-                    {ms[0][0].shape[1], ms[0][1].shape[1]});
+    I.cal_inverse(method, threshold_condition_number);
+    return I.output({ms[0][0].shape[0], ms[1][0].shape[0]}, {ms[0][0].shape[1], ms[0][1].shape[1]});
 }
 
 template <typename Tdata>