From fb3557d10dda3ba45d84e264d459beb93a796fc7 Mon Sep 17 00:00:00 2001 From: Chris Bielow Date: Wed, 14 May 2025 09:09:47 +0200 Subject: [PATCH 01/31] Show file load/store progress in all TOPP tools (#8041) * correctly forward TOPP's log_type to FileHandler calls * augment CHANGELOG --- CHANGELOG | 9 ++++---- src/topp/AssayGeneratorMetabo.cpp | 6 +++--- src/topp/FileConverter.cpp | 24 ++++++++++----------- src/topp/FileFilter.cpp | 2 +- src/topp/HighResPrecursorMassCorrector.cpp | 4 ++-- src/topp/IDFileConverter.cpp | 2 +- src/topp/IDMapper.cpp | 4 ++-- src/topp/IDSplitter.cpp | 2 +- src/topp/IonMobilityBinning.cpp | 2 +- src/topp/MRMMapper.cpp | 6 +++--- src/topp/MapAlignerIdentification.cpp | 12 +++++------ src/topp/MapAlignerPoseClustering.cpp | 12 +++++------ src/topp/MapRTTransformer.cpp | 16 +++++++------- src/topp/MetaboliteSpectralMatcher.cpp | 4 ++-- src/topp/MultiplexResolver.cpp | 8 +++---- src/topp/MzMLSplitter.cpp | 4 ++-- src/topp/OpenSwathChromatogramExtractor.cpp | 2 +- src/topp/OpenSwathMzMLFileCacher.cpp | 4 ++-- src/topp/ProteomicsLFQ.cpp | 16 +++++++------- src/topp/QCCalculator.cpp | 8 +++---- src/topp/QualityControl.cpp | 8 +++---- src/topp/RNPxlXICFilter.cpp | 6 +++--- src/topp/SeedListGenerator.cpp | 10 ++++----- src/topp/SpecLibCreator.cpp | 4 ++-- src/topp/TextExporter.cpp | 8 +++---- 25 files changed, 91 insertions(+), 92 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 77b2d8503f1..b8b738dd5cc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -18,9 +18,8 @@ General: Dependencies: -OpenSwath: - Misc: + - show load/store progress for files in all TOPP tools (#8041) Fixes: @@ -28,11 +27,11 @@ Library: - Removed `assignRanks` and `sortByRanks` in PeptideIdentifications and sort and filter by score instead. Also removed `updateHitRanks` in IDFilter (#7991) - Remove ranke member in PeptideHit and store ranks as meta value (for backwards compatibility). (#7997) -- removed tools: +Removed tools: ------------------------------------------------------------------------------------------ ----- OpenMS 3.4.0 (under development) ---- +---- OpenMS 3.4.0 (May 2025) ---- ------------------------------------------------------------------------------------------ General: @@ -75,7 +74,7 @@ Library: - made FASTA file reader more robust in presence of whitespaces (#7960) - add 3' cyclophosphate version of RNAse 4, fix handling of cleavage gains (#7928) -- removed tools: +Removed tools: - XTandemAdapter - MascotAdapter (note: MascotAdapterOnline still exists) (#7927) diff --git a/src/topp/AssayGeneratorMetabo.cpp b/src/topp/AssayGeneratorMetabo.cpp index 3bab1a0d923..b8ba8740f8d 100644 --- a/src/topp/AssayGeneratorMetabo.cpp +++ b/src/topp/AssayGeneratorMetabo.cpp @@ -213,11 +213,11 @@ class TOPPAssayGeneratorMetabo : { // load mzML PeakMap spectra; - FileHandler().loadExperiment(in[file_counter], spectra, {FileTypes::MZML}); + FileHandler().loadExperiment(in[file_counter], spectra, {FileTypes::MZML}, log_type_); // load featurexml FeatureMap feature_map; - FileHandler().loadFeatures(id[file_counter], feature_map, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(id[file_counter], feature_map, {FileTypes::FEATUREXML}, log_type_); // check if featureXML corresponds to mzML StringList featurexml_primary_path; @@ -435,7 +435,7 @@ class TOPPAssayGeneratorMetabo : // validate OpenMS::TransitionTSVFile::validateTargetedExperiment(t_exp); // write traML - FileHandler().storeTransitions(out, t_exp, {FileTypes::TRAML}); + FileHandler().storeTransitions(out, t_exp, {FileTypes::TRAML}, log_type_); } else if (extension == "pqp") { diff --git a/src/topp/FileConverter.cpp b/src/topp/FileConverter.cpp index d76eb9b411a..1e97221983a 100644 --- a/src/topp/FileConverter.cpp +++ b/src/topp/FileConverter.cpp @@ -247,7 +247,7 @@ class TOPPFileConverter : if (in_type == FileTypes::CONSENSUSXML) { - FileHandler().loadConsensusFeatures(in, cm, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in, cm, {FileTypes::CONSENSUSXML}, log_type_); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML) && @@ -306,7 +306,7 @@ class TOPPFileConverter : } else if (in_type == FileTypes::EDTA) { - FileHandler().loadConsensusFeatures(in, cm, {FileTypes::EDTA}); + FileHandler().loadConsensusFeatures(in, cm, {FileTypes::EDTA}, log_type_); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) @@ -478,7 +478,7 @@ class TOPPFileConverter : } ChromatogramTools().convertSpectraToChromatograms(exp, true, convert_to_chromatograms); - mzmlFile.storeExperiment(out, exp, {FileTypes::MZML}); + mzmlFile.storeExperiment(out, exp, {FileTypes::MZML}, log_type_); } else if (out_type == FileTypes::MZDATA) { @@ -492,7 +492,7 @@ class TOPPFileConverter : addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZDATA)); ChromatogramTools().convertChromatogramsToSpectra(exp); - FileHandler().storeExperiment(out, exp, {FileTypes::MZDATA}); + FileHandler().storeExperiment(out, exp, {FileTypes::MZDATA}, log_type_); } else if (out_type == FileTypes::MZXML) { @@ -565,7 +565,7 @@ class TOPPFileConverter : } else if (in_type == FileTypes::OMS) { - FileHandler().loadFeatures(in, fm, {FileTypes::OMS}); + FileHandler().loadFeatures(in, fm, {FileTypes::OMS}, log_type_); IdentificationDataConverter::exportFeatureIDs(fm); } else // not loaded as feature map or consensus map @@ -594,7 +594,7 @@ class TOPPFileConverter : addDataProcessing_(fm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); - FileHandler().storeFeatures(out, fm, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures(out, fm, {FileTypes::FEATUREXML}, log_type_); } else if (out_type == FileTypes::CONSENSUSXML) { @@ -626,7 +626,7 @@ class TOPPFileConverter : addDataProcessing_(cm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); - FileHandler().storeConsensusFeatures(out, cm, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out, cm, {FileTypes::CONSENSUSXML}, log_type_); } else if (out_type == FileTypes::EDTA) { @@ -642,11 +642,11 @@ class TOPPFileConverter : } if (!fm.empty()) { - FileHandler().storeFeatures(out, fm, {FileTypes::EDTA}); + FileHandler().storeFeatures(out, fm, {FileTypes::EDTA}, log_type_); } else if (!cm.empty()) { - FileHandler().storeConsensusFeatures(out, cm, {FileTypes::EDTA}); + FileHandler().storeConsensusFeatures(out, cm, {FileTypes::EDTA}, log_type_); } } else if (out_type == FileTypes::CACHEDMZML) @@ -679,19 +679,19 @@ class TOPPFileConverter : } else if (out_type == FileTypes::SQMASS) { - FileHandler().storeExperiment(out, exp, {FileTypes::SQMASS}); + FileHandler().storeExperiment(out, exp, {FileTypes::SQMASS}, log_type_); } else if (out_type == FileTypes::OMS) { if (in_type == FileTypes::FEATUREXML) { IdentificationDataConverter::importFeatureIDs(fm); - FileHandler().storeFeatures(out, fm, {FileTypes::OMS}); + FileHandler().storeFeatures(out, fm, {FileTypes::OMS}, log_type_); } else if (in_type == FileTypes::CONSENSUSXML) { IdentificationDataConverter::importConsensusIDs(cm); - FileHandler().storeConsensusFeatures(out, cm, {FileTypes::OMS}); + FileHandler().storeConsensusFeatures(out, cm, {FileTypes::OMS}, log_type_); } else { diff --git a/src/topp/FileFilter.cpp b/src/topp/FileFilter.cpp index 2ecea6feaa4..f26df5fe16b 100644 --- a/src/topp/FileFilter.cpp +++ b/src/topp/FileFilter.cpp @@ -946,7 +946,7 @@ class TOPPFileFilter : bool is_blacklist = getStringOption_("spectra:blackorwhitelist:blacklist") == "true" ? true : false; PeakMap lib_file; - FileHandler().loadExperiment(lib_file_name, lib_file, {FileTypes::MZML}); + FileHandler().loadExperiment(lib_file_name, lib_file, {FileTypes::MZML}, log_type_); int ret = filterByBlackOrWhiteList(is_blacklist, exp, lib_file, tol_rt, tol_mz, tol_sim, is_ppm); if (ret != EXECUTION_OK) diff --git a/src/topp/HighResPrecursorMassCorrector.cpp b/src/topp/HighResPrecursorMassCorrector.cpp index 05178803474..217bd5ae9b4 100644 --- a/src/topp/HighResPrecursorMassCorrector.cpp +++ b/src/topp/HighResPrecursorMassCorrector.cpp @@ -135,7 +135,7 @@ class TOPPHiResPrecursorMassCorrector : const bool highest_intensity_peak_ppm = getStringOption_("highest_intensity_peak:mz_tolerance_unit") == "ppm" ? true : false; PeakMap exp; - FileHandler().loadExperiment(in_mzml, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(in_mzml, exp, {FileTypes::MZML}, log_type_); cout << setprecision(12); @@ -170,7 +170,7 @@ class TOPPHiResPrecursorMassCorrector : if (!in_feature.empty()) { FeatureMap features; - FileHandler().loadFeatures(in_feature, features); + FileHandler().loadFeatures(in_feature, features, {}, log_type_); corrected_to_nearest_feature = PrecursorCorrection::correctToNearestFeature(features, exp, rt_tolerance, mz_tolerance, mz_unit_ppm, believe_charge, keep_original, assign_all_matching, max_trace, debug_level_); corrected_precursors.insert(corrected_to_nearest_feature.begin(), corrected_to_nearest_feature.end()); } diff --git a/src/topp/IDFileConverter.cpp b/src/topp/IDFileConverter.cpp index 3e40a9b7266..af9af9bf76d 100644 --- a/src/topp/IDFileConverter.cpp +++ b/src/topp/IDFileConverter.cpp @@ -160,7 +160,7 @@ class TOPPIDFileConverter : bool ret = true; PeakMap expmap; SpectrumLookup lookup; - FileHandler().loadExperiment(filename, expmap); + FileHandler().loadExperiment(filename, expmap, {}, log_type_); lookup.readSpectra(expmap.getSpectra()); #pragma omp parallel for diff --git a/src/topp/IDMapper.cpp b/src/topp/IDMapper.cpp index 400904d97e7..ac332103444 100644 --- a/src/topp/IDMapper.cpp +++ b/src/topp/IDMapper.cpp @@ -193,7 +193,7 @@ class TOPPIDMapper : public TOPPBase PeakMap exp; if (!spectra.empty()) { - FileHandler().loadExperiment(spectra, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(spectra, exp, {FileTypes::MZML}, log_type_); } bool measure_from_subelements = getFlag_("consensus:use_subelements"); @@ -224,7 +224,7 @@ class TOPPIDMapper : public TOPPBase if (!spectra.empty()) { - FileHandler().loadExperiment(spectra, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(spectra, exp, {FileTypes::MZML}, log_type_); } mapper.annotate(map, peptide_ids, protein_ids, (getStringOption_("feature:use_centroid_rt") == "true"), (getStringOption_("feature:use_centroid_mz") == "true"), exp); diff --git a/src/topp/IDSplitter.cpp b/src/topp/IDSplitter.cpp index 1b1d643a297..baaddaa5d10 100644 --- a/src/topp/IDSplitter.cpp +++ b/src/topp/IDSplitter.cpp @@ -104,7 +104,7 @@ class TOPPIDSplitter : if (in_type == FileTypes::MZML) { PeakMap experiment; - FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}); + FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}, log_type_); // what about unassigned peptide IDs? for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) diff --git a/src/topp/IonMobilityBinning.cpp b/src/topp/IonMobilityBinning.cpp index 28e3843933c..9ee4ced3654 100644 --- a/src/topp/IonMobilityBinning.cpp +++ b/src/topp/IonMobilityBinning.cpp @@ -75,7 +75,7 @@ class TOPPIonMobilityBinning : MZ_UNITS mz_binning_width_unit = getStringOption_("SpectraMerging:mz_binning_width_unit") == "Da" ? MZ_UNITS::DA : MZ_UNITS::PPM; PeakMap experiment; - FileHandler().loadExperiment(input_file, experiment, {FileTypes::MZML}); + FileHandler().loadExperiment(input_file, experiment, {FileTypes::MZML}, log_type_); auto [mzML_bins, im_ranges] = IMDataConverter::splitExperimentByIonMobility(std::move(experiment), bins, bin_extension_abs, mz_binning_width, mz_binning_width_unit); diff --git a/src/topp/MRMMapper.cpp b/src/topp/MRMMapper.cpp index 6ce07dbf612..e24d191024b 100644 --- a/src/topp/MRMMapper.cpp +++ b/src/topp/MRMMapper.cpp @@ -127,8 +127,8 @@ class TOPPMRMMapper OpenMS::PeakMap chromatogram_map; OpenMS::PeakMap output; - FileHandler().loadTransitions(tr_file, targeted_exp, {FileTypes::TRAML}); - FileHandler().loadExperiment(in, chromatogram_map, {FileTypes::MZML}); + FileHandler().loadTransitions(tr_file, targeted_exp, {FileTypes::TRAML}, log_type_); + FileHandler().loadExperiment(in, chromatogram_map, {FileTypes::MZML}, log_type_); Param param = getParam_().copy("algorithm:", true); @@ -146,7 +146,7 @@ class TOPPMRMMapper } output.setChromatograms(chromatograms); - FileHandler().storeExperiment(out, output, {FileTypes::MZML}); + FileHandler().storeExperiment(out, output, {FileTypes::MZML}, log_type_); return EXECUTION_OK; } diff --git a/src/topp/MapAlignerIdentification.cpp b/src/topp/MapAlignerIdentification.cpp index fd2a63eb9c2..a94603a4208 100644 --- a/src/topp/MapAlignerIdentification.cpp +++ b/src/topp/MapAlignerIdentification.cpp @@ -213,21 +213,21 @@ class TOPPMapAlignerIdentification : case FileTypes::MZML: { PeakMap experiment; - FileHandler().loadExperiment(reference_file, experiment, {FileTypes::MZML}); + FileHandler().loadExperiment(reference_file, experiment, {FileTypes::MZML}, log_type_); algorithm.setReference(experiment); } break; case FileTypes::FEATUREXML: { FeatureMap features; - FileHandler().loadFeatures(reference_file, features); + FileHandler().loadFeatures(reference_file, features, {}, log_type_); algorithm.setReference(features); } break; case FileTypes::CONSENSUSXML: { ConsensusMap consensus; - FileHandler().loadConsensusFeatures(reference_file, consensus); + FileHandler().loadConsensusFeatures(reference_file, consensus, {}, log_type_); algorithm.setReference(consensus); } break; @@ -235,7 +235,7 @@ class TOPPMapAlignerIdentification : { vector proteins; vector peptides; - FileHandler().loadIdentifications(reference_file, proteins, peptides); + FileHandler().loadIdentifications(reference_file, proteins, peptides, {}, log_type_); algorithm.setReference(peptides); } break; @@ -447,7 +447,7 @@ class TOPPMapAlignerIdentification : for (Size i = 0; i < input_files.size(); ++i) { progresslogger.setProgress(i); - idxml_file.loadIdentifications(input_files[i], protein_ids[i], peptide_ids[i], {FileTypes::IDXML}); + idxml_file.loadIdentifications(input_files[i], protein_ids[i], peptide_ids[i], {FileTypes::IDXML}, log_type_); } progresslogger.endProgress(); @@ -462,7 +462,7 @@ class TOPPMapAlignerIdentification : for (Size i = 0; i < output_files.size(); ++i) { progresslogger.setProgress(i); - idxml_file.storeIdentifications(output_files[i], protein_ids[i], peptide_ids[i], {FileTypes::IDXML}); + idxml_file.storeIdentifications(output_files[i], protein_ids[i], peptide_ids[i], {FileTypes::IDXML}, log_type_); } progresslogger.endProgress(); } diff --git a/src/topp/MapAlignerPoseClustering.cpp b/src/topp/MapAlignerPoseClustering.cpp index 4ee5c8483f7..6b08fcab96a 100644 --- a/src/topp/MapAlignerPoseClustering.cpp +++ b/src/topp/MapAlignerPoseClustering.cpp @@ -160,7 +160,7 @@ class TOPPMapAlignerPoseClustering : else if (in_type == FileTypes::MZML) // this is expensive! { PeakMap exp; - FileHandler().loadExperiment(in_files[i], exp, {FileTypes::MZML}); + FileHandler().loadExperiment(in_files[i], exp, {FileTypes::MZML}, log_type_); exp.updateRanges(1); s = exp.getSize(); } @@ -186,13 +186,13 @@ class TOPPMapAlignerPoseClustering : FileHandler f_fxml_tmp; // for the reference, we never need CH or subordinates f_fxml_tmp.getFeatOptions().setLoadConvexHull(false); f_fxml_tmp.getFeatOptions().setLoadSubordinates(false); - f_fxml_tmp.loadFeatures(file, map_ref, {FileTypes::FEATUREXML}); + f_fxml_tmp.loadFeatures(file, map_ref, {FileTypes::FEATUREXML}, log_type_); algorithm.setReference(map_ref); } else if (in_type == FileTypes::MZML) { PeakMap map_ref; - FileHandler().loadExperiment(file, map_ref); + FileHandler().loadExperiment(file, map_ref, {}, log_type_); algorithm.setReference(map_ref); } @@ -239,13 +239,13 @@ class TOPPMapAlignerPoseClustering : MapAlignmentTransformer::transformRetentionTimes(map, trafo); // annotate output with data processing info addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT)); - f_fxml_tmp.storeFeatures(out_files[i], map, {FileTypes::FEATUREXML}); + f_fxml_tmp.storeFeatures(out_files[i], map, {FileTypes::FEATUREXML}, log_type_); } } else if (in_type == FileTypes::MZML) { PeakMap map; - FileHandler().loadExperiment(in_files[i], map, {FileTypes::MZML}); + FileHandler().loadExperiment(in_files[i], map, {FileTypes::MZML}, log_type_); if (i == static_cast(reference_index)) { trafo.fitModel("identity"); @@ -259,7 +259,7 @@ class TOPPMapAlignerPoseClustering : MapAlignmentTransformer::transformRetentionTimes(map, trafo); // annotate output with data processing info addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT)); - FileHandler().storeExperiment(out_files[i], map, {FileTypes::MZML}); + FileHandler().storeExperiment(out_files[i], map, {FileTypes::MZML}, log_type_); } } diff --git a/src/topp/MapRTTransformer.cpp b/src/topp/MapRTTransformer.cpp index 49401678980..c767fdb4126 100644 --- a/src/topp/MapRTTransformer.cpp +++ b/src/topp/MapRTTransformer.cpp @@ -166,35 +166,35 @@ class TOPPMapRTTransformer : if (in_type == FileTypes::MZML) { PeakMap map; - FileHandler().loadExperiment(in, map, {FileTypes::MZML}); + FileHandler().loadExperiment(in, map, {FileTypes::MZML}, log_type_); applyTransformation_( trafo, map); - FileHandler().storeExperiment(out, map, {FileTypes::MZML}); + FileHandler().storeExperiment(out, map, {FileTypes::MZML}, log_type_); } else if (in_type == FileTypes::FEATUREXML) { FeatureMap map; - FileHandler().loadFeatures(in, map, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(in, map, {FileTypes::FEATUREXML}, log_type_); applyTransformation_( trafo, map); - FileHandler().storeFeatures(out, map, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures(out, map, {FileTypes::FEATUREXML}, log_type_); } else if (in_type == FileTypes::CONSENSUSXML) { ConsensusMap map; - FileHandler().loadConsensusFeatures(in, map, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in, map, {FileTypes::CONSENSUSXML}, log_type_); applyTransformation_( trafo, map); - FileHandler().storeConsensusFeatures(out, map, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out, map, {FileTypes::CONSENSUSXML}, log_type_); } else if (in_type == FileTypes::IDXML) { vector proteins; vector peptides; - FileHandler().loadIdentifications(in, proteins, peptides, {FileTypes::IDXML}); + FileHandler().loadIdentifications(in, proteins, peptides, {FileTypes::IDXML}, log_type_); bool store_original_rt = getFlag_("store_original_rt"); MapAlignmentTransformer::transformRetentionTimes(peptides, trafo, store_original_rt); // no "data processing" section in idXML - FileHandler().storeIdentifications(out, proteins, peptides, {FileTypes::IDXML}); + FileHandler().storeIdentifications(out, proteins, peptides, {FileTypes::IDXML}, log_type_); } } diff --git a/src/topp/MetaboliteSpectralMatcher.cpp b/src/topp/MetaboliteSpectralMatcher.cpp index 5e59e7b209f..2cbee2a5741 100644 --- a/src/topp/MetaboliteSpectralMatcher.cpp +++ b/src/topp/MetaboliteSpectralMatcher.cpp @@ -119,7 +119,7 @@ class TOPPMetaboliteSpectralMatcher : mz_file.getOptions().setMSLevels(ms_level); PeakMap ms_peakmap; - mz_file.loadExperiment(in, ms_peakmap, {FileTypes::MZML}); + mz_file.loadExperiment(in, ms_peakmap, {FileTypes::MZML}, log_type_); if (ms_peakmap.empty()) { @@ -141,7 +141,7 @@ class TOPPMetaboliteSpectralMatcher : // load database //------------------------------------------------------------- PeakMap spec_db; - FileHandler().loadExperiment(spec_db_filename, spec_db, {FileTypes::MSP, FileTypes::MZML, FileTypes::MGF}); + FileHandler().loadExperiment(spec_db_filename, spec_db, {FileTypes::MSP, FileTypes::MZML, FileTypes::MGF}, log_type_); if (spec_db.empty()) { diff --git a/src/topp/MultiplexResolver.cpp b/src/topp/MultiplexResolver.cpp index 288c649ee8d..5cf2a692ac1 100644 --- a/src/topp/MultiplexResolver.cpp +++ b/src/topp/MultiplexResolver.cpp @@ -572,14 +572,14 @@ class TOPPMultiplexResolver : * load consensus map */ ConsensusMap map_in; - FileHandler().loadConsensusFeatures(in_, map_in, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in_, map_in, {FileTypes::CONSENSUSXML}, log_type_); /** * load (optional) blacklist */ if (!(in_blacklist_.empty())) { - FileHandler().loadExperiment(in_blacklist_, exp_blacklist_, {FileTypes::MZML}); + FileHandler().loadExperiment(in_blacklist_, exp_blacklist_, {FileTypes::MZML}, log_type_); } /** @@ -603,10 +603,10 @@ class TOPPMultiplexResolver : /** * store consensus maps */ - FileHandler().storeConsensusFeatures(out_, map_out, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out_, map_out, {FileTypes::CONSENSUSXML}, log_type_); if (!out_conflicts_.empty()) { - FileHandler().storeConsensusFeatures(out_conflicts_, map_conflicts, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out_conflicts_, map_conflicts, {FileTypes::CONSENSUSXML}, log_type_); } return EXECUTION_OK; diff --git a/src/topp/MzMLSplitter.cpp b/src/topp/MzMLSplitter.cpp index 4b5f5ebd1a1..3d25e78788a 100644 --- a/src/topp/MzMLSplitter.cpp +++ b/src/topp/MzMLSplitter.cpp @@ -112,7 +112,7 @@ class TOPPMzMLSplitter : public TOPPBase writeLogInfo_("Splitting file into " + String(parts) + " parts..."); PeakMap experiment; - FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}); + FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}, log_type_); vector spectra; vector chromatograms; @@ -171,7 +171,7 @@ class TOPPMzMLSplitter : public TOPPBase chrom_start += n_chrom; writeLogInfo_("Part " + String(counter) + ": " + String(n_spec) + " spectra, " + String(n_chrom) + " chromatograms"); - FileHandler().storeExperiment(out_name.str(), part, {FileTypes::MZML}); + FileHandler().storeExperiment(out_name.str(), part, {FileTypes::MZML}, log_type_); } return EXECUTION_OK; diff --git a/src/topp/OpenSwathChromatogramExtractor.cpp b/src/topp/OpenSwathChromatogramExtractor.cpp index 75731461ff1..6ccee83190e 100644 --- a/src/topp/OpenSwathChromatogramExtractor.cpp +++ b/src/topp/OpenSwathChromatogramExtractor.cpp @@ -221,7 +221,7 @@ class TOPPOpenSwathChromatogramExtractor // Find the transitions to extract and extract them MapType tmp_out; OpenMS::TargetedExperiment transition_exp_used; - FileHandler().loadExperiment(file_list[i], *exp, {FileTypes::MZML}); + FileHandler().loadExperiment(file_list[i], *exp, {FileTypes::MZML}, log_type_); if (exp->empty()) { continue; // if empty, go on diff --git a/src/topp/OpenSwathMzMLFileCacher.cpp b/src/topp/OpenSwathMzMLFileCacher.cpp index 32c18390229..31a5c414d6f 100644 --- a/src/topp/OpenSwathMzMLFileCacher.cpp +++ b/src/topp/OpenSwathMzMLFileCacher.cpp @@ -153,7 +153,7 @@ class TOPPOpenSwathMzMLFileCacher MapType exp; SqMassFile sqfile; sqfile.load(in, exp); - FileHandler().storeExperiment(out, exp, {FileTypes::MZML}); + FileHandler().storeExperiment(out, exp, {FileTypes::MZML}, log_type_); return EXECUTION_OK; } else if (in_type == FileTypes::MZML && out_type == FileTypes::SQMASS && process_lowmemory) @@ -186,7 +186,7 @@ class TOPPOpenSwathMzMLFileCacher sqfile.setConfig(config); MapType exp; - FileHandler().loadExperiment(in, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(in, exp, {FileTypes::MZML}, log_type_); sqfile.store(out, exp); return EXECUTION_OK; } diff --git a/src/topp/ProteomicsLFQ.cpp b/src/topp/ProteomicsLFQ.cpp index 6273b2a9948..948a5ccf84d 100644 --- a/src/topp/ProteomicsLFQ.cpp +++ b/src/topp/ProteomicsLFQ.cpp @@ -362,7 +362,7 @@ class ProteomicsLFQ : // load raw file PeakMap ms_raw; - FileHandler().loadExperiment(mz_file, ms_raw, {FileTypes::MZML}); + FileHandler().loadExperiment(mz_file, ms_raw, {FileTypes::MZML}, log_type_); ms_raw.clearMetaDataArrays(); ms_raw.updateRanges(); @@ -819,7 +819,7 @@ class ProteomicsLFQ : { const String& mz_file_abs_path = File::absolutePath(mz_file); - FileHandler().loadIdentifications(id_file_abs_path, protein_ids, peptide_ids, {FileTypes::IDXML}); + FileHandler().loadIdentifications(id_file_abs_path, protein_ids, peptide_ids, {FileTypes::IDXML}, log_type_); ExitCodes e = checkSingleRunPerID_(protein_ids, id_file_abs_path); if (e != EXECUTION_OK) return e; @@ -1092,7 +1092,7 @@ class ProteomicsLFQ : calculateSeeds_(ms_centroided, seeds, median_fwhm); if (debug_level_ > 666) { - FileHandler().storeFeatures("debug_seeds_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + ".featureXML", seeds, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures("debug_seeds_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + ".featureXML", seeds, {FileTypes::FEATUREXML}, log_type_); } } @@ -1309,12 +1309,12 @@ class ProteomicsLFQ : if (debug_level_ > 666) { - FileHandler().storeFeatures("debug_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + ".featureXML", feature_maps.back(), {FileTypes::FEATUREXML}); + FileHandler().storeFeatures("debug_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + ".featureXML", feature_maps.back(), {FileTypes::FEATUREXML}, log_type_); } if (debug_level_ > 10000) { - FileHandler().storeExperiment("debug_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + "_chroms.mzML", ffi.getChromatograms(), {FileTypes::MZML}); + FileHandler().storeExperiment("debug_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + "_chroms.mzML", ffi.getChromatograms(), {FileTypes::MZML}, log_type_); } ++fraction_group; @@ -1393,7 +1393,7 @@ class ProteomicsLFQ : if (debug_level_ >= 666) { - FileHandler().storeConsensusFeatures("debug_fraction_" + String(ms_files.first) + ".consensusXML", consensus_fraction, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures("debug_fraction_" + String(ms_files.first) + ".consensusXML", consensus_fraction, {FileTypes::CONSENSUSXML}, log_type_); writeDebug_("to produce a consensus map with: " + String(consensus_fraction.getColumnHeaders().size()) + " columns.", 1); } @@ -1784,7 +1784,7 @@ class ProteomicsLFQ : if (debug_level_ >= 666) { - FileHandler().storeConsensusFeatures("debug_after_normalization.consensusXML", consensus, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures("debug_after_normalization.consensusXML", consensus, {FileTypes::CONSENSUSXML}, log_type_); } } else if (getStringOption_("quantification_method") == "spectral_counting") @@ -1939,7 +1939,7 @@ class ProteomicsLFQ : { // Note: idXML and consensusXML doesn't support writing quantification at protein groups // (they are nevertheless stored and passed to mzTab for proper export) - FileHandler().storeConsensusFeatures(getStringOption_("out_cxml"), consensus, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(getStringOption_("out_cxml"), consensus, {FileTypes::CONSENSUSXML}, log_type_); } // Fill MzTab with meta data and quants annotated in identification data structure diff --git a/src/topp/QCCalculator.cpp b/src/topp/QCCalculator.cpp index a789c4065ad..0fa0d40a5f8 100644 --- a/src/topp/QCCalculator.cpp +++ b/src/topp/QCCalculator.cpp @@ -132,7 +132,7 @@ class TOPPQCCalculator : // prepare input cout << "Reading mzML file..." << endl; MSExperiment exp; - FileHandler().loadExperiment(inputfile_name, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(inputfile_name, exp, {FileTypes::MZML}, log_type_); exp.sortSpectra(); exp.updateRanges(); @@ -140,7 +140,7 @@ class TOPPQCCalculator : if (!inputfile_feature.empty()) { cout << "Reading featureXML file..." << endl; - FileHandler().loadFeatures(inputfile_feature, feature_map, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(inputfile_feature, feature_map, {FileTypes::FEATUREXML}, log_type_); feature_map.updateRanges(); feature_map.sortByRT(); } @@ -149,7 +149,7 @@ class TOPPQCCalculator : if (!inputfile_consensus.empty()) { cout << "Reading consensusXML file..." << endl; - FileHandler().loadConsensusFeatures(inputfile_consensus, consensus_map, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(inputfile_consensus, consensus_map, {FileTypes::CONSENSUSXML}, log_type_); } vector prot_ids; @@ -157,7 +157,7 @@ class TOPPQCCalculator : if (!inputfile_id.empty()) { cout << "Reading idXML file..." << endl; - FileHandler().loadIdentifications(inputfile_id, prot_ids, pep_ids, {FileTypes::IDXML}); + FileHandler().loadIdentifications(inputfile_id, prot_ids, pep_ids, {FileTypes::IDXML}, log_type_); } // collect QC data and store according to output file extension diff --git a/src/topp/QualityControl.cpp b/src/topp/QualityControl.cpp index 119f73ab7a6..a80f855aa6d 100644 --- a/src/topp/QualityControl.cpp +++ b/src/topp/QualityControl.cpp @@ -275,7 +275,7 @@ class TOPPQualityControl : public TOPPBase //------------------------------------------------------------- if (i < in_raw.size()) { // we either have 'n' or 1 mzML ... use the correct one in each iteration - FileHandler().loadExperiment(in_raw[i], exp, {FileTypes::MZML}); + FileHandler().loadExperiment(in_raw[i], exp, {FileTypes::MZML}, log_type_); spec_map.calculateMap(exp); } @@ -283,7 +283,7 @@ class TOPPQualityControl : public TOPPBase FeatureMap fmap_local; if (!in_postFDR.empty()) { - FileHandler().loadFeatures(in_postFDR[i], fmap_local, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(in_postFDR[i], fmap_local, {FileTypes::FEATUREXML}, log_type_); fmap = &fmap_local; } else @@ -387,7 +387,7 @@ class TOPPQualityControl : public TOPPBase StringList out_feat = getStringList_("out_feat"); if (!out_feat.empty()) { - FileHandler().storeFeatures(out_feat[i], *fmap, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures(out_feat[i], *fmap, {FileTypes::FEATUREXML}, log_type_); } //------------------------------------------------------------- // Annotate calculated meta values from FeatureMap to given ConsensusMap @@ -460,7 +460,7 @@ class TOPPQualityControl : public TOPPBase String out_cm = getStringOption_("out_cm"); if (!out_cm.empty()) { - FileHandler().storeConsensusFeatures(out_cm, cmap, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out_cm, cmap, {FileTypes::CONSENSUSXML}, log_type_); } String out = getStringOption_("out"); diff --git a/src/topp/RNPxlXICFilter.cpp b/src/topp/RNPxlXICFilter.cpp index 1de44b1a0e2..ed5204a44ed 100644 --- a/src/topp/RNPxlXICFilter.cpp +++ b/src/topp/RNPxlXICFilter.cpp @@ -199,10 +199,10 @@ class TOPPRNPxlXICFilter : // load experiments PeakMap exp_control; - FileHandler().loadExperiment(control_mzml, exp_control, {FileTypes::MZML}); + FileHandler().loadExperiment(control_mzml, exp_control, {FileTypes::MZML}, log_type_); PeakMap exp_treatment; - FileHandler().loadExperiment(treatment_mzml, exp_treatment, {FileTypes::MZML}); + FileHandler().loadExperiment(treatment_mzml, exp_treatment, {FileTypes::MZML}, log_type_); // extract precursor mz and rts vector pc_mzs; @@ -261,7 +261,7 @@ class TOPPRNPxlXICFilter : } } - FileHandler().storeExperiment(out_mzml, exp_out, {FileTypes::MZML}); + FileHandler().storeExperiment(out_mzml, exp_out, {FileTypes::MZML}, log_type_); return EXECUTION_OK; } diff --git a/src/topp/SeedListGenerator.cpp b/src/topp/SeedListGenerator.cpp index 2d0be2271aa..c999644c88e 100644 --- a/src/topp/SeedListGenerator.cpp +++ b/src/topp/SeedListGenerator.cpp @@ -129,7 +129,7 @@ namespace OpenMS if (in_type == FileTypes::CONSENSUSXML) { ConsensusMap consensus; - FileHandler().loadConsensusFeatures(in, consensus, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in, consensus, {FileTypes::CONSENSUSXML}, log_type_); num_maps = consensus.getColumnHeaders().size(); ConsensusMap::ColumnHeaders ch = consensus.getColumnHeaders(); size_t map_count = 0; @@ -157,21 +157,21 @@ namespace OpenMS else if (in_type == FileTypes::MZML) { PeakMap experiment; - FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}); + FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}, log_type_); seed_gen.generateSeedList(experiment, seed_lists[0]); } else if (in_type == FileTypes::IDXML) { vector proteins; vector peptides; - FileHandler().loadIdentifications(in, proteins, peptides, {FileTypes::IDXML}); + FileHandler().loadIdentifications(in, proteins, peptides, {FileTypes::IDXML}, log_type_); seed_gen.generateSeedList(peptides, seed_lists[0], getFlag_("use_peptide_mass")); } else if (in_type == FileTypes::FEATUREXML) { FeatureMap features; - FileHandler().loadFeatures(in, features, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(in, features, {FileTypes::FEATUREXML}, log_type_); seed_gen.generateSeedList( features.getUnassignedPeptideIdentifications(), seed_lists[0]); } @@ -187,7 +187,7 @@ namespace OpenMS addDataProcessing_(features, getProcessingInfo_( DataProcessing::DATA_PROCESSING)); OPENMS_LOG_INFO << "Writing " << features.size() << " seeds to " << out[num_maps] << endl; - FileHandler().storeFeatures(out[num_maps], features, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures(out[num_maps], features, {FileTypes::FEATUREXML}, log_type_); } return EXECUTION_OK; diff --git a/src/topp/SpecLibCreator.cpp b/src/topp/SpecLibCreator.cpp index 358851d2ecb..75add9ffb3a 100644 --- a/src/topp/SpecLibCreator.cpp +++ b/src/topp/SpecLibCreator.cpp @@ -171,7 +171,7 @@ class TOPPSpecLibCreator : } else if (in_type == FileTypes::MZDATA || in_type == FileTypes::MZXML) { - FileHandler().loadExperiment(spec, msexperiment, {FileTypes::MZDATA, FileTypes::MZXML}); + FileHandler().loadExperiment(spec, msexperiment, {FileTypes::MZDATA, FileTypes::MZXML}, log_type_); } if (msexperiment.getMinRT() == 0) { @@ -240,7 +240,7 @@ class TOPPSpecLibCreator : // writing output //------------------------------------------------------------- in_type = fh.getType(out); - FileHandler().storeExperiment(out, library, {FileTypes::MZDATA, FileTypes::MZXML, FileTypes::MSP}); + FileHandler().storeExperiment(out, library, {FileTypes::MZDATA, FileTypes::MZXML, FileTypes::MSP}, log_type_); return EXECUTION_OK; } diff --git a/src/topp/TextExporter.cpp b/src/topp/TextExporter.cpp index 3d96cecfc66..72689e2e3de 100644 --- a/src/topp/TextExporter.cpp +++ b/src/topp/TextExporter.cpp @@ -666,7 +666,7 @@ namespace OpenMS //------------------------------------------------------------- FeatureMap feature_map; - FileHandler().loadFeatures(in, feature_map, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(in, feature_map, {FileTypes::FEATUREXML}, log_type_); // extract common id and hit meta values StringList peptide_id_meta_keys; @@ -828,7 +828,7 @@ namespace OpenMS ConsensusMap consensus_map; - FileHandler().loadConsensusFeatures(in, consensus_map, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in, consensus_map, {FileTypes::CONSENSUSXML}, log_type_); // for optional export of ConsensusFeature meta values, collect all possible meta value keys std::set meta_value_keys; @@ -1335,7 +1335,7 @@ namespace OpenMS { vector prot_ids; vector pep_ids; - FileHandler().loadIdentifications(in, prot_ids, pep_ids, {FileTypes::IDXML}); + FileHandler().loadIdentifications(in, prot_ids, pep_ids, {FileTypes::IDXML}, log_type_); StringList peptide_id_meta_keys; StringList peptide_hit_meta_keys; StringList protein_hit_meta_keys; @@ -1436,7 +1436,7 @@ namespace OpenMS else if (in_type == FileTypes::MZML) { PeakMap exp; - FileHandler().loadExperiment(in, exp, {FileTypes::MZML}, ProgressLogger::NONE, false, false); + FileHandler().loadExperiment(in, exp, {FileTypes::MZML}, log_type_, false, false); if (exp.getSpectra().empty() && exp.getChromatograms().empty()) { From 6499d8dc489c80e7e0fdbed0c87f74012417ffaa Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Fri, 28 Mar 2025 15:04:53 +0100 Subject: [PATCH 02/31] Potential Runtime Improvement of GaussTraceFilter.cpp^ --- .../source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp index e819450dfcf..9dbe24853a8 100644 --- a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp +++ b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp @@ -1007,7 +1007,7 @@ namespace OpenMS { //store map of abort reasons for failed seeds FeatureMap abort_map; - abort_map.reserve(abort_reasons_.size()); + abort_map.reserve( abort_reasons_.size()); Size counter = 0; for (std::map::iterator it2 = abort_reasons_.begin(); it2 != abort_reasons_.end(); ++it2, ++counter) { From 60442f0554a8d029bdc1848679da8c205353f897 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Fri, 28 Mar 2025 15:26:16 +0100 Subject: [PATCH 03/31] patched From 44fefeb34fa59767483b4e92356ae419371bcdf0 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Wed, 9 Apr 2025 12:34:56 +0200 Subject: [PATCH 04/31] modified XMLHandler.h/cpp Changed Implementation of appendASCII added some helping functions to class StringManager --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 19 +++- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 101 +++++++++++++++--- 2 files changed, 101 insertions(+), 19 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 9d4f98665ce..38279edbb9f 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -224,8 +224,18 @@ namespace OpenMS // Converts from a wide-character string to a narrow-character string. inline static String toNative_(const XMLCh* str) - { - return String(unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + { + String r; + XMLSize_t l = xercesc::XMLString::stringLen(str); + if(isASCII(str, l)) + { + appendASCII(str,l,r); + } + else + { + r = (unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + } + return r; } // Converts from a wide-character string to a narrow-character string. @@ -283,7 +293,12 @@ namespace OpenMS { return toNative_(str); } + /// Checks if supplied if chars in XMLCh* can be encoded with ASCII + static bool isASCII(const XMLCh * chars, const XMLSize_t length); + /// Compresses eight 8x16bit Chars in XMLCh* to 8x8bit Chars by cutting upper byte + static void compress64 (const XMLCh * input_it, char* output_it); + /** * @brief Transcodes the supplied XMLCh* and appends it to the OpenMS String * diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 6af312971b6..8298d78afa5 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -302,7 +303,7 @@ namespace OpenMS::Internal } // no value, although there should be a numerical value else if (term.xref_type != ControlledVocabulary::CVTerm::NONE && term.xref_type != ControlledVocabulary::CVTerm::XSD_STRING && // should be numerical - !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the binary data array, not the 'value=' attribute! + !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the bits data array, not the 'value=' attribute! ) { warning(LOAD, String("The CV term '") + accession + " - " + term.name + "' used in tag '" + parent_tag + "' should have a numerical value. The value is '" + value + "'."); @@ -425,34 +426,100 @@ namespace OpenMS::Internal StringManager::~StringManager() = default; + void StringManager::compress64 (const XMLCh* input_it, char* output_it) { + alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); + simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); + simde_mm_storel_epi64((simde__m128i*)output_it, compressed); + } + + bool StringManager::isASCII(const XMLCh * chars, const XMLSize_t length) { + + + std::div_t quotient_and_remainder = std::div(length, 8); + size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient + size_t remainder = quotient_and_remainder.rem; + // std::cout << "Remainer: " << remainder << std::endl; + // std::cout << "Quotient: " << quotient << std::endl; + // cout << "length: " << length << endl; + + const XMLCh* it = chars; + const XMLCh* end = it + (quotient * 8); + simde__m128i mask = simde_mm_set1_epi16(0xFF00); + bool bitmask = true; + while (it != end && bitmask){ + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)it); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i andOP = simde_mm_and_si128(bits, mask); + simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); + bitmask = simde_mm_movemask_epi8(cmp) == 0xFFFF; + // bitmask = simde_mm_testz_si128(bits, mask); + it+=8; + } + + end += remainder; + while (it != end && bitmask){ + bitmask = *it & 0xFF00; + it++; + } + + return bitmask; + } + void StringManager::appendASCII(const XMLCh * chars, const XMLSize_t length, String & result) { - // XMLCh are characters in UTF16 (usually stored as 16bit unsigned - // short but this is not guaranteed). - // We know that the Base64 string here can only contain plain ASCII - // and all bytes except the least significant one will be zero. Thus - // we can convert to char directly (only keeping the least - // significant byte). + // XMLCh are characters in UTF16 (usually stored as 16bit unsigned + // short but this is not guaranteed). + // We know that the Base64 string here can only contain plain ASCII + // and all bytes except the least significant one will be zero. Thus + // we can convert to char directly (only keeping the least + // significant byte). + + + + + std::div_t quotient_and_remainder = std::div(length, 8); + size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient + size_t remainder = quotient_and_remainder.rem; + // std::cout << "Remainer: " << remainder << std::endl; + // std::cout << "Quotient: " << quotient << std::endl; + // cout << "length: " << length << endl; + const XMLCh* it = chars; - const XMLCh* end = it + length; + const XMLCh* end = it + (quotient * 8); + // std::cout << "Anzahl der Elemente zwischen it1 und it2: " + // << std::distance(it, end) << std::endl; size_t curr_size = result.size(); result.resize(curr_size + length); std::string::iterator str_it = result.begin(); std::advance(str_it, curr_size); + // int i = 0; + + //Copy Block of 8 chars at a time. Then jumps to the next eight Blocks while (it!=end) - { - *str_it = (char)*it; - ++str_it; - ++it; + { + // std::cout << "Aktueller Wert: " << *it << std::endl; + + compress64(it, &(*str_it)); + // printf("loop: %d\n", i); + str_it += 8; + it += 8; + // i++; } - // This is ca. 50 % faster than - // for (size_t i = 0; i < length; i++) - // { - // result[curr_size + i] = (char)chars[i]; - // } + + + end = it + remainder; + + while (it!=end) + { + *str_it = static_cast(*it & 0xFF); + // std::cout << "Aktueller Wert: " << *str_it << std::endl; + str_it ++; + it ++; + // i++; + } } From d35a0435d973e749e83035d7240d802678df3af0 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Fri, 11 Apr 2025 15:28:52 +0200 Subject: [PATCH 05/31] added test for XMLHeader, Debugged it --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 16 +- .../class_tests/openms/executables.cmake | 1 + .../openms/source/XMLHandler_test.cpp | 154 ++++++++++++++++++ 3 files changed, 166 insertions(+), 5 deletions(-) create mode 100644 src/tests/class_tests/openms/source/XMLHandler_test.cpp diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 8298d78afa5..b9094061287 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -438,14 +438,20 @@ namespace OpenMS::Internal std::div_t quotient_and_remainder = std::div(length, 8); size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient size_t remainder = quotient_and_remainder.rem; - // std::cout << "Remainer: " << remainder << std::endl; - // std::cout << "Quotient: " << quotient << std::endl; - // cout << "length: " << length << endl; + std::cout << "Remainer: " << remainder << std::endl; + std::cout << "Quotient: " << quotient << std::endl; + std::cout << "length: " << length << endl; const XMLCh* it = chars; const XMLCh* end = it + (quotient * 8); simde__m128i mask = simde_mm_set1_epi16(0xFF00); bool bitmask = true; + + if (length == 0) + { + return false; + } + while (it != end && bitmask){ simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)it); simde__m128i zero = simde_mm_setzero_si128(); @@ -458,10 +464,10 @@ namespace OpenMS::Internal end += remainder; while (it != end && bitmask){ - bitmask = *it & 0xFF00; + bitmask = !(*it & 0xFF00); it++; } - + std::cout << "bitmask: " << bitmask << std::endl; return bitmask; } diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index 656625cea4c..83b2508d6b0 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -251,6 +251,7 @@ set(format_executables_list UnimodXMLFile_test XMassFile_test XMLFile_test + XMLHandler_test XMLValidator_test XQuestResultXMLFile_test XTandemInfile_test diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp new file mode 100644 index 00000000000..41232f755e4 --- /dev/null +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -0,0 +1,154 @@ + +#include +#include +#include +#include +#include + +#include + +using namespace OpenMS::Internal; + + + + + +START_TEST(StringManager, "$Id$") + + +const XMLCh russianHello[] = { + 0x041F, 0x0440, 0x0438, 0x0432, 0x0435, 0x0442, 0x043C, + 0x0438, 0x0440, // "Привет мир" (Hello World in Russian) +}; +XMLSize_t r_length = xercesc::XMLString::stringLen(russianHello); + +const XMLCh ascii[] = { + 0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F, + 0x0072,0x006C,0x0064,0x0021}; +XMLSize_t a_length = xercesc::XMLString::stringLen(ascii); + +const XMLCh mixed[] = { + 0x0048, 0x0065,0x0432, 0x0435, 0x0442, 0x043C, 0x006F, + 0x0072,0x006C,0x0064, 0x0021 }; +XMLSize_t m_length = xercesc::XMLString::stringLen(mixed); + +const XMLCh empty[] = {0}; +XMLSize_t e_length = xercesc::XMLString::stringLen(empty); +std::cout << e_length << std::endl; + +const XMLCh upperBoundary [] = {0x00FF,0x00FF}; +XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); + +bool isAscii = false; + +START_SECTION(if input is ascii) + isAscii = StringManager::isASCII(ascii,a_length); + std::cout << "1 \n"; + TEST_TRUE(isAscii) + isAscii = StringManager::isASCII(russianHello,r_length); + std::cout << "2 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(mixed,m_length); + std::cout << "3 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(empty,e_length); + std::cout << "4 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(upperBoundary,u_length); + std::cout << "5 \n"; + TEST_TRUE(isAscii) +END_SECTION + +const XMLCh eight_block_negative[] = {0xFFFF,0xFFFE,0xFFFB,0xFFF6,0xFFEC,0xFFCE,0xFF9C,0xFE00}; + +const XMLCh eight_block[] = {0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F}; + +const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0xFFFF,0xFFFE,0xFFFB,0xFFF6}; + +const XMLCh eight_block_kadabra[] = { + 0x004B, // K + 0x0041, // A + 0x0044, // D + 0x0041, // A + 0x0042, // B + 0x0052, // R + 0x0041, // A + 0x0021 // ! +}; + +START_SECTION(if Utf16 to Ascii Compression works right) + char* output1 = new char [9]; + output1[8] = '\0'; + StringManager::compress64(eight_block,output1); + std::string res1_str = "Hello,Wo"; + std::string o1_str (output1); + TEST_STRING_EQUAL(o1_str,res1_str); + delete[] output1; + + + char* output2 = new char [9]; + output2 [8] = '\0'; + char res2 [9] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; + res2[8] = '\0'; + StringManager::compress64(eight_block_negative,output2); + std::string res2_str (res2); + std::string o2_str(output2); + TEST_STRING_EQUAL(o2_str, res2_str); + delete[] output2; + + char* output3 = new char [9]; + output3 [8] = '\0'; + char res3 [9] = {0x42,0x45,0x4C,0x41,0x00,0x00,0x00,0x00}; + res3[8] = '\0'; + StringManager::compress64(eight_block_mixed,output3); + std::string res3_str (res3); + std::string o3_str(output3); + TEST_STRING_EQUAL(o3_str, res3_str); + delete[] output3; + + char* output4 = new char [13]; + output4 [0] ='A'; + output4 [1] ='B'; + output4 [2] ='R'; + output4 [3] ='A'; + output3 [12] = '\0'; + + StringManager::compress64(eight_block_kadabra,(output4+4)); + std::string res4_str = "ABRAKADABRA!"; + std::string o4_str(output4); + TEST_STRING_EQUAL(o4_str, res4_str); + delete[] output4; + +END_SECTION + +//Tests Number of Chars not Dividable by 8 +OpenMS::String o5_str; +std::string res5_str = "Hello,World!"; + +//Checks how the Function handles Data thats already stored in Output string +OpenMS::String o6_str = "Gruess Gott und "; +std::string res6_str = "Gruess Gott und Hello,World!"; + +OpenMS::String o7_str; +std::string res7_str = ""; + + +START_SECTION(if appendASCII works) + + StringManager::appendASCII(ascii,a_length,o5_str); + TEST_STRING_EQUAL(o5_str, res5_str); + + StringManager::appendASCII(ascii,a_length,o6_str); + TEST_STRING_EQUAL(o6_str, res6_str); + + StringManager::appendASCII(empty,e_length,o7_str); + TEST_STRING_EQUAL(o7_str, res7_str); + std::cout << o7_str.size() << std::endl; + +END_SECTION + +END_TEST + + + + From 9a2d0b4f8d258969b25a54ccf375dc8a79639ef1 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Mon, 14 Apr 2025 14:35:54 +0200 Subject: [PATCH 06/31] t rid of print statements in function --- src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index b9094061287..81c7862b30b 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -438,9 +438,9 @@ namespace OpenMS::Internal std::div_t quotient_and_remainder = std::div(length, 8); size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient size_t remainder = quotient_and_remainder.rem; - std::cout << "Remainer: " << remainder << std::endl; - std::cout << "Quotient: " << quotient << std::endl; - std::cout << "length: " << length << endl; + // std::cout << "Remainer: " << remainder << std::endl; + // std::cout << "Quotient: " << quotient << std::endl; + // std::cout << "length: " << length << endl; const XMLCh* it = chars; const XMLCh* end = it + (quotient * 8); @@ -467,7 +467,7 @@ namespace OpenMS::Internal bitmask = !(*it & 0xFF00); it++; } - std::cout << "bitmask: " << bitmask << std::endl; + // std::cout << "bitmask: " << bitmask << std::endl; return bitmask; } From ee6bd12d64111936733b172e0888f1d89f60cabf Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 15 Apr 2025 13:50:43 +0200 Subject: [PATCH 07/31] Moved the function in the right part of the String Manager Class --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 81c7862b30b..41225fbb038 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -418,14 +418,6 @@ namespace OpenMS::Internal } } - //******************************************************************************************************************* - - StringManager::StringManager() - = default; - - StringManager::~StringManager() - = default; - void StringManager::compress64 (const XMLCh* input_it, char* output_it) { alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); @@ -467,7 +459,6 @@ namespace OpenMS::Internal bitmask = !(*it & 0xFF00); it++; } - // std::cout << "bitmask: " << bitmask << std::endl; return bitmask; } @@ -526,7 +517,17 @@ namespace OpenMS::Internal it ++; // i++; } - } + //******************************************************************************************************************* + + StringManager::StringManager() + = default; + + StringManager::~StringManager() + = default; + + + + } // namespace OpenMS // namespace Internal From c3155c4f778ac7dbe304c9eac1583baf37194e54 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Thu, 17 Apr 2025 13:42:47 +0200 Subject: [PATCH 08/31] fferent Impelmentation of append/isAscii with potentially silghtly less overhead --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 58 +++++++------------ 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 41225fbb038..01f4f335b09 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -427,15 +427,10 @@ namespace OpenMS::Internal bool StringManager::isASCII(const XMLCh * chars, const XMLSize_t length) { - std::div_t quotient_and_remainder = std::div(length, 8); - size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient - size_t remainder = quotient_and_remainder.rem; - // std::cout << "Remainer: " << remainder << std::endl; - // std::cout << "Quotient: " << quotient << std::endl; - // std::cout << "length: " << length << endl; - - const XMLCh* it = chars; - const XMLCh* end = it + (quotient * 8); + size_t quotient = length / 8; // Ganzzahliger Quotient + size_t remainder = length % 8; + + const XMLCh* input_ptr = chars; simde__m128i mask = simde_mm_set1_epi16(0xFF00); bool bitmask = true; @@ -444,20 +439,19 @@ namespace OpenMS::Internal return false; } - while (it != end && bitmask){ - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)it); + for (size_t i = 0; i < quotient && bitmask; i++) + { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_ptr); simde__m128i zero = simde_mm_setzero_si128(); simde__m128i andOP = simde_mm_and_si128(bits, mask); simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); bitmask = simde_mm_movemask_epi8(cmp) == 0xFFFF; - // bitmask = simde_mm_testz_si128(bits, mask); - it+=8; + input_ptr+=8; } - end += remainder; - while (it != end && bitmask){ - bitmask = !(*it & 0xFF00); - it++; + for (size_t i = 0; i < remainder && bitmask; i++) + { + bitmask = !(input_ptr[i] & 0xFF00); } return bitmask; } @@ -471,7 +465,10 @@ namespace OpenMS::Internal // we can convert to char directly (only keeping the least // significant byte). + size_t quotient = length / 8; + size_t remainder = length % 8; + const XMLCh* input_ptr = chars; std::div_t quotient_and_remainder = std::div(length, 8); @@ -489,33 +486,20 @@ namespace OpenMS::Internal size_t curr_size = result.size(); result.resize(curr_size + length); - std::string::iterator str_it = result.begin(); - std::advance(str_it, curr_size); - // int i = 0; + char* output_ptr = &result[curr_size]; //Copy Block of 8 chars at a time. Then jumps to the next eight Blocks - while (it!=end) + for (size_t i = 0; i < quotient; i++) { - // std::cout << "Aktueller Wert: " << *it << std::endl; - - compress64(it, &(*str_it)); - // printf("loop: %d\n", i); - str_it += 8; - it += 8; - // i++; + compress64(input_ptr, output_ptr); + input_ptr += 8; + output_ptr += 8; } - - end = it + remainder; - - while (it!=end) + for (size_t i = 0; i < remainder; i++) { - *str_it = static_cast(*it & 0xFF); - // std::cout << "Aktueller Wert: " << *str_it << std::endl; - str_it ++; - it ++; - // i++; + output_ptr[i] = static_cast(input_ptr[i] & 0xFF); } } From 109411ae2a00539bba6eb448ff60e79685168ea0 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Thu, 17 Apr 2025 14:53:22 +0200 Subject: [PATCH 09/31] Test formatted to Conventions --- src/tests/class_tests/openms/source/XMLHandler_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index 41232f755e4..d3fed30bd98 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -41,7 +41,7 @@ XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); bool isAscii = false; -START_SECTION(if input is ascii) +START_SECTION(isASCII(const XMLCh * chars, const XMLSize_t length)) isAscii = StringManager::isASCII(ascii,a_length); std::cout << "1 \n"; TEST_TRUE(isAscii) @@ -76,7 +76,7 @@ const XMLCh eight_block_kadabra[] = { 0x0021 // ! }; -START_SECTION(if Utf16 to Ascii Compression works right) +START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) char* output1 = new char [9]; output1[8] = '\0'; StringManager::compress64(eight_block,output1); @@ -133,7 +133,7 @@ OpenMS::String o7_str; std::string res7_str = ""; -START_SECTION(if appendASCII works) +START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) StringManager::appendASCII(ascii,a_length,o5_str); TEST_STRING_EQUAL(o5_str, res5_str); From 9229e841deb25821ae743ea3adcdc3b2b60810ae Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 22 Apr 2025 16:49:09 +0200 Subject: [PATCH 10/31] Changed the packus_epi16 function with the shuffle function for improved performance inside the compress64 function. Edited the the XMLHandler_test to work with new implemantation and got rid of new/delete: --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 5 +- .../openms/source/XMLHandler_test.cpp | 62 +++++++------------ 2 files changed, 28 insertions(+), 39 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 01f4f335b09..259eee24f23 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -420,7 +420,10 @@ namespace OpenMS::Internal void StringManager::compress64 (const XMLCh* input_it, char* output_it) { alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); - simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); + // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); + const simde__m128i shuffleMask = simde_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, + -1, -1, -1, -1, -1, -1, -1, -1); + simde__m128i compressed = simde_mm_shuffle_epi8(bits,shuffleMask); simde_mm_storel_epi64((simde__m128i*)output_it, compressed); } diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index d3fed30bd98..19e56614144 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -24,19 +24,18 @@ XMLSize_t r_length = xercesc::XMLString::stringLen(russianHello); const XMLCh ascii[] = { 0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F, - 0x0072,0x006C,0x0064,0x0021}; + 0x0072,0x006C,0x0064,0x0021, 0x0000}; XMLSize_t a_length = xercesc::XMLString::stringLen(ascii); const XMLCh mixed[] = { 0x0048, 0x0065,0x0432, 0x0435, 0x0442, 0x043C, 0x006F, - 0x0072,0x006C,0x0064, 0x0021 }; + 0x0072,0x006C,0x0064, 0x0021, 0x0000 }; XMLSize_t m_length = xercesc::XMLString::stringLen(mixed); const XMLCh empty[] = {0}; XMLSize_t e_length = xercesc::XMLString::stringLen(empty); -std::cout << e_length << std::endl; -const XMLCh upperBoundary [] = {0x00FF,0x00FF}; +const XMLCh upperBoundary [] = {0x00FF,0x00FF,0x0000}; XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); bool isAscii = false; @@ -59,11 +58,11 @@ START_SECTION(isASCII(const XMLCh * chars, const XMLSize_t length)) TEST_TRUE(isAscii) END_SECTION -const XMLCh eight_block_negative[] = {0xFFFF,0xFFFE,0xFFFB,0xFFF6,0xFFEC,0xFFCE,0xFF9C,0xFE00}; +const XMLCh eight_block_negative[] = {0x0148,0x0165,0x016C,0x016C,0x016F,0x012C,0x0157,0x016F}; const XMLCh eight_block[] = {0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F}; -const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0xFFFF,0xFFFE,0xFFFB,0xFFF6}; +const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0x0142,0x0145,0x014C,0x0141}; const XMLCh eight_block_kadabra[] = { 0x004B, // K @@ -77,47 +76,34 @@ const XMLCh eight_block_kadabra[] = { }; START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) - char* output1 = new char [9]; - output1[8] = '\0'; - StringManager::compress64(eight_block,output1); + std::string o1_str(8,'\0'); + StringManager::compress64(eight_block,o1_str.data()); std::string res1_str = "Hello,Wo"; - std::string o1_str (output1); TEST_STRING_EQUAL(o1_str,res1_str); - delete[] output1; - char* output2 = new char [9]; - output2 [8] = '\0'; - char res2 [9] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; - res2[8] = '\0'; - StringManager::compress64(eight_block_negative,output2); - std::string res2_str (res2); - std::string o2_str(output2); + std::string o2_str(8,'\0'); + StringManager::compress64(eight_block_negative,o2_str.data()); + std::string res2_str = res1_str; TEST_STRING_EQUAL(o2_str, res2_str); - delete[] output2; - - char* output3 = new char [9]; - output3 [8] = '\0'; - char res3 [9] = {0x42,0x45,0x4C,0x41,0x00,0x00,0x00,0x00}; - res3[8] = '\0'; - StringManager::compress64(eight_block_mixed,output3); - std::string res3_str (res3); - std::string o3_str(output3); + + + std::string o3_str(8,'\0'); + // char res3 [9] = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; + // res3[8] = '\0'; + StringManager::compress64(eight_block_mixed,o3_str.data()); + std::string res3_str = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; TEST_STRING_EQUAL(o3_str, res3_str); - delete[] output3; - - char* output4 = new char [13]; - output4 [0] ='A'; - output4 [1] ='B'; - output4 [2] ='R'; - output4 [3] ='A'; - output3 [12] = '\0'; + + std::string o4_str(12,'\0'); + o4_str [0] ='A'; + o4_str [1] ='B'; + o4_str [2] ='R'; + o4_str [3] ='A'; - StringManager::compress64(eight_block_kadabra,(output4+4)); + StringManager::compress64(eight_block_kadabra,((o4_str.data())+4)); std::string res4_str = "ABRAKADABRA!"; - std::string o4_str(output4); TEST_STRING_EQUAL(o4_str, res4_str); - delete[] output4; END_SECTION From 7faf6a151b3743713c2a271873de25304e8d6267 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 11:17:14 +0200 Subject: [PATCH 11/31] Implemented strLength function with simde for potential runtime improvement --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 4 ++- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 25 +++++++++++++++++++ .../openms/source/XMLHandler_test.cpp | 9 +++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 38279edbb9f..6124c6cb206 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -226,7 +226,7 @@ namespace OpenMS inline static String toNative_(const XMLCh* str) { String r; - XMLSize_t l = xercesc::XMLString::stringLen(str); + XMLSize_t l = strLength(str); if(isASCII(str, l)) { appendASCII(str,l,r); @@ -252,6 +252,8 @@ namespace OpenMS /// Destructor ~StringManager(); + static int strLength(const XMLCh* input_ptr); + /// Transcode the supplied C string to a xerces string inline static XercesString convert(const char * str) { diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 259eee24f23..16546ba55ff 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -418,6 +418,31 @@ namespace OpenMS::Internal } } + int StringManager::strLength(const XMLCh* input_ptr) { + size_t processedChars = 0; + XMLCh* pos_ptr = const_cast(input_ptr); + + while (true) { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)pos_ptr); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i cmpZero = simde_mm_cmpeq_epi16(bits, zero); + uint16_t zeroMask = simde_mm_movemask_epi8(cmpZero); + + if (zeroMask != 0x0000) { + int bytePosZero = __builtin_ctz(zeroMask); + int charPosZero = bytePosZero / 2; + pos_ptr += charPosZero; + return processedChars + charPosZero; + } + + pos_ptr += 8; + processedChars += 8; + } + + // Reached max length without finding null terminator + return 0; + } + void StringManager::compress64 (const XMLCh* input_it, char* output_it) { alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index 19e56614144..b55c6ad27d7 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -133,6 +133,15 @@ START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & END_SECTION +START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) + int o_length = StringManager::strLength(ascii); + TEST_EQUAL(o_length, a_length); + o_length = StringManager::strLength(empty); + TEST_EQUAL(o_length, e_length); + o_length = StringManager::strLength(upperBoundary); + TEST_EQUAL(o_length, u_length); +END_SECTION + END_TEST From 8c2a1b6952ff7bbfb42d07cd9d7c80972ad61ac5 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 16:11:49 +0200 Subject: [PATCH 12/31] Added strLength Method using simde for Potential runtime improvement. Added a for loop to prevent this method from crossing page Boundaries --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 1 + .../source/FORMAT/HANDLERS/MzMLHandler.cpp | 5 +++- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 28 +++++++++++++++---- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 6124c6cb206..5f4dcc89777 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -21,6 +21,7 @@ #include #include +#include #include #include diff --git a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp index f643342819c..23b62e46c8a 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp @@ -18,8 +18,11 @@ #include #include + #include +using namespace std::literals; + namespace OpenMS::Internal { @@ -267,7 +270,7 @@ namespace OpenMS::Internal UInt meta_string_array_index = 0; for (Size i = 0; i < input_data.size(); i++) //loop over all binary data arrays { - if (input_data[i].meta.getName() != "m/z array" && input_data[i].meta.getName() != "intensity array") // is meta data array? + if (input_data[i].meta.getName() != "m/z array"sv && input_data[i].meta.getName() != "intensity array"sv) // is meta data array? { if (input_data[i].data_type == MzMLHandlerHelper::BinaryData::DT_FLOAT) { diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 16546ba55ff..8abee4b900e 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -421,8 +421,20 @@ namespace OpenMS::Internal int StringManager::strLength(const XMLCh* input_ptr) { size_t processedChars = 0; XMLCh* pos_ptr = const_cast(input_ptr); + size_t align = (size_t)pos_ptr % 16; + // Prevents Page boundary crossing + for (size_t i = 0; i < align; i++) + { + if (pos_ptr[i] == 0) + { + return processedChars + i; + } + processedChars++; + pos_ptr++; + }; - while (true) { + while (true) + { simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)pos_ptr); simde__m128i zero = simde_mm_setzero_si128(); simde__m128i cmpZero = simde_mm_cmpeq_epi16(bits, zero); @@ -443,8 +455,9 @@ namespace OpenMS::Internal return 0; } - void StringManager::compress64 (const XMLCh* input_it, char* output_it) { - alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); + void StringManager::compress64 (const XMLCh* input_it, char* output_it) + { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); const simde__m128i shuffleMask = simde_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1); @@ -467,13 +480,18 @@ namespace OpenMS::Internal return false; } - for (size_t i = 0; i < quotient && bitmask; i++) + for (size_t i = 0; i < quotient; i++) { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_ptr); simde__m128i zero = simde_mm_setzero_si128(); simde__m128i andOP = simde_mm_and_si128(bits, mask); simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); - bitmask = simde_mm_movemask_epi8(cmp) == 0xFFFF; + + if (simde_mm_movemask_epi8(cmp) != 0xFFFF) + { + bitmask = false; + } input_ptr+=8; } From c0e1284dc44499b825035c65a97a8b9687f61e82 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 16:44:33 +0200 Subject: [PATCH 13/31] Added Description of strLength method for documentation --- src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 5f4dcc89777..51ef962aa9f 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -253,6 +253,7 @@ namespace OpenMS /// Destructor ~StringManager(); + /// Calculates the length of a XMLCh* string using SIMDe static int strLength(const XMLCh* input_ptr); /// Transcode the supplied C string to a xerces string From fdf5629224201e84628e030cf6397bb02e75d58c Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 23:27:28 +0200 Subject: [PATCH 14/31] code is now up to the coding conventions --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 2 +- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 197 +++++++++--------- 2 files changed, 99 insertions(+), 100 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 51ef962aa9f..0563d765809 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -11,7 +11,7 @@ #include #include -#include // StringList + #include #include #include diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 8abee4b900e..6441a048bbc 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -418,134 +418,133 @@ namespace OpenMS::Internal } } - int StringManager::strLength(const XMLCh* input_ptr) { - size_t processedChars = 0; + int StringManager::strLength(const XMLCh* input_ptr) + { + size_t processed_chars = 0; XMLCh* pos_ptr = const_cast(input_ptr); size_t align = (size_t)pos_ptr % 16; - // Prevents Page boundary crossing - for (size_t i = 0; i < align; i++) + + // Prevent crossing page boundaries + for (size_t i = 0; i < align; ++i) { if (pos_ptr[i] == 0) { - return processedChars + i; + return processed_chars + i; } - processedChars++; - pos_ptr++; - }; + ++processed_chars; + ++pos_ptr; + } - while (true) + while (true) { - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)pos_ptr); - simde__m128i zero = simde_mm_setzero_si128(); - simde__m128i cmpZero = simde_mm_cmpeq_epi16(bits, zero); - uint16_t zeroMask = simde_mm_movemask_epi8(cmpZero); + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(pos_ptr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i cmp_zero = simde_mm_cmpeq_epi16(bits, zero); + uint16_t zero_mask = simde_mm_movemask_epi8(cmp_zero); - if (zeroMask != 0x0000) { - int bytePosZero = __builtin_ctz(zeroMask); - int charPosZero = bytePosZero / 2; - pos_ptr += charPosZero; - return processedChars + charPosZero; - } + if (zero_mask != 0x0000) + { + int byte_pos_zero = __builtin_ctz(zero_mask); + int char_pos_zero = byte_pos_zero / 2; + pos_ptr += char_pos_zero; + return processed_chars + char_pos_zero; + } - pos_ptr += 8; - processedChars += 8; + pos_ptr += 8; + processed_chars += 8; } + } + + void StringManager::compress64(const XMLCh* inputIt, char* outputIt) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputIt)); + + // Select every second byte (little-endian lower byte of each UTF-16 character) + const simde__m128i shuffleMask = simde_mm_setr_epi8( + 0, 2, 4, 6, 8, 10, 12, 14, + -1, -1, -1, -1, -1, -1, -1, -1 + ); + + simde__m128i compressed = simde_mm_shuffle_epi8(bits, shuffleMask); - // Reached max length without finding null terminator - return 0; + // Store the lower 64 bits (8 ASCII characters) + simde_mm_storel_epi64(reinterpret_cast(outputIt), compressed); } - void StringManager::compress64 (const XMLCh* input_it, char* output_it) + bool StringManager::isASCII(const XMLCh* chars, const XMLSize_t length) + { + if (length == 0) { - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); - // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); - const simde__m128i shuffleMask = simde_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, - -1, -1, -1, -1, -1, -1, -1, -1); - simde__m128i compressed = simde_mm_shuffle_epi8(bits,shuffleMask); - simde_mm_storel_epi64((simde__m128i*)output_it, compressed); + return false; } - bool StringManager::isASCII(const XMLCh * chars, const XMLSize_t length) { + Size quotient = length / 8; + Size remainder = length % 8; - - size_t quotient = length / 8; // Ganzzahliger Quotient - size_t remainder = length % 8; + const XMLCh* inputPtr = chars; + simde__m128i mask = simde_mm_set1_epi16(0xFF00); + bool bitmask = true; - const XMLCh* input_ptr = chars; - simde__m128i mask = simde_mm_set1_epi16(0xFF00); - bool bitmask = true; + // Process blocks of 8 UTF-16 characters using SIMD + for (Size i = 0; i < quotient; ++i) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputPtr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i andOp = simde_mm_and_si128(bits, mask); + simde__m128i cmp = simde_mm_cmpeq_epi16(andOp, zero); - if (length == 0) + if (simde_mm_movemask_epi8(cmp) != 0xFFFF) { - return false; + bitmask = false; + break; } - for (size_t i = 0; i < quotient; i++) - { - - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_ptr); - simde__m128i zero = simde_mm_setzero_si128(); - simde__m128i andOP = simde_mm_and_si128(bits, mask); - simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); - - if (simde_mm_movemask_epi8(cmp) != 0xFFFF) - { - bitmask = false; - } - input_ptr+=8; - } - - for (size_t i = 0; i < remainder && bitmask; i++) + inputPtr += 8; + } + + // Check remaining characters individually + for (Size i = 0; i < remainder && bitmask; ++i) + { + if (inputPtr[i] & 0xFF00) { - bitmask = !(input_ptr[i] & 0xFF00); + bitmask = false; + break; } - return bitmask; } - void StringManager::appendASCII(const XMLCh * chars, const XMLSize_t length, String & result) + return bitmask; + } + + void StringManager::appendASCII(const XMLCh* chars, const XMLSize_t length, String& result) { - // XMLCh are characters in UTF16 (usually stored as 16bit unsigned - // short but this is not guaranteed). - // We know that the Base64 string here can only contain plain ASCII - // and all bytes except the least significant one will be zero. Thus - // we can convert to char directly (only keeping the least - // significant byte). - - size_t quotient = length / 8; - size_t remainder = length % 8; - - const XMLCh* input_ptr = chars; - - - std::div_t quotient_and_remainder = std::div(length, 8); - size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient - size_t remainder = quotient_and_remainder.rem; - // std::cout << "Remainer: " << remainder << std::endl; - // std::cout << "Quotient: " << quotient << std::endl; - // cout << "length: " << length << endl; - - - const XMLCh* it = chars; - const XMLCh* end = it + (quotient * 8); - // std::cout << "Anzahl der Elemente zwischen it1 und it2: " - // << std::distance(it, end) << std::endl; - - size_t curr_size = result.size(); - result.resize(curr_size + length); - char* output_ptr = &result[curr_size]; - - //Copy Block of 8 chars at a time. Then jumps to the next eight Blocks - for (size_t i = 0; i < quotient; i++) - { - compress64(input_ptr, output_ptr); - input_ptr += 8; - output_ptr += 8; + // XMLCh are characters in UTF16 (usually stored as 16-bit unsigned + // short but this is not guaranteed). + // We know that the Base64 string here can only contain plain ASCII + // and all bytes except the least significant one will be zero. Thus + // we can convert to char directly (only keeping the least + // significant byte). + + Size quotient = length / 8; + Size remainder = length % 8; + + const XMLCh* inputPtr = chars; + + Size currentSize = result.size(); + result.resize(currentSize + length); + char* outputPtr = &result[currentSize]; + + // Copy blocks of 8 characters at a time + for (Size i = 0; i < quotient; ++i) + { + compress64(inputPtr, outputPtr); + inputPtr += 8; + outputPtr += 8; } - - - for (size_t i = 0; i < remainder; i++) - { - output_ptr[i] = static_cast(input_ptr[i] & 0xFF); + + // Copy any remaining characters individually + for (Size i = 0; i < remainder; ++i) + { + outputPtr[i] = static_cast(inputPtr[i] & 0xFF); } } From 99e5c3dacc96ede7389b947df9a157bad94a1363 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Wed, 14 May 2025 15:50:56 +0200 Subject: [PATCH 15/31] Fixed the bug that led to several tests to fail. It was an implicit conversion from a size_t to an int which led to an overflow --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 2 +- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 76 ++++++++++--------- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 0563d765809..ca351fad24f 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -254,7 +254,7 @@ namespace OpenMS ~StringManager(); /// Calculates the length of a XMLCh* string using SIMDe - static int strLength(const XMLCh* input_ptr); + static XMLSize_t strLength(const XMLCh* input_ptr); /// Transcode the supplied C string to a xerces string inline static XercesString convert(const char * str) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 6441a048bbc..1984dab42d3 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -418,42 +418,50 @@ namespace OpenMS::Internal } } - int StringManager::strLength(const XMLCh* input_ptr) - { - size_t processed_chars = 0; - XMLCh* pos_ptr = const_cast(input_ptr); - size_t align = (size_t)pos_ptr % 16; - - // Prevent crossing page boundaries - for (size_t i = 0; i < align; ++i) - { - if (pos_ptr[i] == 0) - { - return processed_chars + i; - } - ++processed_chars; - ++pos_ptr; + size_t StringManager::strLength(const XMLCh* input_ptr) { + if (input_ptr == nullptr) { + return 0; } - - while (true) - { - simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(pos_ptr)); - simde__m128i zero = simde_mm_setzero_si128(); - simde__m128i cmp_zero = simde_mm_cmpeq_epi16(bits, zero); - uint16_t zero_mask = simde_mm_movemask_epi8(cmp_zero); - - if (zero_mask != 0x0000) - { - int byte_pos_zero = __builtin_ctz(zero_mask); - int char_pos_zero = byte_pos_zero / 2; - pos_ptr += char_pos_zero; - return processed_chars + char_pos_zero; - } - - pos_ptr += 8; - processed_chars += 8; + + XMLSize_t processed_chars = 0; + const XMLCh* pos_ptr = input_ptr; + + // Verarbeite einzelne Zeichen, bis der Pointer 16-Byte-aligned ist + uintptr_t ptr_value = reinterpret_cast(pos_ptr); + size_t misalignment = ptr_value & 0xF; // Berechnet Misalignment als (Adresswert) mod 16 + size_t chars_to_align = misalignment ? (16 - misalignment) / sizeof(XMLCh) : 0; + + // Vorverarbeitung einzelner Zeichen bis zum Alignment oder bis zum Ende des Strings + for (size_t i = 0; i < chars_to_align; ++i) { + if (*pos_ptr == 0) { + return processed_chars; + } + ++pos_ptr; + ++processed_chars; } - } + + // Hauptschleife mit SIMD-Operationen + while (true) { + // SIMD-Operation + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(pos_ptr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i cmp_zero = simde_mm_cmpeq_epi16(bits, zero); + uint16_t zero_mask = simde_mm_movemask_epi8(cmp_zero); + + if (zero_mask != 0x0000) { + int byte_pos_zero = __builtin_ctz(zero_mask); + int char_pos_zero = byte_pos_zero / 2; + return processed_chars + static_cast<>(char_pos_zero); + } + + // 8 Zeichen (16 Bytes) wurden verarbeitet, keine Null gefunden + pos_ptr += 8; + processed_chars += 8; + } + + // Diese Zeile wird nie erreicht + return processed_chars; + } void StringManager::compress64(const XMLCh* inputIt, char* outputIt) { From e26fb11566da5dc6fd08fde2fa2f4cf9f10cc832 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Thu, 15 May 2025 07:17:52 +0200 Subject: [PATCH 16/31] Add doc (#8044) * add missing kernel doc * more doc --- .../include/OpenMS/DATASTRUCTURES/Compomer.h | 231 +++++++++--- .../OpenMS/DATASTRUCTURES/IsotopeCluster.h | 52 ++- .../include/OpenMS/DATASTRUCTURES/LPWrapper.h | 338 +++++++++++++++--- .../OpenMS/DATASTRUCTURES/MassExplainer.h | 164 +++++++-- src/openms/include/OpenMS/KERNEL/DimMapper.h | 43 +++ src/openms/include/OpenMS/KERNEL/MSSpectrum.h | 10 + 6 files changed, 701 insertions(+), 137 deletions(-) diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/Compomer.h b/src/openms/include/OpenMS/DATASTRUCTURES/Compomer.h index 6733739050c..25860f7583b 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/Compomer.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/Compomer.h @@ -23,77 +23,196 @@ namespace OpenMS /** @brief Holds information on an edge connecting two features from a (putative) charge ladder - - A compomer is storing information on the adducts used on LEFT and RIGHT nodes (Features) that are connected by the edge (i.e. ChargePair) - holding the compomer. Additionally meta information on the edge (net_charge, edge score, id) - which is kept up-to-date when adducts are added to either side is stored. - + + A Compomer represents the chemical composition difference between two mass spectrometry features. + It stores information about the adducts (ions, molecules, or atoms) that explain the mass and + charge differences between these features. + + The Compomer has two sides: + - LEFT side: adducts subtracted from the first feature + - RIGHT side: adducts added to the first feature + + This model allows representing the relationship between features that correspond to the same + analyte but with different adduct compositions or charge states. + + The Compomer maintains metadata such as: + - Net charge (difference between right and left sides) + - Total mass difference + - Probability score of this adduct combination + - Expected RT shift caused by the adducts + + This class is used extensively in the feature decharging and adduct annotation processes. + + @ingroup Datastructures */ class OPENMS_DLLAPI Compomer { public: - /// side of compomer (LEFT ^ subtract; RIGHT ^ add) + /** + @brief Enumeration for specifying which side of the compomer to operate on + + - LEFT: The left side (adducts subtracted from the first feature) + - RIGHT: The right side (adducts added to the first feature) + - BOTH: Both sides of the compomer + */ enum SIDE {LEFT, RIGHT, BOTH}; - typedef std::map CompomerSide; ///< adducts and their abundance etc - typedef std::vector CompomerComponents; ///< container for the two sides [0]=left, [1]=right + /// Type definition for one side of a compomer (maps adduct labels to Adduct objects) + typedef std::map CompomerSide; + + /** + @brief Container for both sides of a compomer + + Vector with exactly two elements: + - [0] = left side (adducts subtracted) + - [1] = right side (adducts added) + */ + typedef std::vector CompomerComponents; - /// Default Constructor + /** + @brief Default Constructor + + Initializes an empty compomer with zero net charge, mass, and probability. + */ Compomer(); - /// Constructor with net-charge and mass + /** + @brief Constructor with net-charge, mass, and probability + + @param net_charge Net charge of the compomer (right side - left side) + @param mass Mass difference represented by the compomer + @param log_p Log probability of this adduct combination + */ Compomer(Int net_charge, double mass, double log_p); - /// Copy C'tor + /** + @brief Copy constructor + + @param p Source compomer to copy from + */ Compomer(const Compomer& p); - /// Assignment Operator + /** + @brief Assignment Operator + + @param source Source compomer to assign from + @return Reference to this object + */ Compomer& operator=(const Compomer& source); - /// Add a.amount of Adduct @param a to Compomer's @param side and update its properties + /** + @brief Add an adduct to a specific side of the compomer + + Adds the specified amount of the adduct to the given side and + updates the compomer's properties (net charge, mass, etc.). + + @param a The adduct to add + @param side Which side to add the adduct to (0=LEFT, 1=RIGHT) + */ void add(const Adduct& a, UInt side); /** - * indicates if these two compomers can coexist for one feature - * @param cmp The other Compomer we compare to - * @param side_this Indicates which "side"(negative or positive adducts) we are looking at. Negative adducts belong to the left side of the ChargePair. - * @param side_other See above. + @brief Determines if two compomers conflict with each other + + Checks if these two compomers can coexist for one feature by examining + if they have conflicting adduct compositions on the specified sides. + + @param cmp The other Compomer to compare against + @param side_this Which side of this compomer to check (0=LEFT, 1=RIGHT) + @param side_other Which side of the other compomer to check (0=LEFT, 1=RIGHT) + @return True if the compomers conflict (cannot coexist), false otherwise */ bool isConflicting(const Compomer& cmp, UInt side_this, UInt side_other) const; - /// set an Id which allows unique identification of a compomer + /** + @brief Set a unique identifier for this compomer + + @param id The unique ID to assign + */ void setID(const Size& id); - /// return Id which allows unique identification of this compomer + + /** + @brief Get the unique identifier of this compomer + + @return The unique ID of this compomer + */ const Size& getID() const; - /// left and right adducts of this compomer + + /** + @brief Get both sides (left and right) of this compomer + + @return Reference to the compomer components (left and right sides) + */ const CompomerComponents& getComponent() const; - /// net charge of compomer (i.e. difference between left and right side of compomer) + /** + @brief Get the net charge of this compomer + + The net charge is calculated as the difference between the right and left sides. + + @return Net charge value + */ const Int& getNetCharge() const; - /// mass of all contained adducts + /** + @brief Get the total mass difference represented by this compomer + + @return Mass difference in Da + */ const double& getMass() const; - /// summed positive charges of contained adducts + /** + @brief Get the sum of positive charges in this compomer + + @return Total positive charges + */ const Int& getPositiveCharges() const; - /// summed negative charges of contained adducts + /** + @brief Get the sum of negative charges in this compomer + + @return Total negative charges + */ const Int& getNegativeCharges() const; - /// return log probability + /** + @brief Get the log probability of this adduct combination + + Higher values indicate more likely combinations. + + @return Log probability value + */ const double& getLogP() const; - /// return log probability + /** + @brief Get the expected retention time shift caused by this compomer + + @return Expected RT shift value + */ const double& getRTShift() const; - /// get adducts with their abundance as compact string for both sides + /** + @brief Get a string representation of all adducts in this compomer + + @return String representation of adducts on both sides + */ String getAdductsAsString() const; - /// get adducts with their abundance as compact string (amounts are absolute unless side=BOTH) - /// @param side Use LEFT for left, RIGHT for right + /** + @brief Get a string representation of adducts on a specific side + + @param side Which side to get adducts for (LEFT, RIGHT, or BOTH) + @return String representation of adducts on the specified side + */ String getAdductsAsString(UInt side) const; - /// check if Compomer only contains a single adduct on side @p side + /** + @brief Check if the compomer contains only a single adduct on the specified side + + @param a Output parameter that will contain the adduct if found + @param side Which side to check (LEFT or RIGHT) + @return True if only a single adduct is present on the specified side + */ bool isSingleAdduct(Adduct& a, const UInt side) const; /** @@ -121,28 +240,56 @@ namespace OpenMS StringList getLabels(const UInt side) const; - /// Adds @p add_side to this compomer. + /** + @brief Add a complete set of adducts to a specific side of the compomer + + @param add_side The set of adducts to add + @param side Which side to add the adducts to (LEFT or RIGHT) + */ void add(const CompomerSide& add_side, UInt side); - /// Sort compomer by (in order of importance): net-charge, mass, probability + /** + @brief Comparison operator for sorting compomers + + Sorts compomers by (in order of importance): + 1. Net charge + 2. Mass + 3. Probability + + @param c1 First compomer to compare + @param c2 Second compomer to compare + @return True if c1 should be ordered before c2 + */ friend OPENMS_DLLAPI bool operator<(const Compomer& c1, const Compomer& c2); - /// Print the contents of a Compomer to a stream. + /** + @brief Output stream operator for printing compomer contents + + @param os Output stream to write to + @param cmp Compomer to print + @return Reference to the output stream + */ friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const Compomer& cmp); - /// Comparator + /** + @brief Equality comparison operator + + @param a First compomer to compare + @param b Second compomer to compare + @return True if the compomers are equal + */ friend OPENMS_DLLAPI bool operator==(const Compomer& a, const Compomer& b); private: - CompomerComponents cmp_; ///< adducts of left and right side - Int net_charge_; ///< net charge (right - left) - double mass_; ///< net mass (right - left) - Int pos_charges_; ///< net charges on the right - Int neg_charges_; ///< net charges on the left - double log_p_; ///< log probability of compomer - double rt_shift_; ///< expected net RT shift of compomer (-shift_leftside + shift_rightside) - Size id_; + CompomerComponents cmp_; ///< Adducts of left and right side + Int net_charge_; ///< Net charge (right - left) + double mass_; ///< Net mass (right - left) + Int pos_charges_; ///< Sum of positive charges + Int neg_charges_; ///< Sum of negative charges + double log_p_; ///< Log probability of this adduct combination + double rt_shift_; ///< Expected net RT shift (-shift_leftside + shift_rightside) + Size id_; ///< Unique identifier for this compomer }; // \Compomer diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/IsotopeCluster.h b/src/openms/include/OpenMS/DATASTRUCTURES/IsotopeCluster.h index 31134c75a5d..8a8504436ed 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/IsotopeCluster.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/IsotopeCluster.h @@ -14,37 +14,75 @@ namespace OpenMS { - ///Stores information about an isotopic cluster (i.e. potential peptide charge variants) + /** + @brief Stores information about an isotopic cluster (i.e. potential peptide charge variants) + + An isotopic cluster represents a group of related peaks that likely originate from the same + peptide but with different isotopic compositions. This structure stores the indices of these + peaks and the scans they appear in, along with charge state information when available. + + The structure is typically used in mass spectrometry data analysis to group related peaks + and track their charge states for further processing. + + @ingroup Datastructures + */ struct OPENMS_DLLAPI IsotopeCluster { - /// An index e.g. in an MSExperiment + /** + @brief An index pair typically representing (scan_index, peak_index) in an MSExperiment + + The first value usually refers to the scan/spectrum index, while the second value + refers to the peak index within that scan/spectrum. + */ typedef std::pair IndexPair; - /// A set of index pairs, usually referring to an MSExperiment. + + /** + @brief A set of index pairs, usually referring to peaks in an MSExperiment + + This collection stores unique pairs of indices that point to specific peaks + in specific scans of a mass spectrometry experiment. + */ typedef std::set IndexSet; - ///index set with associated charge estimate + /** + @brief Index set with associated charge estimate + + Extends the basic IndexSet with charge state information for the peaks. + This allows tracking which peaks belong to the same isotopic pattern + and what charge state they represent. + */ struct ChargedIndexSet : public IndexSet { + /** + @brief Default constructor + + Initializes the charge to 0, which by convention means "no charge estimate" + */ ChargedIndexSet() : charge(0) { } - /// charge estimate (convention: zero means "no charge estimate") + /// Charge estimate (convention: zero means "no charge estimate") Int charge; }; + /** + @brief Default constructor + + Initializes an empty isotope cluster with no peaks and no scans + */ IsotopeCluster() : peaks(), scans() { } - /// peaks in this cluster + /// Peaks in this cluster, with their charge state information ChargedIndexSet peaks; - /// the scans of this cluster + /// The scan indices where this cluster appears std::vector scans; }; diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/LPWrapper.h b/src/openms/include/OpenMS/DATASTRUCTURES/LPWrapper.h index 21a96f9213a..6aa37d62d43 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/LPWrapper.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/LPWrapper.h @@ -41,14 +41,44 @@ class CoinModel; namespace OpenMS { + /** + @brief A wrapper class for linear programming (LP) solvers + + This class provides a unified interface to different linear programming solvers, + including GLPK (GNU Linear Programming Kit) and COIN-OR (if available). + + Linear programming is a method to find the best outcome in a mathematical model + whose requirements are represented by linear relationships. It is used for + optimization problems where the objective function and constraints are linear. + + LPWrapper allows you to: + - Create and manipulate LP problems (add rows, columns, set bounds) + - Set objective functions and constraints + - Solve the LP problem using different solvers + - Access the solution and status information + + The class supports both continuous and integer variables, allowing for + mixed-integer linear programming (MILP) problems. + + @ingroup Datastructures + */ class OPENMS_DLLAPI LPWrapper { public: /** @brief Struct that holds the parameters of the LP solver + + This structure contains various parameters that control the behavior of the LP solver, + including algorithm selection, cut generation, heuristics, and output control. + + Most parameters have reasonable defaults and don't need to be modified for basic use cases. + Advanced users can tune these parameters to improve performance for specific problem types. */ struct SolverParam { + /** + @brief Default constructor that initializes all parameters with reasonable defaults + */ SolverParam() : message_level(3), branching_tech(4), backtrack_tech(3), preprocessing_tech(2), enable_feas_pump_heuristic(true), enable_gmi_cuts(true), @@ -58,77 +88,138 @@ namespace OpenMS { } - Int message_level; - Int branching_tech; - Int backtrack_tech; - Int preprocessing_tech; - bool enable_feas_pump_heuristic; - bool enable_gmi_cuts; - bool enable_mir_cuts; - bool enable_cov_cuts; - bool enable_clq_cuts; - double mip_gap; - Int time_limit; - Int output_freq; - Int output_delay; - bool enable_presolve; - bool enable_binarization; ///< only with presolve + Int message_level; ///< Controls verbosity of solver output (0-3) + Int branching_tech; ///< Branching technique for MIP problems + Int backtrack_tech; ///< Backtracking technique for MIP problems + Int preprocessing_tech; ///< Preprocessing technique + bool enable_feas_pump_heuristic; ///< Enable feasibility pump heuristic + bool enable_gmi_cuts; ///< Enable Gomory mixed-integer cuts + bool enable_mir_cuts; ///< Enable mixed-integer rounding cuts + bool enable_cov_cuts; ///< Enable cover cuts + bool enable_clq_cuts; ///< Enable clique cuts + double mip_gap; ///< Relative gap tolerance for MIP problems + Int time_limit; ///< Time limit in milliseconds + Int output_freq; ///< Output frequency in milliseconds + Int output_delay; ///< Output delay in milliseconds + bool enable_presolve; ///< Enable presolve techniques + bool enable_binarization; ///< Enable binarization (only with presolve) }; + /** + @brief Enumeration for variable/constraint bound types + + Defines the type of bounds applied to variables or constraints in the LP problem. + */ enum Type { - UNBOUNDED = 1, - LOWER_BOUND_ONLY, - UPPER_BOUND_ONLY, - DOUBLE_BOUNDED, - FIXED + UNBOUNDED = 1, ///< No bounds (free variable) + LOWER_BOUND_ONLY, ///< Only lower bound is specified + UPPER_BOUND_ONLY, ///< Only upper bound is specified + DOUBLE_BOUNDED, ///< Both lower and upper bounds are specified + FIXED ///< Lower bound equals upper bound (fixed value) }; + /** + @brief Enumeration for variable types in the LP problem + + Defines whether variables are continuous or discrete (integer/binary). + */ enum VariableType { - CONTINUOUS = 1, - INTEGER, - BINARY + CONTINUOUS = 1, ///< Continuous variable (can take any real value within bounds) + INTEGER, ///< Integer variable (can only take integer values within bounds) + BINARY ///< Binary variable (can only take values 0 or 1) }; + /** + @brief Enumeration for optimization direction + + Defines whether the objective function should be minimized or maximized. + */ enum Sense { - MIN = 1, - MAX + MIN = 1, ///< Minimize the objective function + MAX ///< Maximize the objective function }; + /** + @brief Enumeration for LP problem file formats + + Defines the file format used when writing LP problems to disk. + */ enum WriteFormat { - FORMAT_LP = 0, - FORMAT_MPS, - FORMAT_GLPK + FORMAT_LP = 0, ///< LP format (human-readable) + FORMAT_MPS, ///< MPS format (industry standard) + FORMAT_GLPK ///< GLPK's native format }; + /** + @brief Enumeration for available LP solvers + + Defines which solver backend to use for solving LP problems. + */ enum SOLVER { - SOLVER_GLPK = 0 + SOLVER_GLPK = 0 ///< GNU Linear Programming Kit solver #ifdef OPENMS_HAS_COINOR - , SOLVER_COINOR + , SOLVER_COINOR ///< COIN-OR solver (if available) #endif }; + /** + @brief Enumeration for solver status after solving an LP problem + + Defines the possible outcomes after attempting to solve an LP problem. + */ enum SolverStatus { - UNDEFINED = 1, - OPTIMAL = 5, - FEASIBLE = 2, - NO_FEASIBLE_SOL = 4 + UNDEFINED = 1, ///< Status is undefined (e.g., solver not run yet) + OPTIMAL = 5, ///< Optimal solution found + FEASIBLE = 2, ///< Feasible solution found (but not necessarily optimal) + NO_FEASIBLE_SOL = 4 ///< No feasible solution exists for the problem }; + /** + @brief Default constructor + + Initializes a new LP problem with the default solver (GLPK or COIN-OR if available). + */ LPWrapper(); + + /** + @brief Virtual destructor + + Frees all resources associated with the LP problem. + */ virtual ~LPWrapper(); // problem creation/manipulation - /// adds a row to the LP matrix, returns index + /** + @brief Adds a row to the LP matrix + + @param row_indices Indices of the columns that have non-zero coefficients in this row + @param row_values Values of the non-zero coefficients in this row + @param name Name of the row (for identification purposes) + @return Index of the newly added row + */ Int addRow(const std::vector& row_indices, const std::vector& row_values, const String& name); - /// adds an empty column to the LP matrix, returns index + + /** + @brief Adds an empty column to the LP matrix + + @return Index of the newly added column + */ Int addColumn(); - /// adds a column to the LP matrix, returns index + + /** + @brief Adds a column to the LP matrix + + @param column_indices Indices of the rows that have non-zero coefficients in this column + @param column_values Values of the non-zero coefficients in this column + @param name Name of the column (for identification purposes) + @return Index of the newly added column + */ Int addColumn(const std::vector& column_indices, const std::vector& column_values, const String& name); /** @@ -158,27 +249,91 @@ namespace OpenMS */ Int addColumn(const std::vector& column_indices, const std::vector& column_values, const String& name, double lower_bound, double upper_bound, Type type); - /// delete index-th row + /** + @brief Delete the row at the specified index + + @param index Index of the row to delete + */ void deleteRow(Int index); - /// sets name of the index-th column + + /** + @brief Set the name of a column + + @param index Index of the column to rename + @param name New name for the column + */ void setColumnName(Int index, const String& name); - /// gets name of the index-th column + + /** + @brief Get the name of a column + + @param index Index of the column + @return Name of the column + */ String getColumnName(Int index); - /// sets name of the index-th row + + /** + @brief Get the name of a row + + @param index Index of the row + @return Name of the row + */ String getRowName(Int index); - /// gets index of the row with name + + /** + @brief Find the index of a row by its name + + @param name Name of the row to find + @return Index of the row with the given name + */ Int getRowIndex(const String& name); - /// gets index of the column with name + + /** + @brief Find the index of a column by its name + + @param name Name of the column to find + @return Index of the column with the given name + */ Int getColumnIndex(const String& name); - /// gets column's upper bound + + /** + @brief Get the upper bound of a column + + @param index Index of the column + @return Upper bound value of the column + */ double getColumnUpperBound(Int index); - /// gets column's lower bound + + /** + @brief Get the lower bound of a column + + @param index Index of the column + @return Lower bound value of the column + */ double getColumnLowerBound(Int index); - /// gets row's upper bound + + /** + @brief Get the upper bound of a row + + @param index Index of the row + @return Upper bound value of the row + */ double getRowUpperBound(Int index); - /// gets row's lower bound + + /** + @brief Get the lower bound of a row + + @param index Index of the row + @return Lower bound value of the row + */ double getRowLowerBound(Int index); - /// sets name of the index-th row + + /** + @brief Set the name of a row + + @param index Index of the row to rename + @param name New name for the row + */ void setRowName(Int index, const String& name); /** @@ -217,9 +372,20 @@ namespace OpenMS */ VariableType getColumnType(Int index); - /// set objective value for column with index + /** + @brief Set the objective coefficient for a column/variable + + @param index Index of the column/variable + @param obj_value Coefficient value in the objective function + */ void setObjective(Int index, double obj_value); - /// get objective value for column with index + + /** + @brief Get the objective coefficient for a column/variable + + @param index Index of the column/variable + @return Coefficient value in the objective function + */ double getObjective(Int index); /** @@ -228,14 +394,43 @@ namespace OpenMS @param sense 1- minimize, 2- maximize */ void setObjectiveSense(Sense sense); + /** + @brief Get the current objective direction + + @return Current optimization direction (MIN or MAX) + */ Sense getObjectiveSense(); - /// get number of columns + /** + @brief Get the number of columns/variables in the LP problem + + @return Number of columns in the LP matrix + */ Int getNumberOfColumns(); - /// get number of rows + + /** + @brief Get the number of rows/constraints in the LP problem + + @return Number of rows in the LP matrix + */ Int getNumberOfRows(); + /** + @brief Set the value of a matrix element at the specified position + + @param row_index Index of the row + @param column_index Index of the column + @param value Value to set at the specified position + */ void setElement(Int row_index, Int column_index, double value); + + /** + @brief Get the value of a matrix element at the specified position + + @param row_index Index of the row + @param column_index Index of the column + @return Value at the specified position + */ double getElement(Int row_index, Int column_index); // problem reading/writing @@ -275,24 +470,53 @@ namespace OpenMS SolverStatus getStatus(); // solution access + /** + @brief Get the objective function value of the solution + + @return Value of the objective function at the optimal solution + */ double getObjectiveValue(); + + /** + @brief Get the value of a variable in the solution + + @param index Index of the column/variable + @return Value of the variable in the optimal solution + */ double getColumnValue(Int index); + /** + @brief Get the number of non-zero entries in a specific row + + @param idx Index of the row + @return Number of non-zero coefficients in the row + */ Int getNumberOfNonZeroEntriesInRow(Int idx); + + /** + @brief Get the indices of non-zero entries in a specific row + + @param idx Index of the row + @param indexes Vector to store the column indices of non-zero entries + */ void getMatrixRow(Int idx, std::vector& indexes); - /// get currently active solver + /** + @brief Get the currently active solver backend + + @return Currently active solver (GLPK or COIN-OR) + */ SOLVER getSolver() const; protected: #ifdef OPENMS_HAS_COINOR - CoinModel * model_ = nullptr; - std::vector solution_; + CoinModel * model_ = nullptr; ///< COIN-OR model object for the LP problem + std::vector solution_; ///< Solution vector when using COIN-OR #else - glp_prob * lp_problem_ = nullptr; + glp_prob * lp_problem_ = nullptr; ///< GLPK problem object for the LP problem #endif - SOLVER solver_; + SOLVER solver_; ///< Currently active solver backend }; // class diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/MassExplainer.h b/src/openms/include/OpenMS/DATASTRUCTURES/MassExplainer.h index aeba31c3154..1632afd0b51 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/MassExplainer.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/MassExplainer.h @@ -20,9 +20,19 @@ namespace OpenMS class Compomer; /** - @brief computes empirical formulas for given mass differences using a set of allowed elements - - + @brief Computes empirical formulas for given mass differences using a set of allowed elements + + MassExplainer is used to explain observed mass differences between features by + determining the most likely combination of adducts that could cause such differences. + + The class works by: + 1. Taking a set of allowed adducts (elements, molecules, or ions) + 2. Computing all possible combinations of these adducts that could explain observed mass differences + 3. Providing a query interface to search for explanations for specific mass differences + + This is particularly useful in mass spectrometry data analysis for identifying + related features that represent the same analyte but with different adducts or charge states. + @ingroup Datastructures */ class OPENMS_DLLAPI MassExplainer @@ -30,30 +40,73 @@ namespace OpenMS public: - typedef Adduct::AdductsType AdductsType; //vector + /// Type definition for a vector of Adduct objects + typedef Adduct::AdductsType AdductsType; + + /// Type definition for an iterator over Compomer objects typedef std::vector::const_iterator CompomerIterator; ///@name Constructors and destructor //@{ - /// Default constructor + /** + @brief Default constructor + + Initializes with default parameters: + - No adducts + - Charge range from -2 to +4 + - Maximum charge span of 4 + - Log probability threshold of -5.0 + - Maximum number of neutral adducts of 2 + */ MassExplainer(); - /// Constructor + /** + @brief Constructor with custom adduct base + + @param adduct_base Set of allowed adducts to use for mass difference explanations + */ MassExplainer(AdductsType adduct_base); - /// Constructor + /** + @brief Constructor with custom charge parameters + + @param q_min Minimum charge state to consider + @param q_max Maximum charge state to consider + @param max_span Maximum allowed charge span between related features + @param thresh_logp Minimum log probability threshold for accepting explanations + */ MassExplainer(Int q_min, Int q_max, Int max_span, double thresh_logp); - /// Constructor + /** + @brief Constructor with all custom parameters + + @param adduct_base Set of allowed adducts to use for mass difference explanations + @param q_min Minimum charge state to consider + @param q_max Maximum charge state to consider + @param max_span Maximum allowed charge span between related features + @param thresh_logp Minimum log probability threshold for accepting explanations + @param max_neutrals Maximum number of neutral adducts allowed in an explanation + */ MassExplainer(AdductsType adduct_base, Int q_min, Int q_max, Int max_span, double thresh_logp, Size max_neutrals); private: - /// check consistency of input - /// @param init_thresh_p set default threshold (set to "false" to keep current value) + /** + @brief Check consistency of input parameters and initialize internal data structures + + This method validates the input parameters and sets default values where needed. + + @param init_thresh_p Whether to initialize the probability threshold with default value + (set to "false" to keep current value) + */ void init_(bool init_thresh_p); public: - /// Assignment operator + /** + @brief Assignment operator + + @param rhs Source object to assign from + @return Reference to this object + */ MassExplainer& operator=(const MassExplainer& rhs); /// Destructor @@ -61,30 +114,61 @@ namespace OpenMS //@} - /// fill map with possible mass-differences along with their explanation + /** + @brief Compute all possible mass differences and their explanations + + This method generates all possible combinations of adducts from the adduct base + and stores them internally for later querying. This must be called after + changing any parameters and before performing queries. + */ void compute(); //@name Accessors //@{ - /// Sets the set of possible adducts + /** + @brief Set the base set of allowed adducts + + @param adduct_base Vector of adducts to use for explanations + */ void setAdductBase(AdductsType adduct_base); - /// Returns the set of adducts + + /** + @brief Get the current set of allowed adducts + + @return Vector of adducts currently used for explanations + */ AdductsType getAdductBase() const; - /// return a compomer by its Id (useful after a query() ). + /** + @brief Get a specific compomer by its ID + + This is typically used after a query() to retrieve detailed information + about a specific explanation. + + @param id ID of the compomer to retrieve + @return Reference to the requested compomer + */ const Compomer& getCompomerById(Size id) const; //@} - /// search the mass database for explanations - /// @param net_charge net charge of compomer seeked - /// @param mass_to_explain mass in Da that needs explanation - /// @param mass_delta allowed deviation from exact mass - /// @param thresh_log_p minimal log probability required - /// @param firstExplanation begin range with candidates according to net_charge and mass - /// @param lastExplanation end range + /** + @brief Search for explanations of a given mass difference + + This method searches the precomputed explanations for those that match + the given mass difference within the specified tolerance and have the + required net charge. + + @param net_charge Net charge of the compomer being sought + @param mass_to_explain Mass difference in Da that needs explanation + @param mass_delta Allowed deviation from exact mass (tolerance) + @param thresh_log_p Minimum log probability required for explanations + @param firstExplanation Output iterator to the beginning of matching explanations + @param lastExplanation Output iterator to the end of matching explanations + @return Number of explanations found, or negative value if no explanations found + */ SignedSize query(const Int net_charge, const float mass_to_explain, const float mass_delta, @@ -93,25 +177,43 @@ namespace OpenMS std::vector::const_iterator& lastExplanation) const; protected: - ///check if the generated compomer is valid judged by its probability, charges etc + /** + @brief Check if a generated compomer is valid based on its probability, charges, etc. + + @param cmp The compomer to validate + @return True if the compomer is valid, false otherwise + */ bool compomerValid_(const Compomer& cmp) const; - /// create a proper adduct from formula and charge and probability + /** + @brief Create a proper adduct from formula, charge, and probability + + @param formula Chemical formula of the adduct + @param charge Charge of the adduct + @param p Probability of the adduct + @return Adduct object with the specified properties + */ Adduct createAdduct_(const String& formula, const Int charge, const double p) const; - /// store possible explanations (as formula) for a certain ChargeDifference and MassDifference + /// Vector storing all possible explanations for mass differences std::vector explanations_; - /// all allowed adducts, whose combination explains the mass difference + + /// Set of allowed adducts that can be combined to explain mass differences AdductsType adduct_base_; - /// minimal expected charge + + /// Minimum charge state to consider in explanations Int q_min_; - /// maximal expected charge + + /// Maximum charge state to consider in explanations Int q_max_; - /// maximal span (in terms of charge) for co-features, e.g. a cluster with q={3,6} has span=4 + + /// Maximum allowed charge span between related features (e.g., a cluster with q={3,6} has span=4) Int max_span_; - /// minimum required probability of a compound (all other compounds are discarded) + + /// Minimum required probability threshold for accepting explanations double thresh_p_; - /// Maximum number of neutral(q=0) adducts + + /// Maximum number of neutral (q=0) adducts allowed in an explanation Size max_neutrals_; }; diff --git a/src/openms/include/OpenMS/KERNEL/DimMapper.h b/src/openms/include/OpenMS/KERNEL/DimMapper.h index c288260e130..ee25985aecb 100644 --- a/src/openms/include/OpenMS/KERNEL/DimMapper.h +++ b/src/openms/include/OpenMS/KERNEL/DimMapper.h @@ -150,6 +150,16 @@ namespace OpenMS + /** + @brief Dimension implementation for retention time values. + + This class implements the DimBase interface for the retention time dimension. + It provides methods to access RT values from various data structures and + convert between RT values and generic dimension values. + + @see DimBase + @ingroup Kernel + */ class OPENMS_DLLAPI DimRT final : public DimBase { public: @@ -260,6 +270,16 @@ namespace OpenMS } }; + /** + @brief Dimension implementation for m/z values. + + This class implements the DimBase interface for the mass-to-charge ratio dimension. + It provides methods to access m/z values from various data structures and + convert between m/z values and generic dimension values. + + @see DimBase + @ingroup Kernel + */ class OPENMS_DLLAPI DimMZ final : public DimBase { public: @@ -371,6 +391,16 @@ namespace OpenMS } }; + /** + @brief Dimension implementation for intensity values. + + This class implements the DimBase interface for the intensity dimension. + It provides methods to access intensity values from various data structures and + convert between intensity values and generic dimension values. + + @see DimBase + @ingroup Kernel + */ class OPENMS_DLLAPI DimINT final : public DimBase { public: @@ -488,6 +518,19 @@ namespace OpenMS } }; + /** + @brief Dimension implementation for ion mobility values. + + This class implements the DimBase interface for the ion mobility dimension. + It provides methods to access ion mobility values from various data structures and + convert between ion mobility values and generic dimension values. + + Ion mobility dimensions support different units such as FAIMS compensation voltage, + linear ion mobility spectrometry, and trapped ion mobility spectrometry. + + @see DimBase + @ingroup Kernel + */ class OPENMS_DLLAPI DimIM final : public DimBase { public: diff --git a/src/openms/include/OpenMS/KERNEL/MSSpectrum.h b/src/openms/include/OpenMS/KERNEL/MSSpectrum.h index a664a87f7f3..47ab7b531fa 100644 --- a/src/openms/include/OpenMS/KERNEL/MSSpectrum.h +++ b/src/openms/include/OpenMS/KERNEL/MSSpectrum.h @@ -64,6 +64,16 @@ namespace OpenMS } }; + /** + * @brief Container for organizing and managing multiple chunks in a spectrum. + * + * This structure is used to track multiple chunks (segments) within a spectrum. + * Each chunk represents a portion of the spectrum that may or may not be sorted. + * This information is used to optimize sorting operations on spectra, particularly + * when only parts of the spectrum need to be sorted or have been modified. + * + * @see Chunk + */ struct Chunks { public: Chunks(const MSSpectrum& s) : spec_(s) {} From 597aaf86d7d010edadaca76b06a1dbe06ed485b4 Mon Sep 17 00:00:00 2001 From: Chris Bielow Date: Thu, 15 May 2025 07:34:33 +0200 Subject: [PATCH 17/31] avoid "unknown pragma" warnings on MSVC (#8045) --- .../include/OpenMS/ANALYSIS/ID/IDBoostGraph.h | 4 + .../IMS/IMSIsotopeDistribution.h | 8 +- .../include/OpenMS/DATASTRUCTURES/DPosition.h | 358 ++++++++---------- src/openms/include/OpenMS/FORMAT/MzTab.h | 7 +- .../include/OpenMS/KERNEL/ChromatogramPeak.h | 8 +- .../include/OpenMS/KERNEL/MobilityPeak1D.h | 8 +- .../include/OpenMS/KERNEL/MobilityPeak2D.h | 5 +- src/openms/include/OpenMS/KERNEL/Peak1D.h | 8 +- src/openms/include/OpenMS/KERNEL/Peak2D.h | 8 +- .../include/OpenMS/METADATA/ProteinHit.h | 27 +- .../source/FORMAT/HANDLERS/MzDataHandler.cpp | 5 +- 11 files changed, 184 insertions(+), 262 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/IDBoostGraph.h b/src/openms/include/OpenMS/ANALYSIS/ID/IDBoostGraph.h index df11d7c0abe..f027fcd9e4e 100644 --- a/src/openms/include/OpenMS/ANALYSIS/ID/IDBoostGraph.h +++ b/src/openms/include/OpenMS/ANALYSIS/ID/IDBoostGraph.h @@ -59,8 +59,10 @@ namespace OpenMS public: // boost has a weird extra semicolon in their strong typedef + #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wextra-semi" + #endif /// placeholder for peptides with the same parent proteins or protein groups BOOST_STRONG_TYPEDEF(boost::blank, PeptideCluster); @@ -82,7 +84,9 @@ namespace OpenMS /// in which charge state a PSM was observed BOOST_STRONG_TYPEDEF(int, Charge); + #ifdef __clang__ #pragma clang diagnostic pop + #endif //typedefs //TODO rename ProteinGroup type since it collides with the actual OpenMS ProteinGroup diff --git a/src/openms/include/OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/IMSIsotopeDistribution.h b/src/openms/include/OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/IMSIsotopeDistribution.h index e91eee01169..953f7b37145 100644 --- a/src/openms/include/OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/IMSIsotopeDistribution.h +++ b/src/openms/include/OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/IMSIsotopeDistribution.h @@ -78,13 +78,7 @@ namespace OpenMS mass(local_mass), abundance(local_abundance) {} - bool operator==(const Peak & peak) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return peak.mass == mass && peak.abundance == abundance; -#pragma clang diagnostic pop - } + bool operator==(const Peak& peak) const = default; mass_type mass; abundance_type abundance; diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/DPosition.h b/src/openms/include/OpenMS/DATASTRUCTURES/DPosition.h index c7edc4a4093..5e5486cd690 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/DPosition.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/DPosition.h @@ -13,6 +13,7 @@ #include #include +#include #include // for std::abs on integrals and floats #include #include @@ -25,225 +26,193 @@ namespace OpenMS @ingroup Datastructures */ template - class DPosition - { +class DPosition +{ public: + /// Coordinate type + using CoordinateType = TCoordinateType; - /// Coordinate type - typedef TCoordinateType CoordinateType; - /// Mutable iterator - typedef CoordinateType* Iterator; - /// Non-mutable iterator - typedef const CoordinateType* ConstIterator; - /// Dimensions - enum - { - DIMENSION = D - }; - /** - @name STL compatibility type definitions - */ - //@{ - typedef CoordinateType value_type; - typedef CoordinateType& reference; - typedef CoordinateType* pointer; - typedef CoordinateType* iterator; - typedef const CoordinateType* const_iterator; - //@} - - /** - @name Constructors and Destructor - */ - //@{ - /** - @brief Default constructor. - - Creates a position with all coordinates zero. - */ - DPosition() = default; - - /// Constructor that fills all dimensions with the value @p x - DPosition(CoordinateType x) - { - std::fill(&(coordinate_[0]), &(coordinate_[D]), x); - } + using DataType = std::array; - /// Constructor only for DPosition<2> that takes two Coordinates. - DPosition(CoordinateType x, CoordinateType y) - { - static_assert(D == 2, "DPosition:DPosition(x,y): index overflow!"); - coordinate_[0] = x; - coordinate_[1] = y; - } + /// Dimensions + enum + { + DIMENSION = D + }; + /** + @name STL compatibility type definitions + */ + //@{ + typedef CoordinateType value_type; + typedef CoordinateType& reference; + typedef CoordinateType* pointer; + typedef CoordinateType* iterator; + typedef const CoordinateType* const_iterator; + //@} - /// Constructor only for DPosition<3> that takes three Coordinates. - DPosition(CoordinateType x, CoordinateType y, CoordinateType z) - { - static_assert(D == 3, "DPosition:DPosition(x,y,z): index overflow!"); - coordinate_[0] = x; - coordinate_[1] = y; - coordinate_[2] = z; - } + /** + @name Constructors and Destructor + */ + //@{ + /** + @brief Default constructor. - /// Copy constructor - DPosition(const DPosition& pos) = default; + Creates a position with all coordinates zero. + */ + DPosition() = default; - /// Move constructor - DPosition(DPosition&& rhs) noexcept = default; + /// Constructor that fills all dimensions with the value @p x + DPosition(CoordinateType x) + { + std::fill(coordinate_.begin(), coordinate_.end(), x); + } - /// Assignment operator - DPosition& operator=(const DPosition& source) = default; + /// Constructor only for DPosition<2> that takes two Coordinates. + DPosition(CoordinateType x, CoordinateType y) + { + static_assert(D == 2, "DPosition:DPosition(x,y): index overflow!"); + coordinate_[0] = x; + coordinate_[1] = y; + } - /// Move Assignment operator - DPosition& operator=(DPosition&& source) noexcept = default; + /// Constructor only for DPosition<3> that takes three Coordinates. + DPosition(CoordinateType x, CoordinateType y, CoordinateType z) + { + static_assert(D == 3, "DPosition:DPosition(x,y,z): index overflow!"); + coordinate_[0] = x; + coordinate_[1] = y; + coordinate_[2] = z; + } - /// Destructor (not-virtual as this will save a lot of space!) - ~DPosition() noexcept = default; + /// Copy constructor + DPosition(const DPosition& pos) = default; - //@} + /// Move constructor + DPosition(DPosition&& rhs) noexcept = default; - /// Swap the two objects - void swap(DPosition& rhs) noexcept - { - for (Size i = 0; i < D; ++i) - { - std::swap(coordinate_[i], rhs.coordinate_[i]); - } - } + /// Assignment operator + DPosition& operator=(const DPosition& source) = default; - /// Make all dimension values positive - DPosition& abs() noexcept - { - for (Size i = 0; i < D; ++i) - { - coordinate_[i] = std::abs(coordinate_[i]); - } - return *this; - } + /// Move Assignment operator + DPosition& operator=(DPosition&& source) noexcept = default; - /**@name Accessors */ - //@{ + /// Destructor (not-virtual as this will save a lot of space!) + ~DPosition() noexcept = default; - ///Const accessor for the dimensions - CoordinateType operator[](Size index) const - { - OPENMS_PRECONDITION(index < D, "DPosition:operator [] (Position): index overflow!"); - return coordinate_[index]; - } + //@} - ///Accessor for the dimensions - CoordinateType& operator[](Size index) - { - OPENMS_PRECONDITION(index < D, "DPosition:operator [] (Position): index overflow!"); - return coordinate_[index]; - } + /// Swap the two objects + void swap(DPosition& rhs) noexcept + { + std::swap(coordinate_, rhs.coordinate_); + } - ///Name accessor for the first dimension. Only for DPosition<2>, for visualization. - CoordinateType getX() const + /// Make all dimension values positive + DPosition& abs() noexcept + { + for (Size i = 0; i < D; ++i) { - OPENMS_PRECONDITION(D == 2, "DPosition:getX(): index overflow!"); - return coordinate_[0]; + coordinate_[i] = std::abs(coordinate_[i]); } + return *this; + } - ///Name accessor for the second dimension. Only for DPosition<2>, for visualization. - CoordinateType getY() const - { - OPENMS_PRECONDITION(D == 2, "DPosition:getY(): index overflow!"); - return coordinate_[1]; - } + /**@name Accessors */ + //@{ - ///Name mutator for the first dimension. Only for DPosition<2>, for visualization. - void setX(CoordinateType c) - { - OPENMS_PRECONDITION(D == 2, "DPosition:setX(): index overflow!"); - coordinate_[0] = c; - } + /// Const accessor for the dimensions + CoordinateType operator[](Size index) const + { + OPENMS_PRECONDITION(index < D, "DPosition:operator [] (Position): index overflow!"); + return coordinate_[index]; + } - ///Name mutator for the second dimension. Only for DPosition<2>, for visualization. - void setY(CoordinateType c) - { - OPENMS_PRECONDITION(D == 2, "DPosition:setY(): index overflow!"); - coordinate_[1] = c; - } + /// Accessor for the dimensions + CoordinateType& operator[](Size index) + { + OPENMS_PRECONDITION(index < D, "DPosition:operator [] (Position): index overflow!"); + return coordinate_[index]; + } - /// Equality operator - bool operator==(const DPosition& point) const - { - for (Size i = 0; i < D; i++) - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - if (coordinate_[i] != point.coordinate_[i]) return false; + /// Name accessor for the first dimension. Only for DPosition<2>, for visualization. + CoordinateType getX() const + { + OPENMS_PRECONDITION(D == 2, "DPosition:getX(): index overflow!"); + return coordinate_[0]; + } -#pragma clang diagnostic pop - } - return true; - } + /// Name accessor for the second dimension. Only for DPosition<2>, for visualization. + CoordinateType getY() const + { + OPENMS_PRECONDITION(D == 2, "DPosition:getY(): index overflow!"); + return coordinate_[1]; + } - /// Equality operator - bool operator!=(const DPosition& point) const - { - return !(operator==(point)); - } + /// Name mutator for the first dimension. Only for DPosition<2>, for visualization. + void setX(CoordinateType c) + { + OPENMS_PRECONDITION(D == 2, "DPosition:setX(): index overflow!"); + coordinate_[0] = c; + } - /** - @brief Lexicographical less than operator. - Lexicographical comparison from dimension 0 to dimension D-1 is done. - */ - bool operator<(const DPosition& point) const - { - for (Size i = 0; i < D; i++) - { - if (coordinate_[i] < point.coordinate_[i]) return true; + /// Name mutator for the second dimension. Only for DPosition<2>, for visualization. + void setY(CoordinateType c) + { + OPENMS_PRECONDITION(D == 2, "DPosition:setY(): index overflow!"); + coordinate_[1] = c; + } - if (coordinate_[i] > point.coordinate_[i]) return false; - } - return false; - } + /// Equality operator + bool operator==(const DPosition& point) const = default; - /// Lexicographical greater less or equal operator. - bool operator<=(const DPosition& point) const - { - for (Size i = 0; i < D; i++) - { - if (coordinate_[i] < point.coordinate_[i]) return true; + /// Equality operator + bool operator!=(const DPosition& point) const = default; + /** + @brief Lexicographical less than operator. + Lexicographical comparison from dimension 0 to dimension D-1 is done. + */ + bool operator<(const DPosition& point) const + { + return coordinate_ < point.coordinate_; + } - if (coordinate_[i] > point.coordinate_[i]) return false; - } - return true; - } + /// Lexicographical greater less or equal operator. + bool operator<=(const DPosition& point) const + { + return coordinate_ <= point.coordinate_; + } - /// Spatially (geometrically) less or equal operator. All coordinates must be "<=". - bool spatiallyLessEqual(const DPosition& point) const + /// Spatially (geometrically) less or equal operator. All coordinates must be "<=". + bool spatiallyLessEqual(const DPosition& point) const + { + for (Size i = 0; i < D; i++) { - for (Size i = 0; i < D; i++) - { - if (coordinate_[i] > point.coordinate_[i]) return false; - } - return true; + if (coordinate_[i] > point.coordinate_[i]) return false; } + return true; + } - /// Spatially (geometrically) greater or equal operator. All coordinates must be ">=". - bool spatiallyGreaterEqual(const DPosition& point) const + /// Spatially (geometrically) greater or equal operator. All coordinates must be ">=". + bool spatiallyGreaterEqual(const DPosition& point) const + { + for (Size i = 0; i < D; i++) { - for (Size i = 0; i < D; i++) - { - if (coordinate_[i] < point.coordinate_[i]) return false; - } - return true; + if (coordinate_[i] < point.coordinate_[i]) return false; } + return true; + } - /// Lexicographical greater than operator. - bool operator>(const DPosition& point) const - { - return !(operator<=(point)); - } + /// Lexicographical greater than operator. + bool operator>(const DPosition& point) const + { + return coordinate_ > point.coordinate_; + } - /// Lexicographical greater or equal operator. - bool operator>=(const DPosition& point) const - { - return !operator<(point); - } + /// Lexicographical greater or equal operator. + bool operator>=(const DPosition& point) const + { + return coordinate_ >= point.coordinate_; + } /// Addition (a bit inefficient) DPosition operator+(const DPosition& point) const @@ -338,10 +307,7 @@ namespace OpenMS /// Set all dimensions to zero void clear() { - for (Size i = 0; i < D; ++i) - { - coordinate_[i] = static_cast(0); - } + coordinate_.fill(0); } //@} @@ -377,33 +343,33 @@ namespace OpenMS /** @name Iteration */ //@{ /// Non-mutable begin iterator - ConstIterator begin() const + const_iterator begin() const { - return &(coordinate_[0]); + return &coordinate_[0]; } /// Non-mutable end iterator - ConstIterator end() const + const_iterator end() const { - return &(coordinate_[0]) + D; + return &coordinate_[0] + D; } /// Mutable begin iterator - Iterator begin() + iterator begin() { - return &(coordinate_[0]); + return &coordinate_[0]; } /// Mutable end iterator - Iterator end() + iterator end() { - return &(coordinate_[0]) + D; + return &coordinate_[0] + D; } //@} protected: - CoordinateType coordinate_[D]{}; + DataType coordinate_{}; }; // DPosition /// Scalar multiplication (a bit inefficient) diff --git a/src/openms/include/OpenMS/FORMAT/MzTab.h b/src/openms/include/OpenMS/FORMAT/MzTab.h index 9096cbe2ee5..6931e40783d 100644 --- a/src/openms/include/OpenMS/FORMAT/MzTab.h +++ b/src/openms/include/OpenMS/FORMAT/MzTab.h @@ -18,8 +18,10 @@ #include +#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#endif namespace OpenMS { @@ -874,5 +876,6 @@ namespace OpenMS }; } // namespace OpenMS - -#pragma clang diagnostic pop +#ifdef __clang__ + #pragma clang diagnostic pop +#endif diff --git a/src/openms/include/OpenMS/KERNEL/ChromatogramPeak.h b/src/openms/include/OpenMS/KERNEL/ChromatogramPeak.h index 0e014182594..8a1748180df 100644 --- a/src/openms/include/OpenMS/KERNEL/ChromatogramPeak.h +++ b/src/openms/include/OpenMS/KERNEL/ChromatogramPeak.h @@ -141,13 +141,7 @@ namespace OpenMS } /// Equality operator - inline bool operator==(const ChromatogramPeak & rhs) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop - } + inline bool operator==(const ChromatogramPeak& rhs) const = default; /// Equality operator inline bool operator!=(const ChromatogramPeak & rhs) const diff --git a/src/openms/include/OpenMS/KERNEL/MobilityPeak1D.h b/src/openms/include/OpenMS/KERNEL/MobilityPeak1D.h index 3f1c655d57b..407ac6a4784 100644 --- a/src/openms/include/OpenMS/KERNEL/MobilityPeak1D.h +++ b/src/openms/include/OpenMS/KERNEL/MobilityPeak1D.h @@ -131,13 +131,7 @@ namespace OpenMS ///@} /// Equality operator - bool operator==(const MobilityPeak1D& rhs) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop - } + bool operator==(const MobilityPeak1D& rhs) const = default; /// Equality operator bool operator!=(const MobilityPeak1D& rhs) const diff --git a/src/openms/include/OpenMS/KERNEL/MobilityPeak2D.h b/src/openms/include/OpenMS/KERNEL/MobilityPeak2D.h index 482ce4b47be..e0687d91598 100644 --- a/src/openms/include/OpenMS/KERNEL/MobilityPeak2D.h +++ b/src/openms/include/OpenMS/KERNEL/MobilityPeak2D.h @@ -193,10 +193,7 @@ namespace OpenMS /// Equality operator bool operator==(const MobilityPeak2D & rhs) const { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop + return std::tie(intensity_, position_) == std::tie(rhs.intensity_, rhs.position_); } /// Equality operator diff --git a/src/openms/include/OpenMS/KERNEL/Peak1D.h b/src/openms/include/OpenMS/KERNEL/Peak1D.h index 5bcc1c6660a..52c269ee55e 100644 --- a/src/openms/include/OpenMS/KERNEL/Peak1D.h +++ b/src/openms/include/OpenMS/KERNEL/Peak1D.h @@ -128,13 +128,7 @@ namespace OpenMS ///@} /// Equality operator - bool operator==(const Peak1D & rhs) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop - } + bool operator==(const Peak1D& rhs) const = default; /// Equality operator bool operator!=(const Peak1D & rhs) const diff --git a/src/openms/include/OpenMS/KERNEL/Peak2D.h b/src/openms/include/OpenMS/KERNEL/Peak2D.h index b901250431d..135cd99f1a6 100644 --- a/src/openms/include/OpenMS/KERNEL/Peak2D.h +++ b/src/openms/include/OpenMS/KERNEL/Peak2D.h @@ -195,13 +195,7 @@ namespace OpenMS ///@} /// Equality operator - bool operator==(const Peak2D & rhs) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop - } + bool operator==(const Peak2D& rhs) const = default; /// Equality operator bool operator!=(const Peak2D & rhs) const diff --git a/src/openms/include/OpenMS/METADATA/ProteinHit.h b/src/openms/include/OpenMS/METADATA/ProteinHit.h index 379f4d52784..1b8f26ff6ea 100644 --- a/src/openms/include/OpenMS/METADATA/ProteinHit.h +++ b/src/openms/include/OpenMS/METADATA/ProteinHit.h @@ -63,20 +63,12 @@ namespace OpenMS /// Greater predicate for scores of hits class OPENMS_DLLAPI ScoreMore { -public: - template - bool operator()(const Arg & a, const Arg & b) + public: + template + bool operator()(const Arg& a, const Arg& b) const { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - if (a.getScore() != b.getScore()) -#pragma clang diagnostic pop - { - return a.getScore() > b.getScore(); - } - return a.getAccession() > b.getAccession(); + return std::make_tuple(a.getScore(), a.getAccession()) > std::make_tuple(b.getScore(), b.getAccession()); } - }; /// Lesser predicate for scores of hits @@ -84,16 +76,9 @@ namespace OpenMS { public: template - bool operator()(const Arg & a, const Arg & b) + bool operator()(const Arg & a, const Arg & b) const { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - if (a.getScore() != b.getScore()) -#pragma clang diagnostic pop - { - return a.getScore() < b.getScore(); - } - return a.getAccession() < b.getAccession(); + return std::make_tuple(a.getScore(), a.getAccession()) < std::make_tuple(b.getScore(), b.getAccession()); } }; diff --git a/src/openms/source/FORMAT/HANDLERS/MzDataHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzDataHandler.cpp index 9ca2ddd485e..643090db81a 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzDataHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzDataHandler.cpp @@ -591,10 +591,7 @@ namespace OpenMS::Internal << sm.getName() << "\n"; -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wconversion" - if (!sm.getNumber().empty() || sm.getState() || sm.getMass() || sm.getVolume() || sm.getConcentration() || !sm.isMetaEmpty()) -#pragma clang diagnostic pop + if (! sm.getNumber().empty() || sm.getState() != Sample::SAMPLENULL || sm.getMass() || sm.getVolume() || sm.getConcentration() || ! sm.isMetaEmpty()) { os << "\t\t\t\n"; writeCVS_(os, sm.getNumber(), "1000001", "SampleNumber"); From e5c3cb24a334707621457171bcf78695782458fe Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Thu, 15 May 2025 09:28:52 +0200 Subject: [PATCH 18/31] refactor: FFIDmoves many of the external id related functionality in separate class. (#8016) * start separating external id stuff * move svm code into externid class * moved more * simplify a bit more * nop * removed unused functions * Adds private method suffix for consistency Renames private methods in FFIDAlgoExternalIDHandler with a trailing underscore to follow naming conventions and improve code readability. No functional changes introduced. * Namespaces FFIDAlgoExternalIDHandler for clarity Encapsulates FFIDAlgoExternalIDHandler within the Internal namespace to improve code organization and clarify its intended scope of use. Updates relevant references and adds the required namespace import. Improves code modularity and maintainability. * Fix namespace closure for FFIDAlgoExternalIDHandler * Update src/openms/source/FEATUREFINDER/FFIDAlgoExternalIDHandler.cpp Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .../FEATUREFINDER/FFIDAlgoExternalIDHandler.h | 172 +++++ .../FeatureFinderIdentificationAlgorithm.h | 36 +- .../OpenMS/FEATUREFINDER/sources.cmake | 1 + .../FFIDAlgoExternalIDHandler.cpp | 678 ++++++++++++++++++ .../FeatureFinderIdentificationAlgorithm.cpp | 573 ++------------- src/openms/source/FEATUREFINDER/sources.cmake | 1 + 6 files changed, 929 insertions(+), 532 deletions(-) create mode 100644 src/openms/include/OpenMS/FEATUREFINDER/FFIDAlgoExternalIDHandler.h create mode 100644 src/openms/source/FEATUREFINDER/FFIDAlgoExternalIDHandler.cpp diff --git a/src/openms/include/OpenMS/FEATUREFINDER/FFIDAlgoExternalIDHandler.h b/src/openms/include/OpenMS/FEATUREFINDER/FFIDAlgoExternalIDHandler.h new file mode 100644 index 00000000000..e969a3e3aad --- /dev/null +++ b/src/openms/include/OpenMS/FEATUREFINDER/FFIDAlgoExternalIDHandler.h @@ -0,0 +1,172 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Hendrik Weisser $ +// -------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace OpenMS +{ +namespace Internal +{ + /** + * @brief Class for handling external peptide identifications in feature finding + * + * This class encapsulates all functionality related to external peptide IDs in the + * feature finding process, including storage, RT transformation, and feature annotation. + */ + class OPENMS_DLLAPI FFIDAlgoExternalIDHandler + { + public: + /// RTMap for external data structure storage + typedef std::multimap ExternalRTMap; + + /// Charge to External RTMap mapping + typedef std::map ExternalChargeMap; + + /// Sequence to External Charge Map mapping + typedef std::map ExternalPeptideMap; + + /// Default constructor + FFIDAlgoExternalIDHandler(); + + /// Reset the handler's state + void reset(); + + /// Add an external peptide to the handler's map + void addExternalPeptide(PeptideIdentification& peptide); + + /// Process external peptide IDs + void processExternalPeptides(std::vector& peptides_ext); + + /// Align internal and external IDs to estimate RT shifts and return RT uncertainty + double alignInternalAndExternalIDs( + const std::vector& peptides_internal, + const std::vector& peptides_external, + double rt_quantile); + + /// Transform RT from internal to external scale + double transformRT(double rt) const; + + /// Check if we have RT transformation data + bool hasRTTransformation() const; + + /// Get the RT transformation + const TransformationDescription& getRTTransformation() const; + + /// Classify features using SVM + void classifyFeaturesWithSVM(FeatureMap& features, const Param& param); + + /// Filter classified features + void filterClassifiedFeatures(FeatureMap& features, double quality_cutoff); + + /// Calculate FDR for classified features + void calculateFDR(FeatureMap& features); + + /// Get SVM probabilities for internal features + const std::map >& getSVMProbsInternal() const; + + private: + /// Add external peptide to charge map (merged version for compatibility) + void addExternalPeptideToMap_(PeptideIdentification& peptide, + std::map, + std::multimap>>>& peptide_map); + + /// Fill an external RTMap from our data for a specific peptide and charge + bool fillExternalRTMap_(const AASequence& sequence, Int charge, + std::multimap& rt_map); + + /// Check and set feature class based on external data + void annotateFeatureWithExternalIDs_(Feature& feature); + + /// Initialize SVM parameters + void initSVMParameters_(const Param& param); + + /// Finalize assay features + void finalizeAssayFeatures_(Feature& best_feature, double best_quality, double quality_cutoff); + + /// Get random sample for SVM training + void getRandomSample_(std::map& training_labels); + + /// Check observation counts for SVM + void checkNumObservations_(Size n_pos, Size n_neg, const String& note = "") const; + + /// Get unbiased sample for SVM training + void getUnbiasedSample_(const std::multimap >& valid_obs, + std::map& training_labels); + + /// Add dummy peptide identification from external data + void addDummyPeptideID_(Feature& feature, const PeptideIdentification* ext_id); + + /// Handle external feature probability + void handleExternalFeature_(Feature& feature, double prob_positive, double quality_cutoff); + + /// Adjust FDR calculation for external features + void adjustFDRForExternalFeatures_(std::vector& fdr_probs, + std::vector& fdr_qvalues, + Size n_internal_features); + + /// External peptide storage + ExternalPeptideMap external_peptide_map_; + + /// RT transformation description + TransformationDescription rt_transformation_; + + /// Number of external peptides + Size n_external_peptides_; + + /// Number of external features + Size n_external_features_; + + /// SVM probabilities for external features + std::multiset svm_probs_external_; + + /// SVM probabilities for internal features + std::map > svm_probs_internal_; + + /// SVM number of parts for cross-validation + Size svm_n_parts_; + + /// SVM number of samples for training + Size svm_n_samples_; + + /// SVM minimum probability threshold + double svm_min_prob_; + + /// SVM quality cutoff + double svm_quality_cutoff; + + /// SVM predictor names + std::vector svm_predictor_names_; + + /// SVM cross-validation output file + String svm_xval_out_; + + /// Debug level + Int debug_level_; + + /// Number of internal features + Size n_internal_features_; + }; + +} // namespace Internal +} // namespace OpenMS diff --git a/src/openms/include/OpenMS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.h b/src/openms/include/OpenMS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.h index 32d4b45ef57..de710bffc68 100644 --- a/src/openms/include/OpenMS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.h +++ b/src/openms/include/OpenMS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.h @@ -6,8 +6,7 @@ // $Authors: Hendrik Weisser $ // -------------------------------------------------------------------------- -#ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H -#define OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H +#pragma once #include #include @@ -16,23 +15,19 @@ #include #include #include +#include #include #include #include -namespace OpenMS -{ - class IsotopeDistribution; - - - +namespace OpenMS { class OPENMS_DLLAPI FeatureFinderIdentificationAlgorithm : public DefaultParamHandler { public: /// default constructor - FeatureFinderIdentificationAlgorithm(); + FeatureFinderIdentificationAlgorithm(); /// Main method for actual FeatureFinder /// External IDs (@p peptides_ext, @p proteins_ext) may be empty, @@ -207,9 +202,9 @@ class OPENMS_DLLAPI FeatureFinderIdentificationAlgorithm : Size n_internal_features_; ///< internal feature counter (for FDR calculation) Size n_external_features_; ///< external feature counter (for FDR calculation) /// TransformationDescription trafo_; // RT transformation (to range 0-1) - TransformationDescription trafo_external_; ///< transform. to external RT scale std::map isotope_probs_; ///< isotope probabilities of transitions MRMFeatureFinderScoring feat_finder_; ///< OpenSWATH feature finder + Internal::FFIDAlgoExternalIDHandler external_id_handler_; ///< Handler for external peptide IDs ProgressLogger prog_log_; @@ -249,22 +244,8 @@ class OPENMS_DLLAPI FeatureFinderIdentificationAlgorithm : PeptideMap& peptide_map, bool external = false); - void checkNumObservations_(Size n_pos, Size n_neg, const String& note = "") const; - - void getUnbiasedSample_(const std::multimap >& valid_obs, - std::map& training_labels); - - void getRandomSample_(std::map& training_labels) const; - - void classifyFeatures_(FeatureMap& features); - - void filterFeaturesFinalizeAssay_(Feature& best_feature, double best_quality, - const double quality_cutoff); - void filterFeatures_(FeatureMap& features, bool classified); - void calculateFDR_(FeatureMap& features); - // seeds for untargeted extraction Size addSeeds_(std::vector& peptides, const FeatureMap& seeds); @@ -313,9 +294,6 @@ class OPENMS_DLLAPI FeatureFinderIdentificationAlgorithm : return chunks; } -}; - +}; // namespace OpenMS } // namespace OpenMS - -#endif - + \ No newline at end of file diff --git a/src/openms/include/OpenMS/FEATUREFINDER/sources.cmake b/src/openms/include/OpenMS/FEATUREFINDER/sources.cmake index d50777d64be..96ac355aa37 100644 --- a/src/openms/include/OpenMS/FEATUREFINDER/sources.cmake +++ b/src/openms/include/OpenMS/FEATUREFINDER/sources.cmake @@ -21,6 +21,7 @@ FeatureFinderIdentificationAlgorithm.h FeatureFinderAlgorithmMetaboIdent.h FeatureFinderMultiplexAlgorithm.h FeatureFindingMetabo.h +FFIDAlgoExternalIDHandler.h Fitter1D.h GaussFitter1D.h GaussModel.h diff --git a/src/openms/source/FEATUREFINDER/FFIDAlgoExternalIDHandler.cpp b/src/openms/source/FEATUREFINDER/FFIDAlgoExternalIDHandler.cpp new file mode 100644 index 00000000000..bb3158516db --- /dev/null +++ b/src/openms/source/FEATUREFINDER/FFIDAlgoExternalIDHandler.cpp @@ -0,0 +1,678 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Hendrik Weisser $ +// -------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace OpenMS +{ +namespace Internal +{ + FFIDAlgoExternalIDHandler::FFIDAlgoExternalIDHandler() : + n_external_peptides_(0), + n_external_features_(0), + svm_n_parts_(3), + svm_n_samples_(0), + svm_min_prob_(0.0), + n_internal_features_(0) + { + } + + void FFIDAlgoExternalIDHandler::initSVMParameters_(const Param& param) + { + svm_min_prob_ = param.getValue("svm:min_prob"); + svm_n_parts_ = param.getValue("svm:xval"); + svm_n_samples_ = param.getValue("svm:samples"); + svm_xval_out_ = param.getValue("svm:xval_out").toString(); + svm_quality_cutoff = svm_min_prob_; + svm_predictor_names_ = ListUtils::create(param.getValue("svm:predictors").toString()); + debug_level_ = param.getValue("debug"); + } + + void FFIDAlgoExternalIDHandler::reset() + { + external_peptide_map_.clear(); + rt_transformation_ = TransformationDescription(); + n_external_peptides_ = 0; + n_external_features_ = 0; + svm_probs_external_.clear(); + svm_probs_internal_.clear(); + n_internal_features_ = 0; + } + + void FFIDAlgoExternalIDHandler::addExternalPeptide(PeptideIdentification& peptide) + { + if (peptide.getHits().empty()) + { + return; + } + + peptide.sort(); + PeptideHit& hit = peptide.getHits()[0]; + peptide.getHits().resize(1); + + Int charge = hit.getCharge(); + double rt = peptide.getRT(); + double mz = peptide.getMZ(); + + external_peptide_map_[hit.getSequence()][charge].emplace(rt, &peptide); + + OPENMS_LOG_DEBUG_NOFILE << "Adding peptide (external) " << hit.getSequence() + << "; CHG: " << charge << "; RT: " << rt + << "; MZ: " << mz << std::endl; + } + + void FFIDAlgoExternalIDHandler::processExternalPeptides(std::vector& peptides_ext) + { + for (PeptideIdentification& pep : peptides_ext) + { + addExternalPeptide(pep); + pep.setMetaValue("FFId_category", "external"); + } + + n_external_peptides_ = external_peptide_map_.size(); + } + + double FFIDAlgoExternalIDHandler::alignInternalAndExternalIDs( + const std::vector& peptides_internal, + const std::vector& peptides_external, + double rt_quantile) + { + // Reset the handler state + reset(); + + // Align internal and external IDs to estimate RT shifts: + MapAlignmentAlgorithmIdentification aligner; + aligner.setReference(peptides_external); // go from internal to external scale + std::vector> aligner_peptides(1, peptides_internal); + std::vector aligner_trafos; + + OPENMS_LOG_INFO << "Realigning internal and external IDs..."; + aligner.align(aligner_peptides, aligner_trafos); + rt_transformation_ = aligner_trafos[0]; + + std::vector aligned_diffs; + rt_transformation_.getDeviations(aligned_diffs); + + // Calculate RT uncertainty based on quantile + std::sort(aligned_diffs.begin(), aligned_diffs.end()); + Size index = std::clamp(Size(rt_quantile * aligned_diffs.size()), + Size(0), aligned_diffs.size() - 1); + double rt_uncertainty = aligned_diffs[index]; + + try + { + aligner_trafos[0].fitModel("lowess"); + rt_transformation_ = aligner_trafos[0]; + } + catch (Exception::BaseException& e) + { + OPENMS_LOG_ERROR << "Error: Failed to align RTs of internal/external peptides. " + << "RT information will not be considered in the SVM classification. " + << "The original error message was:\n" << e.what() << std::endl; + } + + return rt_uncertainty; + } + + double FFIDAlgoExternalIDHandler::transformRT(double rt) const + { + return rt_transformation_.apply(rt); + } + + bool FFIDAlgoExternalIDHandler::hasRTTransformation() const + { + return !rt_transformation_.getDataPoints().empty(); + } + + const TransformationDescription& FFIDAlgoExternalIDHandler::getRTTransformation() const + { + return rt_transformation_; + } + + void FFIDAlgoExternalIDHandler::addExternalPeptideToMap_(PeptideIdentification& peptide, + std::map, + std::multimap>>>& peptide_map) + { + if (peptide.getHits().empty()) return; + + peptide.sort(); + PeptideHit& hit = peptide.getHits()[0]; + peptide.getHits().resize(1); + + Int charge = hit.getCharge(); + double rt = peptide.getRT(); + + // Add to the external map (second in the pair) + peptide_map[hit.getSequence()][charge].second.emplace(rt, &peptide); + } + + bool FFIDAlgoExternalIDHandler::fillExternalRTMap_(const AASequence& sequence, Int charge, + std::multimap& rt_map) + { + auto seq_it = external_peptide_map_.find(sequence); + if (seq_it == external_peptide_map_.end()) return false; + + auto charge_it = seq_it->second.find(charge); + if (charge_it == seq_it->second.end()) return false; + + rt_map.insert(charge_it->second.begin(), charge_it->second.end()); + return true; + } + + void FFIDAlgoExternalIDHandler::annotateFeatureWithExternalIDs_(Feature& feature) + { + feature.setMetaValue("n_total_ids", 0); + feature.setMetaValue("n_matching_ids", -1); + feature.setMetaValue("feature_class", "unknown"); + } + + void FFIDAlgoExternalIDHandler::addDummyPeptideID_(Feature& feature, const PeptideIdentification* ext_id) + { + if (!ext_id) return; + + PeptideIdentification id = *ext_id; + id.clearMetaInfo(); + id.setMetaValue("FFId_category", "implied"); + id.setRT(feature.getRT()); + id.setMZ(feature.getMZ()); + // Only one peptide hit per ID - see function "addPeptideToMap_": + PeptideHit& hit = id.getHits()[0]; + hit.clearMetaInfo(); + hit.setScore(0.0); + feature.getPeptideIdentifications().push_back(id); + } + + void FFIDAlgoExternalIDHandler::handleExternalFeature_(Feature& feature, double prob_positive, double quality_cutoff) + { + svm_probs_external_.insert(prob_positive); + + if (prob_positive >= quality_cutoff) + { + feature.setOverallQuality(prob_positive); + ++n_external_features_; + } + } + + void FFIDAlgoExternalIDHandler::adjustFDRForExternalFeatures_(std::vector& fdr_probs, + std::vector& fdr_qvalues, + Size n_internal_features) + { + std::multiset::reverse_iterator ext_it = svm_probs_external_.rbegin(); + Size external_count = 0; + + for (Int i = fdr_probs.size() - 1; i >= 0; --i) + { + double cutoff = fdr_probs[i]; + while ((ext_it != svm_probs_external_.rend()) && (*ext_it >= cutoff)) + { + ++external_count; + ++ext_it; + } + fdr_qvalues[i] = (fdr_qvalues[i] * external_count) / + (external_count + n_internal_features); + } + } + + void FFIDAlgoExternalIDHandler::checkNumObservations_(Size n_pos, Size n_neg, const String& note) const + { + if (n_pos < svm_n_parts_) + { + String msg = "Not enough positive observations for " + + String(svm_n_parts_) + "-fold cross-validation" + note + "."; + throw Exception::MissingInformation(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, msg); + } + if (n_neg < svm_n_parts_) + { + String msg = "Not enough negative observations for " + + String(svm_n_parts_) + "-fold cross-validation" + note + "."; + throw Exception::MissingInformation(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, msg); + } + } +void FFIDAlgoExternalIDHandler::getUnbiasedSample_(const std::multimap >& valid_obs, + std::map& training_labels) + { + // Create an unbiased training sample: + // - same number of pos./neg. observations (approx.), + // - same intensity distribution of pos./neg. observations. + // We use a sliding window over the set of observations, ordered by + // intensity. At each step, we examine the proportion of both pos./neg. + // observations in the window and select the middle element with according + // probability. (We use an even window size, to cover the ideal case where + // the two classes are balanced.) + const Size window_size = 8; + const Size half_win_size = window_size / 2; + if (valid_obs.size() < half_win_size + 1) + { + String msg = "Not enough observations for intensity-bias filtering."; + throw Exception::MissingInformation(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, msg); + } + srand(time(nullptr)); // seed random number generator + Size n_obs[2] = {0, 0}; // counters for neg./pos. observations + Size counts[2] = {0, 0}; // pos./neg. counts in current window + // iterators to begin, middle and past-the-end of sliding window: + std::multimap >::const_iterator begin, middle, end; + begin = middle = end = valid_obs.begin(); + // initialize ("middle" is at beginning of sequence, so no full window): + for (Size i = 0; i <= half_win_size; ++i, ++end) + { + ++counts[end->second.second]; // increase counter for pos./neg. obs. + } + // "i" is the index of one of the two middle values of the sliding window: + // - in the left half of the sequence, "i" is left-middle, + // - in the right half of the sequence, "i" is right-middle. + // The counts are updated as "i" and the sliding window move to the right. + for (Size i = 0; i < valid_obs.size(); ++i, ++middle) + { + // if count for either class is zero, we don't select anything: + if ((counts[0] > 0) && (counts[1] > 0)) + { + // probability thresholds for neg./pos. observations: + double thresholds[2] = {counts[1] / float(counts[0]), + counts[0] / float(counts[1])}; + // check middle values: + double rnd = rand() / double(RAND_MAX); // random num. in range 0-1 + if (rnd < thresholds[middle->second.second]) + { + training_labels[middle->second.first] = Int(middle->second.second); + ++n_obs[middle->second.second]; + } + } + // update sliding window and class counts; + // when we reach the middle of the sequence, we keep the window in place + // for one step, to change from "left-middle" to "right-middle": + if (i != valid_obs.size() / 2) + { + // only move "begin" when "middle" has advanced far enough: + if (i > half_win_size) + { + --counts[begin->second.second]; + ++begin; + } + // don't increment "end" beyond the defined range: + if (end != valid_obs.end()) + { + ++counts[end->second.second]; + ++end; + } + } + } + checkNumObservations_(n_obs[1], n_obs[0], " after bias filtering"); + } + + void FFIDAlgoExternalIDHandler::getRandomSample_(std::map& training_labels) + { + // Pick a random subset of size "svm_n_samples_" for training: Shuffle the whole + // sequence, then select the first "svm_n_samples_" elements. + std::vector selection; + selection.reserve(training_labels.size()); + for (auto it = training_labels.begin(); it != training_labels.end(); ++it) + { + selection.push_back(it->first); + } + Math::RandomShuffler shuffler; + shuffler.portable_random_shuffle(selection.begin(), selection.end()); + // However, ensure that at least "svm_n_parts_" pos./neg. observations are + // included (for cross-validation) - there must be enough, otherwise + // "checkNumObservations" would have thrown an error. To this end, move + // "svm_n_parts_" pos. observations to the beginning of sequence, followed by + // "svm_n_parts_" neg. observations (pos. first - see reason below): + Size n_obs[2] = {0, 0}; // counters for neg./pos. observations + for (Int label = 1; label >= 0; --label) + { + for (Size i = n_obs[1]; i < selection.size(); ++i) + { + Size obs_index = selection[i]; + if (training_labels[obs_index] == label) + { + std::swap(selection[i], selection[n_obs[label]]); + ++n_obs[label]; + } + if (n_obs[label] == svm_n_parts_) + { + break; + } + } + } + selection.resize(svm_n_samples_); + // copy the selected subset back: + std::map temp; + for (std::vector::iterator it = selection.begin(); it != selection.end(); + ++it) + { + temp[*it] = training_labels[*it]; + } + training_labels.swap(temp); + } + + void FFIDAlgoExternalIDHandler::classifyFeaturesWithSVM(FeatureMap& features, const Param& param) + { + // Initialize SVM parameters in the external ID handler + initSVMParameters_(param); + + if (features.empty()) + { + return; + } + if (features[0].metaValueExists("rt_delta")) // include RT feature + { + if (std::find(svm_predictor_names_.begin(), svm_predictor_names_.end(), "rt_delta") == svm_predictor_names_.end()) + { + svm_predictor_names_.push_back("rt_delta"); + } + } + // values for all features per predictor (this way around to simplify scaling + // of predictors): + SimpleSVM::PredictorMap predictors; + for (const String& pred : svm_predictor_names_) + { + predictors[pred].reserve(features.size()); + for (Feature& feat : features) + { + if (!feat.metaValueExists(pred)) + { + OPENMS_LOG_ERROR << "Meta value '" << pred << "' missing for feature '" + << feat.getUniqueId() << "'" << std::endl; + predictors.erase(pred); + break; + } + predictors[pred].push_back(feat.getMetaValue(pred)); + } + } + + // get labels for SVM: + std::map training_labels; + bool no_selection = param.getValue("svm:no_selection") == "true"; + // mapping (for bias correction): intensity -> (index, positive?) + std::multimap > valid_obs; + Size n_obs[2] = {0, 0}; // counters for neg./pos. observations + for (Size feat_index = 0; feat_index < features.size(); ++feat_index) + { + String feature_class = features[feat_index].getMetaValue("feature_class"); + int label = -1; + if (feature_class == "positive") + { + label = 1; + } + else if (feature_class == "negative") + { + label = 0; + } + if (label != -1) + { + ++n_obs[label]; + if (!no_selection) + { + double intensity = features[feat_index].getIntensity(); + valid_obs.insert(std::make_pair(intensity, std::make_pair(feat_index, + bool(label)))); + } + else + { + training_labels[feat_index] = (double)label; + } + } + } + checkNumObservations_(n_obs[1], n_obs[0]); + + if (!no_selection) + { + getUnbiasedSample_(valid_obs, training_labels); + } + if (svm_n_samples_ > 0) // limited number of samples for training + { + if (training_labels.size() < svm_n_samples_) + { + OPENMS_LOG_WARN << "Warning: There are only " << training_labels.size() + << " valid observations for training." << std::endl; + } + else if (training_labels.size() > svm_n_samples_) + { + getRandomSample_(training_labels); + } + } + + SimpleSVM svm; + // set (only) the relevant parameters: + Param svm_params = svm.getParameters(); + Logger::LogStream no_log; // suppress warnings about additional parameters + svm_params.update(param.copy("svm:", true), false, no_log); + svm.setParameters(svm_params); + svm.setup(predictors, training_labels); + if (!svm_xval_out_.empty()) + { + svm.writeXvalResults(svm_xval_out_); + } + if ((debug_level_ > 0) && svm_params.getValue("kernel") == "linear") + { + std::map feature_weights; + svm.getFeatureWeights(feature_weights); + OPENMS_LOG_DEBUG << "SVM feature weights:" << std::endl; + for (std::map::iterator it = feature_weights.begin(); + it != feature_weights.end(); ++it) + { + OPENMS_LOG_DEBUG << "- " << it->first << ": " << it->second << std::endl; + } + } + + std::vector predictions; + svm.predict(predictions); + OPENMS_POSTCONDITION(predictions.size() == features.size(), + "SVM predictions for all features expected"); + for (Size i = 0; i < features.size(); ++i) + { + features[i].setMetaValue("predicted_class", predictions[i].outcome); + double prob_positive = predictions[i].probabilities[1]; + features[i].setMetaValue("predicted_probability", prob_positive); + // @TODO: store previous (OpenSWATH) overall quality in a meta value? + features[i].setOverallQuality(prob_positive); + } + } + + void FFIDAlgoExternalIDHandler::finalizeAssayFeatures_(Feature& best_feature, double best_quality, double quality_cutoff) + { + const String& feature_class = best_feature.getMetaValue("feature_class"); + if (feature_class == "positive") // true positive prediction + { + svm_probs_internal_[best_quality].first++; + } + else if ((feature_class == "negative") || // false positive prediction + (feature_class == "ambiguous")) // let's be strict about this + { + svm_probs_internal_[best_quality].second++; + } + else if (feature_class == "unknown") + { + svm_probs_external_.insert(best_quality); + if (best_quality >= quality_cutoff) + { + best_feature.setOverallQuality(best_quality); + ++n_external_features_; + } + } + } + + void FFIDAlgoExternalIDHandler::filterClassifiedFeatures(FeatureMap& features, double quality_cutoff) + { + if (features.empty()) + { + return; + } + + // Remove features with class "negative" or "ambiguous", keep "positive". + // For class "unknown", for every assay (meta value "PeptideRef"), keep + // the feature with highest "predicted_probability" (= overall quality), + // subject to the "svm:min_prob" threshold. + // We mark features for removal by setting their overall quality to zero. + n_internal_features_ = 0; + n_external_features_ = 0; + FeatureMap::Iterator best_it = features.begin(); + double best_quality = 0.0; + String previous_ref; + for (FeatureMap::Iterator it = features.begin(); it != features.end(); ++it) + { + // features from same assay (same "PeptideRef") appear consecutively; + // if this is a new assay, finalize the previous one: + String peptide_ref = it->getMetaValue("PeptideRef"); + // remove region number, if present: + Size pos_slash = peptide_ref.rfind('/'); + Size pos_colon = peptide_ref.find(':', pos_slash + 2); + peptide_ref = peptide_ref.substr(0, pos_colon); + + if (peptide_ref != previous_ref) + { + if (!previous_ref.empty()) + { + finalizeAssayFeatures_(*best_it, best_quality, quality_cutoff); + best_quality = 0.0; + } + previous_ref = peptide_ref; + } + + // update qualities: + if ((it->getOverallQuality() > best_quality) || + // break ties by intensity: + ((it->getOverallQuality() == best_quality) && + (it->getIntensity() > best_it->getIntensity()))) + { + best_it = it; + best_quality = it->getOverallQuality(); + } + if (it->getMetaValue("feature_class") == "positive") + { + n_internal_features_++; + } + else + { + it->setOverallQuality(0.0); // gets overwritten for "best" candidate + } + } + // set of features from the last assay: + finalizeAssayFeatures_(*best_it, best_quality, quality_cutoff); + + features.erase(std::remove_if(features.begin(), features.end(), + [](const Feature& f) { + return f.getOverallQuality() == 0.0; + }), + features.end()); + } + + void FFIDAlgoExternalIDHandler::calculateFDR(FeatureMap& features) + { + if (getSVMProbsInternal().empty()) return; + + // cumulate the true/false positive counts, in decreasing probability order: + Size n_false = 0, n_true = 0; + for (std::map >::reverse_iterator prob_it = + svm_probs_internal_.rbegin(); prob_it != svm_probs_internal_.rend(); + ++prob_it) + { + n_true += prob_it->second.first; + n_false += prob_it->second.second; + prob_it->second.first = n_true; + prob_it->second.second = n_false; + } + + // print FDR for features that made the cut-off: + std::map >::iterator prob_it = + svm_probs_internal_.lower_bound(svm_min_prob_); + if (prob_it != svm_probs_internal_.end()) + { + float fdr = float(prob_it->second.second) / (prob_it->second.first + + prob_it->second.second); + OPENMS_LOG_INFO << "Estimated FDR of features detected based on 'external' IDs: " + << fdr * 100.0 << "%" << std::endl; + fdr = (fdr * n_external_features_) / (n_external_features_ + + n_internal_features_); + OPENMS_LOG_INFO << "Estimated FDR of all detected features: " << fdr * 100.0 + << "%" << std::endl; + } + + // calculate q-values: + std::vector qvalues; + qvalues.reserve(svm_probs_internal_.size()); + double min_fdr = 1.0; + for (prob_it = svm_probs_internal_.begin(); + prob_it != svm_probs_internal_.end(); ++prob_it) + { + double fdr = double(prob_it->second.second) / (prob_it->second.first + + prob_it->second.second); + if (fdr < min_fdr) + { + min_fdr = fdr; + } + qvalues.push_back(min_fdr); + } + // record only probabilities where q-value changes: + std::vector fdr_probs, fdr_qvalues; + std::vector::iterator qv_it = qvalues.begin(); + double previous_qvalue = -1.0; + for (prob_it = svm_probs_internal_.begin(); + prob_it != svm_probs_internal_.end(); ++prob_it, ++qv_it) + { + if (*qv_it != previous_qvalue) + { + fdr_probs.push_back(prob_it->first); + fdr_qvalues.push_back(*qv_it); + previous_qvalue = *qv_it; + } + } + features.setMetaValue("FDR_probabilities", fdr_probs); + features.setMetaValue("FDR_qvalues_raw", fdr_qvalues); + + // FDRs are estimated from "internal" features, but apply only to "external" + // ones. "Internal" features are considered "correct" by definition. + // We need to adjust the q-values to take this into account: + adjustFDRForExternalFeatures_(fdr_probs, fdr_qvalues, n_internal_features_); + features.setMetaValue("FDR_qvalues_corrected", fdr_qvalues); + + // @TODO: should we use "1 - qvalue" as overall quality for features? + // assign q-values to features: + for (Feature& feat : features) + { + if (feat.getMetaValue("feature_class") == "positive") + { + feat.setMetaValue("q-value", 0.0); + } + else + { + double prob = feat.getOverallQuality(); + // find the highest FDR prob. that is less-or-equal to the feature prob.: + std::vector::iterator pos = std::upper_bound(fdr_probs.begin(), + fdr_probs.end(), prob); + if (pos != fdr_probs.begin()) + { + --pos; + } + Size dist = std::distance(fdr_probs.begin(), pos); + feat.setMetaValue("q-value", fdr_qvalues[dist]); + } + } + } + + const std::map >& FFIDAlgoExternalIDHandler::getSVMProbsInternal() const + { + return svm_probs_internal_; + } + +} // namespace Internal +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/source/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp b/src/openms/source/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp index a831171019b..b9f1a4e6036 100644 --- a/src/openms/source/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp +++ b/src/openms/source/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp @@ -7,7 +7,9 @@ // -------------------------------------------------------------------------- #include +#include #include + #include #include #include @@ -36,9 +38,11 @@ #endif using namespace std; +using namespace OpenMS::Internal; namespace OpenMS { + FeatureFinderIdentificationAlgorithm::FeatureFinderIdentificationAlgorithm() : DefaultParamHandler("FeatureFinderIdentificationAlgorithm") { @@ -426,7 +430,6 @@ namespace OpenMS // to use MS1 Swath scores: feat_finder_.setMS1Map(SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(boost::make_shared(ms_data_))); - double rt_uncertainty(0); bool with_external_ids = !peptides_ext.empty(); if (with_external_ids && !seeds.empty()) @@ -438,30 +441,11 @@ namespace OpenMS "Using seeds and external ids is currently not supported."); } + double rt_uncertainty(0); if (with_external_ids) { - // align internal and external IDs to estimate RT shifts: - MapAlignmentAlgorithmIdentification aligner; - aligner.setReference(peptides_ext); // go from internal to external scale - vector > aligner_peptides(1, peptides); - vector aligner_trafos; - - OPENMS_LOG_INFO << "Realigning internal and external IDs..."; - aligner.align(aligner_peptides, aligner_trafos); - trafo_external_ = aligner_trafos[0]; - vector aligned_diffs; - trafo_external_.getDeviations(aligned_diffs); - Size index = std::max(Size(0), Size(rt_quantile_ * static_cast(aligned_diffs.size())) - 1); - rt_uncertainty = aligned_diffs[index]; - try - { - aligner_trafos[0].fitModel("lowess"); - trafo_external_ = aligner_trafos[0]; - } - catch (Exception::BaseException& e) - { - OPENMS_LOG_ERROR << "Error: Failed to align RTs of internal/external peptides. RT information will not be considered in the SVM classification. The original error message was:\n" << e.what() << endl; - } + // Use the external ID handler to align internal and external IDs + rt_uncertainty = external_id_handler_.alignInternalAndExternalIDs(peptides, peptides_ext, rt_quantile_); } if (rt_window_ == 0.0) @@ -511,12 +495,17 @@ namespace OpenMS } n_internal_peps_ = peptide_map_.size(); - for (PeptideIdentification& pep : peptides_ext) + + if (with_external_ids) { - addPeptideToMap_(pep, peptide_map_, true); - pep.setMetaValue("FFId_category", "external"); + // Process and add external peptides + for (PeptideIdentification& pep : peptides_ext) + { + addPeptideToMap_(pep, peptide_map_, true); + pep.setMetaValue("FFId_category", "external"); + } + n_external_peps_ = peptide_map_.size() - n_internal_peps_; } - n_external_peps_ = peptide_map_.size() - n_internal_peps_; boost::shared_ptr shared = boost::make_shared(ms_data_); OpenSwath::SpectrumAccessPtr spec_temp = @@ -679,11 +668,11 @@ namespace OpenMS void FeatureFinderIdentificationAlgorithm::postProcess_( FeatureMap & features, bool with_external_ids) - { + { // don't do SVM stuff unless we have external data to apply the model to: if (with_external_ids) { - classifyFeatures_(features); + external_id_handler_.classifyFeaturesWithSVM(features, param_); } // make sure proper unique ids get assigned to all features features.ensureUniqueId(); @@ -694,15 +683,26 @@ namespace OpenMS FileHandler().storeFeatures(candidates_out_, features); } - filterFeatures_(features, with_external_ids); - OPENMS_LOG_INFO << features.size() << " features left after filtering." << endl; + // Use ExternalIDHandler for feature filtering + if (with_external_ids) + { + external_id_handler_.filterClassifiedFeatures(features, external_id_handler_.getSVMProbsInternal().empty() ? 0.0 : double(param_.getValue("svm:min_prob"))); + OPENMS_LOG_INFO << features.size() << " features left after filtering." << endl; + } + else + { + filterFeatures_(features, with_external_ids); + OPENMS_LOG_INFO << features.size() << " features left after filtering." << endl; + } if (features.empty()) return; // elution model fit throws on empty features - if (!svm_probs_internal_.empty()) + // Calculate FDR if we have external IDs + if (with_external_ids) { - calculateFDR_(features); - } + external_id_handler_.calculateFDR(features); + } + //TODO MRMFeatureFinderScoring already does an ElutionModel scoring. It uses EMG fitting. // Would be nice if we could only do the fitting once, since it is one of the bottlenecks. // What is the intention of this post-processing here anyway? Does it filter anything? @@ -1106,24 +1106,6 @@ namespace OpenMS } } - void FeatureFinderIdentificationAlgorithm::checkNumObservations_(Size n_pos, Size n_neg, const String& note) const - { - if (n_pos < svm_n_parts_) - { - String msg = "Not enough positive observations for " + - String(svm_n_parts_) + "-fold cross-validation" + note + "."; - throw Exception::MissingInformation(__FILE__, __LINE__, - OPENMS_PRETTY_FUNCTION, msg); - } - if (n_neg < svm_n_parts_) - { - String msg = "Not enough negative observations for " + - String(svm_n_parts_) + "-fold cross-validation" + note + "."; - throw Exception::MissingInformation(__FILE__, __LINE__, - OPENMS_PRETTY_FUNCTION, msg); - } - } - void FeatureFinderIdentificationAlgorithm::annotateFeaturesFinalizeAssay_( FeatureMap& features, map >& feat_ids, RTMap& rt_internal) @@ -1269,41 +1251,46 @@ namespace OpenMS } else // only external IDs -> no validation possible { + // Set feature class to unknown feat.setMetaValue("n_total_ids", 0); feat.setMetaValue("n_matching_ids", -1); feat.setMetaValue("feature_class", "unknown"); - // add "dummy" peptide identification: - PeptideIdentification id = *(rt_external.begin()->second); - id.clearMetaInfo(); - id.setMetaValue("FFId_category", "implied"); - id.setRT(feat.getRT()); - id.setMZ(feat.getMZ()); - // only one peptide hit per ID - see function "addPeptideToMap_": - PeptideHit& hit = id.getHits()[0]; - hit.clearMetaInfo(); - hit.setScore(0.0); - feat.getPeptideIdentifications().push_back(id); + + // Add a dummy peptide identification from external data + if (!rt_external.empty()) + { + PeptideIdentification id = *(rt_external.begin()->second); + id.clearMetaInfo(); + id.setMetaValue("FFId_category", "implied"); + id.setRT(feat.getRT()); + id.setMZ(feat.getMZ()); + // only one peptide hit per ID - see function "addPeptideToMap_": + PeptideHit& hit = id.getHits()[0]; + hit.clearMetaInfo(); + hit.setScore(0.0); + feat.getPeptideIdentifications().push_back(id); + } } // distance from feature to closest peptide ID: - if (!trafo_external_.getDataPoints().empty()) + if (external_id_handler_.hasRTTransformation()) { // use external IDs if available, otherwise RT-transformed internal IDs // (but only compute the transform if necessary, once per assay!): if (rt_external.empty() && (transformed_internal.empty() || - (peptide_ref != previous_ref))) + (peptide_ref != previous_ref))) { transformed_internal.clear(); for (RTMap::const_iterator it = rt_internal.begin(); it != rt_internal.end(); ++it) { - double transformed_rt = trafo_external_.apply(it->first); + double transformed_rt = external_id_handler_.transformRT(it->first); RTMap::value_type pair = make_pair(transformed_rt, it->second); transformed_internal.insert(transformed_internal.end(), pair); } } const RTMap& rt_ref = (rt_external.empty() ? transformed_internal : - rt_external); + rt_external); double rt_min = feat.getMetaValue("leftWidth"); double rt_max = feat.getMetaValue("rightWidth"); @@ -1398,7 +1385,7 @@ namespace OpenMS if (!quantify_decoys_) { if (hit.metaValueExists("target_decoy") && hit.getMetaValue("target_decoy") == "decoy") - { + { unassignedIDs_.push_back(peptide); return; } @@ -1417,7 +1404,13 @@ namespace OpenMS Int charge = hit.getCharge(); double rt = peptide.getRT(); double mz = peptide.getMZ(); - if (!external) + + if (external) + { + OPENMS_LOG_DEBUG_NOFILE << "Adding peptide (external) " << hit.getSequence() << "; CHG: " << charge << "; RT: " << rt << "; MZ: " << mz << endl; + peptide_map[hit.getSequence()][charge].second.emplace(rt, &peptide); + } + else { if (peptide.metaValueExists("SeedFeatureID")) { @@ -1429,11 +1422,6 @@ namespace OpenMS } peptide_map[hit.getSequence()][charge].first.emplace(rt, &peptide); } - else - { - OPENMS_LOG_DEBUG_NOFILE << "Adding peptide (external) " << hit.getSequence() << "; CHG: " << charge << "; RT: " << rt << "; MZ: " << mz << endl; - peptide_map[hit.getSequence()][charge].second.emplace(rt, &peptide); - } } void FeatureFinderIdentificationAlgorithm::updateMembers_() @@ -1477,444 +1465,23 @@ namespace OpenMS add_mass_offset_peptides_ = double(param_.getValue("add_mass_offset_peptides")); } - void FeatureFinderIdentificationAlgorithm::getUnbiasedSample_(const multimap >& valid_obs, - map& training_labels) - { - // Create an unbiased training sample: - // - same number of pos./neg. observations (approx.), - // - same intensity distribution of pos./neg. observations. - // We use a sliding window over the set of observations, ordered by - // intensity. At each step, we examine the proportion of both pos./neg. - // observations in the window and select the middle element with according - // probability. (We use an even window size, to cover the ideal case where - // the two classes are balanced.) - const Size window_size = 8; - const Size half_win_size = window_size / 2; - if (valid_obs.size() < half_win_size + 1) - { - String msg = "Not enough observations for intensity-bias filtering."; - throw Exception::MissingInformation(__FILE__, __LINE__, - OPENMS_PRETTY_FUNCTION, msg); - } - srand(time(nullptr)); // seed random number generator - Size n_obs[2] = {0, 0}; // counters for neg./pos. observations - Size counts[2] = {0, 0}; // pos./neg. counts in current window - // iterators to begin, middle and past-the-end of sliding window: - multimap >::const_iterator begin, middle, end; - begin = middle = end = valid_obs.begin(); - // initialize ("middle" is at beginning of sequence, so no full window): - for (Size i = 0; i <= half_win_size; ++i, ++end) - { - ++counts[end->second.second]; // increase counter for pos./neg. obs. - } - // "i" is the index of one of the two middle values of the sliding window: - // - in the left half of the sequence, "i" is left-middle, - // - in the right half of the sequence, "i" is right-middle. - // The counts are updated as "i" and the sliding window move to the right. - for (Size i = 0; i < valid_obs.size(); ++i, ++middle) - { - // if count for either class is zero, we don't select anything: - if ((counts[0] > 0) && (counts[1] > 0)) - { - // probability thresholds for neg./pos. observations: - double thresholds[2] = {counts[1] / float(counts[0]), - counts[0] / float(counts[1])}; - // check middle values: - double rnd = rand() / double(RAND_MAX); // random num. in range 0-1 - if (rnd < thresholds[middle->second.second]) - { - training_labels[middle->second.first] = Int(middle->second.second); - ++n_obs[middle->second.second]; - } - } - // update sliding window and class counts; - // when we reach the middle of the sequence, we keep the window in place - // for one step, to change from "left-middle" to "right-middle": - if (i != valid_obs.size() / 2) - { - // only move "begin" when "middle" has advanced far enough: - if (i > half_win_size) - { - --counts[begin->second.second]; - ++begin; - } - // don't increment "end" beyond the defined range: - if (end != valid_obs.end()) - { - ++counts[end->second.second]; - ++end; - } - } - } - checkNumObservations_(n_obs[1], n_obs[0], " after bias filtering"); - } - - - void FeatureFinderIdentificationAlgorithm::getRandomSample_(std::map& training_labels) const - { - // @TODO: can this be done with less copying back and forth of data? - // Pick a random subset of size "svm_n_samples_" for training: Shuffle the whole - // sequence, then select the first "svm_n_samples_" elements. - std::vector selection; - selection.reserve(training_labels.size()); - for (auto it = training_labels.begin(); it != training_labels.end(); ++it) - { - selection.push_back(it->first); - } - //TODO check how often this is potentially called and move out the initialization - Math::RandomShuffler shuffler; - shuffler.portable_random_shuffle(selection.begin(), selection.end()); - // However, ensure that at least "svm_n_parts_" pos./neg. observations are - // included (for cross-validation) - there must be enough, otherwise - // "checkNumObservations_" would have thrown an error. To this end, move - // "svm_n_parts_" pos. observations to the beginning of sequence, followed by - // "svm_n_parts_" neg. observations (pos. first - see reason below): - Size n_obs[2] = {0, 0}; // counters for neg./pos. observations - for (Int label = 1; label >= 0; --label) - { - for (Size i = n_obs[1]; i < selection.size(); ++i) - { - Size obs_index = selection[i]; - if (training_labels[obs_index] == label) - { - std::swap(selection[i], selection[n_obs[label]]); - ++n_obs[label]; - } - if (n_obs[label] == svm_n_parts_) - { - break; - } - } - } - selection.resize(svm_n_samples_); - // copy the selected subset back: - std::map temp; - for (vector::iterator it = selection.begin(); it != selection.end(); - ++it) - { - temp[*it] = training_labels[*it]; - } - training_labels.swap(temp); - } - - void FeatureFinderIdentificationAlgorithm::classifyFeatures_(FeatureMap& features) - { - if (features.empty()) - { - return; - } - if (features[0].metaValueExists("rt_delta")) // include RT feature - { - if (std::find(svm_predictor_names_.begin(), svm_predictor_names_.end(), "rt_delta") == svm_predictor_names_.end()) - { - svm_predictor_names_.push_back("rt_delta"); - } - } - // values for all features per predictor (this way around to simplify scaling - // of predictors): - SimpleSVM::PredictorMap predictors; - for (const String& pred : svm_predictor_names_) - { - predictors[pred].reserve(features.size()); - for (Feature& feat : features) - { - if (!feat.metaValueExists(pred)) - { - OPENMS_LOG_ERROR << "Meta value '" << pred << "' missing for feature '" - << feat.getUniqueId() << "'" << endl; - predictors.erase(pred); - break; - } - predictors[pred].push_back(feat.getMetaValue(pred)); - } - } - - // get labels for SVM: - std::map training_labels; - bool no_selection = param_.getValue("svm:no_selection") == "true"; - // mapping (for bias correction): intensity -> (index, positive?) - std::multimap > valid_obs; - Size n_obs[2] = {0, 0}; // counters for neg./pos. observations - for (Size feat_index = 0; feat_index < features.size(); ++feat_index) - { - String feature_class = features[feat_index].getMetaValue("feature_class"); - int label = -1; - if (feature_class == "positive") - { - label = 1; - } - else if (feature_class == "negative") - { - label = 0; - } - if (label != -1) - { - ++n_obs[label]; - if (!no_selection) - { - double intensity = features[feat_index].getIntensity(); - valid_obs.insert(make_pair(intensity, make_pair(feat_index, - bool(label)))); - } - else - { - training_labels[feat_index] = (double)label; - } - } - } - checkNumObservations_(n_obs[1], n_obs[0]); - - if (!no_selection) - { - getUnbiasedSample_(valid_obs, training_labels); - } - if (svm_n_samples_ > 0) // limited number of samples for training - { - if (training_labels.size() < svm_n_samples_) - { - OPENMS_LOG_WARN << "Warning: There are only " << training_labels.size() - << " valid observations for training." << endl; - } - else if (training_labels.size() > svm_n_samples_) - { - getRandomSample_(training_labels); - } - } - - SimpleSVM svm; - // set (only) the relevant parameters: - Param svm_params = svm.getParameters(); - Logger::LogStream no_log; // suppress warnings about additional parameters - svm_params.update(param_.copy("svm:", true), false, no_log); - svm.setParameters(svm_params); - svm.setup(predictors, training_labels); - if (!svm_xval_out_.empty()) - { - svm.writeXvalResults(svm_xval_out_); - } - if ((debug_level_ > 0) && svm_params.getValue("kernel") == "linear") - { - std::map feature_weights; - svm.getFeatureWeights(feature_weights); - OPENMS_LOG_DEBUG << "SVM feature weights:" << endl; - for (std::map::iterator it = feature_weights.begin(); - it != feature_weights.end(); ++it) - { - OPENMS_LOG_DEBUG << "- " << it->first << ": " << it->second << endl; - } - } - - std::vector predictions; - svm.predict(predictions); - OPENMS_POSTCONDITION(predictions.size() == features.size(), - "SVM predictions for all features expected"); - for (Size i = 0; i < features.size(); ++i) - { - features[i].setMetaValue("predicted_class", predictions[i].outcome); - double prob_positive = predictions[i].probabilities[1]; - features[i].setMetaValue("predicted_probability", prob_positive); - // @TODO: store previous (OpenSWATH) overall quality in a meta value? - features[i].setOverallQuality(prob_positive); - } - } - - - void FeatureFinderIdentificationAlgorithm::filterFeaturesFinalizeAssay_(Feature& best_feature, double best_quality, - const double quality_cutoff) - { - const String& feature_class = best_feature.getMetaValue("feature_class"); - if (feature_class == "positive") // true positive prediction - { - svm_probs_internal_[best_quality].first++; - } - else if ((feature_class == "negative") || // false positive prediction - (feature_class == "ambiguous")) // let's be strict about this - { - svm_probs_internal_[best_quality].second++; - } - else if (feature_class == "unknown") - { - svm_probs_external_.insert(best_quality); - if (best_quality >= quality_cutoff) - { - best_feature.setOverallQuality(best_quality); - ++n_external_features_; - } - } - } - - void FeatureFinderIdentificationAlgorithm::filterFeatures_(FeatureMap& features, bool classified) + + void FeatureFinderIdentificationAlgorithm::filterFeatures_(OpenMS::FeatureMap& features, bool classified) { if (features.empty()) { return; } - if (classified) - { - // Remove features with class "negative" or "ambiguous", keep "positive". - // For class "unknown", for every assay (meta value "PeptideRef"), keep - // the feature with highest "predicted_probability" (= overall quality), - // subject to the "svm:min_prob" threshold. - // We mark features for removal by setting their overall quality to zero. - n_internal_features_ = n_external_features_ = 0; - FeatureMap::Iterator best_it = features.begin(); - double best_quality = 0.0; - String previous_ref; - for (FeatureMap::Iterator it = features.begin(); it != features.end(); - ++it) - { - // features from same assay (same "PeptideRef") appear consecutively; - // if this is a new assay, finalize the previous one: - String peptide_ref = it->getMetaValue("PeptideRef"); - // remove region number, if present: - Size pos_slash = peptide_ref.rfind('/'); - Size pos_colon = peptide_ref.find(':', pos_slash + 2); - peptide_ref = peptide_ref.substr(0, pos_colon); - - if (peptide_ref != previous_ref) - { - if (!previous_ref.empty()) - { - filterFeaturesFinalizeAssay_(*best_it, best_quality, - svm_quality_cutoff); - best_quality = 0.0; - } - previous_ref = peptide_ref; - } - - // update qualities: - if ((it->getOverallQuality() > best_quality) || - // break ties by intensity: - ((it->getOverallQuality() == best_quality) && - (it->getIntensity() > best_it->getIntensity()))) - { - best_it = it; - best_quality = it->getOverallQuality(); - } - if (it->getMetaValue("feature_class") == "positive") - { - n_internal_features_++; - } - else - { - it->setOverallQuality(0.0); // gets overwritten for "best" candidate - } - } - // set of features from the last assay: - filterFeaturesFinalizeAssay_(*best_it, best_quality, svm_quality_cutoff); - - features.erase(remove_if(features.begin(), features.end(), - feature_filter_quality_), features.end()); - } - else + + // For non-classified features, we still use the original filtering + if (!classified) { // remove features without ID (or pseudo ID from seeds) - features.erase(remove_if(features.begin(), features.end(), + features.erase(std::remove_if(features.begin(), features.end(), feature_filter_peptides_), features.end()); } + // Note: The classified case is now handled by ExternalIDHandler::filterClassifiedFeatures + // in the postProcess_ method } - - void FeatureFinderIdentificationAlgorithm::calculateFDR_(FeatureMap& features) - { - // cumulate the true/false positive counts, in decreasing probability order: - Size n_false = 0, n_true = 0; - for (std::map >::reverse_iterator prob_it = - svm_probs_internal_.rbegin(); prob_it != svm_probs_internal_.rend(); - ++prob_it) - { - n_true += prob_it->second.first; - n_false += prob_it->second.second; - prob_it->second.first = n_true; - prob_it->second.second = n_false; - } - - // print FDR for features that made the cut-off: - std::map >::iterator prob_it = - svm_probs_internal_.lower_bound(svm_min_prob_); - if (prob_it != svm_probs_internal_.end()) - { - float fdr = float(prob_it->second.second) / (prob_it->second.first + - prob_it->second.second); - OPENMS_LOG_INFO << "Estimated FDR of features detected based on 'external' IDs: " - << fdr * 100.0 << "%" << endl; - fdr = (fdr * n_external_features_) / (n_external_features_ + - n_internal_features_); - OPENMS_LOG_INFO << "Estimated FDR of all detected features: " << fdr * 100.0 - << "%" << endl; - } - - // calculate q-values: - std::vector qvalues; - qvalues.reserve(svm_probs_internal_.size()); - double min_fdr = 1.0; - for (prob_it = svm_probs_internal_.begin(); - prob_it != svm_probs_internal_.end(); ++prob_it) - { - double fdr = double(prob_it->second.second) / (prob_it->second.first + - prob_it->second.second); - if (fdr < min_fdr) - { - min_fdr = fdr; - } - qvalues.push_back(min_fdr); - } - // record only probabilities where q-value changes: - std::vector fdr_probs, fdr_qvalues; - std::vector::iterator qv_it = qvalues.begin(); - double previous_qvalue = -1.0; - for (prob_it = svm_probs_internal_.begin(); - prob_it != svm_probs_internal_.end(); ++prob_it, ++qv_it) - { - if (*qv_it != previous_qvalue) - { - fdr_probs.push_back(prob_it->first); - fdr_qvalues.push_back(*qv_it); - previous_qvalue = *qv_it; - } - } - features.setMetaValue("FDR_probabilities", fdr_probs); - features.setMetaValue("FDR_qvalues_raw", fdr_qvalues); - - // FDRs are estimated from "internal" features, but apply only to "external" - // ones. "Internal" features are considered "correct" by definition. - // We need to adjust the q-values to take this into account: - std::multiset::reverse_iterator ext_it = svm_probs_external_.rbegin(); - Size external_count = 0; - for (Int i = fdr_probs.size() - 1; i >= 0; --i) - { - double cutoff = fdr_probs[i]; - while ((ext_it != svm_probs_external_.rend()) && (*ext_it >= cutoff)) - { - ++external_count; - ++ext_it; - } - fdr_qvalues[i] = (fdr_qvalues[i] * external_count) / - (external_count + n_internal_features_); - } - features.setMetaValue("FDR_qvalues_corrected", fdr_qvalues); - - // @TODO: should we use "1 - qvalue" as overall quality for features? - // assign q-values to features: - for (Feature& feat : features) - { - if (feat.getMetaValue("feature_class") == "positive") - { - feat.setMetaValue("q-value", 0.0); - } - else - { - double prob = feat.getOverallQuality(); - // find the highest FDR prob. that is less-or-equal to the feature prob.: - std::vector::iterator pos = upper_bound(fdr_probs.begin(), - fdr_probs.end(), prob); - if (pos != fdr_probs.begin()) - { - --pos; - } - Size dist = distance(fdr_probs.begin(), pos); - feat.setMetaValue("q-value", fdr_qvalues[dist]); - } - } - } } diff --git a/src/openms/source/FEATUREFINDER/sources.cmake b/src/openms/source/FEATUREFINDER/sources.cmake index 0f7f6c389f5..eee7581e3f1 100644 --- a/src/openms/source/FEATUREFINDER/sources.cmake +++ b/src/openms/source/FEATUREFINDER/sources.cmake @@ -20,6 +20,7 @@ FeatureFinderIdentificationAlgorithm.cpp FeatureFinderAlgorithmMetaboIdent.cpp FeatureFinderMultiplexAlgorithm.cpp FeatureFindingMetabo.cpp +FFIDAlgoExternalIDHandler.cpp Fitter1D.cpp GaussFitter1D.cpp GaussModel.cpp From 4a57f59fbd4a48a4966d350c72877323b5ce9861 Mon Sep 17 00:00:00 2001 From: Samuel Wein Date: Thu, 15 May 2025 09:52:30 +0200 Subject: [PATCH 19/31] Cleanup tags (#8036) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update openms_ci_matrix_full.yml Fix destination paths for KNIME artifact downloads. * Update package_mac_productbuild.cmake Change logo scaling for installer to "tofit" * Update FLASHDeconvWizardBase.cpp (#8034) updated link to flashdeconv web site * Replace neckarfront with logo * Update package_mac_productbuild.cmake Disable mac installer logo * Fix debian installer (#8035) * add libyaml dependency * proper fix * Update openms_ci_matrix_full.yml fix download-artifact issue * rework tag creation to be less cryptic, add action to publish draft release * fix knime makr latest target * Update openms_ci_matrix_full.yml --------- Co-authored-by: Tjeerd Dijkstra Co-authored-by: Tom David Müller <57191390+t0mdavid-m@users.noreply.github.com> Co-authored-by: Timo Sachsenberg --- .github/workflows/openms_ci_matrix_full.yml | 90 ++++++++++++++------- 1 file changed, 59 insertions(+), 31 deletions(-) diff --git a/.github/workflows/openms_ci_matrix_full.yml b/.github/workflows/openms_ci_matrix_full.yml index 8d8f8c89b79..204a4f63126 100644 --- a/.github/workflows/openms_ci_matrix_full.yml +++ b/.github/workflows/openms_ci_matrix_full.yml @@ -670,7 +670,7 @@ jobs: - name: run GHA release action. id: create_release if: inputs.do_release - uses: ncipollo/release-action@v1.14.0 + uses: ncipollo/release-action@v1.16.0 with: bodyFile: RELEASE_TEXT_GH.md tag: ${{ github.ref_name }} @@ -941,7 +941,7 @@ jobs: mkdir -p ~/.ssh/ echo "$PASS" > ~/.ssh/private.key sudo chmod 600 ~/.ssh/private.key - ln -s ./$folder latest #create link to the release folder + ln -s ../$folder latest #create link to the release folder rsync --progress -avz -e "ssh -i ~/.ssh/private.key -p $PORT -o StrictHostKeyChecking=no" latest "$USER@$HOST:/knime-plugin/updateSite/release" do-release: @@ -963,36 +963,65 @@ jobs: shell: bash run: echo "RUN_NAME=${{ github.event.pull_request && github.event.number || github.ref_name }}" >> $GITHUB_ENV - # NB we create the tag for the OpenMS repo next in a separate action. + # We created the draft release during deploy-installer step. Now we want to publish it. + - name: Publish OpenMS release + id: publish_release + if: inputs.do_release + uses: ncipollo/release-action@v1.16.0 + with: + tag: ${{ github.ref_name }} + draft: false + allowUpdates: true + artifactErrorsFailBuild: true + makeLatest: ${{ inputs.mark_as_latest }} + omitBodyDuringUpdate: true + omitNameDuringUpdate: true + updateOnlyUnreleased: true + + + # NB we create the tag for the OpenMS repo next in a separate action. + # SPW TODO: Move the script here to a file once its stable - id: bash_create_tags name: create tags for other repos shell: bash env: GH_TOKEN: ${{ steps.app-token.outputs.token }} run: | - function createGitTag() { + function handleGitTag() { REPO=$1 SHA=$2 - gh api \ - --method POST \ + TAG_NAME="${{ env.RUN_NAME }}" + + # Check if the tag exists + TAG_EXISTS=$(gh api \ + --method GET \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${REPO}/git/refs \ - -f ref="refs/tags/${{ env.RUN_NAME }}" \ - -f sha="${SHA}" - } - - function updateGitTag() { - REPO=$1 - SHA=$2 - - gh api \ - --method PATCH \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${REPO}/git/refs/tags/${{ env.RUN_NAME }} \ - -f sha="${SHA}" \ - -F force=true + /repos/${REPO}/git/refs/tags/${TAG_NAME} 2>/dev/null || echo "TAG_NOT_FOUND") + + if [[ "$TAG_EXISTS" == "TAG_NOT_FOUND" ]]; then + # Tag doesn't exist, create it + echo "Creating tag ${TAG_NAME} in ${REPO} pointing to ${SHA}" + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + /repos/${REPO}/git/refs \ + -f ref="refs/tags/${TAG_NAME}" \ + -f sha="${SHA}" + else + # Tag exists, check if it points to the same SHA + EXISTING_SHA=$(echo $TAG_EXISTS | jq -r '.object.sha') + + if [[ "$EXISTING_SHA" == "$SHA" ]]; then + # Tag already points to the correct SHA, do nothing + echo "Tag ${TAG_NAME} already exists in ${REPO} and points to the correct SHA: ${SHA}" + else + # Tag exists but points to a different SHA, throw an error + echo "Error: Tag ${TAG_NAME} already exists in ${REPO} but points to ${EXISTING_SHA} instead of ${SHA}. Manual intervention required." + exit 1 + fi + fi } DEOPENMS_SHA=$(curl -s -X GET https://api.github.com/repos/OpenMS/de.openms.knime/git/ref/heads/develop |jq -r '.object.sha') GKN_SHA=$(curl -s -X GET https://api.github.com/repos/genericworkflownodes/GenericKnimeNodes/git/ref/heads/develop |jq -r '.object.sha') @@ -1003,15 +1032,14 @@ jobs: TUTORIAL_SHA=$(curl https://api.github.com/repos/OpenMS/Tutorials/git/refs/heads/master | jq -r ".object.sha") DOCS_SHA=$(curl https://api.github.com/repos/OpenMS/OpenMS-docs/git/refs/heads/develop | jq -r ".object.sha") - createGitTag OpenMS/contrib $CONTRIB_SHA || updateGitTag OpenMS/contrib $CONTRIB_SHA - createGitTag OpenMS/pyopenms-docs $PYDOCS_SHA || updateGitTag OpenMS/pyopenms-docs $PYDOCS_SHA - createGitTag OpenMS/THIRDPARTY $THIRDPARTY_SHA || updateGitTag OpenMS/THIRDPARTY $THIRDPARTY_SHA - createGitTag OpenMS/Tutorials $TUTORIAL_SHA || updateGitTag OpenMS/Tutorials $TUTORIAL_SHA - createGitTag OpenMS/OpenMS-docs $DOCS_SHA || updateGitTag OpenMS/OpenMS-docs $DOCS_SHA - createGitTag OpenMS/de.openms.knime $DEOPENMS_SHA || updateGitTag OpenMS/de.openms.knime $DEOPENMS_SHA - - #FIXME reenable these after we get the correct access permissions - #createGitTag genericworkflownodes/de.openms.knime.dynamicJSViewers $JSViewer_SHA || updateGitTag genericworkflownodes/de.openms.knime.dynamicJSViewers $GKN_SHA + handleGitTag OpenMS/contrib $CONTRIB_SHA + handleGitTag OpenMS/pyopenms-docs $PYDOCS_SHA + handleGitTag OpenMS/THIRDPARTY $THIRDPARTY_SHA + handleGitTag OpenMS/Tutorials $TUTORIAL_SHA + handleGitTag OpenMS/OpenMS-docs $DOCS_SHA + handleGitTag OpenMS/de.openms.knime $DEOPENMS_SHA + # Uncomment when permissions are fixed + # handleGitTag genericworkflownodes/de.openms.knime.dynamicJSViewers $JSViewer_SHA - name: Merge to Develop if: inputs.mark_as_latest From 0a3375a02c287c9c9ba125a06b90154ed66640e2 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Thu, 15 May 2025 14:04:55 +0200 Subject: [PATCH 20/31] Added a test to the XMLHandler_test that would cause the previous version of strLength to generate a SEGFAULT and the new version to pass the test. Error was caused because when given a nullptr the the input_ptr didnt check if the input is a nullptr before dereferencing it and immediatlely attempted to perform pointer operations. Patched it with an if statement. Also changed some int datatypes to size_t datatypes to prevent potential errors with overflow and to improve concistency --- src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp | 6 +++--- src/tests/class_tests/openms/source/XMLHandler_test.cpp | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 1984dab42d3..204b0bf6194 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -449,9 +449,9 @@ namespace OpenMS::Internal uint16_t zero_mask = simde_mm_movemask_epi8(cmp_zero); if (zero_mask != 0x0000) { - int byte_pos_zero = __builtin_ctz(zero_mask); - int char_pos_zero = byte_pos_zero / 2; - return processed_chars + static_cast<>(char_pos_zero); + size_t byte_pos_zero = __builtin_ctz(zero_mask); + size_t char_pos_zero = byte_pos_zero / 2; + return processed_chars + char_pos_zero; } // 8 Zeichen (16 Bytes) wurden verarbeitet, keine Null gefunden diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index b55c6ad27d7..021863619e6 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -132,7 +132,7 @@ START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & std::cout << o7_str.size() << std::endl; END_SECTION - +XMLCh* nullPointer = nullptr; START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) int o_length = StringManager::strLength(ascii); TEST_EQUAL(o_length, a_length); @@ -140,6 +140,7 @@ START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & TEST_EQUAL(o_length, e_length); o_length = StringManager::strLength(upperBoundary); TEST_EQUAL(o_length, u_length); + o_length = StringManager::strLength(nullPointer); END_SECTION END_TEST From acea49e24a0c230b579f455af18ff7e9f0ff61a7 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Thu, 15 May 2025 14:54:49 +0200 Subject: [PATCH 21/31] Beschreibung von strLength test angepasst --- src/tests/class_tests/openms/source/XMLHandler_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index 021863619e6..46e912498a5 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -133,7 +133,7 @@ START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & END_SECTION XMLCh* nullPointer = nullptr; -START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) +START_SECTION(strLength(const XMLCh* input_ptr)) int o_length = StringManager::strLength(ascii); TEST_EQUAL(o_length, a_length); o_length = StringManager::strLength(empty); From 970bfbefb20293ec9af934716d8fe2f58d7206c1 Mon Sep 17 00:00:00 2001 From: Chris Bielow Date: Thu, 15 May 2025 18:11:23 +0200 Subject: [PATCH 22/31] TOPPView: fixes a few error messages boxes (im IM frames, and 2D projections) (#8047) * avoid "unknown pragma" warnings on MSVC * fix TV error message box when right-clicking on a 2D mobilogram (which has no RT information) * avoid error in TV when activating 2D projections (for any type of data) * fix: allow projections for IM frames from MS2 data (would otherwise be empty) --- .../include/OpenMS/VISUAL/Plot2DCanvas.h | 4 +- .../source/VISUAL/LayerDataPeak.cpp | 52 +++-- src/openms_gui/source/VISUAL/Plot2DCanvas.cpp | 200 ++++++++---------- 3 files changed, 125 insertions(+), 131 deletions(-) diff --git a/src/openms_gui/include/OpenMS/VISUAL/Plot2DCanvas.h b/src/openms_gui/include/OpenMS/VISUAL/Plot2DCanvas.h index 0960c6756d6..39b1c61540b 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/Plot2DCanvas.h +++ b/src/openms_gui/include/OpenMS/VISUAL/Plot2DCanvas.h @@ -109,8 +109,8 @@ protected slots: // Docu in base class bool finishAdding_() override; - /// Collects fragment ion scans in the indicated RT/mz area and adds them to the indicated action - bool collectFragmentScansInArea_(const RangeType& range, QAction* a, QMenu* msn_scans, QMenu* msn_meta); + /// Collects fragment ion scans in the indicated RT/mz area and adds them to the menus + bool collectFragmentScansInArea_(const RangeType& range, QMenu* msn_scans, QMenu* msn_meta); /// Draws the coordinates (or coordinate deltas) to the widget's upper left corner void drawCoordinates_(QPainter& painter, const PeakIndex& peak); diff --git a/src/openms_gui/source/VISUAL/LayerDataPeak.cpp b/src/openms_gui/source/VISUAL/LayerDataPeak.cpp index f571a1a2629..f8555487101 100644 --- a/src/openms_gui/source/VISUAL/LayerDataPeak.cpp +++ b/src/openms_gui/source/VISUAL/LayerDataPeak.cpp @@ -79,11 +79,12 @@ namespace OpenMS MSSpectrum projection_mz; Mobilogram projection_im; MSChromatogram projection_rt; - - for (auto i = getPeakData()->areaBeginConst(area); i != getPeakData()->areaEndConst(); ++i) + const auto& exp = *getPeakData(); + auto lvls = exp.getMSLevels(); // use for smallest MS level in the data (IM frames may have all level 1, or all level 2) + for (auto i = exp.areaBeginConst(area, lvls[0]); i != exp.areaEndConst(); ++i) { PeakIndex pi = i.getPeakIndex(); - if (filters.passes((*getPeakData())[pi.spectrum], pi.peak)) + if (filters.passes(exp[pi.spectrum], pi.peak)) { // summary stats ++peak_count; @@ -104,25 +105,42 @@ namespace OpenMS } } - // write to spectra/chrom projection_mz.resize(mzint.size() + 2); - projection_mz[0].setMZ(area.getMinMZ()); - projection_mz[0].setIntensity(0.0); - projection_mz.back().setMZ(area.getMaxMZ()); - projection_mz.back().setIntensity(0.0); + // write to spectra/chrom + try + { // may throw if m/z is not in area + projection_mz[0].setMZ(area.getMinMZ()); + projection_mz[0].setIntensity(0.0); + projection_mz.back().setMZ(area.getMaxMZ()); + projection_mz.back().setIntensity(0.0); + } + catch (...) { } + projection_im.resize(mobility.size() + 2); - projection_im[0].setMobility(area.getMinMobility()); - projection_im[0].setIntensity(0.0); - projection_im.back().setMobility(area.getMaxMobility()); - projection_im.back().setIntensity(0.0); - + try + { // may throw if IM is not in area + projection_im[0].setMobility(area.getMinMobility()); + projection_im[0].setIntensity(0.0); + projection_im.back().setMobility(area.getMaxMobility()); + projection_im.back().setIntensity(0.0); + } + catch (...) + { + } projection_rt.resize(rt.size() + 2); - projection_rt[0].setRT(area.getMinRT()); - projection_rt[0].setIntensity(0.0); - projection_rt.back().setRT(area.getMaxRT()); - projection_rt.back().setIntensity(0.0); + try + { // may throw if RT is not in area + projection_rt[0].setRT(area.getMinRT()); + projection_rt[0].setIntensity(0.0); + projection_rt.back().setRT(area.getMaxRT()); + projection_rt.back().setIntensity(0.0); + } + catch (...) + { + } + Size i = 1; auto intit = mzint.begin(); diff --git a/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp b/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp index 0b78b3c649a..aa45ce71cd4 100644 --- a/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp +++ b/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp @@ -884,132 +884,107 @@ namespace OpenMS settings_menu->addAction("Show/hide projections"); settings_menu->addAction("Show/hide MS/MS precursors"); - //add surrounding survey scans - //find nearest survey scan - SignedSize size = lp->getPeakData()->size(); - Int current = lp->getPeakData()->RTBegin(e_units.getMinRT()) - lp->getPeakData()->begin(); - if (current == size) // if the user clicked right of the last MS1 scan + // in a IM-frame (IM vs. m/z), the RT is empty in `e_units`, and showing neighbouring RT scans is not possible (this layer only has this IM frame) + // --> skip entries for RT neighbours. + if (!e_units.RangeRT::isEmpty()) { - current = std::max(SignedSize{0}, size - 1); // we want the rightmost valid scan index - } - - SignedSize i = 0; - while (current + i < size || current - i >= 0) - { - if (current + i < size && (*lp->getPeakData())[current + i].getMSLevel() == 1) - { - current += i; - break; - } - if (current - i >= 0 && (*lp->getPeakData())[current - i].getMSLevel() == 1) + // add surrounding survey scans + // find nearest survey scan + SignedSize size = lp->getPeakData()->size(); + Int current = lp->getPeakData()->RTBegin(e_units.getMinRT()) - lp->getPeakData()->begin(); + if (current == size) // if the user clicked right of the last MS1 scan { - current -= i; - break; + current = std::max(SignedSize {0}, size - 1); // we want the rightmost valid scan index } - ++i; - } - // search for four scans in both directions - vector indices; - indices.push_back(current); - i = 1; - while (current - i >= 0 && indices.size() < 5) - { - if ((*lp->getPeakData())[current - i].getMSLevel() == 1) + + SignedSize i = 0; + while (current + i < size || current - i >= 0) { - indices.push_back(current - i); + if (current + i < size && (*lp->getPeakData())[current + i].getMSLevel() == 1) + { + current += i; + break; + } + if (current - i >= 0 && (*lp->getPeakData())[current - i].getMSLevel() == 1) + { + current -= i; + break; + } + ++i; } - ++i; - } - i = 1; - while (current + i < size && indices.size() < 9) - { - if ((*lp->getPeakData())[current + i].getMSLevel() == 1) + // search for four scans in both directions + vector indices; + indices.push_back(current); + i = 1; + while (current - i >= 0 && indices.size() < 5) { - indices.push_back(current + i); + if ((*lp->getPeakData())[current - i].getMSLevel() == 1) { indices.push_back(current - i); } + ++i; } - ++i; - } - sort(indices.rbegin(), indices.rend()); - QMenu* ms1_scans = context_menu->addMenu("Survey scan in 1D"); - QMenu* ms1_meta = context_menu->addMenu("Survey scan meta data"); - context_menu->addSeparator(); - for (i = 0; i < (Int)indices.size(); ++i) - { - if (indices[i] == current) + i = 1; + while (current + i < size && indices.size() < 9) { - ms1_scans->addSeparator(); + if ((*lp->getPeakData())[current + i].getMSLevel() == 1) { indices.push_back(current + i); } + ++i; } - a = ms1_scans->addAction(QString("RT: ") + QString::number((*lp->getPeakData())[indices[i]].getRT())); - a->setData(indices[i]); - if (indices[i] == current) + sort(indices.rbegin(), indices.rend()); + QMenu* ms1_scans = context_menu->addMenu("Survey scan in 1D"); + QMenu* ms1_meta = context_menu->addMenu("Survey scan meta data"); + context_menu->addSeparator(); + for (auto idx : indices) { - ms1_scans->addSeparator(); + if (idx == current) { ms1_scans->addSeparator(); } + ms1_scans->addAction(QString("RT: ") + QString::number((*lp->getPeakData())[idx].getRT()), + [=]() { emit showSpectrumAsNew1D(idx); }); + if (idx == current) { ms1_scans->addSeparator(); } + + if (idx == current) { ms1_meta->addSeparator(); } + ms1_meta->addAction(QString("RT: ") + QString::number((*lp->getPeakData())[idx].getRT()), + [=]() { showMetaData(true, idx); }); + if (idx == current) { ms1_meta->addSeparator(); } } - - if (indices[i] == current) + // add surrounding fragment scans + // - We first attempt to look at the position where the user clicked + // - Next we look within the +/- 5 scans around that position + // - Next we look within the whole visible area + QMenu* msn_scans = new QMenu("fragment scan in 1D"); + QMenu* msn_meta = new QMenu("fragment scan meta data"); + bool item_added = collectFragmentScansInArea_(check_area, msn_scans, msn_meta); + if (! item_added) { - ms1_meta->addSeparator(); + // Now simply go for the 5 closest points in RT and check whether there + // are any scans. + // NOTE: that if we go for the visible area, we run the + // risk of iterating through *all* the scans. + check_area.RangeMZ::extend((RangeMZ)visible_area_.getAreaUnit()); + const auto& specs = lp->getPeakData()->getSpectra(); + check_area.RangeRT::operator=(RangeRT(specs[indices.back()].getRT(), specs[indices.front()].getRT())); + item_added = collectFragmentScansInArea_(check_area, msn_scans, msn_meta); + + if (! item_added) + { // OK, now lets search the whole visible area (may be large!) + item_added = collectFragmentScansInArea_(visible_area_.getAreaUnit(), msn_scans, msn_meta); + } } - a = ms1_meta->addAction(QString("RT: ") + QString::number((*lp->getPeakData())[indices[i]].getRT())); - a->setData(indices[i]); - if (indices[i] == current) + if (item_added) { - ms1_meta->addSeparator(); + context_menu->addMenu(msn_scans); + context_menu->addMenu(msn_meta); + context_menu->addSeparator(); } - } - // add surrounding fragment scans - // - We first attempt to look at the position where the user clicked - // - Next we look within the +/- 5 scans around that position - // - Next we look within the whole visible area - QMenu* msn_scans = new QMenu("fragment scan in 1D"); - QMenu* msn_meta = new QMenu("fragment scan meta data"); - bool item_added = collectFragmentScansInArea_(check_area, a, msn_scans, msn_meta); - if (!item_added) - { - // Now simply go for the 5 closest points in RT and check whether there - // are any scans. - // NOTE: that if we go for the visible area, we run the - // risk of iterating through *all* the scans. - check_area.RangeMZ::extend((RangeMZ)visible_area_.getAreaUnit()); - const auto& specs = lp->getPeakData()->getSpectra(); - check_area.RangeRT::operator=(RangeRT(specs[indices.back()].getRT(), specs[indices.front()].getRT())); - item_added = collectFragmentScansInArea_(check_area, a, msn_scans, msn_meta); - - if (!item_added) - { // OK, now lets search the whole visible area (may be large!) - item_added = collectFragmentScansInArea_(visible_area_.getAreaUnit(), a, msn_scans, msn_meta); + auto it_closest_MS = lp->getPeakData()->getClosestSpectrumInRT(e_units.getMinRT()); + if (it_closest_MS->containsIMData()) + { + context_menu->addAction( + ("Switch to ion mobility view (MSLevel: " + String(it_closest_MS->getMSLevel()) + ";RT: " + String(it_closest_MS->getRT(), false) + ")") + .c_str(), + [=]() { emit showCurrentPeaksAsIonMobility(*it_closest_MS); }); } - } - if (item_added) - { - context_menu->addMenu(msn_scans); - context_menu->addMenu(msn_meta); - context_menu->addSeparator(); - } - - auto it_closest_MS = lp->getPeakData()->getClosestSpectrumInRT(e_units.getMinRT()); - if (it_closest_MS->containsIMData()) - { - context_menu->addAction(("Switch to ion mobility view (MSLevel: " + String(it_closest_MS->getMSLevel()) + ";RT: " + String(it_closest_MS->getRT(), false) + ")").c_str(), - [&]() {emit showCurrentPeaksAsIonMobility(*it_closest_MS); }); - } - + } // end of hasRT finishContextMenu_(context_menu, settings_menu); - - // evaluate menu - if ((result = context_menu->exec(mapToGlobal(e->pos())))) - { - if (result->parent() == ms1_scans || result->parent() == msn_scans) - { - emit showSpectrumAsNew1D(result->data().toInt()); - } - else if (result->parent() == ms1_meta || result->parent() == msn_meta) - { - showMetaData(true, result->data().toInt()); - } - } + context_menu->exec(mapToGlobal(e->pos())); } //-------------------FEATURES---------------------------------- else if (auto* lf = dynamic_cast(&layer)) @@ -1548,7 +1523,7 @@ namespace OpenMS resetZoom(true); } - bool Plot2DCanvas::collectFragmentScansInArea_(const RangeType& range, QAction* a, QMenu* msn_scans, QMenu* msn_meta) + bool Plot2DCanvas::collectFragmentScansInArea_(const RangeType& range, QMenu* msn_scans, QMenu* msn_meta) { auto& layer = dynamic_cast(getCurrentLayer()); bool item_added = false; @@ -1561,10 +1536,11 @@ namespace OpenMS double mz = it->getPrecursors()[0].getMZ(); if (it->getMSLevel() > 1 && range.containsMZ(mz)) { - a = msn_scans->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz)); - a->setData((int)(it - layer.getPeakData()->begin())); - a = msn_meta->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz)); - a->setData((int)(it - layer.getPeakData()->begin())); + msn_scans->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz), + [=]() { emit showSpectrumAsNew1D(it - layer.getPeakData()->begin()); }); + + msn_meta->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz), + [=]() { showMetaData(true, it - layer.getPeakData()->begin()); }); item_added = true; } } From 36744dbb38b9eb1ae37976109667d4f3042e2627 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Fri, 16 May 2025 14:51:13 +0200 Subject: [PATCH 23/31] refactor: separate ranges (#8021) * added update ranges to other file formats * explorative attempt Introduces dual-range management for spectra and chromatograms Replaces the single RangeManager with separate SpectrumRangeManager and ChromatogramRangeManager, enabling independent handling of ranges for MS spectra and chromatograms. Maintains backward compatibility using a combined range manager. Adds extensive tests to validate the new dual-range system and its integration with existing functionality. This change aims to improve modularity and clarity in range handling, while preserving legacy behavior for seamless integration. * some progress * fix * more fixes * Adds detailed range reporting for MSExperiment data Introduces specialized templates for human-readable and machine-readable range reporting in MSExperiment, covering retention time, mass-to-charge, ion mobility, and intensity across combined, spectrum-specific, and chromatogram ranges. Enhances the clarity and granularity of data outputs. * more fixes * more fixes * some fixes * compiles again * fix tests * add file * add cos * add python bindings * make private * simplify interface * simplify a bit more * update test * address reviewer comments * remove non const access * don't treat chromatograms as metadata * exclude precursors from range calculation --- doc/code_examples/Tutorial_MSExperiment.cpp | 19 +- .../OpenMS/KERNEL/ChromatogramRangeManager.h | 38 ++ .../include/OpenMS/KERNEL/MSExperiment.h | 113 ++++- .../OpenMS/KERNEL/SpectrumRangeManager.h | 156 +++++++ .../include/OpenMS/KERNEL/sources.cmake | 2 + .../FeatureFinderAlgorithmPicked.cpp | 14 +- .../FEATUREFINDER/MultiplexClustering.cpp | 18 +- src/openms/source/FORMAT/MzQCFile.cpp | 4 +- .../source/IONMOBILITY/IMDataConverter.cpp | 6 +- .../KERNEL/ChromatogramRangeManager.cpp | 14 + src/openms/source/KERNEL/ConversionHelper.cpp | 2 +- src/openms/source/KERNEL/MSExperiment.cpp | 187 ++++---- .../source/KERNEL/SpectrumRangeManager.cpp | 14 + src/openms/source/KERNEL/sources.cmake | 2 + .../include/OpenMS/VISUAL/LayerDataChrom.h | 2 +- .../include/OpenMS/VISUAL/LayerDataPeak.h | 2 +- .../VISUAL/APPLICATIONS/TOPPViewBase.cpp | 6 +- .../pxds/ChromatogramRangeManager.pxd | 29 ++ src/pyOpenMS/pxds/MSExperiment.pxd | 12 +- src/pyOpenMS/pxds/RangeManager.pxd | 15 + src/pyOpenMS/pxds/SpectrumRangeManager.pxd | 40 ++ .../FeatureFinderAlgorithmPicked_test.cpp | 2 +- .../FeatureFinderMultiplexAlgorithm_test.cpp | 2 +- .../openms/source/MSExperiment_test.cpp | 402 ++++++++++++++++-- .../openms/source/MzMLFile_test.cpp | 6 +- src/tests/topp/FileFilter_47_output.mzML | 66 +-- src/tests/topp/FileFilter_48_output.mzML | 58 +-- src/tests/topp/FileInfo_19_output.txt | 25 +- src/tests/topp/FileInfo_1_output.txt | 20 +- src/tests/topp/FileInfo_2_output.txt | 20 +- src/tests/topp/FileInfo_4_output.txt | 25 +- src/tests/topp/FileInfo_5_output.txt | 25 +- src/tests/topp/FileInfo_6_output.txt | 19 +- src/tests/topp/FileInfo_9_output.txt | 25 +- src/tests/topp/QCCalculator_2_output.mzQC | 8 +- src/topp/FileInfo.cpp | 368 +++++++++++++++- src/topp/FileMerger.cpp | 2 +- src/topp/ImageCreator.cpp | 9 +- src/topp/MapAlignerPoseClustering.cpp | 3 +- src/topp/Resampler.cpp | 6 +- 40 files changed, 1477 insertions(+), 309 deletions(-) create mode 100644 src/openms/include/OpenMS/KERNEL/ChromatogramRangeManager.h create mode 100644 src/openms/include/OpenMS/KERNEL/SpectrumRangeManager.h create mode 100644 src/openms/source/KERNEL/ChromatogramRangeManager.cpp create mode 100644 src/openms/source/KERNEL/SpectrumRangeManager.cpp create mode 100644 src/pyOpenMS/pxds/ChromatogramRangeManager.pxd create mode 100644 src/pyOpenMS/pxds/SpectrumRangeManager.pxd diff --git a/doc/code_examples/Tutorial_MSExperiment.cpp b/doc/code_examples/Tutorial_MSExperiment.cpp index ddf586b4ef7..6ac6133674a 100644 --- a/doc/code_examples/Tutorial_MSExperiment.cpp +++ b/doc/code_examples/Tutorial_MSExperiment.cpp @@ -52,12 +52,23 @@ int main() } } - // update the data ranges for all dimensions (RT, m/z, int, IM) and print them: + // updateRanges provides a fast way to update the ranges of all spectra and chromatograms in the experiment. + // Once updated, the data ranges for all dimensions (RT, m/z, int, IM) can be printed. exp.updateRanges(); std::cout << "Data ranges:\n"; - exp.printRange(std::cout); - std::cout << "\nGet maximum intensity on its own: " << exp.getMaxIntensity() << '\n'; - exp.getMinRT(); + exp.spectrumRanges().printRange(std::cout); + std::cout << "\nGet maximum intensity on its own: " << exp.spectrumRanges().getMaxIntensity() << '\n'; + std::cout << "Get minimum RT on its own: " << exp.spectrumRanges().getMinRT() << '\n'; + std::cout << "Get maximum RT on its own: " << exp.spectrumRanges().getMaxRT() << '\n'; + std::cout << "Get minimum m/z on its own: " << exp.spectrumRanges().getMinMZ() << '\n'; + std::cout << "Get maximum m/z on its own: " << exp.spectrumRanges().getMaxMZ() << '\n'; + + // Printing the IM ranges is only possible if the spectra contain IM data (would throw an exception otherwise) + if (!exp.spectrumRanges().RangeMobility::isEmpty()) + { + std::cout << "Get minimum IM on its own: " << exp.spectrumRanges().getMinMobility() << '\n'; + std::cout << "Get maximum IM on its own: " << exp.spectrumRanges().getMaxMobility() << '\n'; + } // Store the spectra to a mzML file with: FileHandler fh; diff --git a/src/openms/include/OpenMS/KERNEL/ChromatogramRangeManager.h b/src/openms/include/OpenMS/KERNEL/ChromatogramRangeManager.h new file mode 100644 index 00000000000..8c9c19e261b --- /dev/null +++ b/src/openms/include/OpenMS/KERNEL/ChromatogramRangeManager.h @@ -0,0 +1,38 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Administrator $ +// -------------------------------------------------------------------------- + +#pragma once + +#include + +namespace OpenMS +{ + /** + @brief Range manager for chromatograms + + This class manages retention time, m/z, and intensity ranges for multiple chromatograms. + It extends the basic RangeManager to provide specialized functionality for chromatogram data. + + The ChromatogramRangeManager is used in conjunction with the SpectrumRangeManager in MSExperiment + to provide separate range tracking for chromatograms and spectra. This separation allows for + more efficient and targeted range operations on specific data types. + + @see RangeManager + @see SpectrumRangeManager + @see MSExperiment + @ingroup Kernel + */ + class OPENMS_DLLAPI ChromatogramRangeManager : public RangeManager + { + public: + /// Base type + using BaseType = RangeManager; + + }; + +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/include/OpenMS/KERNEL/MSExperiment.h b/src/openms/include/OpenMS/KERNEL/MSExperiment.h index 8cb6986c3a3..832a321ad11 100644 --- a/src/openms/include/OpenMS/KERNEL/MSExperiment.h +++ b/src/openms/include/OpenMS/KERNEL/MSExperiment.h @@ -10,8 +10,10 @@ #include #include +#include #include #include +#include #include #include @@ -43,8 +45,7 @@ namespace OpenMS @ingroup Kernel */ - class OPENMS_DLLAPI MSExperiment final : public RangeManagerContainer, - public ExperimentalSettings + class OPENMS_DLLAPI MSExperiment final : public ExperimentalSettings { public: @@ -61,10 +62,14 @@ namespace OpenMS typedef PeakType::CoordinateType CoordinateType; /// Intensity type of peaks typedef PeakType::IntensityType IntensityType; - /// RangeManager type + /// Combined RangeManager type to store the overall range of all spectra and chromatograms (for backward compatibility) typedef RangeManager RangeManagerType; - /// RangeManager type - typedef RangeManagerContainer RangeManagerContainerType; + + /// Spectrum range manager type for tracking ranges with MS level separation + typedef SpectrumRangeManager SpectrumRangeManagerType; + + /// Chromatogram range manager type for tracking chromatogram-specific ranges + typedef ChromatogramRangeManager ChromatogramRangeManagerType; /// Spectrum Type typedef MSSpectrum SpectrumType; /// Chromatogram type @@ -1067,16 +1072,56 @@ std::vector extractXICs( @note The range values (min, max, etc.) are not updated automatically. Call updateRanges() to update the values! */ ///@{ - // Docu in base class - void updateRanges() override; - + /// Delegate methods for backward compatibility + /** - @brief Updates the m/z, intensity, and retention time ranges of all spectra with a certain ms level + * @brief Clear all ranges in all range managers + * + * This clears the ranges in the combined range manager, the spectrum range manager, + * and the chromatogram range manager. + */ + void clearRanges() + { + combined_ranges_.clearRanges(); + spectrum_ranges_.clearRanges(); + chromatogram_ranges_.clearRanges(); + } + + /// Get the minimum RT value from the combined ranges (includes both chromatogram and spectra ranges) + double getMinRT() const { return combined_ranges_.getMinRT(); } + + /// Get the maximum RT value from the combined ranges (includes both chromatogram and spectra ranges) + double getMaxRT() const { return combined_ranges_.getMaxRT(); } + + /// Get the minimum m/z value from the combined ranges (includes both chromatogram and spectra ranges) + double getMinMZ() const { return combined_ranges_.getMinMZ(); } + + /// Get the maximum m/z value from the combined ranges (includes both chromatogram and spectra ranges) + double getMaxMZ() const { return combined_ranges_.getMaxMZ(); } + + /// Get the minimum intensity value from the combined ranges (includes both chromatogram and spectra ranges) + double getMinIntensity() const { return combined_ranges_.getMinIntensity(); } + + /// Get the maximum intensity value from the combined ranges (includes both chromatogram and spectra ranges) + double getMaxIntensity() const { return combined_ranges_.getMaxIntensity(); } + + /// Get the minimum mobility value from the combined ranges (includes both chromatogram and spectra ranges) + double getMinMobility() const { return combined_ranges_.getMinMobility(); } + + /// Get the maximum mobility value from the combined ranges (includes both chromatogram and spectra ranges) + double getMaxMobility() const { return combined_ranges_.getMaxMobility(); } + + /** + @brief Updates the m/z, intensity, mobility, and retention time ranges of all spectra and chromatograms - - @param ms_level MS level to consider for m/z range, RT range and intensity range (All MS levels if negative) + This method updates all three range managers: + - The spectrum range manager (for spectra ranges with MS level separation) + - The chromatogram range manager (for chromatogram ranges) + - The combined range manager (for overall ranges across both spectra and chromatograms) + + Call this method after modifying spectra or chromatograms to ensure that all range information is up-to-date. */ - void updateRanges(Int ms_level); + void updateRanges(); /// returns the total number of peaks (spectra and chromatograms included) UInt64 getSize() const; @@ -1286,8 +1331,49 @@ std::vector extractXICs( std::vector chromatograms_; /// spectra std::vector spectra_; + /// Spectrum range manager for tracking m/z, intensity, RT, and ion mobility ranges of spectra with MS level separation + SpectrumRangeManagerType spectrum_ranges_; + + /// Chromatogram range manager for tracking RT, intensity, and m/z ranges of chromatograms + ChromatogramRangeManagerType chromatogram_ranges_; + + /// Combined range manager that provides overall ranges across both spectra and chromatograms (maintained for backward compatibility) + RangeManagerType combined_ranges_; + + public: + /** + * @brief Returns a const reference to the spectrum range manager + * + * The spectrum range manager provides access to m/z, intensity, retention time, and ion mobility + * ranges for spectra, with separate tracking for different MS levels. + * + * @return Const reference to the spectrum range manager + * @see SpectrumRangeManager + */ + const SpectrumRangeManagerType& spectrumRanges() const { return spectrum_ranges_; } + + /** + * @brief Returns a const reference to the chromatogram range manager + * + * The chromatogram range manager provides access to retention time, m/z, and intensity + * ranges for chromatograms. + * + * @return Const reference to the chromatogram range manager + * @see ChromatogramRangeManager + */ + const ChromatogramRangeManagerType& chromatogramRanges() const { return chromatogram_ranges_; } + + /** + * @brief Returns a const reference to the combined range manager + * + * The combined range manager provides access to the overall ranges across both spectra and chromatograms. + * This is maintained for backward compatibility with code that expects a single range manager. + * + * @return Const reference to the combined range manager + */ + const RangeManagerType& combinedRanges() const { return combined_ranges_; } -private: + private: /// Helper class to add either general data points in set2DData or use mass traces from meta values template @@ -1348,6 +1434,7 @@ std::vector extractXICs( } }; + /* @brief Append a spectrum to current MSExperiment diff --git a/src/openms/include/OpenMS/KERNEL/SpectrumRangeManager.h b/src/openms/include/OpenMS/KERNEL/SpectrumRangeManager.h new file mode 100644 index 00000000000..dd8e64c8af8 --- /dev/null +++ b/src/openms/include/OpenMS/KERNEL/SpectrumRangeManager.h @@ -0,0 +1,156 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Administrator $ +// -------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include +#include + +namespace OpenMS +{ + class MSSpectrum; // Forward declaration for MSSpectrum + /** + @brief Advanced range manager for MS spectra with separate ranges for each MS level + + This class extends the basic RangeManager to provide separate range tracking for different MS levels + (MS1, MS2, etc.). It manages four types of ranges: + - m/z (mass-to-charge ratio) + - intensity + - retention time (RT) + - ion mobility + + A global range is tracked for all MS levels, and additional ranges are maintained for each specific MS level. + This allows for efficient querying of ranges for specific MS levels, which is useful for visualization, + filtering, and processing operations that need to work with specific MS levels. + + The class inherits from RangeManager and adds MS level-specific functionality. The base RangeManager + functionality is used for the global ranges, while a map of MS levels to RangeManagers is used for + the MS level-specific ranges. + + @see RangeManager + @see MSSpectrum + @see ChromatogramRangeManager + @see MSExperiment + @ingroup Kernel + */ + class OPENMS_DLLAPI SpectrumRangeManager : public RangeManager + { + public: + /// Base type + using BaseType = RangeManager; + + /// Default constructor + SpectrumRangeManager() = default; + + /// Copy constructor + SpectrumRangeManager(const SpectrumRangeManager& source) = default; + + /// Move constructor + SpectrumRangeManager(SpectrumRangeManager&& source) = default; + + /// Assignment operator + SpectrumRangeManager& operator=(const SpectrumRangeManager& source) = default; + + /// Move assignment operator + SpectrumRangeManager& operator=(SpectrumRangeManager&& source) = default; + + /// Destructor + ~SpectrumRangeManager() = default; + + /** + @brief Clears all ranges (global and MS level-specific) + */ + void clearRanges() + { + BaseType::clearRanges(); + ms_level_ranges_.clear(); + } + + /** + @brief Extends the ranges with the ranges of another range manager + + @param other The other range manager to extend from + @param ms_level The MS level for which to extend the ranges (0 for global ranges) + */ + void extend(const BaseType& other, UInt ms_level = 0) + { + ms_level == 0 ? BaseType::extend(other) : ms_level_ranges_[ms_level].extend(other); + } + + /** + @brief Gets the ranges for a specific MS level + + @param ms_level The MS level for which to retrieve the ranges + @return The ranges for the specified MS level + @throw Exception::InvalidValue if no ranges exist for the specified MS level + */ + const BaseType& byMSLevel(UInt ms_level = 0) const + { + if (auto it = ms_level_ranges_.find(ms_level); it != ms_level_ranges_.end()) + { + return it->second; + } + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No ranges for this MS level", String(ms_level)); + } + + /** + @brief Gets all MS levels for which specific ranges exist + + @return The set of MS levels + */ + std::set getMSLevels() const + { + std::set ms_levels; + for (const auto& [level, _] : ms_level_ranges_) + { + ms_levels.insert(level); + } + return ms_levels; + } + + /** + @brief Extends the RT range with an MS level parameter + + @param rt The RT value to extend with + @param ms_level The MS level for which to extend the RT range (0 for global range) + */ + void extendRT(double rt, UInt ms_level = 0) + { + ms_level == 0 ? BaseType::extendRT(rt) : ms_level_ranges_[ms_level].extendRT(rt); + } + + /** + @brief Extends the m/z range with an MS level parameter + + @param mz The m/z value to extend with + @param ms_level The MS level for which to extend the m/z range (0 for global range) + */ + void extendMZ(double mz, UInt ms_level = 0) + { + ms_level == 0 ? BaseType::extendMZ(mz) : ms_level_ranges_[ms_level].extendMZ(mz); + } + + /** + @brief Extends the ranges with the ranges of a spectrum using an MS level parameter + + @param spectrum The spectrum whose ranges to extend from + @param ms_level The MS level for which to extend the ranges (0 for global ranges) + */ + void extendUnsafe(const MSSpectrum& spectrum, UInt ms_level = 0) + { + ms_level == 0 ? BaseType::extendUnsafe(spectrum.getRange()) : ms_level_ranges_[ms_level].extendUnsafe(spectrum.getRange()); + } + + protected: + /// MS level-specific ranges + std::map ms_level_ranges_; + }; + +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/include/OpenMS/KERNEL/sources.cmake b/src/openms/include/OpenMS/KERNEL/sources.cmake index 0cfa31e9c54..1a60c9a8607 100644 --- a/src/openms/include/OpenMS/KERNEL/sources.cmake +++ b/src/openms/include/OpenMS/KERNEL/sources.cmake @@ -33,6 +33,8 @@ PeakIndex.h RangeManager.h RangeUtils.h RichPeak2D.h +SpectrumRangeManager.h +ChromatogramRangeManager.h StandardTypes.h SpectrumHelper.h ) diff --git a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp index e819450dfcf..2234445368f 100644 --- a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp +++ b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp @@ -242,10 +242,10 @@ namespace OpenMS //new scope to make local variables disappear { startProgress(0, intensity_bins_ * intensity_bins_, "Precalculating intensity scores"); - double rt_start = map_.getMinRT(); - double mz_start = map_.getMinMZ(); - intensity_rt_step_ = (map_.getMaxRT() - rt_start) / (double)intensity_bins_; - intensity_mz_step_ = (map_.getMaxMZ() - mz_start) / (double)intensity_bins_; + double rt_start = map_.spectrumRanges().byMSLevel(1).getMinRT(); + double mz_start = map_.spectrumRanges().byMSLevel(1).getMinMZ(); + intensity_rt_step_ = (map_.spectrumRanges().byMSLevel(1).getMaxRT() - rt_start) / (double)intensity_bins_; + intensity_mz_step_ = (map_.spectrumRanges().byMSLevel(1).getMaxMZ() - mz_start) / (double)intensity_bins_; intensity_thresholds_.resize(intensity_bins_); for (Size rt = 0; rt < intensity_bins_; ++rt) { @@ -356,7 +356,7 @@ namespace OpenMS //--------------------------------------------------------------------------- //new scope to make local variables disappear { - double max_mass = map_.getMaxMZ() * charge_high; + double max_mass = map_.spectrumRanges().byMSLevel(1).getMaxMZ() * charge_high; Size num_isotopes = std::ceil(max_mass / mass_window_width_) + 1; startProgress(0, num_isotopes, "Precalculating isotope distributions"); @@ -1829,8 +1829,8 @@ namespace OpenMS double intensity = map_[spectrum][peak].getIntensity(); double rt = map_[spectrum].getRT(); double mz = map_[spectrum][peak].getMZ(); - double rt_min = map_.getMinRT(); - double mz_min = map_.getMinMZ(); + double rt_min = map_.spectrumRanges().byMSLevel(1).getMinRT(); + double mz_min = map_.spectrumRanges().byMSLevel(1).getMinMZ(); UInt rt_bin = std::min(2 * intensity_bins_ - 1, (UInt) std::floor((rt - rt_min) / intensity_rt_step_ * 2.0)); UInt mz_bin = std::min(2 * intensity_bins_ - 1, (UInt) std::floor((mz - mz_min) / intensity_mz_step_ * 2.0)); // determine mz bins diff --git a/src/openms/source/FEATUREFINDER/MultiplexClustering.cpp b/src/openms/source/FEATUREFINDER/MultiplexClustering.cpp index 7748112e961..f1cb08a8359 100644 --- a/src/openms/source/FEATUREFINDER/MultiplexClustering.cpp +++ b/src/openms/source/FEATUREFINDER/MultiplexClustering.cpp @@ -31,11 +31,11 @@ namespace OpenMS } // ranges of the experiment - double mz_min = exp_profile.getMinMZ(); - double mz_max = exp_profile.getMaxMZ(); - double rt_min = exp_profile.getMinRT(); - double rt_max = exp_profile.getMaxRT(); - + double mz_min = exp_profile.spectrumRanges().getMinMZ(); + double mz_max = exp_profile.spectrumRanges().getMaxMZ(); + double rt_min = exp_profile.spectrumRanges().getMinRT(); + double rt_max = exp_profile.spectrumRanges().getMaxRT(); + // extend the grid by a small absolute margin double mz_margin = 1e-2; double rt_margin = 1e-2; @@ -81,10 +81,10 @@ namespace OpenMS rt_typical_(rt_typical) { // ranges of the experiment - double mz_min = exp.getMinMZ(); - double mz_max = exp.getMaxMZ(); - double rt_min = exp.getMinRT(); - double rt_max = exp.getMaxRT(); + double mz_min = exp.spectrumRanges().byMSLevel(1).getMinMZ(); + double mz_max = exp.spectrumRanges().byMSLevel(1).getMaxMZ(); + double rt_min = exp.spectrumRanges().byMSLevel(1).getMinRT(); + double rt_max = exp.spectrumRanges().byMSLevel(1).getMaxRT(); if (!RangeMZ(0.0, 1.0e12).containsMZ({mz_min, mz_max}) || !RangeRT(-1.0e12, 1.0e12).containsRT({rt_min, rt_max}) ) diff --git a/src/openms/source/FORMAT/MzQCFile.cpp b/src/openms/source/FORMAT/MzQCFile.cpp index 19c9025455d..8a7f55bad05 100644 --- a/src/openms/source/FORMAT/MzQCFile.cpp +++ b/src/openms/source/FORMAT/MzQCFile.cpp @@ -107,9 +107,9 @@ namespace OpenMS // Number of chromatograms" addMetric("QC:4000135", exp.getChromatograms().size()); // Run time (RT duration) - addMetric("QC:4000053", UInt(exp.getMaxRT() - exp.getMinRT())); + addMetric("QC:4000053", UInt(exp.spectrumRanges().getMaxRT() - exp.spectrumRanges().getMinRT())); // MZ acquisition range - addMetric("QC:4000138", tuple{exp.getMinMZ(), exp.getMaxMZ()}); + addMetric("QC:4000138", tuple{exp.spectrumRanges().getMinMZ(), exp.spectrumRanges().getMaxMZ()}); // TICs if (tic.isRunnable(status)) { diff --git a/src/openms/source/IONMOBILITY/IMDataConverter.cpp b/src/openms/source/IONMOBILITY/IMDataConverter.cpp index dcb36573ac1..b85458fc099 100644 --- a/src/openms/source/IONMOBILITY/IMDataConverter.cpp +++ b/src/openms/source/IONMOBILITY/IMDataConverter.cpp @@ -134,7 +134,7 @@ namespace OpenMS std::vector results(number_of_bins); in.updateRanges(); // find the IM range - const auto range_IM = RangeMobility(in); + const auto range_IM = RangeMobility(in.spectrumRanges()); if (range_IM.getSpan() / number_of_bins < bin_extension_abs * 2) { throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Bin size (") + String(range_IM.getSpan() / number_of_bins) + ") is smaller than the overlap.", String(bin_extension_abs*2)); @@ -146,7 +146,6 @@ namespace OpenMS // results for each IM-frame: all spectra per bin, to get merged MSExperiment binned_spectra; - SpectraMerger merger; auto p = merger.getParameters(); const auto ms_levels = in.getMSLevels(); @@ -168,7 +167,6 @@ namespace OpenMS MSExperiment frame_melt = IMDataConverter::reshapeIMFrameToMany(std::move(frame)); for (size_t i = 0; i < bins.size(); ++i) - { binned_spectra.clear(false); // check if spectrum goes into this bin @@ -208,7 +206,7 @@ namespace OpenMS term = &cv.getTerm("MS:1002816"); break; case DriftTimeUnit::VSSC: - term = &cv.getTerm("MS:1003008"); + term = &cv.getTerm("MS:1003008"); break; default: throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Unit cannot be converted into CV term.", toString(unit)); diff --git a/src/openms/source/KERNEL/ChromatogramRangeManager.cpp b/src/openms/source/KERNEL/ChromatogramRangeManager.cpp new file mode 100644 index 00000000000..0e8adaea8e0 --- /dev/null +++ b/src/openms/source/KERNEL/ChromatogramRangeManager.cpp @@ -0,0 +1,14 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Administrator $ +// -------------------------------------------------------------------------- + +#include + +namespace OpenMS +{ + // Currently empty as the class only contains inline implementations +} \ No newline at end of file diff --git a/src/openms/source/KERNEL/ConversionHelper.cpp b/src/openms/source/KERNEL/ConversionHelper.cpp index f4347006683..e08fe9238b5 100644 --- a/src/openms/source/KERNEL/ConversionHelper.cpp +++ b/src/openms/source/KERNEL/ConversionHelper.cpp @@ -20,7 +20,7 @@ namespace OpenMS // see @todo above output_map.setUniqueId(); - input_map.updateRanges(1); + input_map.updateRanges(); if (n > input_map.getSize()) { n = input_map.getSize(); diff --git a/src/openms/source/KERNEL/MSExperiment.cpp b/src/openms/source/KERNEL/MSExperiment.cpp index a5d364ace9c..c6b0e8bad2f 100644 --- a/src/openms/source/KERNEL/MSExperiment.cpp +++ b/src/openms/source/KERNEL/MSExperiment.cpp @@ -25,8 +25,10 @@ namespace OpenMS { /// Constructor MSExperiment::MSExperiment() : - RangeManagerContainerType(), - ExperimentalSettings() + ExperimentalSettings(), + spectrum_ranges_(), + chromatogram_ranges_(), + combined_ranges_() {} /// Copy constructor @@ -39,14 +41,15 @@ namespace OpenMS { return *this; } - RangeManagerContainerType::operator=(source); ExperimentalSettings::operator=(source); chromatograms_ = source.chromatograms_; spectra_ = source.spectra_; - - //no need to copy the alloc?! - //alloc_ + + // Copy the range managers + spectrum_ranges_ = source.spectrum_ranges_; + chromatogram_ranges_ = source.chromatogram_ranges_; + combined_ranges_ = source.combined_ranges_; return *this; } @@ -214,91 +217,77 @@ namespace OpenMS /** @name Range methods - - @note The range values (min, max, etc.) are not updated automatically. Call updateRanges() to update the values! */ - ///@{ - // Docu in base class - void MSExperiment::updateRanges() - { - updateRanges(-1); - } /** - @brief Updates the m/z, intensity, retention time, ion mobility and MS level ranges of all spectra with a certain ms level - - @param ms_level MS level to consider for m/z range, RT range, intensity range and ion mobility (if negative, all MS levels are used) + @brief Updates the m/z, intensity, retention time, ion mobility ranges for all spectra and chromatograms */ - void MSExperiment::updateRanges(Int ms_level) + void MSExperiment::updateRanges() { #ifdef OPENMS_ASSERTIONS - double rt_min = RangeRT::isEmpty() ? 0 : getMinRT(); - double rt_max = RangeRT::isEmpty() ? 0 : getMaxRT(); - double mz_min = RangeMZ::isEmpty() ? 0 : getMinMZ(); - double mz_max = RangeMZ::isEmpty() ? 0 : getMaxMZ(); - double int_min = RangeIntensity::isEmpty() ? 0 : getMinIntensity(); - double int_max = RangeIntensity::isEmpty() ? 0 : getMaxIntensity(); - double im_min = RangeMobility::isEmpty() ? 0 : getMinMobility(); - double im_max = RangeMobility::isEmpty() ? 0 : getMaxMobility(); + double rt_min = combined_ranges_.RangeRT::isEmpty() ? 0 : combined_ranges_.getMinRT(); + double rt_max = combined_ranges_.RangeRT::isEmpty() ? 0 : combined_ranges_.getMaxRT(); + double mz_min = combined_ranges_.RangeMZ::isEmpty() ? 0 : combined_ranges_.getMinMZ(); + double mz_max = combined_ranges_.RangeMZ::isEmpty() ? 0 : combined_ranges_.getMaxMZ(); + double int_min = combined_ranges_.RangeIntensity::isEmpty() ? 0 : combined_ranges_.getMinIntensity(); + double int_max = combined_ranges_.RangeIntensity::isEmpty() ? 0 : combined_ranges_.getMaxIntensity(); + double im_min = combined_ranges_.RangeMobility::isEmpty() ? 0 : combined_ranges_.getMinMobility(); + double im_max = combined_ranges_.RangeMobility::isEmpty() ? 0 : combined_ranges_.getMaxMobility(); #endif - // reset mz/rt/int range - this->clearRanges(); + // Reset all range managers + clearRanges(); - // empty + // Empty experiment if (spectra_.empty() && chromatograms_.empty()) { return; } - // update + // Update spectrum ranges for (Base::iterator it = spectra_.begin(); it != spectra_.end(); ++it) - { - if (ms_level < Int(0) || Int(it->getMSLevel()) == ms_level) - { - // ranges - this->extendRT(it->getRT()); // RT - // m/z, intensity and ion mobility from spectrum's range - it->updateRanges(); - this->extend(*it); - } - // for MS level = 1 we extend the range for all the MS2 precursors - if (ms_level == 1 && it->getMSLevel() == 2) - { - if (!it->getPrecursors().empty()) - { - this->extendRT(it->getRT()); - this->extendMZ(it->getPrecursors()[0].getMZ()); - } - } + { + // Update ranges for the spectrum itself + it->updateRanges(); + + // Update spectrum range manager with this spectrum's ranges + // Add to both general ranges and MS level-specific ranges + spectrum_ranges_.extendUnsafe(*it); + spectrum_ranges_.extendRT(it->getRT()); // RT is not part of the range of an individual spectrum + + spectrum_ranges_.extendUnsafe(*it, it->getMSLevel()); + spectrum_ranges_.extendRT(it->getRT(), it->getMSLevel()); // RT is not part of the range of an individual spectrum + } - if (this->chromatograms_.empty()) + // Update chromatogram ranges + if (!chromatograms_.empty()) { - return; + for (ChromatogramType& cp : chromatograms_) + { + // Update range of EACH chromatogram + cp.updateRanges(); + + // Add RT and intensity ranges to the chromatogram manager + chromatogram_ranges_.extend(cp.getRange()); + chromatogram_ranges_.extendMZ(cp.getMZ()); // MZ is not part of the range of an individual chromatogram + } } - // update intensity, m/z and RT according to chromatograms as well: - for (ChromatogramType& cp : chromatograms_) - { - // update range of EACH chrom, if we need them individually later - cp.updateRanges(); - - // ranges - this->extendMZ(cp.getMZ());// MZ - this->extend(cp);// RT and intensity from chroms's range - } + // Update the combined range manager with both spectrum and chromatogram ranges + combined_ranges_.extendUnsafe(spectrum_ranges_); + combined_ranges_.extendUnsafe(chromatogram_ranges_); #ifdef OPENMS_ASSERTIONS - // check if updateRanges() was necessary and find places where it was not - double im_min_new = RangeMobility::isEmpty() ? 0 : getMinMobility(); - double im_max_new = RangeMobility::isEmpty() ? 0 : getMaxMobility(); - double int_min_new = RangeIntensity::isEmpty() ? 0 : getMinIntensity(); - double int_max_new = RangeIntensity::isEmpty() ? 0 : getMaxIntensity(); - double rt_min_new = RangeRT::isEmpty() ? 0 : getMinRT(); - double rt_max_new = RangeRT::isEmpty() ? 0 : getMaxRT(); - double mz_min_new = RangeMZ::isEmpty() ? 0 : getMinMZ(); - double mz_max_new = RangeMZ::isEmpty() ? 0 : getMaxMZ(); + // check if updateRanges() was necessary to find places where it was not + double im_min_new = combined_ranges_.RangeMobility::isEmpty() ? 0 : combined_ranges_.getMinMobility(); + double im_max_new = combined_ranges_.RangeMobility::isEmpty() ? 0 : combined_ranges_.getMaxMobility(); + double int_min_new = combined_ranges_.RangeIntensity::isEmpty() ? 0 : combined_ranges_.getMinIntensity(); + double int_max_new = combined_ranges_.RangeIntensity::isEmpty() ? 0 : combined_ranges_.getMaxIntensity(); + double rt_min_new = combined_ranges_.RangeRT::isEmpty() ? 0 : combined_ranges_.getMinRT(); + double rt_max_new = combined_ranges_.RangeRT::isEmpty() ? 0 : combined_ranges_.getMaxRT(); + double mz_min_new = combined_ranges_.RangeMZ::isEmpty() ? 0 : combined_ranges_.getMinMZ(); + double mz_max_new = combined_ranges_.RangeMZ::isEmpty() ? 0 : combined_ranges_.getMaxMZ(); if (im_min_new == im_min && im_max_new == im_max && int_min_new == int_min && int_max_new == int_max @@ -428,7 +417,7 @@ namespace OpenMS void MSExperiment::reset() { spectra_.clear(); //remove data - RangeManagerType::clearRanges(); //reset range manager + clearRanges(); // reset all ranges ExperimentalSettings::operator=(ExperimentalSettings()); //reset meta info } @@ -633,24 +622,22 @@ namespace OpenMS /// Swaps the content of this map with the content of @p from void MSExperiment::swap(MSExperiment & from) { - MSExperiment tmp; - - //swap range information - tmp.RangeManagerType::operator=(*this); - this->RangeManagerType::operator=(from); - from.RangeManagerType::operator=(tmp); + // Swap range managers + std::swap(spectrum_ranges_, from.spectrum_ranges_); + std::swap(chromatogram_ranges_, from.chromatogram_ranges_); + std::swap(combined_ranges_, from.combined_ranges_); - //swap experimental settings + // Swap experimental settings + ExperimentalSettings tmp; tmp.ExperimentalSettings::operator=(*this); this->ExperimentalSettings::operator=(from); from.ExperimentalSettings::operator=(tmp); - // swap chromatograms + // Swap chromatograms std::swap(chromatograms_, from.chromatograms_); - //swap peaks + // Swap spectra spectra_.swap(from.getSpectra()); - } /// sets the spectrum list @@ -834,46 +821,34 @@ namespace OpenMS void MSExperiment::clear(bool clear_meta_data) { spectra_.clear(); + chromatograms_.clear(); if (clear_meta_data) { - clearRanges(); + clearRanges(); // reset all ranges this->ExperimentalSettings::operator=(ExperimentalSettings()); // no "clear" method - chromatograms_.clear(); } } // static bool MSExperiment::containsScanOfLevel(size_t ms_level) const { - //test if no scans with MS-level 1 exist - for (const auto& spec : getSpectra()) - { - if (spec.getMSLevel() == ms_level) - { - return true; - } - } - return false; + // Check if any spectrum with the specified MS level exists + return std::any_of(getSpectra().begin(), getSpectra().end(), + [ms_level](const auto& spec) { return spec.getMSLevel() == ms_level; }); } bool MSExperiment::hasZeroIntensities(size_t ms_level) const { - for (const auto& spec : getSpectra()) - { - if (spec.getMSLevel() != ms_level) - { - continue; - } - for (const auto& p : spec) - { - if (p.getIntensity() == 0.0) - { - return true; - } - } - } - return false; + // Check if any spectrum of the specified MS level contains peaks with zero intensity + return std::any_of(getSpectra().begin(), getSpectra().end(), + [ms_level](const auto& spec) { + if (spec.getMSLevel() != ms_level) return false; // Skip spectra that don't match the requested MS level + + // Check if this spectrum has any zero intensity peaks + return std::any_of(spec.begin(), spec.end(), + [](const auto& peak) { return peak.getIntensity() == 0.0; }); + }); } bool MSExperiment::hasPeptideIdentifications() const diff --git a/src/openms/source/KERNEL/SpectrumRangeManager.cpp b/src/openms/source/KERNEL/SpectrumRangeManager.cpp new file mode 100644 index 00000000000..d940447792b --- /dev/null +++ b/src/openms/source/KERNEL/SpectrumRangeManager.cpp @@ -0,0 +1,14 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Administrator $ +// -------------------------------------------------------------------------- + +#include + +namespace OpenMS +{ + // Currently empty as the class only contains inline implementations +} \ No newline at end of file diff --git a/src/openms/source/KERNEL/sources.cmake b/src/openms/source/KERNEL/sources.cmake index f693ef7625b..804cbeaf5ee 100644 --- a/src/openms/source/KERNEL/sources.cmake +++ b/src/openms/source/KERNEL/sources.cmake @@ -32,6 +32,8 @@ PeakIndex.cpp RangeManager.cpp RichPeak2D.cpp SpectrumHelper.cpp +SpectrumRangeManager.cpp +ChromatogramRangeManager.cpp ) ### add path to the filenames diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h b/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h index d75e1c2129f..48764602bbf 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h @@ -50,7 +50,7 @@ namespace OpenMS RangeAllType getRange() const override { RangeAllType r; - r.assign(*chromatogram_map_); + r.assign(chromatogram_map_->chromatogramRanges()); return r; } diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h b/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h index 69003c07dd1..9c9991764c3 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h @@ -60,7 +60,7 @@ namespace OpenMS RangeAllType getRange() const override { RangeAllType r; - r.assign(*peak_map_); + r.assign(peak_map_->spectrumRanges()); return r; } diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp index 0fc382ab6da..000642bfa6e 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp @@ -713,7 +713,7 @@ namespace OpenMS // sort for m/z and update ranges of newly loaded data peak_map_sptr->sortSpectra(true); - peak_map_sptr->updateRanges(1); + peak_map_sptr->updateRanges(); // try to add the data if (caption == "") @@ -2550,7 +2550,7 @@ namespace OpenMS lp->getPeakDataMuteable()->clear(true); } lp->getPeakDataMuteable()->sortSpectra(true); - lp->getPeakDataMuteable()->updateRanges(1); + lp->getPeakDataMuteable()->updateRanges(); } else if (auto* lp = dynamic_cast(&layer)) // feature data { @@ -2591,7 +2591,7 @@ namespace OpenMS lp->getChromatogramData()->clear(true); } lp->getChromatogramData()->sortChromatograms(true); - lp->getChromatogramData()->updateRanges(1); + lp->getChromatogramData()->updateRanges(); } // update all layers that need an update diff --git a/src/pyOpenMS/pxds/ChromatogramRangeManager.pxd b/src/pyOpenMS/pxds/ChromatogramRangeManager.pxd new file mode 100644 index 00000000000..3b2374d5f3a --- /dev/null +++ b/src/pyOpenMS/pxds/ChromatogramRangeManager.pxd @@ -0,0 +1,29 @@ +from Types cimport * +from RangeManager cimport * + +cdef extern from "" namespace "OpenMS": + + cdef cppclass ChromatogramRangeManager: + # wrap-doc: + # Range manager for chromatograms + # + # This class manages retention time, m/z, and intensity ranges for multiple chromatograms. + # It extends the basic RangeManager to provide specialized functionality for chromatogram data. + # + # The template parameters for the base RangeManager are ordered differently than in SpectrumRangeManager: + # - RangeRT (retention time) is the first parameter, as it's the primary dimension for chromatograms + # - RangeIntensity is the second parameter + # - RangeMZ is the third parameter + + ChromatogramRangeManager() except + nogil + ChromatogramRangeManager(ChromatogramRangeManager &) except + nogil + + void clearRanges() except + nogil + + # Range accessors + double getMinRT() except + nogil + double getMaxRT() except + nogil + double getMinMZ() except + nogil + double getMaxMZ() except + nogil + double getMinIntensity() except + nogil + double getMaxIntensity() except + nogil \ No newline at end of file diff --git a/src/pyOpenMS/pxds/MSExperiment.pxd b/src/pyOpenMS/pxds/MSExperiment.pxd index bbdddcf0f6f..cbe1607616a 100644 --- a/src/pyOpenMS/pxds/MSExperiment.pxd +++ b/src/pyOpenMS/pxds/MSExperiment.pxd @@ -10,15 +10,16 @@ from ExperimentalSettings cimport * from DateTime cimport * from RangeManager cimport * from Matrix cimport * +from SpectrumRangeManager cimport * +from ChromatogramRangeManager cimport * # this class has addons, see the ./addons folder cdef extern from "" namespace "OpenMS": - cdef cppclass MSExperiment(ExperimentalSettings, RangeManagerRtMzInt): + cdef cppclass MSExperiment(ExperimentalSettings): # wrap-inherits: # ExperimentalSettings - # RangeManagerRtMzInt # # wrap-doc: # In-Memory representation of a mass spectrometry experiment. @@ -112,3 +113,10 @@ cdef extern from "" namespace "OpenMS": int getPrecursorSpectrum(int zero_based_index) except + nogil # wrap-doc:Returns the index of the precursor spectrum for spectrum at index @p zero_based_index + # Range manager accessors + SpectrumRangeManager& spectrumRanges() except + nogil # wrap-doc:Returns a reference to the spectrum range manager + const SpectrumRangeManager& spectrumRanges() const except + nogil # wrap-doc:Returns a const reference to the spectrum range manager + ChromatogramRangeManager& chromatogramRanges() except + nogil # wrap-doc:Returns a reference to the chromatogram range manager + const ChromatogramRangeManager& chromatogramRanges() const except + nogil # wrap-doc:Returns a const reference to the chromatogram range manager + RangeManagerRtMzIntMob& combinedRanges() except + nogil # wrap-doc:Returns a reference to the combined range manager (for backward compatibility) + const RangeManagerRtMzIntMob& combinedRanges() const except + nogil # wrap-doc:Returns a const reference to the combined range manager (for backward compatibility) diff --git a/src/pyOpenMS/pxds/RangeManager.pxd b/src/pyOpenMS/pxds/RangeManager.pxd index 7d1abff0038..db3a38d5210 100644 --- a/src/pyOpenMS/pxds/RangeManager.pxd +++ b/src/pyOpenMS/pxds/RangeManager.pxd @@ -118,3 +118,18 @@ cdef extern from "" namespace "OpenMS": void clearRanges() except + nogil # wrap-doc:Resets all range dimensions as empty + cdef cppclass RangeManagerRtMzIntMob "OpenMS::RangeManager": + # wrap-ignore + # no-pxd-import + RangeManagerRtMzIntMob() except + nogil + RangeManagerRtMzIntMob(RangeManagerRtMzIntMob &) except + nogil + + double getMinRT() except + nogil # wrap-doc:Returns the minimum RT + double getMaxRT() except + nogil # wrap-doc:Returns the maximum RT + double getMinMZ() except + nogil # wrap-doc:Returns the minimum m/z + double getMaxMZ() except + nogil # wrap-doc:Returns the maximum m/z + double getMinIntensity() except + nogil # wrap-doc:Returns the minimum intensity + double getMaxIntensity() except + nogil # wrap-doc:Returns the maximum intensity + double getMinMobility() except + nogil # wrap-doc:Returns the minimum mobility + double getMaxMobility() except + nogil # wrap-doc:Returns the maximum mobility + void clearRanges() except + nogil # wrap-doc:Resets all range dimensions as empty diff --git a/src/pyOpenMS/pxds/SpectrumRangeManager.pxd b/src/pyOpenMS/pxds/SpectrumRangeManager.pxd new file mode 100644 index 00000000000..e1d814ea7ea --- /dev/null +++ b/src/pyOpenMS/pxds/SpectrumRangeManager.pxd @@ -0,0 +1,40 @@ +from Types cimport * +from RangeManager cimport * +from MSSpectrum cimport * +from libcpp.set cimport set as libcpp_set + +cdef extern from "" namespace "OpenMS": + + cdef cppclass SpectrumRangeManager: + # wrap-doc: + # Advanced range manager for MS spectra with separate ranges for each MS level + # + # This class extends the basic RangeManager to provide separate range tracking for different MS levels + # (MS1, MS2, etc.). It manages four types of ranges: + # - m/z (mass-to-charge ratio) + # - intensity + # - retention time (RT) + # - ion mobility + # + # A global range is tracked for all MS levels, and additional ranges are maintained for each specific MS level. + # This allows for efficient querying of ranges for specific MS levels, which is useful for visualization, + # filtering, and processing operations that need to work with specific MS levels. + + SpectrumRangeManager() except + nogil + SpectrumRangeManager(SpectrumRangeManager &) except + nogil + + void clearRanges() except + nogil + libcpp_set[UInt] getMSLevels() except + nogil + void extendRT(double rt, UInt ms_level) except + nogil + void extendMZ(double mz, UInt ms_level) except + nogil + void extendUnsafe(const MSSpectrum& spectrum, UInt ms_level) except + nogil + + # Range accessors + double getMinRT() except + nogil + double getMaxRT() except + nogil + double getMinMZ() except + nogil + double getMaxMZ() except + nogil + double getMinIntensity() except + nogil + double getMaxIntensity() except + nogil + double getMinMobility() except + nogil + double getMaxMobility() except + nogil \ No newline at end of file diff --git a/src/tests/class_tests/openms/source/FeatureFinderAlgorithmPicked_test.cpp b/src/tests/class_tests/openms/source/FeatureFinderAlgorithmPicked_test.cpp index b1346e22f29..b2d9daeb37f 100644 --- a/src/tests/class_tests/openms/source/FeatureFinderAlgorithmPicked_test.cpp +++ b/src/tests/class_tests/openms/source/FeatureFinderAlgorithmPicked_test.cpp @@ -47,7 +47,7 @@ START_SECTION((virtual void run())) MzMLFile mzml_file; mzml_file.getOptions().addMSLevel(1); mzml_file.load(OPENMS_GET_TEST_DATA_PATH("FeatureFinderAlgorithmPicked.mzML"),input); - input.updateRanges(1); + input.updateRanges(); FeatureMap output; //parameters diff --git a/src/tests/class_tests/openms/source/FeatureFinderMultiplexAlgorithm_test.cpp b/src/tests/class_tests/openms/source/FeatureFinderMultiplexAlgorithm_test.cpp index f16275b061a..d8ca80dd92e 100644 --- a/src/tests/class_tests/openms/source/FeatureFinderMultiplexAlgorithm_test.cpp +++ b/src/tests/class_tests/openms/source/FeatureFinderMultiplexAlgorithm_test.cpp @@ -44,7 +44,7 @@ START_SECTION((virtual void run())) mzml_file.getOptions().addMSLevel(1); mzml_file.load(OPENMS_GET_TEST_DATA_PATH("FeatureFinderMultiplex_1_input.mzML"), exp); - exp.updateRanges(1); + exp.updateRanges(); Param param; ParamXMLFile paramFile; diff --git a/src/tests/class_tests/openms/source/MSExperiment_test.cpp b/src/tests/class_tests/openms/source/MSExperiment_test.cpp index c4320ddfb09..a0349d13c01 100644 --- a/src/tests/class_tests/openms/source/MSExperiment_test.cpp +++ b/src/tests/class_tests/openms/source/MSExperiment_test.cpp @@ -421,7 +421,7 @@ END_SECTION START_SECTION((const MSExperiment::RangeManagerType& MSExperiment::getRange() const)) { PeakMap tmp; - TEST_EQUAL(tmp.getRange().hasRange() == HasRangeType::NONE, true) + TEST_EQUAL(tmp.combinedRanges().hasRange() == HasRangeType::NONE, true) } END_SECTION @@ -487,12 +487,12 @@ START_SECTION((virtual void updateRanges())) TEST_REAL_SIMILAR(tmp.getMinRT(),30.0) TEST_REAL_SIMILAR(tmp.getMaxRT(),50.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMZ(), 5.0) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMZ(), 10.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinRT(), 30.0) - TEST_REAL_SIMILAR(tmp.getRange().getMaxRT(), 50.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMobility(), 66) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMobility(), 199) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMinMZ(), 5.0) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMaxMZ(), 10.0) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMinRT(), 30.0) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMaxRT(), 50.0) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMinMobility(), 66) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMaxMobility(), 199) TEST_EQUAL(tmp.getMSLevels().size(),2) TEST_EQUAL(tmp.getMSLevels()[0],1) @@ -500,27 +500,24 @@ START_SECTION((virtual void updateRanges())) TEST_EQUAL(tmp.getSize(),4) - //Update for MS level 1 - // Store initial MS levels std::vector initial_ms_levels = tmp.getMSLevels(); - tmp.updateRanges(1); - tmp.updateRanges(1); // Call twice to verify consistent behavior + // MS1 for (int l = 0; l < 2; ++l) { - TEST_REAL_SIMILAR(tmp.getMinMZ(),5.0) - TEST_REAL_SIMILAR(tmp.getMaxMZ(),7.0) - TEST_REAL_SIMILAR(tmp.getMinIntensity(), -7.0) - TEST_REAL_SIMILAR(tmp.getMaxIntensity(), -5.0) - TEST_REAL_SIMILAR(tmp.getMinRT(),30.0) - TEST_REAL_SIMILAR(tmp.getMaxRT(),40.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMobility(), 99) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMobility(), 99) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMinMZ(),5.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMaxMZ(),7.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMinIntensity(), -7.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMaxIntensity(), -5.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMinRT(),30.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMaxRT(),40.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMinMobility(), 99) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMaxMobility(), 99) + // Verify MS levels remain unchanged TEST_EQUAL(tmp.getMSLevels() == initial_ms_levels, true) - TEST_EQUAL(tmp.getSize(),4) - tmp.updateRanges(1); + TEST_EQUAL(tmp.getSize(),4) } // test with only one peak @@ -534,26 +531,27 @@ START_SECTION((virtual void updateRanges())) s2.push_back(p2); s2.setDriftTime(99); tmp2.addSpectrum(s2); - tmp2.updateRanges(); - TEST_REAL_SIMILAR(tmp2.getMinMZ(),5.0) - TEST_REAL_SIMILAR(tmp2.getMaxMZ(),5.0) - TEST_REAL_SIMILAR(tmp2.getMinIntensity(), -5.0) - TEST_REAL_SIMILAR(tmp2.getMaxIntensity(), -5.0) - TEST_REAL_SIMILAR(tmp2.getMinRT(),30.0) - TEST_REAL_SIMILAR(tmp2.getMaxRT(),30.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMobility(), 99) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMobility(), 99) - tmp2.updateRanges(1); + // check the overall ranges TEST_REAL_SIMILAR(tmp2.getMinMZ(),5.0) TEST_REAL_SIMILAR(tmp2.getMaxMZ(),5.0) TEST_REAL_SIMILAR(tmp2.getMinIntensity(), -5.0) TEST_REAL_SIMILAR(tmp2.getMaxIntensity(), -5.0) TEST_REAL_SIMILAR(tmp2.getMinRT(),30.0) TEST_REAL_SIMILAR(tmp2.getMaxRT(),30.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMobility(), 99) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMobility(), 99) + TEST_REAL_SIMILAR(tmp2.getMinMobility(), 99) + TEST_REAL_SIMILAR(tmp2.getMaxMobility(), 99) + + // check the spectra specific ranges + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMinMZ(),5.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMaxMZ(),5.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMinIntensity(), -5.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMaxIntensity(), -5.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMinRT(),30.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMaxRT(),30.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMinMobility(), 99) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMaxMobility(), 99) // test ranges with a chromatogram MSChromatogram chrom1, chrom2; @@ -583,12 +581,23 @@ START_SECTION((virtual void updateRanges())) tmp2.setChromatograms(chroms); tmp2.updateRanges(); + + // test the overall ranges TEST_REAL_SIMILAR(tmp2.getMinMZ(), 5.0) TEST_REAL_SIMILAR(tmp2.getMaxMZ(), 100.0) TEST_REAL_SIMILAR(tmp2.getMinIntensity(), -5.0) TEST_REAL_SIMILAR(tmp2.getMaxIntensity(), 10.4) TEST_REAL_SIMILAR(tmp2.getMinRT(), 0.1) - TEST_REAL_SIMILAR(tmp2.getMaxRT(), 30.0) + TEST_REAL_SIMILAR(tmp2.getMaxRT(), 30.0) // overall range still 30 + + // test the chromatogram ranges + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMinMZ(), 80.0) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMaxMZ(), 100.0) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMinIntensity(), 10.0) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMaxIntensity(), 10.4) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMinRT(), 0.1) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMaxRT(), 0.3) // chromatogram range 0.1-0.3 + } END_SECTION @@ -1348,7 +1357,7 @@ START_SECTION((void swap(MSExperiment &from))) TEST_EQUAL(exp1.getComment(),"") TEST_EQUAL(exp1.size(),0) - TEST_EQUAL(exp1.getRange().hasRange() == HasRangeType::NONE, true) + TEST_EQUAL(exp1.combinedRanges().hasRange() == HasRangeType::NONE, true) TEST_EQUAL(exp1.getMSLevels().size(),0) TEST_EQUAL(exp1.getSize(),0); @@ -2623,8 +2632,331 @@ START_SECTION((template std::vector void store(const String& filename, co empty[0].getAcquisitionInfo().resize(1); std::string tmp_filename; - NEW_TMP_FILE(tmp_filename); + NEW_TMP_FILE(tmp_filename); + file.store(tmp_filename,empty); file.load(tmp_filename,exp); - TEST_EQUAL(exp==empty,true) + + TEST_EQUAL(exp == empty,true) //NOTE: If it does not work, use this code to find out where the difference is // TEST_EQUAL(exp.size()==empty.size(),true) diff --git a/src/tests/topp/FileFilter_47_output.mzML b/src/tests/topp/FileFilter_47_output.mzML index 70ba63aab61..30585e3d65e 100644 --- a/src/tests/topp/FileFilter_47_output.mzML +++ b/src/tests/topp/FileFilter_47_output.mzML @@ -113,7 +113,7 @@ - + @@ -126,9 +126,9 @@ - + - + @@ -141,7 +141,7 @@ - + @@ -152,7 +152,7 @@ - + @@ -168,16 +168,16 @@ - + - + - + - + @@ -401,49 +401,17 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - 14572 - 18005 - 21346 - 24944 - 28880 - - - 32785 + 14587 + 18020 + 21361 + 24959 + 28895 -34237 +32746 0 - + \ No newline at end of file diff --git a/src/tests/topp/FileFilter_48_output.mzML b/src/tests/topp/FileFilter_48_output.mzML index aacdb4e202e..94eff3d51f9 100644 --- a/src/tests/topp/FileFilter_48_output.mzML +++ b/src/tests/topp/FileFilter_48_output.mzML @@ -113,7 +113,7 @@ - + @@ -126,9 +126,9 @@ - + - + @@ -141,7 +141,7 @@ - + @@ -152,7 +152,7 @@ - + @@ -168,16 +168,16 @@ - + - + - + - + @@ -233,45 +233,13 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - 14572 - - - 18042 + 14587 -19494 +18003 0 - + \ No newline at end of file diff --git a/src/tests/topp/FileInfo_19_output.txt b/src/tests/topp/FileInfo_19_output.txt index e8af63fbdcd..b9b1723d9a0 100644 --- a/src/tests/topp/FileInfo_19_output.txt +++ b/src/tests/topp/FileInfo_19_output.txt @@ -10,12 +10,35 @@ MS levels: 1, 2 Total number of peaks: 2681 Number of spectra: 2 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 18.25 .. 18.39 sec (0.0 min) mass-to-charge: 0.00 .. 1590.88 ion mobility: -65.00 .. -65.00 intensity: 0.00 .. 31878566.00 +Spectrum Ranges: + retention time: 18.25 .. 18.39 sec (0.0 min) + mass-to-charge: 110.07 .. 1590.88 + ion mobility: -65.00 .. -65.00 + intensity: 0.00 .. 5057372.00 + +MS Level 1 Ranges: + retention time: 18.25 .. 18.25 sec (0.0 min) + mass-to-charge: 371.27 .. 1590.88 + ion mobility: -65.00 .. -65.00 + intensity: 0.00 .. 5057372.00 + +MS Level 2 Ranges: + retention time: 18.39 .. 18.39 sec (0.0 min) + mass-to-charge: 110.07 .. 871.38 + ion mobility: -65.00 .. -65.00 + intensity: 1030.52 .. 15416.40 + +Chromatogram Ranges: + retention time: 18.25 .. 18.39 sec (0.0 min) + mass-to-charge: 0.00 .. 0.00 + intensity: 117547.13 .. 31878566.00 + Number of spectra per MS level: level 1: 1 level 2: 1 diff --git a/src/tests/topp/FileInfo_1_output.txt b/src/tests/topp/FileInfo_1_output.txt index 3407bd96077..f74048ce136 100644 --- a/src/tests/topp/FileInfo_1_output.txt +++ b/src/tests/topp/FileInfo_1_output.txt @@ -10,11 +10,29 @@ MS levels: 2 Total number of peaks: 57 Number of spectra: 1 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: -1.00 .. -1.00 sec (0.0 min) mass-to-charge: 261.30 .. 783.50 ion mobility: .. intensity: 3672.00 .. 272411.00 + +Spectrum Ranges: + retention time: -1.00 .. -1.00 sec (0.0 min) + mass-to-charge: 261.30 .. 783.50 + ion mobility: .. + intensity: 3672.00 .. 272411.00 + +MS Level 2 Ranges: + retention time: -1.00 .. -1.00 sec (0.0 min) + mass-to-charge: 261.30 .. 783.50 + ion mobility: .. + intensity: 3672.00 .. 272411.00 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 2: 1 diff --git a/src/tests/topp/FileInfo_2_output.txt b/src/tests/topp/FileInfo_2_output.txt index 8dc1cd22730..76bc8af57ea 100644 --- a/src/tests/topp/FileInfo_2_output.txt +++ b/src/tests/topp/FileInfo_2_output.txt @@ -10,11 +10,29 @@ MS levels: 1 Total number of peaks: 8 Number of spectra: 8 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 0.00 .. 6.00 sec (0.1 min) mass-to-charge: 500.00 .. 1100.00 ion mobility: .. intensity: 50.00 .. 400.00 + +Spectrum Ranges: + retention time: 0.00 .. 6.00 sec (0.1 min) + mass-to-charge: 500.00 .. 1100.00 + ion mobility: .. + intensity: 50.00 .. 400.00 + +MS Level 1 Ranges: + retention time: 0.00 .. 6.00 sec (0.1 min) + mass-to-charge: 500.00 .. 1100.00 + ion mobility: .. + intensity: 50.00 .. 400.00 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 8 diff --git a/src/tests/topp/FileInfo_4_output.txt b/src/tests/topp/FileInfo_4_output.txt index 8defbec1941..631a82f142f 100644 --- a/src/tests/topp/FileInfo_4_output.txt +++ b/src/tests/topp/FileInfo_4_output.txt @@ -11,12 +11,35 @@ MS levels: 1, 2 Total number of peaks: 6864 Number of spectra: 20 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 0.26 .. 37.64 sec (0.6 min) mass-to-charge: 207.51 .. 1496.08 ion mobility: .. intensity: 12.15 .. 25903.89 +Spectrum Ranges: + retention time: 0.26 .. 37.64 sec (0.6 min) + mass-to-charge: 207.51 .. 1496.08 + ion mobility: .. + intensity: 12.15 .. 25903.89 + +MS Level 1 Ranges: + retention time: 0.26 .. 32.24 sec (0.5 min) + mass-to-charge: 402.48 .. 1496.08 + ion mobility: .. + intensity: 12.15 .. 25903.89 + +MS Level 2 Ranges: + retention time: 21.95 .. 37.64 sec (0.3 min) + mass-to-charge: 207.51 .. 1035.83 + ion mobility: .. + intensity: 13.39 .. 3633.01 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 14 level 2: 6 diff --git a/src/tests/topp/FileInfo_5_output.txt b/src/tests/topp/FileInfo_5_output.txt index c8b0d5cd9e8..a238096d66c 100644 --- a/src/tests/topp/FileInfo_5_output.txt +++ b/src/tests/topp/FileInfo_5_output.txt @@ -12,12 +12,35 @@ MS levels: 1, 2 Total number of peaks: 3149 Number of spectra: 10 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 20.87 .. 39.97 sec (0.3 min) mass-to-charge: 500.07 .. 1497.28 ion mobility: .. intensity: 16.30 .. 38920.12 +Spectrum Ranges: + retention time: 20.87 .. 39.97 sec (0.3 min) + mass-to-charge: 500.07 .. 1497.28 + ion mobility: .. + intensity: 16.30 .. 38920.12 + +MS Level 1 Ranges: + retention time: 20.87 .. 39.97 sec (0.3 min) + mass-to-charge: 500.07 .. 1497.28 + ion mobility: .. + intensity: 16.30 .. 38920.12 + +MS Level 2 Ranges: + retention time: 25.40 .. 39.11 sec (0.2 min) + mass-to-charge: 500.10 .. 850.08 + ion mobility: .. + intensity: 23.68 .. 729.68 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 4 level 2: 6 diff --git a/src/tests/topp/FileInfo_6_output.txt b/src/tests/topp/FileInfo_6_output.txt index 0b7af16dba8..33e3f40118c 100644 --- a/src/tests/topp/FileInfo_6_output.txt +++ b/src/tests/topp/FileInfo_6_output.txt @@ -11,12 +11,29 @@ MS levels: 1 Total number of peaks: 9 Number of spectra: 2 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 474.56 .. 475.32 sec (0.0 min) mass-to-charge: 937.28 .. 941.20 ion mobility: .. intensity: 1639.00 .. 18025.00 +Spectrum Ranges: + retention time: 474.56 .. 475.32 sec (0.0 min) + mass-to-charge: 937.28 .. 941.20 + ion mobility: .. + intensity: 1639.00 .. 18025.00 + +MS Level 1 Ranges: + retention time: 474.56 .. 475.32 sec (0.0 min) + mass-to-charge: 937.28 .. 941.20 + ion mobility: .. + intensity: 1639.00 .. 18025.00 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 2 diff --git a/src/tests/topp/FileInfo_9_output.txt b/src/tests/topp/FileInfo_9_output.txt index e1bd5e96ad0..a2c707860ef 100644 --- a/src/tests/topp/FileInfo_9_output.txt +++ b/src/tests/topp/FileInfo_9_output.txt @@ -12,12 +12,35 @@ MS levels: 1, 2 Total number of peaks: 40 Number of spectra: 4 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 5.10 .. 5.40 sec (0.0 min) mass-to-charge: 0.00 .. 18.00 ion mobility: .. intensity: 1.00 .. 20.00 +Spectrum Ranges: + retention time: 5.10 .. 5.40 sec (0.0 min) + mass-to-charge: 0.00 .. 18.00 + ion mobility: .. + intensity: 1.00 .. 20.00 + +MS Level 1 Ranges: + retention time: 5.10 .. 5.40 sec (0.0 min) + mass-to-charge: 0.00 .. 14.00 + ion mobility: .. + intensity: 1.00 .. 15.00 + +MS Level 2 Ranges: + retention time: 5.20 .. 5.20 sec (0.0 min) + mass-to-charge: 0.00 .. 18.00 + ion mobility: .. + intensity: 2.00 .. 20.00 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 3 level 2: 1 diff --git a/src/tests/topp/QCCalculator_2_output.mzQC b/src/tests/topp/QCCalculator_2_output.mzQC index 03429e90413..130ae90c4d9 100644 --- a/src/tests/topp/QCCalculator_2_output.mzQC +++ b/src/tests/topp/QCCalculator_2_output.mzQC @@ -1,6 +1,6 @@ { "mzQC": { - "creationDate": "2021-07-13T18:11:15", + "creationDate": "2025-05-16T14:22:42", "version": "1.0.0", "contactName": "name", "contactAddress": "address", @@ -11,7 +11,7 @@ "label": "label", "inputFiles": [ { - "location": "/home/axel/dev/OpenMS/src/tests/topp/QCCalculator_input.mzML", + "location": "/home/sachsenb/Development/OpenMS/src/tests/topp/QCCalculator_input.mzML", "name": "QCCalculator_input.mzML", "fileFormat": { "accession": "MS:10000584", @@ -40,7 +40,7 @@ { "accession": "MS:1009001", "name": "QCCalculator", - "version": "2.6.0", + "version": "3.5.0", "uri": "https://www.openms.de" } ] @@ -71,7 +71,7 @@ "name": "MZ acquisition range", "value": [ 0, - 678 + 18 ] }, { diff --git a/src/topp/FileInfo.cpp b/src/topp/FileInfo.cpp index e7a3e3e7e8e..6b3610ffc99 100644 --- a/src/topp/FileInfo.cpp +++ b/src/topp/FileInfo.cpp @@ -8,7 +8,6 @@ #include #include - #include #include @@ -33,11 +32,13 @@ #include #include #include +#include #include #include #include + #include #include #include @@ -165,6 +166,10 @@ class TOPPFileInfo : public TOPPBase registerFlag_("i", "Check whether a given mzML file contains valid indices (conforming to the indexedmzML standard)"); } + // Forward declare the specialized version for MSExperiment to avoid compiler errors + // template <> + // void writeRangesHumanReadable_(const MSExperiment& map, ostream &os); + template void writeRangesHumanReadable_(const Map& map, ostream &os) { @@ -208,7 +213,196 @@ class TOPPFileInfo : public TOPPBase else { os << " intensity: " << String::number(map.getMinIntensity(), 2) << " .. " << String::number(map.getMaxIntensity(), 2) << "\n\n"; - } + } + } + + void writeRangesHumanReadable_(const MSExperiment& exp, ostream &os) + { + // 1. Display Combined Ranges (same format as before for backward compatibility) + os << "Combined Ranges (spectra + chromatograms):" << '\n'; + // Use the combinedRanges() accessor + if (exp.combinedRanges().RangeRT::isEmpty()) + { + os << " retention time: .. sec ( min)\n"; + } + else + { + os << " retention time: " << String::number(exp.combinedRanges().getMinRT(), 2) << " .. " + << String::number(exp.combinedRanges().getMaxRT(), 2) << " sec (" + << String::number((exp.combinedRanges().getMaxRT() - exp.combinedRanges().getMinRT()) / 60, 1) << " min)\n"; + } + + // Display m/z range + if (exp.combinedRanges().RangeMZ::isEmpty()) + { + os << " mass-to-charge: .. \n"; + } + else + { + os << " mass-to-charge: " << String::number(exp.combinedRanges().getMinMZ(), 2) << " .. " + << String::number(exp.combinedRanges().getMaxMZ(), 2) << '\n'; + } + + // Display mobility range if present + if (exp.combinedRanges().RangeMobility::isEmpty()) + { + os << " ion mobility: .. \n"; + } + else + { + os << " ion mobility: " << String::number(exp.combinedRanges().getMinMobility(), 2) << " .. " + << String::number(exp.combinedRanges().getMaxMobility(), 2) << '\n'; + } + + // Display intensity range + if (exp.combinedRanges().RangeIntensity::isEmpty()) + { + os << " intensity: .. \n\n"; + } + else + { + os << " intensity: " << String::number(exp.combinedRanges().getMinIntensity(), 2) << " .. " + << String::number(exp.combinedRanges().getMaxIntensity(), 2) << "\n\n"; + } + + // 2. Display Spectrum Ranges (overall) + os << "Spectrum Ranges:" << '\n'; + // Use the spectrumRanges() accessor with MS level 0 for overall ranges + const auto& spec_ranges = exp.spectrumRanges(); + + if (spec_ranges.RangeRT::isEmpty()) + { + os << " retention time: .. sec ( min)\n"; + } + else + { + os << " retention time: " << String::number(spec_ranges.getMinRT(), 2) << " .. " + << String::number(spec_ranges.getMaxRT(), 2) << " sec (" + << String::number((spec_ranges.getMaxRT() - spec_ranges.getMinRT()) / 60, 1) << " min)\n"; + } + + // Display m/z range + if (spec_ranges.RangeMZ::isEmpty()) + { + os << " mass-to-charge: .. \n"; + } + else + { + os << " mass-to-charge: " << String::number(spec_ranges.getMinMZ(), 2) << " .. " + << String::number(spec_ranges.getMaxMZ(), 2) << '\n'; + } + + // Display mobility range if present + if (spec_ranges.RangeMobility::isEmpty()) + { + os << " ion mobility: .. \n"; + } + else + { + os << " ion mobility: " << String::number(spec_ranges.getMinMobility(), 2) << " .. " + << String::number(spec_ranges.getMaxMobility(), 2) << '\n'; + } + + // Display intensity range + if (spec_ranges.RangeIntensity::isEmpty()) + { + os << " intensity: .. \n\n"; + } + else + { + os << " intensity: " << String::number(spec_ranges.getMinIntensity(), 2) << " .. " + << String::number(spec_ranges.getMaxIntensity(), 2) << "\n\n"; + } + + // 3. Display Spectrum Ranges per MS Level + std::set ms_levels = exp.spectrumRanges().getMSLevels(); + for (UInt ms_level : ms_levels) + { + os << "MS Level " << ms_level << " Ranges:" << '\n'; + const auto& level_ranges = exp.spectrumRanges().byMSLevel(ms_level); + + // Output RT range for this MS level + if (level_ranges.RangeRT::isEmpty()) + { + os << " retention time: .. sec ( min)\n"; + } + else + { + os << " retention time: " << String::number(level_ranges.getMinRT(), 2) << " .. " + << String::number(level_ranges.getMaxRT(), 2) << " sec (" + << String::number((level_ranges.getMaxRT() - level_ranges.getMinRT()) / 60, 1) << " min)\n"; + } + + // Display m/z range for this MS level + if (level_ranges.RangeMZ::isEmpty()) + { + os << " mass-to-charge: .. \n"; + } + else + { + os << " mass-to-charge: " << String::number(level_ranges.getMinMZ(), 2) << " .. " + << String::number(level_ranges.getMaxMZ(), 2) << '\n'; + } + + // Display mobility range for this MS level if present + if (level_ranges.RangeMobility::isEmpty()) + { + os << " ion mobility: .. \n"; + } + else + { + os << " ion mobility: " << String::number(level_ranges.getMinMobility(), 2) << " .. " + << String::number(level_ranges.getMaxMobility(), 2) << '\n'; + } + + // Display intensity range for this MS level + if (level_ranges.RangeIntensity::isEmpty()) + { + os << " intensity: .. \n\n"; + } + else + { + os << " intensity: " << String::number(level_ranges.getMinIntensity(), 2) << " .. " + << String::number(level_ranges.getMaxIntensity(), 2) << "\n\n"; + } + } + + // 4. Display Chromatogram Ranges + os << "Chromatogram Ranges:" << '\n'; + const auto& chrom_ranges = exp.chromatogramRanges(); + + if (chrom_ranges.RangeRT::isEmpty()) + { + os << " retention time: .. sec ( min)\n"; + } + else + { + os << " retention time: " << String::number(chrom_ranges.getMinRT(), 2) << " .. " + << String::number(chrom_ranges.getMaxRT(), 2) << " sec (" + << String::number((chrom_ranges.getMaxRT() - chrom_ranges.getMinRT()) / 60, 1) << " min)\n"; + } + + // Display m/z range for chromatograms + if (chrom_ranges.RangeMZ::isEmpty()) + { + os << " mass-to-charge: .. \n"; + } + else + { + os << " mass-to-charge: " << String::number(chrom_ranges.getMinMZ(), 2) << " .. " + << String::number(chrom_ranges.getMaxMZ(), 2) << '\n'; + } + + // Display intensity range for chromatograms + if (chrom_ranges.RangeIntensity::isEmpty()) + { + os << " intensity: .. \n\n"; + } + else + { + os << " intensity: " << String::number(chrom_ranges.getMinIntensity(), 2) << " .. " + << String::number(chrom_ranges.getMaxIntensity(), 2) << "\n\n"; + } } template @@ -258,6 +452,174 @@ class TOPPFileInfo : public TOPPBase << "general: ranges: intensity: max" << '\t' << "" << '\n'; } } + + + void writeRangesMachineReadable_(const MSExperiment& exp, ostream &os) + { + // 1. Combined Ranges + if (!exp.combinedRanges().RangeRT::isEmpty()) + { + os << "general: combined ranges: retention time: min" << '\t' << String::number(exp.combinedRanges().getMinRT(), 2) << '\n' + << "general: combined ranges: retention time: max" << '\t' << String::number(exp.combinedRanges().getMaxRT(), 2) << '\n'; + } + else + { + os << "general: combined ranges: retention time: min" << '\t' << "" << '\n' + << "general: combined ranges: retention time: max" << '\t' << "" << '\n'; + } + + if (!exp.combinedRanges().RangeMZ::isEmpty()) + { + os << "general: combined ranges: mass-to-charge: min" << '\t' << String::number(exp.combinedRanges().getMinMZ(), 2) << '\n' + << "general: combined ranges: mass-to-charge: max" << '\t' << String::number(exp.combinedRanges().getMaxMZ(), 2) << '\n'; + } + else + { + os << "general: combined ranges: mass-to-charge: min" << '\t' << "" << '\n' + << "general: combined ranges: mass-to-charge: max" << '\t' << "" << '\n'; + } + + if (!exp.combinedRanges().RangeMobility::isEmpty()) + { + os << "general: combined ranges: ion-mobility: min" << '\t' << String::number(exp.combinedRanges().getMinMobility(), 2) << '\n' + << "general: combined ranges: ion-mobility: max" << '\t' << String::number(exp.combinedRanges().getMaxMobility(), 2) << '\n'; + } + + if (!exp.combinedRanges().RangeIntensity::isEmpty()) + { + os << "general: combined ranges: intensity: min" << '\t' << String::number(exp.combinedRanges().getMinIntensity(), 2) << '\n' + << "general: combined ranges: intensity: max" << '\t' << String::number(exp.combinedRanges().getMaxIntensity(), 2) << '\n'; + } + else + { + os << "general: combined ranges: intensity: min" << '\t' << "" << '\n' + << "general: combined ranges: intensity: max" << '\t' << "" << '\n'; + } + + // 2. Spectrum Ranges (overall) + const auto& spec_ranges = exp.spectrumRanges(); + if (!spec_ranges.RangeRT::isEmpty()) + { + os << "general: spectrum ranges: retention time: min" << '\t' << String::number(spec_ranges.getMinRT(), 2) << '\n' + << "general: spectrum ranges: retention time: max" << '\t' << String::number(spec_ranges.getMaxRT(), 2) << '\n'; + } + else + { + os << "general: spectrum ranges: retention time: min" << '\t' << "" << '\n' + << "general: spectrum ranges: retention time: max" << '\t' << "" << '\n'; + } + + // Similar code for m/z, mobility, intensity for spectrum ranges + if (!spec_ranges.RangeMZ::isEmpty()) + { + os << "general: spectrum ranges: mass-to-charge: min" << '\t' << String::number(spec_ranges.getMinMZ(), 2) << '\n' + << "general: spectrum ranges: mass-to-charge: max" << '\t' << String::number(spec_ranges.getMaxMZ(), 2) << '\n'; + } + else + { + os << "general: spectrum ranges: mass-to-charge: min" << '\t' << "" << '\n' + << "general: spectrum ranges: mass-to-charge: max" << '\t' << "" << '\n'; + } + + if (!spec_ranges.RangeMobility::isEmpty()) + { + os << "general: spectrum ranges: ion-mobility: min" << '\t' << String::number(spec_ranges.getMinMobility(), 2) << '\n' + << "general: spectrum ranges: ion-mobility: max" << '\t' << String::number(spec_ranges.getMaxMobility(), 2) << '\n'; + } + + if (!spec_ranges.RangeIntensity::isEmpty()) + { + os << "general: spectrum ranges: intensity: min" << '\t' << String::number(spec_ranges.getMinIntensity(), 2) << '\n' + << "general: spectrum ranges: intensity: max" << '\t' << String::number(spec_ranges.getMaxIntensity(), 2) << '\n'; + } + else + { + os << "general: spectrum ranges: intensity: min" << '\t' << "" << '\n' + << "general: spectrum ranges: intensity: max" << '\t' << "" << '\n'; + } + + // 3. MS Level-specific Ranges + std::set ms_levels = exp.spectrumRanges().getMSLevels(); + for (UInt ms_level : ms_levels) + { + const auto& level_ranges = exp.spectrumRanges().byMSLevel(ms_level); + if (!level_ranges.RangeRT::isEmpty()) + { + os << "general: MS" << ms_level << " ranges: retention time: min" << '\t' << String::number(level_ranges.getMinRT(), 2) << '\n' + << "general: MS" << ms_level << " ranges: retention time: max" << '\t' << String::number(level_ranges.getMaxRT(), 2) << '\n'; + } + else + { + os << "general: MS" << ms_level << " ranges: retention time: min" << '\t' << "" << '\n' + << "general: MS" << ms_level << " ranges: retention time: max" << '\t' << "" << '\n'; + } + + // Similar code for other dimensions + if (!level_ranges.RangeMZ::isEmpty()) + { + os << "general: MS" << ms_level << " ranges: mass-to-charge: min" << '\t' << String::number(level_ranges.getMinMZ(), 2) << '\n' + << "general: MS" << ms_level << " ranges: mass-to-charge: max" << '\t' << String::number(level_ranges.getMaxMZ(), 2) << '\n'; + } + else + { + os << "general: MS" << ms_level << " ranges: mass-to-charge: min" << '\t' << "" << '\n' + << "general: MS" << ms_level << " ranges: mass-to-charge: max" << '\t' << "" << '\n'; + } + + if (!level_ranges.RangeMobility::isEmpty()) + { + os << "general: MS" << ms_level << " ranges: ion-mobility: min" << '\t' << String::number(level_ranges.getMinMobility(), 2) << '\n' + << "general: MS" << ms_level << " ranges: ion-mobility: max" << '\t' << String::number(level_ranges.getMaxMobility(), 2) << '\n'; + } + + if (!level_ranges.RangeIntensity::isEmpty()) + { + os << "general: MS" << ms_level << " ranges: intensity: min" << '\t' << String::number(level_ranges.getMinIntensity(), 2) << '\n' + << "general: MS" << ms_level << " ranges: intensity: max" << '\t' << String::number(level_ranges.getMaxIntensity(), 2) << '\n'; + } + else + { + os << "general: MS" << ms_level << " ranges: intensity: min" << '\t' << "" << '\n' + << "general: MS" << ms_level << " ranges: intensity: max" << '\t' << "" << '\n'; + } + } + + // 4. Chromatogram Ranges + const auto& chrom_ranges = exp.chromatogramRanges(); + if (!chrom_ranges.RangeRT::isEmpty()) + { + os << "general: chromatogram ranges: retention time: min" << '\t' << String::number(chrom_ranges.getMinRT(), 2) << '\n' + << "general: chromatogram ranges: retention time: max" << '\t' << String::number(chrom_ranges.getMaxRT(), 2) << '\n'; + } + else + { + os << "general: chromatogram ranges: retention time: min" << '\t' << "" << '\n' + << "general: chromatogram ranges: retention time: max" << '\t' << "" << '\n'; + } + + // Similar code for m/z and intensity for chromatogram ranges + if (!chrom_ranges.RangeMZ::isEmpty()) + { + os << "general: chromatogram ranges: mass-to-charge: min" << '\t' << String::number(chrom_ranges.getMinMZ(), 2) << '\n' + << "general: chromatogram ranges: mass-to-charge: max" << '\t' << String::number(chrom_ranges.getMaxMZ(), 2) << '\n'; + } + else + { + os << "general: chromatogram ranges: mass-to-charge: min" << '\t' << "" << '\n' + << "general: chromatogram ranges: mass-to-charge: max" << '\t' << "" << '\n'; + } + + if (!chrom_ranges.RangeIntensity::isEmpty()) + { + os << "general: chromatogram ranges: intensity: min" << '\t' << String::number(chrom_ranges.getMinIntensity(), 2) << '\n' + << "general: chromatogram ranges: intensity: max" << '\t' << String::number(chrom_ranges.getMaxIntensity(), 2) << '\n'; + } + else + { + os << "general: chromatogram ranges: intensity: min" << '\t' << "" << '\n' + << "general: chromatogram ranges: intensity: max" << '\t' << "" << '\n'; + } + } template void writeSummaryStatisticsMachineReadable_(const Math::SummaryStatistics &stats, ostream &os, String title) @@ -1722,7 +2084,7 @@ class TOPPFileInfo : public TOPPBase else //peaks { //copy intensities of MS-level 1 peaks - exp.updateRanges(1); + exp.updateRanges(); Size size = exp.getSize(); vector intensities; intensities.reserve(size); diff --git a/src/topp/FileMerger.cpp b/src/topp/FileMerger.cpp index aa5c425f646..ea180264a72 100644 --- a/src/topp/FileMerger.cpp +++ b/src/topp/FileMerger.cpp @@ -121,7 +121,7 @@ class TOPPFileMerger : TransformationDescription trafo; if (first_file) // no transformation necessary { - rt_offset_ = map.getMaxRT() + rt_gap_; + rt_offset_ = map.getMaxRT() + rt_gap_; // overall range for all spectra trafo.fitModel("identity"); } else // subsequent file -> apply transformation diff --git a/src/topp/ImageCreator.cpp b/src/topp/ImageCreator.cpp index 7e52f07289d..a96644f1153 100644 --- a/src/topp/ImageCreator.cpp +++ b/src/topp/ImageCreator.cpp @@ -269,11 +269,12 @@ class TOPPImageCreator : exp.getSpectra().erase(remove_if(exp.begin(), exp.end(), predicate), exp.end()); } - exp.updateRanges(1); + exp.updateRanges(); Size rows = getIntOption_("height"), cols = getIntOption_("width"); if (rows == 0) rows = exp.size(); - if (cols == 0) cols = UInt(ceil(exp.getMaxMZ() - exp.getMinMZ())); + if (cols == 0) cols = UInt(ceil( + exp.spectrumRanges().byMSLevel(1).getMaxMZ() - exp.spectrumRanges().byMSLevel(1).getMinMZ())); //---------------------------------------------------------------- //Do the actual resampling @@ -284,9 +285,9 @@ class TOPPImageCreator : if (!getFlag_("transpose")) { // scans run bottom-up: - bilip.setMapping_0(0, exp.getMaxRT(), rows - 1, exp.getMinRT()); + bilip.setMapping_0(0, exp.spectrumRanges().byMSLevel(1).getMaxRT(), rows - 1, exp.spectrumRanges().byMSLevel(1).getMinRT()); // peaks run left-right: - bilip.setMapping_1(0, exp.getMinMZ(), cols - 1, exp.getMaxMZ()); + bilip.setMapping_1(0, exp.spectrumRanges().byMSLevel(1).getMinMZ(), cols - 1, exp.spectrumRanges().byMSLevel(1).getMaxMZ()); for (PeakMap::Iterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter) diff --git a/src/topp/MapAlignerPoseClustering.cpp b/src/topp/MapAlignerPoseClustering.cpp index 6b08fcab96a..dfb00003cad 100644 --- a/src/topp/MapAlignerPoseClustering.cpp +++ b/src/topp/MapAlignerPoseClustering.cpp @@ -160,8 +160,9 @@ class TOPPMapAlignerPoseClustering : else if (in_type == FileTypes::MZML) // this is expensive! { PeakMap exp; + FileHandler().loadExperiment(in_files[i], exp, {FileTypes::MZML}, log_type_); - exp.updateRanges(1); + exp.updateRanges(); s = exp.getSize(); } if (s > max_count) diff --git a/src/topp/Resampler.cpp b/src/topp/Resampler.cpp index 208304a803a..b6d04d026f9 100644 --- a/src/topp/Resampler.cpp +++ b/src/topp/Resampler.cpp @@ -116,15 +116,15 @@ class TOPPResampler : lin_resampler.raster(exp[i]); } } - else if(!exp.RangeRT::isEmpty()) + else if(!exp.spectrumRanges().RangeRT::isEmpty()) { // start with even position - auto start_pos = floor(exp.getMinRT()); + auto start_pos = floor(exp.spectrumRanges().getMinRT()); // resample every scan for (Size i = 0; i < exp.size(); ++i) { - lin_resampler.raster_align(exp[i], start_pos, exp.getMaxRT()); + lin_resampler.raster_align(exp[i], start_pos, exp.spectrumRanges().getMaxRT()); } } From dc7037fa0e48033c81539236cdaaf05403d5da53 Mon Sep 17 00:00:00 2001 From: "Peter J. Jones" Date: Fri, 16 May 2025 10:06:06 -0700 Subject: [PATCH 24/31] Pjones/backports (#8051) * [CI] Extract the version update code into its own script * [Docker] Use the correct THIRDPARTY directory name --------- Co-authored-by: Samuel Wein --- .github/workflows/update_version_numbers.yml | 50 +-------- dockerfiles/Dockerfile | 2 +- tools/update_version_numbers.sh | 101 +++++++++++++++++++ 3 files changed, 106 insertions(+), 47 deletions(-) create mode 100755 tools/update_version_numbers.sh diff --git a/.github/workflows/update_version_numbers.yml b/.github/workflows/update_version_numbers.yml index 77c0011edb7..6b06a9407df 100644 --- a/.github/workflows/update_version_numbers.yml +++ b/.github/workflows/update_version_numbers.yml @@ -21,52 +21,10 @@ jobs: # Update files with new package version numbers - name: update files run: | - # setting variables - package_version_major="${{ github.event.inputs.major }}" - package_version_minor="${{ github.event.inputs.minor }}" - package_version_patch="${{ github.event.inputs.patch }}" - package_version="${{ github.event.inputs.major }}.${{ github.event.inputs.minor }}.${{ github.event.inputs.patch }}" - echo "Setting version $package_version" - - # update main cmakelist - sed -i '' "s#.*set(OPENMS_PACKAGE_VERSION_MAJOR.*#set(OPENMS_PACKAGE_VERSION_MAJOR \"$package_version_major\")#" CMakeLists.txt - sed -i '' "s#.*set(OPENMS_PACKAGE_VERSION_MINOR.*#set(OPENMS_PACKAGE_VERSION_MINOR \"$package_version_minor\")#" CMakeLists.txt - sed -i '' "s#.*set(OPENMS_PACKAGE_VERSION_PATCH.*#set(OPENMS_PACKAGE_VERSION_PATCH \"$package_version_patch\")#" CMakeLists.txt - - # update version info test - sed -i '' "s#detail.version_major =.*#detail.version_major = $package_version_major;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp - sed -i '' "s#detail.version_minor =.*#detail.version_minor = $package_version_minor;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp - sed -i '' "s#detail.version_patch =.*#detail.version_patch = $package_version_patch;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp - - # update vcpkg.json - sed -i '' "s/\"version-string\": \".*\"/\"version-string\": \"$package_version\"/" vcpkg.json - - # update test write ini out: - sed -i '' "s#&2 "ERROR: please provide three numbers (use --help for more info)" + exit 1 +fi + +################################################################################ +package_version_major=$1 +package_version_minor=$2 +package_version_patch=$3 +package_version="$1.$2.$3" + +echo "Setting version $package_version" + +################################################################################ +# update main cmakelist +sed -i -e "s#.*set(OPENMS_PACKAGE_VERSION_MAJOR.*#set(OPENMS_PACKAGE_VERSION_MAJOR \"$package_version_major\")#" CMakeLists.txt +sed -i -e "s#.*set(OPENMS_PACKAGE_VERSION_MINOR.*#set(OPENMS_PACKAGE_VERSION_MINOR \"$package_version_minor\")#" CMakeLists.txt +sed -i -e "s#.*set(OPENMS_PACKAGE_VERSION_PATCH.*#set(OPENMS_PACKAGE_VERSION_PATCH \"$package_version_patch\")#" CMakeLists.txt + +# update version info test +sed -i -e "s#detail.version_major =.*#detail.version_major = $package_version_major;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp +sed -i -e "s#detail.version_minor =.*#detail.version_minor = $package_version_minor;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp +sed -i -e "s#detail.version_patch =.*#detail.version_patch = $package_version_patch;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp + +# update vcpkg.json +sed -i -e "s/\"version-string\": \".*\"/\"version-string\": \"$package_version\"/" vcpkg.json + +# update test write ini out: +sed -i -e "s# Date: Sat, 17 May 2025 04:17:15 -0400 Subject: [PATCH 25/31] fix: r^2 is zero (#8052) r^2 is zero because it is not computed. Causes downstream OpenSwath to fail. Ensure goodness of fit is set to true so r^2 is computed --- src/openms/source/ML/RANSAC/RANSACModelLinear.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openms/source/ML/RANSAC/RANSACModelLinear.cpp b/src/openms/source/ML/RANSAC/RANSACModelLinear.cpp index 35e2dbe14ec..0210a0c2bb8 100644 --- a/src/openms/source/ML/RANSAC/RANSACModelLinear.cpp +++ b/src/openms/source/ML/RANSAC/RANSACModelLinear.cpp @@ -43,7 +43,7 @@ namespace OpenMS::Math } LinearRegression lin_reg; - lin_reg.computeRegression(0.95, x.begin(), x.end(), y.begin(), false); + lin_reg.computeRegression(0.95, x.begin(), x.end(), y.begin(), true); return lin_reg.getRSquared(); } From e88b12057c220219159457da333f936c6e5c63c0 Mon Sep 17 00:00:00 2001 From: Simon Gene Gottlieb Date: Tue, 20 May 2025 14:04:11 +0200 Subject: [PATCH 26/31] Feat/option to toggle tdl (#8055) * patch: rename ENABLE_CWL to ENABLE_CWL_GENERATION * feat: add ENABLE_TDL option to enable/disable TDL dependency --- CMakeLists.txt | 10 ++++++++-- src/openms/CMakeLists.txt | 14 ++++++++++++-- src/openms/extern/CMakeLists.txt | 5 ++++- src/openms/source/FORMAT/ParamCWLFile.cpp | 9 +++++++++ src/openms/source/FORMAT/ParamJSONFile.cpp | 8 ++++++++ tools/ci/cibuild.cmake | 6 +++--- 6 files changed, 44 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cf3ce1207c..781ac0f079d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,7 +83,13 @@ option(ENABLE_DOCS "Indicates whether documentation should be built." ON) option(WITH_GUI "Build GUI parts of OpenMS (TOPPView&Co). This requires QtGui." ON) option(NO_WEBENGINE_WIDGETS "Do not use QtWebengineWidgets. Disables Javascript views in TOPPView." OFF) option(WITH_HDF5 "Build HDF5 parts of OpenMS." OFF) -option(ENABLE_CWL "Build and validate the CWL description files for all TOPP tools." OFF) +option(ENABLE_TDL "Load dependency and compile against TDL (required for CWL file support)." ON) +option(ENABLE_CWL_GENERATION "Build and validate the CWL description files for all TOPP tools (Requires ENABLE_TDL=ON)." OFF) + +if(ENABLE_CWL_GENERATION AND NOT ENABLE_TDL) + message(FATAL_ERROR "ENABLE_CWL_GENERATION requires ENABLE_TDL to be ON.") +endif() + if(MSVC) option(MT_ENABLE_NESTED_OPENMP "Enable nested parallelism." OFF) @@ -476,7 +482,7 @@ endif() #------------------------------------------------------------------------------ # CWL generation (updates openms/share/commonwl/*.cwl files for all TOPP tools) #------------------------------------------------------------------------------ -if (ENABLE_CWL) +if (ENABLE_CWL_GENERATION) include(${OPENMS_HOST_DIRECTORY}/cmake/cwl_generation.cmake) endif() diff --git a/src/openms/CMakeLists.txt b/src/openms/CMakeLists.txt index f486a834401..86493c02b23 100644 --- a/src/openms/CMakeLists.txt +++ b/src/openms/CMakeLists.txt @@ -57,7 +57,7 @@ include (${PROJECT_SOURCE_DIR}/includes.cmake) # all the dependency libraries are linked into libOpenMS.so set(OPENMS_DEP_LIBRARIES Evergreen LibSVM::LibSVM XercesC::XercesC Eigen3::Eigen Qt6::Core Qt6::Network) -## setup the argumentes to 'target_link_libraries(OpenMS PRIVATE ${OPENMS_DEP_PRIVATE_LIBRARIES})' +## setup the arguments to 'target_link_libraries(OpenMS PRIVATE ${OPENMS_DEP_PRIVATE_LIBRARIES})' set(OPENMS_DEP_PRIVATE_LIBRARIES $<$:HDF5::HDF5> ${LPTARGET} @@ -72,8 +72,13 @@ set(OPENMS_DEP_PRIVATE_LIBRARIES SQLiteCpp ZLIB::ZLIB nlohmann_json::nlohmann_json - tdl::tdl ) +if (ENABLE_TDL) +set(OPENMS_DEP_PRIVATE_LIBRARIES + ${OPENMS_DEP_PRIVATE_LIBRARIES} + tdl::tdl + ) +endif() # Xerces requires linking against CoreFoundation&CoreServices on macOS # TODO check if this is still the case @@ -114,6 +119,11 @@ if (MSVC) target_compile_options(OpenMS PRIVATE "/we4189") endif() +if (ENABLE_TDL) + target_compile_definitions(OpenMS PUBLIC ENABLE_TDL) +endif() + + #------------------------------------------------------------------------------ # since the share basically belongs to OpenMS core we control its installation # here diff --git a/src/openms/extern/CMakeLists.txt b/src/openms/extern/CMakeLists.txt index de0014b2bfa..d14fedae3ba 100644 --- a/src/openms/extern/CMakeLists.txt +++ b/src/openms/extern/CMakeLists.txt @@ -55,7 +55,10 @@ add_subdirectory(eol-bspline) add_subdirectory(IsoSpec) add_subdirectory(GTE) add_subdirectory(Quadtree) -add_subdirectory(tool_description_lib) + +if(ENABLE_TDL) + add_subdirectory(tool_description_lib) +endif() ## ## external packages (with fallback option to local version) diff --git a/src/openms/source/FORMAT/ParamCWLFile.cpp b/src/openms/source/FORMAT/ParamCWLFile.cpp index 1b9d865a368..80dc8473ed0 100644 --- a/src/openms/source/FORMAT/ParamCWLFile.cpp +++ b/src/openms/source/FORMAT/ParamCWLFile.cpp @@ -12,7 +12,12 @@ #include #include #include + +#if defined(ENABLE_TDL) #include +#else +#include +#endif using json = nlohmann::json; @@ -56,6 +61,7 @@ namespace OpenMS void ParamCWLFile::writeCWLToStream(std::ostream* os_ptr, const Param& param, const ToolInfo& tool_info) const { +#if defined(ENABLE_TDL) std::ostream& os = *os_ptr; os.precision(std::numeric_limits::digits10); @@ -316,5 +322,8 @@ namespace OpenMS "# SPDX-License-Identifier: Apache-2.0\n"; os << convertToCWL(tdl_tool_info) << "\n"; +#else + throw std::runtime_error{"TDL support is not available. Rebuild with -DENABLE_TDL=ON to enable this feature."}; +#endif } } // namespace OpenMS diff --git a/src/openms/source/FORMAT/ParamJSONFile.cpp b/src/openms/source/FORMAT/ParamJSONFile.cpp index 852895a780d..a2573846e01 100644 --- a/src/openms/source/FORMAT/ParamJSONFile.cpp +++ b/src/openms/source/FORMAT/ParamJSONFile.cpp @@ -11,7 +11,11 @@ #include #include #include +#if defined(ENABLE_TDL) #include +#else +#include +#endif using json = nlohmann::json; @@ -174,6 +178,7 @@ namespace OpenMS void ParamJSONFile::writeToStream(std::ostream* os_ptr, const Param& param) const { +#if defined(ENABLE_TDL) std::ostream& os = *os_ptr; // discover the name of the first nesting Level @@ -303,5 +308,8 @@ namespace OpenMS assert(stack.size() == 1); os << jsonDoc.dump(2); +#else + throw std::runtime_error{"TDL support is not available. Rebuild with -DENABLE_TDL=ON to enable this feature."}; +#endif } } // namespace OpenMS diff --git a/tools/ci/cibuild.cmake b/tools/ci/cibuild.cmake index 461347d0168..eaa093e2386 100644 --- a/tools/ci/cibuild.cmake +++ b/tools/ci/cibuild.cmake @@ -89,7 +89,7 @@ set(VARS_TO_LOAD "ENABLE_TOPP_TESTING" "ENABLE_PIPELINE_TESTING" "ENABLE_DOCS" - "ENABLE_CWL" + "ENABLE_CWL_GENERATION" "ENABLE_TUTORIALS" "ENABLE_UPDATE_CHECK" "MT_ENABLE_OPENMP" @@ -179,11 +179,11 @@ endif() if("$ENV{ENABLE_STYLE_TESTING}" STREQUAL "OFF") if("$ENV{PYOPENMS}" STREQUAL "ON") ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" TARGET "pyopenms" NUMBER_ERRORS _build_errors) - # Generate and valdiate the CWL files if "ENABLE_CWL" is set + # Generate and validate the CWL files if "ENABLE_CWL_GENERATION" is set else() ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" NUMBER_ERRORS _build_errors) endif() - if("$ENV{ENABLE_CWL}" STREQUAL "ON") + if("$ENV{ENABLE_CWL_GENERATION}" STREQUAL "ON") ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" TARGET "generate_cwl_files" NUMBER_ERRORS _build_errors) endif() else() From 8b30bf3c509faddb6c5b69e81623ab7bce168a2f Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Wed, 21 May 2025 08:37:22 +0200 Subject: [PATCH 27/31] refactor: Store IDs outside of spectra. (#7974) * Add AnnotatedMSRawData and integrate it instead of MSExperiment where identification is needed * Various refactorings in order to integrate the new class "AnnotatedMSRawData" Build target "OpenMS" is successfully compiling now. * some fixes to tests * remove remnantsof protein identification at spectra * Apply suggestions from code review Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * minor * fix test * class and tool tests pass * more fixes * rename * removed commented out code * Update src/openms_gui/source/VISUAL/PlotCanvas.cpp * Update src/openms/source/ANALYSIS/ID/IDMapper.cpp * nop * simplified interface a bit more * moved file out of kernel * remove mzML from IDSplitter * forgot one file * removed unused mapping class * compiles again * some TV fixes * More identification view fixes * peak annotations work again * more safety checks * more imp to cpp * mend * Comment out unused variable declarations * fix idfilter * fix doxygen * reverted ignored setting file * nop * merge * fix warning --------- Co-authored-by: dvdvgt Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- CHANGELOG | 7 + CMakeLists.txt | 1 - contrib | 2 +- doc/code_examples/Tutorial_GUI_Plot1D.cpp | 8 +- doc/doxygen/public/TOPP.doxygen | 3 - doc/doxygen/public/developer_tutorial.doxygen | 2 - .../include/OpenMS/ANALYSIS/ID/IDMapper.h | 5 +- .../MapAlignmentAlgorithmIdentification.h | 19 +- .../OpenMS/DATASTRUCTURES/ParamValue.h | 2 +- src/openms/include/OpenMS/FORMAT/MSPFile.h | 16 +- .../include/OpenMS/KERNEL/MSExperiment.h | 3 - .../include/OpenMS/METADATA/AnnotatedMSRun.h | 320 +++++++++ .../OpenMS/METADATA/ExperimentalSettings.h | 8 - .../OpenMS/METADATA/SpectrumSettings.h | 8 - .../include/OpenMS/METADATA/sources.cmake | 1 + .../include/OpenMS/PROCESSING/ID/IDFilter.h | 104 ++- src/openms/source/ANALYSIS/ID/IDMapper.cpp | 77 ++- .../MapAlignmentAlgorithmIdentification.cpp | 13 +- .../source/APPLICATIONS/ToolHandler.cpp | 3 - src/openms/source/FORMAT/MSPFile.cpp | 27 +- src/openms/source/KERNEL/MSExperiment.cpp | 2 + src/openms/source/METADATA/AnnotatedMSRun.cpp | 64 ++ src/openms/source/METADATA/AnnotatedMSRun.h | 285 +++++++++ .../source/METADATA/ExperimentalSettings.cpp | 16 - .../source/METADATA/SpectrumSettings.cpp | 19 - src/openms/source/METADATA/sources.cmake | 1 + .../include/OpenMS/VISUAL/LayerData1DChrom.h | 2 +- .../include/OpenMS/VISUAL/LayerData1DPeak.h | 2 +- .../include/OpenMS/VISUAL/LayerDataBase.h | 3 +- .../include/OpenMS/VISUAL/LayerDataChrom.h | 6 +- .../include/OpenMS/VISUAL/LayerDataPeak.h | 33 +- .../include/OpenMS/VISUAL/MetaDataBrowser.h | 1 - .../VISUAL/APPLICATIONS/TOPPViewBase.cpp | 80 +-- .../source/VISUAL/LayerData1DBase.cpp | 7 + .../source/VISUAL/LayerData1DPeak.cpp | 54 +- .../source/VISUAL/LayerDataBase.cpp | 38 +- .../source/VISUAL/LayerDataChrom.cpp | 24 +- .../source/VISUAL/LayerDataPeak.cpp | 13 +- .../source/VISUAL/MetaDataBrowser.cpp | 6 - .../source/VISUAL/Painter2DBase.cpp | 10 +- src/openms_gui/source/VISUAL/Plot1DCanvas.cpp | 16 +- src/openms_gui/source/VISUAL/Plot2DCanvas.cpp | 50 +- src/openms_gui/source/VISUAL/Plot3DCanvas.cpp | 4 +- .../source/VISUAL/Plot3DOpenGLCanvas.cpp | 21 +- src/openms_gui/source/VISUAL/PlotCanvas.cpp | 14 +- .../source/VISUAL/SpectraIDViewTab.cpp | 296 +++++---- .../source/VISUAL/SpectraTreeTab.cpp | 39 +- .../VISUAL/TVIdentificationViewController.cpp | 378 ++++++----- .../source/VISUAL/TVSpectraViewController.cpp | 4 +- src/pyOpenMS/pxds/AnnotatedMSRun.pxd | 52 ++ src/pyOpenMS/pxds/ExperimentalSettings.pxd | 6 - src/pyOpenMS/pxds/SpectrumSettings.pxd | 4 - .../class_tests/openms/executables.cmake | 1 + .../openms/source/AnnotatedMSRun_test.cpp | 343 ++++++++++ .../source/ExperimentalSettings_test.cpp | 65 +- .../openms/source/IDFilter_test.cpp | 104 --- .../openms/source/IDMapper_test.cpp | 104 +-- .../openms/source/MSPFile_test.cpp | 19 +- .../openms/source/SpectrumSettings_test.cpp | 59 -- src/tests/topp/CMakeLists.txt | 11 - src/tests/topp/SpecLibSearcher_1.MSP | 15 - src/tests/topp/SpecLibSearcher_1.MzData | 57 -- src/tests/topp/SpecLibSearcher_1.idXML | 20 - src/tests/topp/SpecLibSearcher_1.mzML | 147 ----- .../topp/SpecLibSearcher_1_parameters.ini | 49 -- src/topp/IDMassAccuracy.cpp | 478 -------------- src/topp/IDSplitter.cpp | 32 +- src/topp/MapAlignerIdentification.cpp | 7 - src/topp/SpecLibCreator.cpp | 258 -------- src/topp/SpecLibSearcher.cpp | 605 ------------------ src/topp/executables.cmake | 3 - vcpkg | 2 +- 72 files changed, 1949 insertions(+), 2609 deletions(-) create mode 100644 src/openms/include/OpenMS/METADATA/AnnotatedMSRun.h create mode 100644 src/openms/source/METADATA/AnnotatedMSRun.cpp create mode 100644 src/openms/source/METADATA/AnnotatedMSRun.h create mode 100644 src/pyOpenMS/pxds/AnnotatedMSRun.pxd create mode 100644 src/tests/class_tests/openms/source/AnnotatedMSRun_test.cpp delete mode 100644 src/tests/topp/SpecLibSearcher_1.MSP delete mode 100644 src/tests/topp/SpecLibSearcher_1.MzData delete mode 100644 src/tests/topp/SpecLibSearcher_1.idXML delete mode 100644 src/tests/topp/SpecLibSearcher_1.mzML delete mode 100644 src/tests/topp/SpecLibSearcher_1_parameters.ini delete mode 100644 src/topp/IDMassAccuracy.cpp delete mode 100644 src/topp/SpecLibCreator.cpp delete mode 100644 src/topp/SpecLibSearcher.cpp diff --git a/CHANGELOG b/CHANGELOG index b8b738dd5cc..42345418b31 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,6 +10,13 @@ Parameters - list of algorithm or TOPP tool parameters that changed in this rele PR - Pull Request (on GitHub), i.e. integration of a new feature or bugfix #, e.g. #4957 - a reference to an issue or pull request on GitHub, visit e.g. https://github.com/OpenMS/OpenMS/pull/XXXX (replace XXXX with number of interest) for details + +- Libary: + ID information got factored out of MSSpectrum + +- removed outdated tools: + SpecLibCreator, SpecLibSearch, IDMassAccuracy + ------------------------------------------------------------------------------------------ ---- OpenMS 3.5.0 (under development) ---- ------------------------------------------------------------------------------------------ diff --git a/CMakeLists.txt b/CMakeLists.txt index 781ac0f079d..7c4d6a9b8c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,6 @@ set(CMAKE_AUTOMOC_COMPILER_PREDEFINES OFF) # General CMake definitions & helper #------------------------------------------------------------------------------ SET(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS true) - #------------------------------------------------------------------------------ ## CMake sanity check: sometimes CMAKE_SIZEOF_VOID_P just vanishes when ## updating CMake. diff --git a/contrib b/contrib index e6fde7cfed8..3cdef5c7c7f 160000 --- a/contrib +++ b/contrib @@ -1 +1 @@ -Subproject commit e6fde7cfed8cde73c6625cd493ce3f82e21263cc +Subproject commit 3cdef5c7c7f98032f7d43c59ed642ebe5a1d56b1 diff --git a/doc/code_examples/Tutorial_GUI_Plot1D.cpp b/doc/code_examples/Tutorial_GUI_Plot1D.cpp index c9cccc8bbb6..c382c8eea1c 100644 --- a/doc/code_examples/Tutorial_GUI_Plot1D.cpp +++ b/doc/code_examples/Tutorial_GUI_Plot1D.cpp @@ -11,6 +11,8 @@ #include // exotic header for path to tutorial data #include +#include + using namespace OpenMS; using namespace std; @@ -20,13 +22,13 @@ Int main(int argc, const char** argv) QApplication app(argc, const_cast(argv)); - PeakMap exp; + AnnotatedMSRun exp; + auto exp_sptr = boost::make_shared(); MSSpectrum spec; // demonstrating how to load a single spectrum from file formats which only contain a single spec // alternatively: use FileHandler().loadExperiment() if you need an experiment anyway FileHandler().loadSpectrum(tutorial_data_path, spec, {FileTypes::DTA}); - exp.addSpectrum(spec); - LayerDataBase::ExperimentSharedPtrType exp_sptr(new PeakMap(exp)); + exp_sptr->getMSExperiment().addSpectrum(spec); LayerDataBase::ODExperimentSharedPtrType on_disc_exp_sptr(new OnDiscMSExperiment()); Plot1DWidget widget(Param(), DIM::Y, nullptr); widget.canvas()->addPeakLayer(exp_sptr, on_disc_exp_sptr); diff --git a/doc/doxygen/public/TOPP.doxygen b/doc/doxygen/public/TOPP.doxygen index e1eeeecc2cf..5e44e7c0e54 100755 --- a/doc/doxygen/public/TOPP.doxygen +++ b/doc/doxygen/public/TOPP.doxygen @@ -103,7 +103,6 @@ - @subpage TOPP_NovorAdapter - De novo sequencing from tandem mass spectrometry data. - @subpage TOPP_SageAdapter - Identifies MS/MS spectra using Sage (external). - @subpage TOPP_SimpleSearchEngine - A simple database search engine for annotating MS/MS spectra. - - @subpage TOPP_SpecLibSearcher - Identifies peptide MS/MS spectra by spectral matching with a searchable spectral library. - @subpage TOPP_SpectraSTSearchAdapter - An interface to the 'SEARCH' mode of the SpectraST program (external, beta). @@ -117,7 +116,6 @@ - @subpage TOPP_IDDecoyProbability - Estimates peptide probabilities using a decoy search strategy. WARNING: This utility is deprecated. - @subpage TOPP_IDExtractor - Extracts n peptides randomly or best n from idXML files. - @subpage TOPP_IDMapper - Assigns protein/peptide identifications to feature or consensus features. - - @subpage TOPP_IDMassAccuracy - Calculates a distribution of the mass error from given mass spectra and IDs. - @subpage TOPP_IDPosteriorErrorProbability - Estimates posterior error probabilities using a mixture model. - @subpage TOPP_IDScoreSwitcher - Switches between different scores of peptide or protein hits in identification data. - @subpage TOPP_PeptideIndexer - Refreshes the protein references for all peptide hits. @@ -126,7 +124,6 @@ - @subpage TOPP_ProteinInference - Infer proteins from a list of (high-confidence) peptides. - @subpage TOPP_PSMFeatureExtractor - Creates search engine specific features for PercolatorAdapter input. - @subpage TOPP_SequenceCoverageCalculator - Prints information about idXML files. - - @subpage TOPP_SpecLibCreator - Creates an MSP-formatted spectral library. - @subpage TOPP_StaticModification - Allows to attach a set of fixed modifications to an idXML file (MS/MS search results), e.g. to add 15N (N15) labeling post-hoc. diff --git a/doc/doxygen/public/developer_tutorial.doxygen b/doc/doxygen/public/developer_tutorial.doxygen index ed173458a59..380383ca34e 100644 --- a/doc/doxygen/public/developer_tutorial.doxygen +++ b/doc/doxygen/public/developer_tutorial.doxygen @@ -160,8 +160,6 @@ The extensible %OpenMS library implements common mass spectrometric data process - Database search: - Peptides (Tool %SimpleSearchEngine and its classes - started simple but is, by now, rather complete peptide identification engine) - Protein-Protein cross-links (Tool OpenPepXL) - - Spectral library search: - - Tool SpecLibSearcher and its classes - DeNovo: - Tool CompNovoCID and its classes - Quantification: diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/IDMapper.h b/src/openms/include/OpenMS/ANALYSIS/ID/IDMapper.h index 6305f1732fe..37cec7f9d96 100644 --- a/src/openms/include/OpenMS/ANALYSIS/ID/IDMapper.h +++ b/src/openms/include/OpenMS/ANALYSIS/ID/IDMapper.h @@ -22,6 +22,7 @@ namespace OpenMS { + class AnnotatedMSRun; /** @brief Annotates an MSExperiment, FeatureMap or ConsensusMap with peptide identifications @@ -65,7 +66,7 @@ namespace OpenMS @exception Exception::MissingInformation is thrown if entries of @p peptide_ids do not contain 'MZ' and 'RT' information. */ - void annotate(PeakMap& map, const std::vector& peptide_ids, const std::vector& protein_ids, const bool clear_ids = false, const bool map_ms1 = false); + void annotate(AnnotatedMSRun& map, const std::vector& peptide_ids, const std::vector& protein_ids, const bool clear_ids = false, const bool map_ms1 = false); /** @brief Mapping method for peak maps @@ -82,7 +83,7 @@ namespace OpenMS @param clear_ids Reset peptide and protein identifications of each scan before annotating @param map_ms1 attach Ids to MS1 spectra using RT mapping only (without precursor, without m/z) */ - void annotate(PeakMap& map, FeatureMap fmap, const bool clear_ids = false, const bool map_ms1 = false); + void annotate(AnnotatedMSRun& map, const FeatureMap& fmap, const bool clear_ids = false, const bool map_ms1 = false); /** @brief Mapping method for feature maps diff --git a/src/openms/include/OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.h b/src/openms/include/OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.h index 5c4e6037f7c..dc91164f99f 100644 --- a/src/openms/include/OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.h +++ b/src/openms/include/OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.h @@ -25,6 +25,12 @@ namespace OpenMS { + /* Concept for FeatureMap or ConsensusMap*/ + template + concept IsFCMap = std::same_as || std::same_as; + + class AnnotatedMSRun; + /** @brief A map alignment algorithm based on peptide identifications from MS2 spectra. @@ -74,9 +80,9 @@ namespace OpenMS } /** - @brief Align feature maps, consensus maps, peak maps, or peptide identifications. + @brief Align feature maps, consensus maps, or peptide identifications. - @param data Vector of input data (FeatureMap, ConsensusMap, PeakMap or @p vector) that should be aligned. + @param data Vector of input data (FeatureMap, ConsensusMap, or @p vector) that should be aligned. @param transformations Vector of RT transformations that will be computed. @param reference_index Index in @p data of the reference to align to, if any @@ -201,7 +207,7 @@ namespace OpenMS @return Are the RTs already sorted? (Here: false) */ - bool getRetentionTimes_(const PeakMap& experiment, SeqToList& rt_data); + bool getRetentionTimes_(const AnnotatedMSRun& experiment, SeqToList& rt_data); /** @brief Collect retention time data from peptide IDs contained in feature maps or consensus maps @@ -217,8 +223,8 @@ namespace OpenMS @return Are the RTs already sorted? (Here: true) */ - template - bool getRetentionTimes_(const MapType& features, SeqToList& rt_data) + + bool getRetentionTimes_(const IsFCMap auto& features, SeqToList& rt_data) { if (!score_cutoff_) { @@ -236,8 +242,7 @@ namespace OpenMS { return a <= b; }; } - for (typename MapType::ConstIterator feat_it = features.begin(); - feat_it != features.end(); ++feat_it) + for (auto feat_it = features.cbegin(); feat_it != features.cend(); ++feat_it) { if (use_feature_rt_) { diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/ParamValue.h b/src/openms/include/OpenMS/DATASTRUCTURES/ParamValue.h index 6e60aec6ca3..301ef016dbe 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/ParamValue.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/ParamValue.h @@ -349,7 +349,7 @@ namespace OpenMS /// Space to store the data union { - ptrdiff_t ssize_; + std::ptrdiff_t ssize_; double dou_; std::string* str_; std::vector* str_list_; diff --git a/src/openms/include/OpenMS/FORMAT/MSPFile.h b/src/openms/include/OpenMS/FORMAT/MSPFile.h index 6831e9f17d5..523a372accc 100644 --- a/src/openms/include/OpenMS/FORMAT/MSPFile.h +++ b/src/openms/include/OpenMS/FORMAT/MSPFile.h @@ -11,11 +11,13 @@ #include #include #include +#include #include namespace OpenMS { + class AnnotatedMSRun; /** @brief File adapter for MSP files (NIST spectra library) @@ -58,12 +60,24 @@ namespace OpenMS */ void load(const String & filename, std::vector & ids, PeakMap & exp); + /** + @brief Loads a map from a MSPFile file. + + @param filename the filename of the experiment + @param annot_exp annotated experiment with spectra and ids + + @throw FileNotFound is thrown if the file could not be found + @throw ParseError is thrown if the given file could not be parsed + @throw ElementNotFound is thrown if a annotated modification cannot be found in ModificationsDB (PSI-MOD definitions) + */ + void load(const String & filename, AnnotatedMSRun & annot_exp); + /** @brief Stores a map in a MSPFile file. @throw UnableToCreateFile is thrown if the given file could not be created */ - void store(const String & filename, const PeakMap & exp) const; + void store(const String & filename, const AnnotatedMSRun & exp) const; protected: diff --git a/src/openms/include/OpenMS/KERNEL/MSExperiment.h b/src/openms/include/OpenMS/KERNEL/MSExperiment.h index 832a321ad11..fad5150e00c 100644 --- a/src/openms/include/OpenMS/KERNEL/MSExperiment.h +++ b/src/openms/include/OpenMS/KERNEL/MSExperiment.h @@ -1320,9 +1320,6 @@ std::vector extractXICs( /// returns true if any MS spectra of trthe specified level contain at least one peak with intensity of 0.0 bool hasZeroIntensities(size_t ms_level) const; - /// do any of the spectra have a PeptideID? - bool hasPeptideIdentifications() const; - /// Are all MSSpectra in this experiment part of an IM Frame? I.e. they all have the same RT, but different drift times bool isIMFrame() const; diff --git a/src/openms/include/OpenMS/METADATA/AnnotatedMSRun.h b/src/openms/include/OpenMS/METADATA/AnnotatedMSRun.h new file mode 100644 index 00000000000..59c38e47c0d --- /dev/null +++ b/src/openms/include/OpenMS/METADATA/AnnotatedMSRun.h @@ -0,0 +1,320 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg$ +// $Authors: David Voigt, Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include + +#include + +namespace OpenMS +{ + class PeptideIdentification; + + class MSSpectrum; + + /** + * @brief Class for storing MS run data with peptide and protein identifications + * + * This class stores an MSExperiment (containing spectra) along with peptide and protein + * identifications. Each spectrum in the MSExperiment is associated with a single + * PeptideIdentification object. + * + * The class provides methods to access and modify these identifications, as well as + * iterators to traverse the spectra and their associated identifications together. + */ + class OPENMS_DLLAPI AnnotatedMSRun + { + public: + using SpectrumIdRef = std::pair; + using ConstSpectrumIdRef = std::pair; + using SpectrumType = MSExperiment::SpectrumType; + using ChromatogramType = MSExperiment::ChromatogramType; + + + /// Default constructor + AnnotatedMSRun() = default; + + /** + * @brief Move constructor for efficiently loading a MSExperiment without a deep copy + * @param experiment The MSExperiment to move into this object + */ + explicit AnnotatedMSRun(MSExperiment&& experiment) : data(std::move(experiment)) + {}; + + /// Move constructor + AnnotatedMSRun(AnnotatedMSRun&&) = default; + + /// Copy constructor + AnnotatedMSRun(const AnnotatedMSRun&) = default; + AnnotatedMSRun& operator=(const AnnotatedMSRun&) = default; + AnnotatedMSRun& operator=(AnnotatedMSRun&&) = default; + + /// Destructor + ~AnnotatedMSRun() = default; + + /** + * @brief Get the protein identification + * @return A reference to the protein identification + */ + std::vector& getProteinIdentifications() + { + return protein_ids_; + } + + /** + * @brief Get the protein identification (const version) + * @return A const reference to the protein identification + */ + const std::vector& getProteinIdentifications() const + { + return protein_ids_; + } + + /** + * @brief Get all peptide identifications for all spectra + * @return A reference to the vector of peptide identifications + */ + std::vector& getPeptideIdentifications(); + + /** + * @brief Get all peptide identifications for all spectra (const version) + * @return A const reference to the vector of peptide identifications + */ + const std::vector& getPeptideIdentifications() const; + + /** + * @brief Set all peptide identifications for all spectra + * @param ids Vector of peptide identifications + */ + void setPeptideIdentifications(std::vector&& ids); + + /** + * @brief Set all peptide identifications for all spectra + * @param ids Vector of peptide identifications + */ + void setPeptideIdentifications(const std::vector& ids); + + /** + * @brief Get the MSExperiment + * @return A reference to the MSExperiment + */ + MSExperiment& getMSExperiment(); + + /** + * @brief Get the MSExperiment (const version) + * @return A const reference to the MSExperiment + */ + const MSExperiment& getMSExperiment() const; + + /** + * @brief Set the MSExperiment + * @param experiment The MSExperiment to set + */ + void setMSExperiment(MSExperiment&& experiment); + + /** + * @brief Set the MSExperiment + * @param experiment The MSExperiment to set + */ + void setMSExperiment(const MSExperiment& experiment); + + /** + * @brief Get a const iterator to the beginning of the data + * @return A const iterator to the beginning + */ + inline auto cbegin() const + { + checkPeptideIdSize_(OPENMS_PRETTY_FUNCTION); + return PairIterator(data.getSpectra().cbegin(), peptide_ids_.cbegin()); + } + + /** + * @brief Get an iterator to the beginning of the data + * @return An iterator to the beginning + */ + inline auto begin() + { + checkPeptideIdSize_(OPENMS_PRETTY_FUNCTION); + return PairIterator(data.getSpectra().begin(), peptide_ids_.begin()); + } + + /** + * @brief Get a const iterator to the beginning of the data + * @return A const iterator to the beginning + */ + inline auto begin() const + { + checkPeptideIdSize_(OPENMS_PRETTY_FUNCTION); + return PairIterator(data.getSpectra().cbegin(), peptide_ids_.cbegin()); + } + + /** + * @brief Get an iterator to the end of the data + * @return An iterator to the end + */ + inline auto end() + { + return PairIterator(data.getSpectra().end(), peptide_ids_.end()); + } + + /** + * @brief Get a const iterator to the end of the data + * @return A const iterator to the end + */ + inline auto end() const + { + return PairIterator(data.getSpectra().end(), peptide_ids_.end()); + } + + /** + * @brief Get a const iterator to the end of the data + * @return A const iterator to the end + */ + inline auto cend() const + { + return PairIterator(data.getSpectra().cend(), peptide_ids_.cend()); + } + + /** + * @brief Access a spectrum and its associated peptide identification + * @param idx The index of the spectrum + * @return A pair of references to the spectrum and its peptide identification + */ + inline SpectrumIdRef operator[](size_t idx) + { + if (idx >= peptide_ids_.size()) + { + throw Exception::IndexOverflow(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, + idx, peptide_ids_.size()); + } + if (idx >= data.getSpectra().size()) + { + throw Exception::IndexOverflow(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, + idx, data.getSpectra().size()); + } + return {data.getSpectra()[idx], peptide_ids_[idx]}; + } + + /** + * @brief Access a spectrum and its associated peptide identification (const version) + * @param idx The index of the spectrum + * @return A pair of const references to the spectrum and its peptide identification + */ + inline ConstSpectrumIdRef operator[](size_t idx) const + { + if (idx >= peptide_ids_.size()) + { + throw Exception::IndexOverflow(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, + idx, peptide_ids_.size()); + } + if (idx >= data.getSpectra().size()) + { + throw Exception::IndexOverflow(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, + idx, data.getSpectra().size()); + } + return {data.getSpectra()[idx], peptide_ids_[idx]}; + } + + /** + * @brief Iterator for pairs of spectra and peptide identifications + * + * This iterator allows traversing the spectra and their associated peptide + * identifications together. + */ + template + struct PairIterator + { + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + + /** + * @brief Constructor + * @param ptr1 Iterator to the spectra + * @param ptr2 Iterator to the peptide identifications + */ + PairIterator(T1 ptr1, T2 ptr2) : m_ptr1(ptr1), m_ptr2(ptr2) + {} + + /** + * @brief Pre-increment operator + * @return Reference to this iterator after incrementing + */ + PairIterator& operator++() + { + ++m_ptr1; + ++m_ptr2; + return *this; + } + + /** + * @brief Post-increment operator + * @return Copy of this iterator before incrementing + */ + PairIterator operator++(int) + { + auto tmp(*this); + ++(*this); + return tmp; + } + + /** + * @brief Dereference operator + * @return A pair of references to the current spectrum and peptide identification + */ + auto operator*() + { + return std::make_pair(std::ref(*m_ptr1), std::ref(*m_ptr2)); + } + + /** + * @brief Equality operator + * @param a First iterator + * @param b Second iterator + * @return True if the iterators are equal + */ + inline friend bool operator==(const PairIterator& a, const PairIterator& b) + { + return a.m_ptr1 == b.m_ptr1 && a.m_ptr2 == b.m_ptr2; + } + + /** + * @brief Inequality operator + * @param a First iterator + * @param b Second iterator + * @return True if the iterators are not equal + */ + inline friend bool operator!=(const PairIterator& a, const PairIterator& b) + { + return !(a == b); + } + + private: + T1 m_ptr1; + T2 m_ptr2; + }; + + typedef AnnotatedMSRun::PairIterator::iterator, std::vector::iterator> Iterator; + typedef AnnotatedMSRun::PairIterator::const_iterator, std::vector::const_iterator> ConstIterator; + + private: + + // Helper to enforce invariant + void checkPeptideIdSize_(const char* function_name) const; + + std::vector peptide_ids_; + std::vector protein_ids_; + MSExperiment data; + }; +} diff --git a/src/openms/include/OpenMS/METADATA/ExperimentalSettings.h b/src/openms/include/OpenMS/METADATA/ExperimentalSettings.h index 19a6cf64acd..b9dc867a6b4 100644 --- a/src/openms/include/OpenMS/METADATA/ExperimentalSettings.h +++ b/src/openms/include/OpenMS/METADATA/ExperimentalSettings.h @@ -100,13 +100,6 @@ namespace OpenMS /// sets the free-text comment void setComment(const String & comment); - /// returns a const reference to the protein ProteinIdentification vector - const std::vector & getProteinIdentifications() const; - /// returns a mutable reference to the protein ProteinIdentification vector - std::vector & getProteinIdentifications(); - /// sets the protein ProteinIdentification vector - void setProteinIdentifications(const std::vector & protein_identifications); - /// returns fraction identifier const String & getFractionIdentifier() const; /// sets the fraction identifier @@ -120,7 +113,6 @@ namespace OpenMS HPLC hplc_; DateTime datetime_; String comment_; - std::vector protein_identifications_; String fraction_identifier_; }; diff --git a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h index 37c79c137db..5594cbd9170 100644 --- a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h +++ b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h @@ -123,13 +123,6 @@ namespace OpenMS /// sets the products void setProducts(const std::vector & products); - /// returns a const reference to the PeptideIdentification vector - const std::vector & getPeptideIdentifications() const; - /// returns a mutable reference to the PeptideIdentification vector - std::vector & getPeptideIdentifications(); - /// sets the PeptideIdentification vector - void setPeptideIdentifications(const std::vector & identifications); - /// sets the description of the applied processing void setDataProcessing(const std::vector< DataProcessingPtr > & data_processing); @@ -149,7 +142,6 @@ namespace OpenMS AcquisitionInfo acquisition_info_; std::vector precursors_; std::vector products_; - std::vector identification_; std::vector< DataProcessingPtr > data_processing_; }; diff --git a/src/openms/include/OpenMS/METADATA/sources.cmake b/src/openms/include/OpenMS/METADATA/sources.cmake index fa826e9e53f..754ca52abf6 100644 --- a/src/openms/include/OpenMS/METADATA/sources.cmake +++ b/src/openms/include/OpenMS/METADATA/sources.cmake @@ -5,6 +5,7 @@ set(directory include/OpenMS/METADATA) set(sources_list_h AbsoluteQuantitationStandards.h Acquisition.h +AnnotatedMSRun.h AcquisitionInfo.h CVTerm.h CVTermList.h diff --git a/src/openms/include/OpenMS/PROCESSING/ID/IDFilter.h b/src/openms/include/OpenMS/PROCESSING/ID/IDFilter.h index af9c1b1ff83..573fd44a37a 100644 --- a/src/openms/include/OpenMS/PROCESSING/ID/IDFilter.h +++ b/src/openms/include/OpenMS/PROCESSING/ID/IDFilter.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,14 @@ namespace OpenMS { + template + concept IsPeptideOrProteinIdentification = + std::is_same_v || std::is_same_v; + + template + concept IsFeatureOrConsensusMap = + std::is_same_v || std::is_same_v; + /** @brief Collection of functions for filtering peptide and protein identifications. @@ -590,7 +599,7 @@ namespace OpenMS removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred); } - template + template static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred) { for (auto& feat : prot_and_pep_ids) @@ -776,7 +785,7 @@ namespace OpenMS ///@{ /// Removes peptide or protein identifications that have no hits in them - template + template static void removeEmptyIdentifications(std::vector& ids) { struct HasNoHits empty_filter; @@ -962,6 +971,20 @@ namespace OpenMS } } + /** + @brief Filters peptide or protein identifications according to the given proteins (positive). + + Hits with a matching protein accession in @p accessions are kept. + + @note The ranks of the hits may be invalidated. + */ + template + static void keepHitsMatchingProteins(IdentificationType& id, const std::set& accessions) + { + struct HasMatchingAccession acc_filter(accessions); + keepMatchingItems(id.getHits(), acc_filter); + } + /** @brief Filters peptide or protein identifications according to the given proteins (positive). @@ -972,11 +995,7 @@ namespace OpenMS template static void keepHitsMatchingProteins(std::vector& ids, const std::set& accessions) { - struct HasMatchingAccession acc_filter(accessions); - for (auto& id_it : ids) - { - keepMatchingItems(id_it.getHits(), acc_filter); - } + for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions); } ///@} @@ -1095,46 +1114,58 @@ namespace OpenMS ///@} - /// @name Filter functions for MS/MS experiments + /// @name Filter functions for AnnotatedMSRun ///@{ - /// Filters an MS/MS experiment according to score thresholds - static void filterHitsByScore(PeakMap& experiment, double peptide_threshold_score, double protein_threshold_score) + /// Filters AnnotatedMSRun according to score thresholds + static void filterHitsByScore(AnnotatedMSRun& annotated_data, + double peptide_threshold_score, + double protein_threshold_score) { // filter protein hits: - filterHitsByScore(experiment.getProteinIdentifications(), protein_threshold_score); - // don't remove empty protein IDs - they contain search metadata and may + filterHitsByScore(annotated_data.getProteinIdentifications(), + protein_threshold_score); + // don't remove empty protein IDs - they contain search meta data and may // be referenced by peptide IDs (via run ID) // filter peptide hits: - for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) + for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications()) { - filterHitsByScore(exp_it->getPeptideIdentifications(), peptide_threshold_score); - removeEmptyIdentifications(exp_it->getPeptideIdentifications()); - // TODO super-duper inefficient. - updateProteinReferences(exp_it->getPeptideIdentifications(), experiment.getProteinIdentifications()); + filterHitsByScore(peptide_id, peptide_threshold_score); } - // @TODO: remove proteins that aren't referenced by peptides any more? + updateProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications()); } - /// Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum - static void keepNBestHits(PeakMap& experiment, Size n) + /// Filters AnnotatedMSRun by keeping the N best peptide hits for every spectrum + static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n) { // don't filter the protein hits by "N best" here - filter the peptides // and update the protein hits! std::vector all_peptides; // IDs from all spectra - // filter peptide hits: - for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) + for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications()) { - std::vector& peptides = exp_it->getPeptideIdentifications(); - keepNBestHits(peptides, n); - removeEmptyIdentifications(peptides); - updateProteinReferences(peptides, experiment.getProteinIdentifications()); - all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end()); + // Create a temporary vector with a single PeptideIdentification + std::vector temp_vec = {peptide_id}; + keepNBestHits(temp_vec, n); + // Copy back the filtered hits + if (!temp_vec.empty()) + { + peptide_id = temp_vec[0]; + } + else + { + peptide_id.getHits().clear(); + } + + // Since we're working with individual PeptideIdentifications, we don't need to remove empty ones + // but we still need to update protein references + temp_vec = {peptide_id}; + updateProteinReferences(temp_vec, annotated_data.getProteinIdentifications()); + all_peptides.push_back(peptide_id); } // update protein hits: - removeUnreferencedProteins(experiment.getProteinIdentifications(), all_peptides); + removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides); } /// Filter identifications by "N best" PeptideIdentification objects (better PeptideIdentification means better [best] PeptideHit than other). @@ -1300,11 +1331,13 @@ namespace OpenMS } } - /// Filters an MS/MS experiment according to the given proteins - static void keepHitsMatchingProteins(PeakMap& experiment, const std::vector& proteins) + /// Filters AnnotatedMSRun according to the given proteins. + static void keepHitsMatchingProteins( + AnnotatedMSRun& experiment, + const std::vector& proteins) { std::set accessions; - for (std::vector::const_iterator it = proteins.begin(); it != proteins.end(); ++it) + for (auto it = proteins.begin(); it != proteins.end(); ++it) { accessions.insert(it->identifier); } @@ -1313,14 +1346,15 @@ namespace OpenMS keepHitsMatchingProteins(experiment.getProteinIdentifications(), accessions); // filter peptide hits: - for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) + // std::pair + for (auto [spectrum, peptide_id] : experiment) { - if (exp_it->getMSLevel() == 2) + if (spectrum.getMSLevel() == 2) { - keepHitsMatchingProteins(exp_it->getPeptideIdentifications(), accessions); - removeEmptyIdentifications(exp_it->getPeptideIdentifications()); + keepHitsMatchingProteins(peptide_id, accessions); } } + removeEmptyIdentifications(experiment.getPeptideIdentifications()); } ///@} diff --git a/src/openms/source/ANALYSIS/ID/IDMapper.cpp b/src/openms/source/ANALYSIS/ID/IDMapper.cpp index 0809463cc58..7a7c074d4dc 100644 --- a/src/openms/source/ANALYSIS/ID/IDMapper.cpp +++ b/src/openms/source/ANALYSIS/ID/IDMapper.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -74,18 +75,19 @@ namespace OpenMS ignore_charge_ = param_.getValue("ignore_charge") == "true"; } - void IDMapper::annotate(PeakMap& map, const vector& peptide_ids, const vector& protein_ids, const bool clear_ids, const bool map_ms1) + void IDMapper::annotate(AnnotatedMSRun& map, + const vector& peptide_ids, + const vector& protein_ids, + const bool clear_ids, + const bool map_ms1) { checkHits_(peptide_ids); SpectrumLookup lookup; if (clear_ids) { // start with empty IDs - for (PeakMap::iterator it = map.begin(); it != map.end(); ++it) - { - it->setPeptideIdentifications({}); - } - map.setProteinIdentifications({}); + map.getPeptideIdentifications().clear(); + map.getProteinIdentifications().clear(); } if (peptide_ids.empty()) return; @@ -93,33 +95,41 @@ namespace OpenMS // append protein identifications map.getProteinIdentifications().insert(map.getProteinIdentifications().end(), protein_ids.begin(), protein_ids.end()); - lookup.readSpectra(map); + // AnnotatedMSRun will have one PeptideIdentification per spectrum (including ones without hits) + map.getPeptideIdentifications().resize(map.getMSExperiment().getSpectra().size()); + + // set up the lookup table for the spectra + lookup.readSpectra(map.getMSExperiment()); // remember which peptides were mapped (for stats later) unordered_set peptides_mapped; + // store mapping of identification RT to index (ignore empty hits) multimap identifications_precursors; for (Size i = 0; i < peptide_ids.size(); ++i) { - if (!peptide_ids[i].empty()) - { // mapping is done by either native id or by comparing peptide_id RT with experiment RT - if (!peptide_ids[i].metaValueExists(Constants::UserParam::SPECTRUM_REFERENCE)) - { // use RT for mapping + if (peptide_ids[i].empty()) continue; + // mapping is done by either native id or by comparing peptide_id RT with experiment RT + if (!peptide_ids[i].metaValueExists(Constants::UserParam::SPECTRUM_REFERENCE)) + { // use RT for mapping + identifications_precursors.insert(make_pair(peptide_ids[i].getRT(), i)); + } + else + { // use native id for mapping + DataValue native_id = peptide_ids[i].getMetaValue(Constants::UserParam::SPECTRUM_REFERENCE); + try + { // spectrum can be retrieved + Size spectrum_idx = lookup.findByNativeID(native_id); + // Since we now have only one PeptideIdentification per spectrum, we need to merge the hits + PeptideIdentification& existing_id = map.getPeptideIdentifications()[spectrum_idx]; + existing_id.getHits().insert(existing_id.getHits().end(), + peptide_ids[i].getHits().begin(), + peptide_ids[i].getHits().end()); + peptides_mapped.insert(i); + } + catch (const Exception::ElementNotFound& /*e*/) + { // use RT for mapping identifications_precursors.insert(make_pair(peptide_ids[i].getRT(), i)); - } - else - { // use native id for mapping - DataValue native_id = peptide_ids[i].getMetaValue(Constants::UserParam::SPECTRUM_REFERENCE); - try - { // spectrum can be retrieved - Size spectrum_idx = lookup.findByNativeID(native_id); - map[spectrum_idx].getPeptideIdentifications().push_back(peptide_ids[i]); - peptides_mapped.insert(i); - } - catch (const Exception::ElementNotFound& /*e*/) - { // use RT for mapping - identifications_precursors.insert(make_pair(peptide_ids[i].getRT(), i)); - } } } } @@ -128,9 +138,9 @@ namespace OpenMS { // store mapping of scan RT to index multimap experiment_precursors; - for (Size i = 0; i < map.size(); i++) + for (Size i = 0; i < map.getMSExperiment().size(); i++) { - experiment_precursors.insert(make_pair(map[i].getRT(), i)); + experiment_precursors.insert(make_pair(map.getMSExperiment()[i].getRT(), i)); } // note that mappings are sorted by key via multimap (we rely on that down below) @@ -174,7 +184,7 @@ namespace OpenMS bool success = map_ms1; if (!success) { - for (const auto& precursor : map[experiment_iterator->second].getPrecursors()) + for (const auto& precursor : map.getMSExperiment()[experiment_iterator->second].getPrecursors()) { if (isMatch_(0, peptide_ids[identifications_iterator->second].getMZ(), precursor.getMZ())) { @@ -183,9 +193,14 @@ namespace OpenMS } } } + if (success) { - map[experiment_iterator->second].getPeptideIdentifications().push_back(peptide_ids[identifications_iterator->second]); + // Since we have only one PeptideIdentification per spectrum, we need to merge the hits + PeptideIdentification& existing_id = map.getPeptideIdentifications()[experiment_iterator->second]; + existing_id.getHits().insert(existing_id.getHits().end(), + peptide_ids[identifications_iterator->second].getHits().begin(), + peptide_ids[identifications_iterator->second].getHits().end()); peptides_mapped.insert(identifications_iterator->second); } ++identifications_iterator; @@ -201,8 +216,7 @@ namespace OpenMS << " Unmapped (empty) peptides: " << peptide_ids.size() - identifications_precursors.size() << endl; } - - void IDMapper::annotate(PeakMap& map, FeatureMap fmap, const bool clear_ids, const bool map_ms1) + void IDMapper::annotate(AnnotatedMSRun& map, const FeatureMap& fmap, const bool clear_ids, const bool map_ms1) { const vector& protein_ids = fmap.getProteinIdentifications(); vector peptide_ids; @@ -1085,3 +1099,4 @@ namespace OpenMS } } // namespace OpenMS + diff --git a/src/openms/source/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.cpp b/src/openms/source/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.cpp index be605ca90b2..14fd6a31283 100644 --- a/src/openms/source/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.cpp +++ b/src/openms/source/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.cpp @@ -10,6 +10,7 @@ #include #include #include +#include using namespace std; @@ -93,8 +94,7 @@ namespace OpenMS bool MapAlignmentAlgorithmIdentification::getRetentionTimes_( const vector& peptides, SeqToList& rt_data) { - for (vector::const_iterator pep_it = peptides.begin(); - pep_it != peptides.end(); ++pep_it) + for (auto pep_it = peptides.cbegin(); pep_it != peptides.cend(); ++pep_it) { if (!pep_it->getHits().empty()) { @@ -178,14 +178,9 @@ namespace OpenMS // lists of peptide hits in "maps" will be sorted bool MapAlignmentAlgorithmIdentification::getRetentionTimes_( - const PeakMap& experiment, SeqToList& rt_data) + const AnnotatedMSRun& experiment, SeqToList& rt_data) { - for (PeakMap::ConstIterator exp_it = experiment.begin(); - exp_it != experiment.end(); ++exp_it) - { - getRetentionTimes_(exp_it->getPeptideIdentifications(), rt_data); - } - // duplicate annotations should not be possible -> no need to remove them + getRetentionTimes_(experiment.getPeptideIdentifications(), rt_data); return false; } diff --git a/src/openms/source/APPLICATIONS/ToolHandler.cpp b/src/openms/source/APPLICATIONS/ToolHandler.cpp index 417411d56b0..b602efa5baf 100644 --- a/src/openms/source/APPLICATIONS/ToolHandler.cpp +++ b/src/openms/source/APPLICATIONS/ToolHandler.cpp @@ -91,7 +91,6 @@ namespace OpenMS tools_map["IDFileConverter"] = Internal::ToolDescription("IDFileConverter", cat_file_converter); tools_map["IDFilter"] = Internal::ToolDescription("IDFilter", cat_file_filter_extract_merge); tools_map["IDMapper"] = Internal::ToolDescription("IDMapper", cat_ID_proc); - tools_map["IDMassAccuracy"] = Internal::ToolDescription("IDMassAccuracy", cat_ID_proc); tools_map["IDMerger"] = Internal::ToolDescription("IDMerger", cat_file_filter_extract_merge); tools_map["IDPosteriorErrorProbability"] = Internal::ToolDescription("IDPosteriorErrorProbability", cat_ID_proc); tools_map["IDRipper"] = Internal::ToolDescription("IDRipper", cat_file_filter_extract_merge); @@ -174,8 +173,6 @@ namespace OpenMS tools_map["SequenceCoverageCalculator"] = Internal::ToolDescription("SequenceCoverageCalculator", cat_ID_proc); tools_map["SimpleSearchEngine"] = Internal::ToolDescription("SimpleSearchEngine", cat_ID_search); tools_map["SiriusExport"] = Internal::ToolDescription("SiriusExport", cat_ID_MTX); - tools_map["SpecLibCreator"] = Internal::ToolDescription("SpecLibCreator", cat_ID_proc); - tools_map["SpecLibSearcher"] = Internal::ToolDescription("SpecLibSearcher", cat_ID_search); tools_map["SpectraFilterNLargest"] = Internal::ToolDescription("SpectraFilterNLargest", cat_signal_proc_smooth_normalize); tools_map["SpectraFilterNormalizer"] = Internal::ToolDescription("SpectraFilterNormalizer", cat_signal_proc_smooth_normalize); tools_map["SpectraFilterThresholdMower"] = Internal::ToolDescription("SpectraFilterThresholdMower", cat_signal_proc_smooth_normalize); diff --git a/src/openms/source/FORMAT/MSPFile.cpp b/src/openms/source/FORMAT/MSPFile.cpp index 13cd90d37a5..f8275eaa3cf 100644 --- a/src/openms/source/FORMAT/MSPFile.cpp +++ b/src/openms/source/FORMAT/MSPFile.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -28,7 +29,7 @@ namespace OpenMS defaults_.setValidStrings("parse_headers", parse_strings); defaults_.setValue("parse_peakinfo", "true", "Flag whether the peak annotation information should be parsed and stored for each peak"); defaults_.setValidStrings("parse_peakinfo", parse_strings); - defaults_.setValue("parse_firstpeakinfo_only", "true", "Flag whether only the first (default for 1:1 correspondence in SpecLibSearcher) or all peak annotation information should be parsed and stored for each peak."); + defaults_.setValue("parse_firstpeakinfo_only", "true", "Flag whether only the first or all peak annotation information should be parsed and stored for each peak."); defaults_.setValidStrings("parse_firstpeakinfo_only", parse_strings); defaults_.setValue("instrument", "", "If instrument given, only spectra of these type of instrument (Inst= in header) are parsed"); defaults_.setValidStrings("instrument", {"","it","qtof","toftof"}); @@ -322,6 +323,18 @@ namespace OpenMS } } + void MSPFile::load(const String & filename, AnnotatedMSRun & annot_exp) + { + // use existing load function + vector ids; + MSExperiment exp; + this->load(filename, ids, exp); + + // Convert to the new data structure (one PeptideIdentification per spectrum) + annot_exp.setPeptideIdentifications(std::move(ids)); + annot_exp.getMSExperiment() = std::move(exp); + } + void MSPFile::parseHeader_(const String & header, PeakSpectrum & spec) { // first header from std_protein of NIST spectra DB @@ -343,7 +356,7 @@ namespace OpenMS } //TODO adapt store to write new? format - void MSPFile::store(const String & filename, const PeakMap & exp) const + void MSPFile::store(const String & filename, const AnnotatedMSRun & exp) const { if (!FileHandler::hasValidExtension(filename, FileTypes::MSP)) { @@ -358,11 +371,11 @@ namespace OpenMS ofstream out(filename.c_str()); - for (const MSSpectrum& it : exp) + for (auto [spectrum, peptide_id] : exp) { - if (!it.getPeptideIdentifications().empty() && !it.getPeptideIdentifications().begin()->getHits().empty()) + if (!peptide_id.getHits().empty()) { - PeptideHit hit = *it.getPeptideIdentifications().begin()->getHits().begin(); + PeptideHit hit = peptide_id.getHits()[0]; String peptide; for (const Residue& pit : hit.getSequence()) { @@ -419,10 +432,10 @@ namespace OpenMS out << " Mods=0"; } out << " Inst=it\n"; // @improvement write instrument type, protein...and other information - out << "Num peaks: " << it.size() << "\n"; + out << "Num peaks: " << spectrum.size() << "\n"; // normalize to 10,000 - PeakSpectrum rich_spec = it; + PeakSpectrum rich_spec = spectrum; double max_int(0); for (const Peak1D& sit : rich_spec) { diff --git a/src/openms/source/KERNEL/MSExperiment.cpp b/src/openms/source/KERNEL/MSExperiment.cpp index c6b0e8bad2f..9a2161ba747 100644 --- a/src/openms/source/KERNEL/MSExperiment.cpp +++ b/src/openms/source/KERNEL/MSExperiment.cpp @@ -851,6 +851,7 @@ namespace OpenMS }); } + /* bool MSExperiment::hasPeptideIdentifications() const { for (const auto& spec : getSpectra()) @@ -862,6 +863,7 @@ namespace OpenMS } return false; } + */ bool MSExperiment::isIMFrame() const { diff --git a/src/openms/source/METADATA/AnnotatedMSRun.cpp b/src/openms/source/METADATA/AnnotatedMSRun.cpp new file mode 100644 index 00000000000..482c1592b84 --- /dev/null +++ b/src/openms/source/METADATA/AnnotatedMSRun.cpp @@ -0,0 +1,64 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg$ +// $Authors: David Voigt, Timo Sachsenberg $ +// -------------------------------------------------------------------------- +#include + + +namespace OpenMS +{ + std::vector& AnnotatedMSRun::getPeptideIdentifications() + { + return peptide_ids_; + } + + const std::vector& AnnotatedMSRun::getPeptideIdentifications() const + { + return peptide_ids_; + } + + void AnnotatedMSRun::setPeptideIdentifications(const std::vector& ids) + { + peptide_ids_ = ids; + } + + void AnnotatedMSRun::setPeptideIdentifications(std::vector&& ids) + { + peptide_ids_ = std::move(ids); + } + + MSExperiment& AnnotatedMSRun::getMSExperiment() + { + return data; + } + + const MSExperiment& AnnotatedMSRun::getMSExperiment() const + { + return data; + } + + void AnnotatedMSRun::setMSExperiment(MSExperiment&& experiment) + { + data = std::move(experiment); + } + + void AnnotatedMSRun::setMSExperiment(const MSExperiment& experiment) + { + data = experiment; + } + + void AnnotatedMSRun::checkPeptideIdSize_(const char* function_name) const + { + if (data.getSpectra().size() != peptide_ids_.size()) + { + throw Exception::InvalidValue(__FILE__, __LINE__, + function_name, // Use the provided function name + "Internal inconsistency: Number of spectra and peptide identifications do not match.", + String(data.getSpectra().size()) + " vs " + String(peptide_ids_.size())); + } + } +} + diff --git a/src/openms/source/METADATA/AnnotatedMSRun.h b/src/openms/source/METADATA/AnnotatedMSRun.h new file mode 100644 index 00000000000..b61cbbcc43b --- /dev/null +++ b/src/openms/source/METADATA/AnnotatedMSRun.h @@ -0,0 +1,285 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg$ +// $Authors: David Voigt $ +// ------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include + +#include + +namespace OpenMS +{ + class PeptideIdentification; + + class MSSpectrum; + + /** + * @brief Class for storing MS run data with peptide and protein identifications + * + * This class stores an MSExperiment (containing spectra) along with peptide and protein + * identifications. Each spectrum in the MSExperiment is associated with a single + * PeptideIdentification object. + * + * The class provides methods to access and modify these identifications, as well as + * iterators to traverse the spectra and their associated identifications together. + */ + class OPENMS_DLLAPI AnnotatedMSRun + { + public: + typedef std::pair Mapping; + typedef std::pair ConstMapping; + + /// Default constructor + AnnotatedMSRun() = default; + + /** + * @brief Move constructor for efficiently loading a MSExperiment without a deep copy + * @param experiment The MSExperiment to move into this object + */ + explicit AnnotatedMSRun(MSExperiment&& experiment) : data(std::move(experiment)) + {}; + + /// Move constructor + AnnotatedMSRun(AnnotatedMSRun&&) = default; + + /// Destructor + ~AnnotatedMSRun() = default; + + /** + * @brief Get the protein identification + * @return A reference to the protein identification + */ + std::vector& getProteinIdentifications() + { + return protein_ids_; + } + + /** + * @brief Get the protein identification (const version) + * @return A const reference to the protein identification + */ + const std::vector& getProteinIdentifications() const + { + return protein_ids_; + } + + /** + * @brief Get all peptide identifications for all spectra + * @return A reference to the vector of peptide identifications + */ + std::vector& getPeptideIdentifications(); + + /** + * @brief Get all peptide identifications for all spectra (const version) + * @return A const reference to the vector of peptide identifications + */ + const std::vector& getPeptideIdentifications() const; + + /** + * @brief Set all peptide identifications for all spectra + * @param ids Vector of peptide identifications + */ + void setPeptideIdentifications(std::vector&& ids); + + /** + * @brief Set all peptide identifications for all spectra + * @param ids Vector of peptide identifications + */ + void setPeptideIdentifications(const std::vector& ids); + + /** + * @brief Get the MSExperiment + * @return A reference to the MSExperiment + */ + MSExperiment& getMSExperiment(); + + /** + * @brief Get the MSExperiment (const version) + * @return A const reference to the MSExperiment + */ + const MSExperiment& getMSExperiment() const; + + /** + * @brief Set the MSExperiment + * @param experiment The MSExperiment to set + */ + void setMSExperiment(MSExperiment&& experiment); + + /** + * @brief Set the MSExperiment + * @param experiment The MSExperiment to set + */ + void setMSExperiment(const MSExperiment& experiment); + + /** + * @brief Get a const iterator to the beginning of the data + * @return A const iterator to the beginning + */ + inline auto cbegin() const + { + return PairIterator(data.getSpectra().cbegin(), peptide_ids.cbegin()); + } + + /** + * @brief Get an iterator to the beginning of the data + * @return An iterator to the beginning + */ + inline auto begin() + { + return PairIterator(data.getSpectra().begin(), peptide_ids.begin()); + } + + /** + * @brief Get a const iterator to the beginning of the data + * @return A const iterator to the beginning + */ + inline auto begin() const + { + return PairIterator(data.getSpectra().cbegin(), peptide_ids.cbegin()); + } + + /** + * @brief Get an iterator to the end of the data + * @return An iterator to the end + */ + inline auto end() + { + return PairIterator(data.getSpectra().end(), peptide_ids.end()); + } + + /** + * @brief Get a const iterator to the end of the data + * @return A const iterator to the end + */ + inline auto end() const + { + return PairIterator(data.getSpectra().end(), peptide_ids.end()); + } + + /** + * @brief Get a const iterator to the end of the data + * @return A const iterator to the end + */ + inline auto cend() const + { + return PairIterator(data.getSpectra().cend(), peptide_ids.cend()); + } + + /** + * @brief Access a spectrum and its associated peptide identification + * @param idx The index of the spectrum + * @return A pair of references to the spectrum and its peptide identification + */ + inline Mapping operator[](size_t idx) + { + return {data.getSpectra()[idx], peptide_ids[idx]}; + } + + /** + * @brief Access a spectrum and its associated peptide identification (const version) + * @param idx The index of the spectrum + * @return A pair of const references to the spectrum and its peptide identification + */ + inline ConstMapping operator[](size_t idx) const + { + return {data.getSpectra()[idx], peptide_ids[idx]}; + } + + /** + * @brief Iterator for pairs of spectra and peptide identifications + * + * This iterator allows traversing the spectra and their associated peptide + * identifications together. + */ + template + struct PairIterator + { + // TODO add check that both vectors are of the same length + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + //using value_type = std::pair; + //using pointer = value_type*; + //using reference = value_type&; + + /** + * @brief Constructor + * @param ptr1 Iterator to the spectra + * @param ptr2 Iterator to the peptide identifications + */ + PairIterator(T1 ptr1, T2 ptr2) : m_ptr1(ptr1), m_ptr2(ptr2) + {} + + /** + * @brief Pre-increment operator + * @return Reference to this iterator after incrementing + */ + PairIterator& operator++() + { + ++m_ptr1; + ++m_ptr2; + return *this; + } + + /** + * @brief Post-increment operator + * @return Copy of this iterator before incrementing + */ + PairIterator operator++(int) + { + auto tmp(*this); + ++(*this); + return tmp; + } + + /** + * @brief Dereference operator + * @return A pair of references to the current spectrum and peptide identification + */ + auto operator*() + { + return std::make_pair(std::ref(*m_ptr1), std::ref(*m_ptr2)); + } + + /** + * @brief Equality operator + * @param a First iterator + * @param b Second iterator + * @return True if the iterators are equal + */ + inline friend bool operator==(const PairIterator& a, const PairIterator& b) + { + return a.m_ptr1 == b.m_ptr1 && a.m_ptr2 == b.m_ptr2; + } + + /** + * @brief Inequality operator + * @param a First iterator + * @param b Second iterator + * @return True if the iterators are not equal + */ + inline friend bool operator!=(const PairIterator& a, const PairIterator& b) + { + return !(a == b); + } + + private: + T1 m_ptr1; + T2 m_ptr2; + }; + + typedef AnnotatedMSRun::PairIterator::iterator, std::vector::iterator> Iterator; + typedef AnnotatedMSRun::PairIterator::const_iterator, std::vector::const_iterator> ConstIterator; + + private: + std::vector peptide_ids; + std::vector protein_ids_; + MSExperiment data; + }; +} \ No newline at end of file diff --git a/src/openms/source/METADATA/ExperimentalSettings.cpp b/src/openms/source/METADATA/ExperimentalSettings.cpp index 5a9b634e958..c9ec2d7a1e6 100644 --- a/src/openms/source/METADATA/ExperimentalSettings.cpp +++ b/src/openms/source/METADATA/ExperimentalSettings.cpp @@ -25,7 +25,6 @@ namespace OpenMS instrument_ == rhs.instrument_ && hplc_ == rhs.hplc_ && datetime_ == rhs.datetime_ && - protein_identifications_ == rhs.protein_identifications_ && comment_ == rhs.comment_ && fraction_identifier_ == rhs.fraction_identifier_ && MetaInfoInterface::operator==(rhs) && @@ -129,21 +128,6 @@ namespace OpenMS return os; } - const vector & ExperimentalSettings::getProteinIdentifications() const - { - return protein_identifications_; - } - - vector & ExperimentalSettings::getProteinIdentifications() - { - return protein_identifications_; - } - - void ExperimentalSettings::setProteinIdentifications(const vector & protein_identifications) - { - protein_identifications_ = protein_identifications; - } - const String & ExperimentalSettings::getComment() const { return comment_; diff --git a/src/openms/source/METADATA/SpectrumSettings.cpp b/src/openms/source/METADATA/SpectrumSettings.cpp index d9a5b045359..266f2da360e 100644 --- a/src/openms/source/METADATA/SpectrumSettings.cpp +++ b/src/openms/source/METADATA/SpectrumSettings.cpp @@ -28,7 +28,6 @@ namespace OpenMS acquisition_info_(), precursors_(), products_(), - identification_(), data_processing_() { } @@ -46,7 +45,6 @@ namespace OpenMS source_file_ == rhs.source_file_ && precursors_ == rhs.precursors_ && products_ == rhs.products_ && - identification_ == rhs.identification_ && ( data_processing_.size() == rhs.data_processing_.size() && std::equal(data_processing_.begin(), data_processing_.end(), @@ -80,7 +78,6 @@ namespace OpenMS //source_file_ == rhs.source_file_ && precursors_.insert(precursors_.end(), rhs.precursors_.begin(), rhs.precursors_.end()); products_.insert(products_.end(), rhs.products_.begin(), rhs.products_.end()); - identification_.insert(identification_.end(), rhs.identification_.begin(), rhs.identification_.end()); data_processing_.insert(data_processing_.end(), rhs.data_processing_.begin(), rhs.data_processing_.end()); } @@ -186,21 +183,6 @@ namespace OpenMS return os; } - const std::vector & SpectrumSettings::getPeptideIdentifications() const - { - return identification_; - } - - std::vector & SpectrumSettings::getPeptideIdentifications() - { - return identification_; - } - - void SpectrumSettings::setPeptideIdentifications(const std::vector & identification) - { - identification_ = identification; - } - const String & SpectrumSettings::getNativeID() const { return native_id_; @@ -227,4 +209,3 @@ namespace OpenMS } } - diff --git a/src/openms/source/METADATA/sources.cmake b/src/openms/source/METADATA/sources.cmake index bd9d51d3624..dda918b3d89 100644 --- a/src/openms/source/METADATA/sources.cmake +++ b/src/openms/source/METADATA/sources.cmake @@ -5,6 +5,7 @@ set(directory source/METADATA) set(sources_list AbsoluteQuantitationStandards.cpp Acquisition.cpp +AnnotatedMSRun.cpp AcquisitionInfo.cpp CVTerm.cpp CVTermList.cpp diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerData1DChrom.h b/src/openms_gui/include/OpenMS/VISUAL/LayerData1DChrom.h index 5563a0074b5..bd835637a46 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerData1DChrom.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerData1DChrom.h @@ -31,7 +31,7 @@ namespace OpenMS bool hasIndex(Size index) const override { - return index < chromatogram_map_->getNrChromatograms(); + return index < chromatogram_map_->getMSExperiment().getNrChromatograms(); } RangeAllType getRangeForArea(const RangeAllType partial_range) const override diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerData1DPeak.h b/src/openms_gui/include/OpenMS/VISUAL/LayerData1DPeak.h index a90d6fb99a8..2ab11eddbfb 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerData1DPeak.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerData1DPeak.h @@ -34,7 +34,7 @@ namespace OpenMS bool hasIndex(Size index) const override { - return index < peak_map_->size(); + return index < peak_map_->getMSExperiment().size(); } RangeAllType getRangeForArea(const RangeAllType partial_range) const override diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerDataBase.h b/src/openms_gui/include/OpenMS/VISUAL/LayerDataBase.h index 2c13b8a71c6..c82f6367698 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerDataBase.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerDataBase.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -120,7 +121,7 @@ namespace OpenMS typedef boost::shared_ptr ConsensusMapSharedPtrType; /// Main data type (experiment) - typedef PeakMap ExperimentType; + typedef AnnotatedMSRun ExperimentType; /// SharedPtr on MSExperiment typedef boost::shared_ptr ExperimentSharedPtrType; diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h b/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h index 48764602bbf..e1ce9cbd08c 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h @@ -44,13 +44,13 @@ namespace OpenMS void updateRanges() override { - chromatogram_map_->updateRanges(); + chromatogram_map_->getMSExperiment().updateRanges(); } RangeAllType getRange() const override { RangeAllType r; - r.assign(chromatogram_map_->chromatogramRanges()); + r.assign(chromatogram_map_->getMSExperiment().chromatogramRanges()); return r; } @@ -62,7 +62,7 @@ namespace OpenMS const ExperimentType::ChromatogramType& getChromatogram(Size idx) const { - return chromatogram_map_->getChromatogram(idx); + return chromatogram_map_->getMSExperiment().getChromatogram(idx); } diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h b/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h index 9c9991764c3..6a09b8951ba 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h @@ -53,14 +53,14 @@ namespace OpenMS void updateRanges() override { - peak_map_->updateRanges(); + peak_map_->getMSExperiment().updateRanges(); // on_disc_peaks_->updateRanges(); // note: this is not going to work since its on disk! We currently don't have a good way to access these ranges } RangeAllType getRange() const override { RangeAllType r; - r.assign(peak_map_->spectrumRanges()); + r.assign(peak_map_->getMSExperiment().spectrumRanges()); return r; } @@ -74,9 +74,9 @@ namespace OpenMS const ExperimentType::SpectrumType& getSpectrum(Size spectrum_idx) const { - if ((*peak_map_)[spectrum_idx].size() > 0) + if (peak_map_->getMSExperiment()[spectrum_idx].size() > 0) { - return (*peak_map_)[spectrum_idx]; + return peak_map_->getMSExperiment()[spectrum_idx]; } if (!on_disc_peaks_->empty()) { @@ -84,7 +84,7 @@ namespace OpenMS local_spec = on_disc_peaks_->getSpectrum(spectrum_idx); return local_spec; } - return (*peak_map_)[spectrum_idx]; + return peak_map_->getMSExperiment()[spectrum_idx]; } /** @@ -137,24 +137,30 @@ namespace OpenMS /// Check whether the current layer should be represented as ion mobility bool isIonMobilityData() const { - return this->getPeakData()->size() > 0 && this->getPeakData()->metaValueExists("is_ion_mobility") && this->getPeakData()->getMetaValue("is_ion_mobility").toBool(); + const MSExperiment& exp = this->getPeakData()->getMSExperiment(); + return exp.size() > 0 + && exp.metaValueExists("is_ion_mobility") + && exp.getMetaValue("is_ion_mobility").toBool(); } void labelAsIonMobilityData() const { - peak_map_->setMetaValue("is_ion_mobility", "true"); + peak_map_->getMSExperiment().setMetaValue("is_ion_mobility", "true"); } /// Check whether the current layer contains DIA (SWATH-MS) data bool isDIAData() const { - return this->getPeakData()->size() > 0 && this->getPeakData()->metaValueExists("is_dia_data") && this->getPeakData()->getMetaValue("is_dia_data").toBool(); + const MSExperiment& exp = this->getPeakData()->getMSExperiment(); + return exp.size() > 0 + && exp.metaValueExists("is_dia_data") + && exp.getMetaValue("is_dia_data").toBool(); } /// Label the current layer as DIA (SWATH-MS) data void labelAsDIAData() { - peak_map_->setMetaValue("is_dia_data", "true"); + peak_map_->getMSExperiment().setMetaValue("is_dia_data", "true"); } /** @@ -167,13 +173,16 @@ namespace OpenMS */ bool chromatogram_flag_set() const { - return this->getPeakData()->size() > 0 && this->getPeakData()->metaValueExists("is_chromatogram") && this->getPeakData()->getMetaValue("is_chromatogram").toBool(); + const MSExperiment& exp = this->getPeakData()->getMSExperiment(); + return exp.size() > 0 + && exp.metaValueExists("is_chromatogram") + && exp.getMetaValue("is_chromatogram").toBool(); } /// set the chromatogram flag void set_chromatogram_flag() { - peak_map_->setMetaValue("is_chromatogram", "true"); + peak_map_->getMSExperiment().setMetaValue("is_chromatogram", "true"); } /// remove the chromatogram flag @@ -181,7 +190,7 @@ namespace OpenMS { if (this->chromatogram_flag_set()) { - peak_map_->removeMetaValue("is_chromatogram"); + peak_map_->getMSExperiment().removeMetaValue("is_chromatogram"); } } diff --git a/src/openms_gui/include/OpenMS/VISUAL/MetaDataBrowser.h b/src/openms_gui/include/OpenMS/VISUAL/MetaDataBrowser.h index b52f5884385..a58cf0174b1 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/MetaDataBrowser.h +++ b/src/openms_gui/include/OpenMS/VISUAL/MetaDataBrowser.h @@ -12,7 +12,6 @@ #include #include -#include #include #include diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp index 000642bfa6e..47c8b1a1e2e 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp @@ -64,6 +64,7 @@ #include #include +#include using namespace std; @@ -658,44 +659,49 @@ namespace OpenMS // with actual spectra including raw data (allowing us to only // populate MS1 spectra with actual data). - peak_map_sptr = on_disc_peaks->getMetaData(); + peak_map_sptr.get()->getMSExperiment() = *on_disc_peaks->getMetaData(); for (Size k = 0; k < indexed_mzml_file.getNrSpectra() && !cache_ms1_on_disc; k++) { - if ( peak_map_sptr->getSpectrum(k).getMSLevel() == 1) + if ( peak_map_sptr->getMSExperiment().getSpectrum(k).getMSLevel() == 1) { - peak_map_sptr->getSpectrum(k) = on_disc_peaks->getSpectrum(k); + peak_map_sptr->getMSExperiment().getSpectrum(k) = on_disc_peaks->getSpectrum(k); } } for (Size k = 0; k < indexed_mzml_file.getNrChromatograms() && !cache_ms2_on_disc; k++) { - peak_map_sptr->getChromatogram(k) = on_disc_peaks->getChromatogram(k); + peak_map_sptr->getMSExperiment().getChromatogram(k) = on_disc_peaks->getChromatogram(k); } // Load at least one spectrum into memory (TOPPView assumes that at least one spectrum is in memory) - if (cache_ms1_on_disc && peak_map_sptr->getNrSpectra() > 0) peak_map_sptr->getSpectrum(0) = on_disc_peaks->getSpectrum(0); + if (cache_ms1_on_disc + && peak_map_sptr->getMSExperiment().getNrSpectra() > 0) + { + peak_map_sptr->getMSExperiment().getSpectrum(0) = on_disc_peaks->getSpectrum(0); + } } } // Load all data into memory if e.g. other file type than mzML if (!parsing_success) { - fh.loadExperiment(abs_filename, *peak_map_sptr, {file_type}, ProgressLogger::GUI, true, true); + fh.loadExperiment(abs_filename, peak_map_sptr->getMSExperiment(), {file_type}, ProgressLogger::GUI, true, true); } OPENMS_LOG_INFO << "INFO: done loading all " << std::endl; // a mzML file may contain both, chromatogram and peak data // -> this is handled in PlotCanvas::addPeakLayer FIXME: No it's not! - if (peak_map_sptr->getNrSpectra() > 0 && peak_map_sptr->getNrChromatograms() > 0) + if (peak_map_sptr->getMSExperiment().getNrSpectra() > 0 + && peak_map_sptr->getMSExperiment().getNrChromatograms() > 0) { OPENMS_LOG_WARN << "Your input data contains chromatograms and spectra, falling back to display spectra only." << std::endl; data_type = LayerDataBase::DT_PEAK; } - else if (peak_map_sptr->getNrChromatograms() > 0) + else if (peak_map_sptr->getMSExperiment().getNrChromatograms() > 0) { data_type = LayerDataBase::DT_CHROMATOGRAM; } - else if (peak_map_sptr->getNrSpectra() > 0) + else if (peak_map_sptr->getMSExperiment().getNrSpectra() > 0) { data_type = LayerDataBase::DT_PEAK; } @@ -712,8 +718,9 @@ namespace OpenMS } // sort for m/z and update ranges of newly loaded data - peak_map_sptr->sortSpectra(true); - peak_map_sptr->updateRanges(); + + peak_map_sptr->getMSExperiment().sortSpectra(true); + peak_map_sptr->getMSExperiment().updateRanges(); // try to add the data if (caption == "") @@ -801,7 +808,7 @@ namespace OpenMS (data_type == LayerDataBase::DT_IDENT)); // only one peak spectrum? disable 2D as default - if (peak_map->size() == 1) { maps_as_2d = false; } + if (peak_map->getMSExperiment().size() == 1) { maps_as_2d = false; } // set the window where (new layer) data could be opened in // get EnhancedTabBarWidget with given id @@ -872,7 +879,7 @@ namespace OpenMS // (ensures we will keep track of this flag from now on). if (is_dia_data) { - peak_map->setMetaValue("is_dia_data", "true"); + peak_map->getMSExperiment().setMetaValue("is_dia_data", "true"); } // determine the window to open the data in @@ -1927,10 +1934,10 @@ namespace OpenMS // spectrum is generated in the dialog, so just receive it here PeakSpectrum spectrum = spec_gen_dialog_.getSpectrum(); - PeakMap new_exp; - new_exp.addSpectrum(spectrum); - new_exp.updateRanges(); - ExperimentSharedPtrType new_exp_sptr(new PeakMap(new_exp)); + ExperimentSharedPtrType new_exp_sptr = boost::make_shared(); + new_exp_sptr->getMSExperiment().addSpectrum(spectrum); + new_exp_sptr->getMSExperiment().updateRanges(); + FeatureMapSharedPtrType f_dummy(new FeatureMapType()); ConsensusMapSharedPtrType c_dummy(new ConsensusMapType()); ODExperimentSharedPtrType od_dummy(new OnDiscMSExperiment()); @@ -2008,14 +2015,15 @@ namespace OpenMS { const LayerDataBase& layer = getActiveCanvas()->getCurrentLayer(); - ExperimentSharedPtrType exp(new MSExperiment(IMDataConverter::reshapeIMFrameToMany(spec))); + ExperimentSharedPtrType exp = boost::make_shared(); + exp.get()->getMSExperiment() = std::move(IMDataConverter::reshapeIMFrameToMany(spec)); // hack, but currently not avoidable, because 2D widget does not support IM natively yet... // for (auto& spec : exp->getSpectra()) spec.setRT(spec.getDriftTime()); // open new 2D widget Plot2DWidget* w = new Plot2DWidget(getCanvasParameters(2), &ws_); // map to IM + MZ - w->setMapper(DimMapper<2>({IMTypes::fromIMUnit(exp->getSpectra()[0].getDriftTimeUnit()), DIM_UNIT::MZ})); + w->setMapper(DimMapper<2>({IMTypes::fromIMUnit(exp->getMSExperiment().getSpectra()[0].getDriftTimeUnit()), DIM_UNIT::MZ})); // add data if (!w->canvas()->addPeakLayer(exp, PlotCanvas::ODExperimentSharedPtrType(new OnDiscMSExperiment()), layer.filename + " (IM Frame)")) @@ -2038,7 +2046,7 @@ namespace OpenMS } // Add spectra into a MSExperiment, sort and prepare it for display - ExperimentSharedPtrType tmpe(new OpenMS::MSExperiment() ); + ExperimentSharedPtrType tmpe = boost::make_shared(); // Collect all MS2 spectra with the same precursor as the current spectrum // (they are in the same SWATH window) @@ -2063,7 +2071,7 @@ namespace OpenMS // view MSSpectrum t = spec; t.setMSLevel(1); - tmpe->addSpectrum(t); + tmpe->getMSExperiment().addSpectrum(t); } else if (lp->getOnDiscPeakData()->getNrSpectra() > k) { @@ -2072,7 +2080,7 @@ namespace OpenMS // view MSSpectrum t = lp->getOnDiscPeakData()->getSpectrum(k); t.setMSLevel(1); - tmpe->addSpectrum(t); + tmpe->getMSExperiment().addSpectrum(t); } } } @@ -2080,8 +2088,8 @@ namespace OpenMS } caption_add = "(DIA window " + String(lower) + " - " + String(upper) + ")"; - tmpe->sortSpectra(); - tmpe->updateRanges(); + tmpe->getMSExperiment().sortSpectra(); + tmpe->getMSExperiment().updateRanges(); // open new 2D widget Plot2DWidget* w = new Plot2DWidget(getCanvasParameters(2), &ws_); @@ -2147,9 +2155,9 @@ namespace OpenMS { // Determine ion mobility unit (default is milliseconds) String unit = "ms"; - if (exp_sptr->metaValueExists("ion_mobility_unit")) + if (exp_sptr->getMSExperiment().metaValueExists("ion_mobility_unit")) { - unit = exp_sptr->getMetaValue("ion_mobility_unit"); + unit = exp_sptr->getMSExperiment().getMetaValue("ion_mobility_unit"); } String label = "Ion Mobility [" + unit + "]"; @@ -2344,7 +2352,7 @@ namespace OpenMS return; } MetaDataBrowser dlg(false, this); - dlg.add(exp); + dlg.add(exp.getMSExperiment()); dlg.exec(); } } @@ -2411,7 +2419,7 @@ namespace OpenMS else if (spec_view != nullptr) { ExperimentSharedPtrType new_exp_sptr(new ExperimentType()); - if (LayerDataBase::DataType current_type; spec_view->getSelectedScan(*new_exp_sptr, current_type)) + if (LayerDataBase::DataType current_type; spec_view->getSelectedScan(new_exp_sptr->getMSExperiment(), current_type)) { ODExperimentSharedPtrType od_dummy(new OnDiscMSExperiment()); FeatureMapSharedPtrType f_dummy(new FeatureMapType()); @@ -2542,15 +2550,15 @@ namespace OpenMS { try { - FileHandler().loadExperiment(layer.filename, *lp->getPeakDataMuteable(), {}, ProgressLogger::NONE, true, true); + FileHandler().loadExperiment(layer.filename, lp->getPeakDataMuteable()->getMSExperiment(), {}, ProgressLogger::NONE, true, true); } catch (Exception::BaseException& e) { QMessageBox::critical(this, "Error", (String("Error while loading file") + layer.filename + "\nError message: " + e.what()).toQString()); - lp->getPeakDataMuteable()->clear(true); + lp->getPeakDataMuteable()->getMSExperiment().clear(true); } - lp->getPeakDataMuteable()->sortSpectra(true); - lp->getPeakDataMuteable()->updateRanges(); + lp->getPeakDataMuteable()->getMSExperiment().sortSpectra(true); + lp->getPeakDataMuteable()->getMSExperiment().updateRanges(); } else if (auto* lp = dynamic_cast(&layer)) // feature data { @@ -2583,15 +2591,15 @@ namespace OpenMS // TODO CHROM try { - FileHandler().loadExperiment(layer.filename, *lp->getChromatogramData(), {}, ProgressLogger::NONE, true, true); + FileHandler().loadExperiment(layer.filename, lp->getChromatogramData()->getMSExperiment(), {}, ProgressLogger::NONE, true, true); } catch (Exception::BaseException& e) { QMessageBox::critical(this, "Error", (String("Error while loading file") + layer.filename + "\nError message: " + e.what()).toQString()); - lp->getChromatogramData()->clear(true); + lp->getChromatogramData()->getMSExperiment().clear(true); } - lp->getChromatogramData()->sortChromatograms(true); - lp->getChromatogramData()->updateRanges(); + lp->getChromatogramData()->getMSExperiment().sortChromatograms(true); + lp->getChromatogramData()->getMSExperiment().updateRanges(); } // update all layers that need an update diff --git a/src/openms_gui/source/VISUAL/LayerData1DBase.cpp b/src/openms_gui/source/VISUAL/LayerData1DBase.cpp index 06ea549796c..21ec56f768b 100644 --- a/src/openms_gui/source/VISUAL/LayerData1DBase.cpp +++ b/src/openms_gui/source/VISUAL/LayerData1DBase.cpp @@ -32,5 +32,12 @@ namespace OpenMS { annotations_1d_.resize(current_idx_ + 1); } + + // Clear peak colors to force reinitialization for the new spectrum + // Unlike annotations which persist across spectra, peak colors need to be regenerated + // to match the size of the new spectrum, preventing "Peak color array size doesn't + // match number of peaks" errors that occur when switching between spectra with + // different numbers of peaks + peak_colors_1d.clear(); } }// namespace OpenMS diff --git a/src/openms_gui/source/VISUAL/LayerData1DPeak.cpp b/src/openms_gui/source/VISUAL/LayerData1DPeak.cpp index 0f591cc6a12..172356edb04 100644 --- a/src/openms_gui/source/VISUAL/LayerData1DPeak.cpp +++ b/src/openms_gui/source/VISUAL/LayerData1DPeak.cpp @@ -102,14 +102,26 @@ namespace OpenMS void LayerData1DPeak::synchronizePeakAnnotations() { + #ifdef DEBUG_IDENTIFICATION_VIEW + std::cout << "synchronizePeakAnnotations." << std::endl; + #endif + // Return if no valid peak layer attached - if (getPeakData() == nullptr || getPeakData()->empty() || type != LayerDataBase::DT_PEAK) + if (getPeakData() == nullptr + || getPeakData()->getMSExperiment().empty() + || type != LayerDataBase::DT_PEAK) + { + return; + } + + // no ID selected + if (peptide_id_index == -1 || peptide_hit_index == -1) { return; } // get mutable access to the spectrum - MSSpectrum& spectrum = getPeakDataMuteable()->getSpectrum(current_idx_); + MSSpectrum& spectrum = getPeakDataMuteable()->getMSExperiment().getSpectrum(current_idx_); int ms_level = spectrum.getMSLevel(); @@ -117,16 +129,14 @@ namespace OpenMS return; // store user fragment annotations - vector& pep_ids = spectrum.getPeptideIdentifications(); - - // no ID selected - if (peptide_id_index == -1 || peptide_hit_index == -1) - { - return; - } - + vector& pep_ids = getPeakDataMuteable()->getPeptideIdentifications(); + vector& prot_ids = getPeakDataMuteable()->getProteinIdentifications(); + if (!pep_ids.empty()) { + #ifdef DEBUG_IDENTIFICATION_VIEW + std::cout << "PeptideIdentifications found in the current spectrum." << std::endl; + #endif vector& hits = pep_ids[peptide_id_index].getHits(); if (!hits.empty()) @@ -141,8 +151,9 @@ namespace OpenMS hits.push_back(hit); } } - else // PeptideIdentifications are empty, create new PepIDs and PeptideHits to store the PeakAnnotations - { + else + { + std::cout << "No PeptideIdentifications found in the current spectrum." << std::endl; // copy user annotations to fragment annotation vector const Annotations1DContainer& las = getAnnotations(current_idx_); @@ -166,7 +177,6 @@ namespace OpenMS pep_id.setIdentifier("Unknown"); // create a dummy ProteinIdentification for all ID-less PeakAnnotations - vector& prot_ids = getPeakDataMuteable()->getProteinIdentifications(); if (prot_ids.empty() || prot_ids.back().getIdentifier() != String("Unknown")) { ProteinIdentification prot_id; @@ -193,7 +203,7 @@ namespace OpenMS void LayerData1DPeak::removePeakAnnotationsFromPeptideHit(const std::vector& selected_annotations) { // Return if no valid peak layer attached - if (getPeakData() == nullptr || getPeakData()->empty() || type != LayerDataBase::DT_PEAK) + if (getPeakData() == nullptr || getPeakData()->getMSExperiment().empty() || type != LayerDataBase::DT_PEAK) { return; } @@ -205,7 +215,7 @@ namespace OpenMS } // get mutable access to the spectrum - MSSpectrum& spectrum = getPeakDataMuteable()->getSpectrum(current_idx_); + MSSpectrum& spectrum = getPeakDataMuteable()->getMSExperiment().getSpectrum(current_idx_); int ms_level = spectrum.getMSLevel(); // wrong MS level @@ -218,17 +228,9 @@ namespace OpenMS // that this function returns prematurely is unlikely, // since we are deleting existing annotations, // that have to be somewhere, but better make sure - vector& pep_ids = spectrum.getPeptideIdentifications(); - if (pep_ids.empty()) - { - return; - } - vector& hits = pep_ids[peptide_id_index].getHits(); - if (hits.empty()) - { - return; - } - PeptideHit& hit = hits[peptide_hit_index]; + PeptideIdentification& pep_ids = getPeakDataMuteable()->getPeptideIdentifications()[peptide_id_index]; + + PeptideHit& hit = pep_ids.getHits()[peptide_hit_index]; vector fas = hit.getPeakAnnotations(); if (fas.empty()) { diff --git a/src/openms_gui/source/VISUAL/LayerDataBase.cpp b/src/openms_gui/source/VISUAL/LayerDataBase.cpp index 3aef61c4703..8e7cbdfd2e1 100644 --- a/src/openms_gui/source/VISUAL/LayerDataBase.cpp +++ b/src/openms_gui/source/VISUAL/LayerDataBase.cpp @@ -53,6 +53,41 @@ namespace OpenMS return n; } + /* + void LayerDataBase::updateCache_() + { + if (peak_map_->getMSExperiment().getNrSpectra() > current_spectrum_idx_ && !(*peak_map_)[current_spectrum_idx_].first.empty()) + { + cached_spectrum_ = (*peak_map_)[current_spectrum_idx_].first; + } + else if (on_disc_peaks->getNrSpectra() > current_spectrum_idx_) + { + cached_spectrum_ = on_disc_peaks->getSpectrum(current_spectrum_idx_); + } + } + + + /// add annotation from an OSW sqlite file. + + + /// get annotation (e.g. to build a hierachical ID View) + /// Not const, because we might have incomplete data, which needs to be loaded from sql source + + LayerDataBase::OSWDataSharedPtrType& LayerDataBase::getChromatogramAnnotation() + { + return chrom_annotation_; + } + + const LayerDataBase::OSWDataSharedPtrType& LayerDataBase::getChromatogramAnnotation() const + { + return chrom_annotation_; + } + + void LayerDataBase::setChromatogramAnnotation(OSWData&& data) + { + chrom_annotation_ = OSWDataSharedPtrType(new OSWData(std::move(data))); + } +*/ bool LayerDataBase::annotate(const vector& identifications, const vector& protein_identifications) { @@ -82,7 +117,6 @@ namespace OpenMS return false; } - float LayerDataBase::getMinIntensity() const { return getRange().getMinIntensity(); @@ -231,7 +265,7 @@ namespace OpenMS OSWData data; oswf.readMinimal(data); // allow data to map from transition.id (=native.id) to a chromatogram index in MSExperiment - data.buildNativeIDResolver(*lp->getChromatogramData().get()); + data.buildNativeIDResolver(lp->getChromatogramData().get()->getMSExperiment()); lp->setChromatogramAnnotation(std::move(data)); return true; } diff --git a/src/openms_gui/source/VISUAL/LayerDataChrom.cpp b/src/openms_gui/source/VISUAL/LayerDataChrom.cpp index 14afb93726f..a4ea3bad0aa 100644 --- a/src/openms_gui/source/VISUAL/LayerDataChrom.cpp +++ b/src/openms_gui/source/VISUAL/LayerDataChrom.cpp @@ -18,7 +18,9 @@ #include #include #include +#include +#include using namespace std; @@ -41,14 +43,14 @@ namespace OpenMS std::unique_ptr LayerDataChrom::storeVisibleData(const RangeAllType& visible_range, const DataFilters& layer_filters) const { auto ret = make_unique(); - ret->storeVisibleExperiment(*chromatogram_map_.get(), visible_range, layer_filters); + ret->storeVisibleExperiment(chromatogram_map_.get()->getMSExperiment(), visible_range, layer_filters); return ret; } std::unique_ptr LayerDataChrom::storeFullData() const { auto ret = make_unique(); - ret->storeFullExperiment(*chromatogram_map_.get()); + ret->storeFullExperiment(chromatogram_map_.get()->getMSExperiment()); return ret; } @@ -133,15 +135,17 @@ namespace OpenMS // projection for m/z auto ptr_mz = make_unique(); - MSExperiment exp_mz; - exp_mz.addSpectrum(std::move(projection_mz)); - ptr_mz->setPeakData(ExperimentSharedPtrType(new ExperimentType(exp_mz))); + + ExperimentSharedPtrType exp_mz = boost::make_shared(); + exp_mz->getMSExperiment().addSpectrum(std::move(projection_mz)); + ptr_mz->setPeakData(exp_mz); // projection for RT auto ptr_rt = make_unique(); - MSExperiment exp_rt; - exp_mz.addChromatogram(std::move(projection_rt)); - ptr_rt->setChromData(ExperimentSharedPtrType(new ExperimentType(exp_rt))); + + exp_mz->getMSExperiment().addChromatogram(std::move(projection_rt)); + + ptr_rt->setChromData(boost::make_shared()); auto assign_axis = [&](auto unit, auto& layer) { switch (unit) @@ -165,7 +169,7 @@ namespace OpenMS PeakIndex LayerDataChrom::findHighestDataPoint(const RangeAllType& area) const { - const PeakMap& exp = *getChromatogramData(); + const PeakMap& exp = getChromatogramData().get()->getMSExperiment(); int count {-1}; for (const auto& chrom : exp.getChromatograms()) { @@ -231,6 +235,6 @@ namespace OpenMS std::unique_ptr LayerDataChrom::getStats() const { - return make_unique(*chromatogram_map_); + return make_unique(chromatogram_map_->getMSExperiment()); } } // namespace OpenMS diff --git a/src/openms_gui/source/VISUAL/LayerDataPeak.cpp b/src/openms_gui/source/VISUAL/LayerDataPeak.cpp index f8555487101..8ee478b9013 100644 --- a/src/openms_gui/source/VISUAL/LayerDataPeak.cpp +++ b/src/openms_gui/source/VISUAL/LayerDataPeak.cpp @@ -47,14 +47,14 @@ namespace OpenMS std::unique_ptr LayerDataPeak::storeVisibleData(const RangeAllType& visible_range, const DataFilters& layer_filters) const { auto ret = make_unique(); - ret->storeVisibleExperiment(*peak_map_.get(), visible_range, layer_filters); + ret->storeVisibleExperiment(peak_map_->getMSExperiment(), visible_range, layer_filters); return ret; } std::unique_ptr LayerDataPeak::storeFullData() const { auto ret = make_unique(); - ret->storeFullExperiment(*peak_map_.get()); + ret->storeFullExperiment(peak_map_->getMSExperiment()); return ret; } @@ -79,7 +79,8 @@ namespace OpenMS MSSpectrum projection_mz; Mobilogram projection_im; MSChromatogram projection_rt; - const auto& exp = *getPeakData(); + + const auto& exp = getPeakData()->getMSExperiment(); auto lvls = exp.getMSLevels(); // use for smallest MS level in the data (IM frames may have all level 1, or all level 2) for (auto i = exp.areaBeginConst(area, lvls[0]); i != exp.areaEndConst(); ++i) { @@ -227,11 +228,11 @@ namespace OpenMS auto max_int = numeric_limits::lowest(); PeakIndex max_pi; - const auto map = *getPeakData(); + const auto& map = getPeakData()->getMSExperiment(); // for IM data, use whatever is there. For RT/mz data, use MSlevel 1 const UInt MS_LEVEL = (! map.empty() && map.isIMFrame()) ? map[0].getMSLevel() : 1; - for (ExperimentType::ConstAreaIterator i = map.areaBeginConst(area, MS_LEVEL); i != map.areaEndConst(); ++i) + for (auto i = map.areaBeginConst(area, MS_LEVEL); i != map.areaEndConst(); ++i) { PeakIndex pi = i.getPeakIndex(); if (i->getIntensity() > max_int && filters.passes((map)[pi.spectrum], pi.peak)) @@ -279,7 +280,7 @@ namespace OpenMS std::unique_ptr LayerDataPeak::getStats() const { - return make_unique(*peak_map_); + return make_unique(peak_map_->getMSExperiment()); } bool LayerDataPeak::annotate(const vector& identifications, const vector& protein_identifications) diff --git a/src/openms_gui/source/VISUAL/MetaDataBrowser.cpp b/src/openms_gui/source/VISUAL/MetaDataBrowser.cpp index 25a055653f0..28f8af5f7b3 100644 --- a/src/openms_gui/source/VISUAL/MetaDataBrowser.cpp +++ b/src/openms_gui/source/VISUAL/MetaDataBrowser.cpp @@ -261,9 +261,6 @@ namespace OpenMS //check for Sample visualize_(meta.getSample(), item); - //check for ProteinIdentification - visualizeAll_(meta.getProteinIdentifications(), item); - //check for Instrument visualize_(meta.getInstrument(), item); @@ -846,9 +843,6 @@ namespace OpenMS //check for AcquisitionInfo visualize_(meta.getAcquisitionInfo(), item); - //check for PeptideIdentification - visualizeAll_(meta.getPeptideIdentifications(), item); - connectVisualizer_(visualizer); } diff --git a/src/openms_gui/source/VISUAL/Painter2DBase.cpp b/src/openms_gui/source/VISUAL/Painter2DBase.cpp index edd22a950c6..55f9ed18130 100644 --- a/src/openms_gui/source/VISUAL/Painter2DBase.cpp +++ b/src/openms_gui/source/VISUAL/Painter2DBase.cpp @@ -123,7 +123,7 @@ namespace OpenMS void Painter2DPeak::paint(QPainter* painter, Plot2DCanvas* canvas, int layer_index) { // renaming some values for readability - const auto& peak_map = *layer_->getPeakData(); + const auto& peak_map = layer_->getPeakData()->getMSExperiment(); // skip empty peak maps if (peak_map.empty()) @@ -241,7 +241,7 @@ namespace OpenMS QVector coloredPoints((int)layer_->gradient.precalculatedSize()); const double snap_factor = canvas->snap_factors_[layer_index]; - const auto& map = *layer_->getPeakData(); + const auto& map = layer_->getPeakData()->getMSExperiment();; const auto& area = canvas->visible_area_.getAreaUnit(); const auto end_area = map.areaEndConst(); // for IM data, use whatever is there. For RT/mz data, use MSlevel 1 @@ -343,7 +343,7 @@ namespace OpenMS // set painter to black (we operate directly on the pixels for all colored data) painter.setPen(Qt::black); const double snap_factor = canvas->snap_factors_[layer_index]; - const auto& map = *layer_->getPeakData(); + const auto& map = layer_->getPeakData()->getMSExperiment(); const auto& area = canvas->visible_area_.getAreaUnit(); // for IM data, use whatever is there. For RT/mz data, use MSlevel 1 @@ -450,7 +450,7 @@ namespace OpenMS void Painter2DPeak::paintPrecursorPeaks_(QPainter& painter, Plot2DCanvas* canvas) { - const auto& peak_map = *layer_->getPeakData(); + const auto& peak_map = layer_->getPeakData()->getMSExperiment(); QPen p; p.setColor(Qt::black); @@ -509,7 +509,7 @@ namespace OpenMS void Painter2DChrom::paint(QPainter* painter, Plot2DCanvas* canvas, int /*layer_index*/) { - const PeakMap& exp = *layer_->getChromatogramData(); + const PeakMap& exp = layer_->getChromatogramData()->getMSExperiment(); // TODO CHROM implement layer filters // paint chromatogram rt start and end as line diff --git a/src/openms_gui/source/VISUAL/Plot1DCanvas.cpp b/src/openms_gui/source/VISUAL/Plot1DCanvas.cpp index df20435c536..ad44d7e1dcc 100644 --- a/src/openms_gui/source/VISUAL/Plot1DCanvas.cpp +++ b/src/openms_gui/source/VISUAL/Plot1DCanvas.cpp @@ -37,6 +37,7 @@ #include #include +#include using namespace std; @@ -49,12 +50,13 @@ namespace OpenMS Plot1DCanvas::ExperimentSharedPtrType prepareChromatogram(Size index, const Plot1DCanvas::ExperimentSharedPtrType& exp_sptr, const Plot1DCanvas::ODExperimentSharedPtrType& ondisc_sptr) { // create a managed pointer fill it with a spectrum containing the chromatographic data - LayerDataBase::ExperimentSharedPtrType chrom_exp_sptr(new LayerDataBase::ExperimentType()); - chrom_exp_sptr->setMetaValue("is_chromatogram", "true"); //this is a hack to store that we have chromatogram data + auto chrom_exp_sptr = boost::make_shared(); + + chrom_exp_sptr->getMSExperiment().setMetaValue("is_chromatogram", "true"); //this is a hack to store that we have chromatogram data LayerDataBase::ExperimentType::SpectrumType spectrum; // retrieve chromatogram (either from in-memory or on-disc representation) - MSChromatogram current_chrom = exp_sptr->getChromatograms()[index]; + MSChromatogram current_chrom = exp_sptr->getMSExperiment().getChromatograms()[index]; if (current_chrom.empty()) { current_chrom = ondisc_sptr->getChromatogram(index); @@ -76,12 +78,12 @@ namespace OpenMS { spectrum.emplace_back(-1, 0); } - chrom_exp_sptr->addSpectrum(spectrum); + chrom_exp_sptr->getMSExperiment().addSpectrum(std::move(spectrum)); // store peptide_sequence if available if (current_chrom.getPrecursor().metaValueExists("peptide_sequence")) { - chrom_exp_sptr->setMetaValue("peptide_sequence", current_chrom.getPrecursor().getMetaValue("peptide_sequence")); + chrom_exp_sptr->getMSExperiment().setMetaValue("peptide_sequence", current_chrom.getPrecursor().getMetaValue("peptide_sequence")); } return chrom_exp_sptr; @@ -1048,7 +1050,7 @@ namespace OpenMS auto* peak_layer = dynamic_cast(&getCurrentLayer()); if (peak_layer) { - if (peak_layer->getPeakData()->containsScanOfLevel(1)) + if (peak_layer->getPeakData()->getMSExperiment().containsScanOfLevel(1)) { context_menu->addAction("Switch to 2D view", [&]() { emit showCurrentPeaksAs2D(); @@ -1069,7 +1071,7 @@ namespace OpenMS { auto l = dynamic_cast(&getCurrentLayer()); context_menu->addAction("Switch to DIA-MS view", [&]() { - emit showCurrentPeaksAsDIA(l->getCurrentSpectrum().getPrecursors()[0], *l->getPeakData().get()); + emit showCurrentPeaksAsDIA(l->getCurrentSpectrum().getPrecursors()[0], l->getPeakData()->getMSExperiment()); }); } } diff --git a/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp b/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp index aa45ce71cd4..55402e0c3ab 100644 --- a/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp +++ b/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp @@ -337,13 +337,14 @@ namespace OpenMS auto local_max = -numeric_limits::max(); if (auto* lp = dynamic_cast(&getLayer(i))) { - for (ExperimentType::ConstAreaIterator it = lp->getPeakData()->areaBeginConst(visible_area_.getAreaUnit().getMinRT(), visible_area_.getAreaUnit().getMaxRT(), + const MSExperiment& peak_data = lp->getPeakData()->getMSExperiment(); + for (auto it = peak_data.areaBeginConst(visible_area_.getAreaUnit().getMinRT(), visible_area_.getAreaUnit().getMaxRT(), visible_area_.getAreaUnit().getMinMZ(), visible_area_.getAreaUnit().getMaxMZ()); - it != lp->getPeakData()->areaEndConst(); + it != peak_data.areaEndConst(); ++it) { PeakIndex pi = it.getPeakIndex(); - if (it->getIntensity() > local_max && getLayer(i).filters.passes((*lp->getPeakData())[pi.spectrum], pi.peak)) + if (it->getIntensity() > local_max && getLayer(i).filters.passes(peak_data[pi.spectrum], pi.peak)) { local_max = it->getIntensity(); } @@ -720,7 +721,7 @@ namespace OpenMS else if (auto* lp = dynamic_cast(&getCurrentLayer())) { //meta info - const ExperimentType::SpectrumType & s = selected_peak_.getSpectrum(*lp->getPeakData()); + const ExperimentType::SpectrumType & s = selected_peak_.getSpectrum(lp->getPeakData()->getMSExperiment()); for (Size m = 0; m < s.getFloatDataArrays().size(); ++m) { if (selected_peak_.peak < s.getFloatDataArrays()[m].size()) @@ -884,14 +885,16 @@ namespace OpenMS settings_menu->addAction("Show/hide projections"); settings_menu->addAction("Show/hide MS/MS precursors"); + auto& exp = lp->getPeakData()->getMSExperiment(); + // in a IM-frame (IM vs. m/z), the RT is empty in `e_units`, and showing neighbouring RT scans is not possible (this layer only has this IM frame) // --> skip entries for RT neighbours. if (!e_units.RangeRT::isEmpty()) { // add surrounding survey scans // find nearest survey scan - SignedSize size = lp->getPeakData()->size(); - Int current = lp->getPeakData()->RTBegin(e_units.getMinRT()) - lp->getPeakData()->begin(); + SignedSize size = exp.size(); + Int current = exp.RTBegin(e_units.getMinRT()) - exp.begin(); if (current == size) // if the user clicked right of the last MS1 scan { current = std::max(SignedSize {0}, size - 1); // we want the rightmost valid scan index @@ -900,12 +903,12 @@ namespace OpenMS SignedSize i = 0; while (current + i < size || current - i >= 0) { - if (current + i < size && (*lp->getPeakData())[current + i].getMSLevel() == 1) + if (current + i < size && exp[current + i].getMSLevel() == 1) { current += i; break; } - if (current - i >= 0 && (*lp->getPeakData())[current - i].getMSLevel() == 1) + if (current - i >= 0 && exp[current - i].getMSLevel() == 1) { current -= i; break; @@ -918,28 +921,29 @@ namespace OpenMS i = 1; while (current - i >= 0 && indices.size() < 5) { - if ((*lp->getPeakData())[current - i].getMSLevel() == 1) { indices.push_back(current - i); } + if (exp[current - i].getMSLevel() == 1) { indices.push_back(current - i); } ++i; } i = 1; while (current + i < size && indices.size() < 9) { - if ((*lp->getPeakData())[current + i].getMSLevel() == 1) { indices.push_back(current + i); } + if (exp[current + i].getMSLevel() == 1) { indices.push_back(current + i); } ++i; } sort(indices.rbegin(), indices.rend()); QMenu* ms1_scans = context_menu->addMenu("Survey scan in 1D"); QMenu* ms1_meta = context_menu->addMenu("Survey scan meta data"); context_menu->addSeparator(); + for (auto idx : indices) { if (idx == current) { ms1_scans->addSeparator(); } - ms1_scans->addAction(QString("RT: ") + QString::number((*lp->getPeakData())[idx].getRT()), + ms1_scans->addAction(QString("RT: ") + QString::number(exp[idx].getRT()), [=]() { emit showSpectrumAsNew1D(idx); }); if (idx == current) { ms1_scans->addSeparator(); } if (idx == current) { ms1_meta->addSeparator(); } - ms1_meta->addAction(QString("RT: ") + QString::number((*lp->getPeakData())[idx].getRT()), + ms1_meta->addAction(QString("RT: ") + QString::number(exp[idx].getRT()), [=]() { showMetaData(true, idx); }); if (idx == current) { ms1_meta->addSeparator(); } } @@ -950,14 +954,15 @@ namespace OpenMS QMenu* msn_scans = new QMenu("fragment scan in 1D"); QMenu* msn_meta = new QMenu("fragment scan meta data"); bool item_added = collectFragmentScansInArea_(check_area, msn_scans, msn_meta); - if (! item_added) + if (!item_added) { // Now simply go for the 5 closest points in RT and check whether there // are any scans. // NOTE: that if we go for the visible area, we run the // risk of iterating through *all* the scans. check_area.RangeMZ::extend((RangeMZ)visible_area_.getAreaUnit()); - const auto& specs = lp->getPeakData()->getSpectra(); + const auto& exp = lp->getPeakData()->getMSExperiment(); + const auto& specs = exp.getSpectra(); check_area.RangeRT::operator=(RangeRT(specs[indices.back()].getRT(), specs[indices.front()].getRT())); item_added = collectFragmentScansInArea_(check_area, msn_scans, msn_meta); @@ -966,6 +971,7 @@ namespace OpenMS item_added = collectFragmentScansInArea_(visible_area_.getAreaUnit(), msn_scans, msn_meta); } } + if (item_added) { context_menu->addMenu(msn_scans); @@ -973,7 +979,7 @@ namespace OpenMS context_menu->addSeparator(); } - auto it_closest_MS = lp->getPeakData()->getClosestSpectrumInRT(e_units.getMinRT()); + auto it_closest_MS = lp->getPeakData()->getMSExperiment().getClosestSpectrumInRT(e_units.getMinRT()); if (it_closest_MS->containsIMData()) { context_menu->addAction( @@ -1072,7 +1078,7 @@ namespace OpenMS settings_menu->addAction("Show/hide projections"); settings_menu->addAction("Show/hide MS/MS precursors"); - const PeakMap& exp = *lc->getChromatogramData(); + const PeakMap& exp = lc->getChromatogramData()->getMSExperiment(); constexpr int CHROMATOGRAM_SHOW_MZ_RANGE = 10; auto search_area = e_units; @@ -1527,8 +1533,10 @@ namespace OpenMS { auto& layer = dynamic_cast(getCurrentLayer()); bool item_added = false; - const auto last_RT = layer.getPeakData()->RTEnd(range.getMaxRT()); - for (ExperimentType::ConstIterator it = layer.getPeakData()->RTBegin(range.getMinRT()); + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + const auto last_RT = peak_data.RTEnd(range.getMaxRT()); + + for (auto it = peak_data.RTBegin(range.getMinRT()); it != last_RT; ++it) { if (it->getPrecursors().empty()) continue; @@ -1537,10 +1545,10 @@ namespace OpenMS if (it->getMSLevel() > 1 && range.containsMZ(mz)) { msn_scans->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz), - [=]() { emit showSpectrumAsNew1D(it - layer.getPeakData()->begin()); }); - + [=]() { emit showSpectrumAsNew1D(it - peak_data.begin()); }); msn_meta->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz), - [=]() { showMetaData(true, it - layer.getPeakData()->begin()); }); + [=]() { showMetaData(true, it - peak_data.begin()); }); + item_added = true; } } diff --git a/src/openms_gui/source/VISUAL/Plot3DCanvas.cpp b/src/openms_gui/source/VISUAL/Plot3DCanvas.cpp index a7b27659098..6ebd50440fc 100644 --- a/src/openms_gui/source/VISUAL/Plot3DCanvas.cpp +++ b/src/openms_gui/source/VISUAL/Plot3DCanvas.cpp @@ -89,8 +89,8 @@ namespace OpenMS // Abort if no data points are contained auto& layer = dynamic_cast(getCurrentLayer()); - - if (layer.getPeakData()->empty()) + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + if (peak_data.empty()) { popIncompleteLayer_("Cannot add a dataset that contains no survey scans. Aborting!"); return false; diff --git a/src/openms_gui/source/VISUAL/Plot3DOpenGLCanvas.cpp b/src/openms_gui/source/VISUAL/Plot3DOpenGLCanvas.cpp index a3c12986151..9e798588371 100644 --- a/src/openms_gui/source/VISUAL/Plot3DOpenGLCanvas.cpp +++ b/src/openms_gui/source/VISUAL/Plot3DOpenGLCanvas.cpp @@ -526,8 +526,9 @@ namespace OpenMS } const auto area = canvas_3d_.visible_area_.getAreaUnit(); - auto begin_it = layer.getPeakData()->areaBeginConst(area.getMinRT(), area.getMaxRT(), area.getMinMZ(), area.getMaxMZ()); - auto end_it = layer.getPeakData()->areaEndConst(); + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + auto begin_it = peak_data.areaBeginConst(area.getMinRT(), area.getMaxRT(), area.getMinMZ(), area.getMaxMZ()); + auto end_it = peak_data.areaEndConst(); // count peaks in area int count = std::distance(begin_it, end_it); @@ -555,7 +556,8 @@ namespace OpenMS } PeakIndex pi = it.getPeakIndex(); - if (layer.filters.passes((*layer.getPeakData())[pi.spectrum], pi.peak)) + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + if (layer.filters.passes(peak_data[pi.spectrum], pi.peak)) { glBegin(GL_POINTS); double intensity = 0; @@ -615,8 +617,9 @@ namespace OpenMS glLineWidth(layer.param.getValue("dot:line_width")); const auto area = canvas_3d_.visible_area_.getAreaUnit(); - auto begin_it = layer.getPeakData()->areaBeginConst(area.getMinRT(), area.getMaxRT(), area.getMinMZ(), area.getMaxMZ()); - auto end_it = layer.getPeakData()->areaEndConst(); + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + auto begin_it = peak_data.areaBeginConst(area.getMinRT(), area.getMaxRT(), area.getMinMZ(), area.getMaxMZ()); + auto end_it = peak_data.areaEndConst(); // count peaks in area int count = std::distance(begin_it, end_it); @@ -643,7 +646,8 @@ namespace OpenMS } PeakIndex pi = it.getPeakIndex(); - if (layer.filters.passes((*layer.getPeakData())[pi.spectrum], pi.peak)) + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + if (layer.filters.passes(peak_data[pi.spectrum], pi.peak)) { glBegin(GL_LINES); double intensity = 0; @@ -1087,8 +1091,9 @@ namespace OpenMS for (Size i = 0; i < canvas_3d_.getLayerCount(); i++) { const auto& layer = dynamic_cast(canvas_3d_.getLayer(i)); - auto rt_begin_it = layer.getPeakData()->RTBegin(area.getMinRT()); - auto rt_end_it = layer.getPeakData()->RTEnd(area.getMaxRT()); + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + auto rt_begin_it = peak_data.RTBegin(area.getMinRT()); + auto rt_end_it = peak_data.RTEnd(area.getMaxRT()); for (auto spec_it = rt_begin_it; spec_it != rt_end_it; ++spec_it) { diff --git a/src/openms_gui/source/VISUAL/PlotCanvas.cpp b/src/openms_gui/source/VISUAL/PlotCanvas.cpp index 2c06f2ad2b0..8e640dd7adc 100644 --- a/src/openms_gui/source/VISUAL/PlotCanvas.cpp +++ b/src/openms_gui/source/VISUAL/PlotCanvas.cpp @@ -424,7 +424,7 @@ namespace OpenMS const String& caption, const bool use_noise_cutoff) { - if (map->getSpectra().empty()) + if (map->getMSExperiment().getSpectra().empty()) { auto msg = "Your input data contains no spectra. Not adding layer."; OPENMS_LOG_WARN << msg << std::endl; @@ -446,14 +446,14 @@ namespace OpenMS // calculate noise if (use_noise_cutoff) { - auto cutoff = estimateNoiseFromRandomScans(*map, 1, 10, 5); // 5% of low intensity data is considered noise + auto cutoff = estimateNoiseFromRandomScans(map->getMSExperiment(), 1, 10, 5); // 5% of low intensity data is considered noise DataFilters filters; filters.add(DataFilters::DataFilter(DataFilters::INTENSITY, DataFilters::GREATER_EQUAL, cutoff)); initFilters(filters); } else // no mower, hide zeros if wanted { - if (map->hasZeroIntensities(1)) + if (map->getMSExperiment().hasZeroIntensities(1)) { DataFilters filters; filters.add(DataFilters::DataFilter(DataFilters::INTENSITY, DataFilters::GREATER_EQUAL, 0.001)); @@ -467,7 +467,7 @@ namespace OpenMS bool PlotCanvas::addChromLayer(const ExperimentSharedPtrType& map, ODExperimentSharedPtrType od_map, const String& filename, const String& caption) { - if (map->getChromatograms().empty()) + if (map->getMSExperiment().getChromatograms().empty()) { auto msg = "Your input data contains no chromatograms. Not adding layer."; OPENMS_LOG_WARN << msg << std::endl; @@ -747,11 +747,11 @@ namespace OpenMS { if (auto lp = dynamic_cast(&layer)) { - dlg.add(*lp->getPeakDataMuteable()); + dlg.add(lp->getPeakDataMuteable()->getMSExperiment()); // Exception for Plot1DCanvas, here we add the meta data of the one spectrum if (auto lp1 = dynamic_cast(&layer)) { - dlg.add((*lp1->getPeakDataMuteable())[lp1->getCurrentIndex()]); + dlg.add(lp1->getPeakDataMuteable()->getMSExperiment()[lp1->getCurrentIndex()]); } } if (auto lp = dynamic_cast(&layer)) @@ -775,7 +775,7 @@ namespace OpenMS { if (auto lp = dynamic_cast(&layer)) { - dlg.add((*lp->getPeakDataMuteable())[index]); + dlg.add(lp->getPeakDataMuteable()->getMSExperiment()[index]); } else if (auto lp = dynamic_cast(&layer)) { diff --git a/src/openms_gui/source/VISUAL/SpectraIDViewTab.cpp b/src/openms_gui/source/VISUAL/SpectraIDViewTab.cpp index 6abade58875..4421d531df0 100644 --- a/src/openms_gui/source/VISUAL/SpectraIDViewTab.cpp +++ b/src/openms_gui/source/VISUAL/SpectraIDViewTab.cpp @@ -33,6 +33,8 @@ #include #include +//#define DEBUG_SPECTRA_ID_VIEW 1 + using namespace std; ///@improvement write the visibility-status of the columns in toppview.ini and read at start @@ -159,35 +161,34 @@ namespace OpenMS if (is_first_time_loading_ && layer_) { - for (const auto& spec : *layer_->getPeakData()) + auto& annotated_peak_data = *layer_->getPeakData(); + + if (annotated_peak_data.getPeptideIdentifications().empty()) { - if (!spec.getPeptideIdentifications().empty()) + return; + } + + for (const auto& [spec, pepid] : annotated_peak_data) + { + const vector& pep_hits = pepid.getHits(); + //add id_accession as the key of the map and push the peptideID to the vector value- + for (const auto & pep_hit : pep_hits) { - const vector& peptide_ids = spec.getPeptideIdentifications(); + const vector& evidences = pep_hit.getPeptideEvidences(); - for (const auto& pepid : peptide_ids) + for (const auto & evidence : evidences) { - const vector& pep_hits = pepid.getHits(); - //add id_accession as the key of the map and push the peptideID to the vector value- - for (const auto & pep_hit : pep_hits) - { - const vector& evidences = pep_hit.getPeptideEvidences(); - - for (const auto & evidence : evidences) - { - const String& id_accession = evidence.getProteinAccession(); - protein_to_peptide_id_map[id_accession].push_back(&pepid); - } - } + const String& id_accession = evidence.getProteinAccession(); + protein_to_peptide_id_map[id_accession].push_back(&pepid); } - } + } } // set is_first_time_loading to false so that the map gets created only the first time! is_first_time_loading_ = false; } } - //extract required part of accession and open browser + // extract required part of accession and open browser QString SpectraIDViewTab::extractNumFromAccession_(const QString& full_accession) { // anchored (^...$) regex for matching accession @@ -407,7 +408,8 @@ namespace OpenMS } int current_spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); - const auto& exp = *layer_->getPeakData(); + const auto& annotated_exp = *layer_->getPeakData(); + const auto& exp = annotated_exp.getMSExperiment(); const auto& spec2 = exp[current_spectrum_index]; // @@ -416,16 +418,16 @@ namespace OpenMS // show precursor spectrum (usually MS1) if (column == Clmn::PRECURSOR_MZ) { - const auto prec_it = exp.getPrecursorSpectrum(exp.begin() + current_spectrum_index); - - if (prec_it != exp.end() && !spec2.getPrecursors().empty()) + const auto prec_it = exp.getPrecursorSpectrum(exp.getSpectra().begin() + current_spectrum_index); + + if (prec_it != exp.getSpectra().end() && !spec2.getPrecursors().empty()) { double precursor_mz = spec2.getPrecursors()[0].getMZ(); // determine start and stop of isolation window double isolation_window_lower_mz = precursor_mz - spec2.getPrecursors()[0].getIsolationWindowLowerOffset(); double isolation_window_upper_mz = precursor_mz + spec2.getPrecursors()[0].getIsolationWindowUpperOffset(); - emit spectrumSelected(std::distance(exp.begin(), prec_it), -1, -1);// no identification or hit selected (-1) + emit spectrumSelected(std::distance(exp.getSpectra().begin(), prec_it), -1, -1); // no identification or hit selected (-1) // zoom into precursor area emit requestVisibleArea1D(isolation_window_lower_mz - 50.0, isolation_window_upper_mz + 50.0); } @@ -453,12 +455,12 @@ namespace OpenMS auto item_pepid = table_widget_->item(row, Clmn::ID_NR); if (item_pepid)// might be null for MS1 spectra { - int current_identification_index = item_pepid->data(Qt::DisplayRole).toInt(); + // int current_identification_index = item_pepid->data(Qt::DisplayRole).toInt(); int current_peptide_hit_index = table_widget_->item(row, Clmn::PEPHIT_NR)->data(Qt::DisplayRole).toInt(); - const vector& peptide_ids = spec2.getPeptideIdentifications(); - const vector& pep_hits = peptide_ids[current_identification_index].getHits(); - const PeptideHit& hit = pep_hits[current_peptide_hit_index]; + const PeptideIdentification& peptide_id = annotated_exp.getPeptideIdentifications()[current_spectrum_index]; + const vector& phits = peptide_id.getHits(); + const PeptideHit& hit = phits[current_peptide_hit_index]; // initialize window, when the table is requested for the first time // afterwards the size will stay at the manually resized window size @@ -523,7 +525,7 @@ namespace OpenMS // want the list of unidentified MS2 spectra (obtained by unchecking the 'just hits' button). auto* ptr_peak = dynamic_cast(layer); bool no_data = (ptr_peak == nullptr - || (ptr_peak && ptr_peak->getPeakData()->empty())); + || (ptr_peak && ptr_peak->getPeakData()->getMSExperiment().empty())); return !no_data; } @@ -588,17 +590,12 @@ namespace OpenMS // only when checked, otherwise only highlights { int row = selected_spec_row_idx; - int spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); + //int spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); int num_id = table_widget_->item(row, Clmn::ID_NR)->data(Qt::DisplayRole).toInt(); - int num_ph = table_widget_->item(row, Clmn::PEPHIT_NR)->data(Qt::DisplayRole).toInt(); - const auto& spec = layer_->getPeakData()->operator[](spectrum_index); - const vector& pep_id = spec.getPeptideIdentifications(); - - if(!spec.getPeptideIdentifications().empty()) - { - const vector& hits = pep_id[num_id].getHits(); - if (!hits.empty()) accs = hits[num_ph].extractProteinAccessionsSet(); - } + int num_ph = table_widget_->item(row, Clmn::PEPHIT_NR)->data(Qt::DisplayRole).toInt(); + const PeptideIdentification& pep_id = layer_->getPeakData()->getPeptideIdentifications()[num_id]; + const vector& hits = pep_id.getHits(); + if (!hits.empty()) accs = hits[num_ph].extractProteinAccessionsSet(); } // create header labels (setting header labels must occur after fill) @@ -668,6 +665,9 @@ namespace OpenMS void SpectraIDViewTab::updateEntries_() { + #ifdef DEBUG_SPECTRA_ID_VIEW + cout << "Updating entries in SpectraIDViewTab" << endl; + #endif // no valid peak layer attached if (!hasData(layer_)) @@ -699,24 +699,20 @@ namespace OpenMS { std::vector> all_hits; - for (const auto& spec : layer_->getPeakData()->getSpectra()) + for (auto [spectrum, peptide_id] : *layer_->getPeakData()) { - UInt ms_level = spec.getMSLevel(); - const vector& peptide_ids = spec.getPeptideIdentifications(); + UInt ms_level = spectrum.getMSLevel(); - if (ms_level != 2 || peptide_ids.empty()) // skip non ms2 spectra and spectra with no identification + if (ms_level != 2) // skip non ms2 spectra and spectra with no identification { continue; } - for (const auto& pep_id : peptide_ids) + const vector& phits = peptide_id.getHits(); + all_hits.insert(all_hits.end(), phits.begin(), phits.end()); + if (!has_peak_annotations && !phits.empty() && !phits[0].getPeakAnnotations().empty()) { - const vector& phits = pep_id.getHits(); - all_hits.insert(all_hits.end(), phits.begin(), phits.end()); - if (!has_peak_annotations && !phits[0].getPeakAnnotations().empty()) - { - has_peak_annotations = true; - } + has_peak_annotations = true; } } @@ -748,25 +744,25 @@ namespace OpenMS // generate flat list int selected_row(-1); // index i is needed, so iterate the old way... - for (Size i = 0; i < layer_->getPeakData()->size(); ++i) + for (Size i = 0; i < layer_->getPeakData()->getMSExperiment().size(); ++i) { - const MSSpectrum& spectrum = (*layer_->getPeakData())[i]; + auto [spectrum, peptide_id] = (*layer_->getPeakData())[i]; const UInt ms_level = spectrum.getMSLevel(); - const vector& pi = spectrum.getPeptideIdentifications(); - const Size id_count = pi.size(); const vector & precursors = spectrum.getPrecursors(); + const Size id_count = peptide_id.getHits().size(); // allow only MS2 OR MS1 with peptideIDs (from Mass Fingerprinting) - if (ms_level != 2 && id_count == 0) + if (ms_level != 2) { continue; } // skip - if (hide_no_identification_->isChecked() && id_count == 0) + if (hide_no_identification_->isChecked() && id_count == 0) { continue; } + // set row background color QColor bg_color = (id_count == 0 ? Qt::white : QColor::fromRgb(127,255,148)); @@ -780,107 +776,113 @@ namespace OpenMS } else { - for (Size pi_idx = 0; pi_idx != id_count; ++pi_idx) + // get peptide identifications of current spectrum + #ifdef DEBUG_SPECTRA_ID_VIEW + cout << "Peptide hits: " << peptide_id.getHits().size() << endl; + #endif + + for (Size ph_idx = 0; ph_idx != peptide_id.getHits().size(); ++ph_idx) { - for (Size ph_idx = 0; ph_idx != pi[pi_idx].getHits().size(); ++ph_idx) - { - const PeptideHit& ph = pi[pi_idx].getHits()[ph_idx]; + #ifdef DEBUG_SPECTRA_ID_VIEW + cout << "Peptide hit index: " << ph_idx << endl; + cout << "Peptide hit: " << peptide_id.getHits()[ph_idx].getSequence().toString() << endl; + #endif + const PeptideHit& ph = peptide_id.getHits()[ph_idx]; - // add new row at the end of the table - table_widget_->insertRow(table_widget_->rowCount()); + // add new row at the end of the table + table_widget_->insertRow(table_widget_->rowCount()); - fillRow_(spectrum, i, bg_color); + fillRow_(spectrum, i, bg_color); - table_widget_->setAtBottomRow(ph.getScore(), Clmn::SCORE, bg_color); - table_widget_->setAtBottomRow((int)ph.getRank(), Clmn::RANK, bg_color); - table_widget_->setAtBottomRow(ph.getCharge(), Clmn::CHARGE, bg_color); + table_widget_->setAtBottomRow(ph.getScore(), Clmn::SCORE, bg_color); + table_widget_->setAtBottomRow((int)ph.getRank(), Clmn::RANK, bg_color); + table_widget_->setAtBottomRow(ph.getCharge(), Clmn::CHARGE, bg_color); - // sequence - String seq = ph.getSequence().toString(); - if (seq.empty()) - { - seq = ph.getMetaValue("label"); - } - table_widget_->setAtBottomRow(seq.toQString(), Clmn::SEQUENCE, bg_color); + // sequence + String seq = ph.getSequence().toString(); + if (seq.empty()) + { + seq = ph.getMetaValue("label"); + } + table_widget_->setAtBottomRow(seq.toQString(), Clmn::SEQUENCE, bg_color); - // accession - set protein_accessions = ph.extractProteinAccessionsSet(); - String accessions = ListUtils::concatenate(vector(protein_accessions.begin(), protein_accessions.end()), ", "); - table_widget_->setAtBottomRow(accessions.toQString(), Clmn::ACCESSIONS, bg_color); - table_widget_->setAtBottomRow((int)(pi_idx), Clmn::ID_NR, bg_color); - table_widget_->setAtBottomRow((int)(ph_idx), Clmn::PEPHIT_NR, bg_color); + // accession + set protein_accessions = ph.extractProteinAccessionsSet(); + String accessions = ListUtils::concatenate(vector(protein_accessions.begin(), protein_accessions.end()), ", "); + table_widget_->setAtBottomRow(accessions.toQString(), Clmn::ACCESSIONS, bg_color); + table_widget_->setAtBottomRow((int) i, Clmn::ID_NR, bg_color); // spectrum index + table_widget_->setAtBottomRow((int)(ph_idx), Clmn::PEPHIT_NR, bg_color); - bool selected(false); - if (ph.metaValueExists("selected")) + bool selected(false); + if (ph.metaValueExists("selected")) + { + selected = ph.getMetaValue("selected").toString() == "true"; + } + table_widget_->setAtBottomRow(selected, Clmn::CURATED, bg_color); + + // additional precursor infos, e.g. ppm error + if (!precursors.empty()) + { + const Precursor& first_precursor = precursors.front(); + double ppm_error(0); + // Protein:RNA cross-link, Protein-Protein cross-link, or other data with a precomputed precursor error + if (ph.metaValueExists(Constants::UserParam::PRECURSOR_ERROR_PPM_USERPARAM)) + { + ppm_error = fabs((double)ph.getMetaValue(Constants::UserParam::PRECURSOR_ERROR_PPM_USERPARAM)); + } + else if (ph.metaValueExists("OMS:precursor_mz_error_ppm")) // for legacy reasons added in OpenMS 2.5 { - selected = ph.getMetaValue("selected").toString() == "true"; + ppm_error = fabs((double)ph.getMetaValue("OMS:precursor_mz_error_ppm")); } - table_widget_->setAtBottomRow(selected, Clmn::CURATED, bg_color); + else if (!ph.getSequence().empty()) // works for normal linear fragments with the correct modifications included in the AASequence + { + double exp_precursor = first_precursor.getMZ(); + int charge = first_precursor.getCharge(); + double theo_precursor= ph.getSequence().getMZ(charge); + ppm_error = fabs((exp_precursor - theo_precursor) / exp_precursor / 1e-6); + } + table_widget_->setAtBottomRow(ppm_error, Clmn::PREC_PPM, bg_color); + } - // additional precursor infos, e.g. ppm error - if (!precursors.empty()) + // add additional meta value columns + if (create_rows_for_commmon_metavalue_->isChecked()) + { + Int current_col = Clmn::PEAK_ANNOTATIONS; + // add peak annotation column (part of meta-value assessment above) + if (has_peak_annotations) { - const Precursor& first_precursor = precursors.front(); - double ppm_error(0); - // Protein:RNA cross-link, Protein-Protein cross-link, or other data with a precomputed precursor error - if (ph.metaValueExists(Constants::UserParam::PRECURSOR_ERROR_PPM_USERPARAM)) - { - ppm_error = fabs((double)ph.getMetaValue(Constants::UserParam::PRECURSOR_ERROR_PPM_USERPARAM)); - } - else if (ph.metaValueExists("OMS:precursor_mz_error_ppm")) // for legacy reasons added in OpenMS 2.5 + // set hidden data for export to TSV + QString annotation; + for (const PeptideHit::PeakAnnotation& pa : ph.getPeakAnnotations()) { - ppm_error = fabs((double)ph.getMetaValue("OMS:precursor_mz_error_ppm")); + annotation += String(pa.mz).toQString() + "|" + + String(pa.intensity).toQString() + "|" + + String(pa.charge).toQString() + "|" + + pa.annotation.toQString() + ";"; } - else if (!ph.getSequence().empty()) // works for normal linear fragments with the correct modifications included in the AASequence - { - double exp_precursor = first_precursor.getMZ(); - int charge = first_precursor.getCharge(); - double theo_precursor= ph.getSequence().getMZ(charge); - ppm_error = fabs((exp_precursor - theo_precursor) / exp_precursor / 1e-6); - } - table_widget_->setAtBottomRow(ppm_error, Clmn::PREC_PPM, bg_color); + QTableWidgetItem* item = table_widget_->setAtBottomRow("show", current_col, bg_color, Qt::blue); + item->setData(Qt::UserRole, annotation); + ++current_col; } - - // add additional meta value columns - if (create_rows_for_commmon_metavalue_->isChecked()) + for (const auto& ck : common_keys) { - Int current_col = Clmn::PEAK_ANNOTATIONS; - // add peak annotation column (part of meta-value assessment above) - if (has_peak_annotations) + const DataValue& dv = ph.getMetaValue(ck); + if (dv.valueType() == DataValue::DOUBLE_VALUE) { - // set hidden data for export to TSV - QString annotation; - for (const PeptideHit::PeakAnnotation& pa : ph.getPeakAnnotations()) - { - annotation += String(pa.mz).toQString() + "|" + - String(pa.intensity).toQString() + "|" + - String(pa.charge).toQString() + "|" + - pa.annotation.toQString() + ";"; - } - QTableWidgetItem* item = table_widget_->setAtBottomRow("show", current_col, bg_color, Qt::blue); - item->setData(Qt::UserRole, annotation); - ++current_col; + table_widget_->setAtBottomRow(double(dv), current_col, bg_color); } - for (const auto& ck : common_keys) + else { - const DataValue& dv = ph.getMetaValue(ck); - if (dv.valueType() == DataValue::DOUBLE_VALUE) - { - table_widget_->setAtBottomRow(double(dv), current_col, bg_color); - } - else - { - table_widget_->setAtBottomRow(dv.toQString(), current_col, bg_color); - } - - ++current_col; + table_widget_->setAtBottomRow(dv.toQString(), current_col, bg_color); } + + ++current_col; } } } } - if ((int)i == restore_spec_index) + if ((int)restore_spec_index) { // get model index of selected spectrum, // as table_widget_->rowCount() returns rows starting from 1, selected row is 1 less than the returned row @@ -937,7 +939,7 @@ namespace OpenMS void SpectraIDViewTab::saveIDs_() { // no valid peak layer attached - if (layer_ == nullptr || layer_->getPeakData()->empty() || layer_->type != LayerDataBase::DT_PEAK) + if (layer_ == nullptr || layer_->getPeakData()->getMSExperiment().empty() || layer_->type != LayerDataBase::DT_PEAK) { return; } @@ -945,7 +947,7 @@ namespace OpenMS // synchronize PeptideHits with the annotations in the spectrum dynamic_cast(layer_)->synchronizePeakAnnotations(); - vector prot_id = (*layer_->getPeakData()).getProteinIdentifications(); + vector prot_id = layer_->getPeakData()->getProteinIdentifications(); vector all_pep_ids; // collect PeptideIdentifications from each spectrum, while making sure each spectrum is only considered once @@ -963,9 +965,8 @@ namespace OpenMS } added_spectra.insert(spectrum_index); - // collect all PeptideIdentifications from this spectrum - const vector& pep_id = (*layer_->getPeakData())[spectrum_index].getPeptideIdentifications(); - copy(pep_id.begin(), pep_id.end(), back_inserter(all_pep_ids)); + const PeptideIdentification& pep_id = (*layer_->getPeakData())[spectrum_index].second; + all_pep_ids.push_back(pep_id); } QString filename = GUIHelpers::getSaveFilename(this, "Save file", "", FileTypeList({FileTypes::IDXML, FileTypes::MZIDENTML}), true, FileTypes::IDXML); @@ -987,17 +988,17 @@ namespace OpenMS // extract position of the correct Spectrum, PeptideIdentification and PeptideHit from the table int row = item->row(); String selected = item->checkState() == Qt::Checked ? "true" : "false"; - int spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); + // int spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); int num_id = table_widget_->item(row, Clmn::ID_NR)->data(Qt::DisplayRole).toInt(); int num_ph = table_widget_->item(row, Clmn::PEPHIT_NR)->data(Qt::DisplayRole).toInt(); // maintain sortability of our checkbox column TableView::updateCheckBoxItem(item); - vector& pep_id = (*layer_->getPeakDataMuteable())[spectrum_index].getPeptideIdentifications(); + PeptideIdentification& pep_id = (*layer_->getPeakDataMuteable())[num_id].second; // update "selected" value in the correct PeptideHits - vector& hits = pep_id[num_id].getHits(); + vector& hits = pep_id.getHits(); // XL-MS specific case, both PeptideHits belong to the same cross-link if (hits[0].metaValueExists("xl_chain")) { @@ -1015,8 +1016,23 @@ namespace OpenMS void SpectraIDViewTab::fillRow_(const MSSpectrum& spectrum, const int spec_index, const QColor& background_color) { + // fill spectrum information in columns const vector& precursors = spectrum.getPrecursors(); + #ifdef DEBUG_SPECTRA_ID_VIEW + cout << "Filling row in SpectraIDViewTab" << endl; + cout << spectrum.getMSLevel() << endl + << "RT: " << spectrum.getRT() << endl + << "Scan mode: " << spectrum.getInstrumentSettings().getScanMode() << endl + << "Zoom scan: " << spectrum.getInstrumentSettings().getZoomScan() << endl + << "Spectrum index: " << spec_index << endl + << "Precursor MZ: " << (precursors.empty() ? 0 : precursors.front().getMZ()) << endl + << "Precursor charge: " << (precursors.empty() ? 0 : precursors.front().getCharge()) << endl + << "Precursor intensity: " << (precursors.empty() ? 0 : precursors.front().getIntensity()) << endl + << endl; + #endif + + table_widget_->setAtBottomRow(QString::number(spectrum.getMSLevel()), Clmn::MS_LEVEL, background_color); table_widget_->setAtBottomRow(spec_index, Clmn::SPEC_INDEX, background_color); table_widget_->setAtBottomRow(spectrum.getRT(), Clmn::RT, background_color); diff --git a/src/openms_gui/source/VISUAL/SpectraTreeTab.cpp b/src/openms_gui/source/VISUAL/SpectraTreeTab.cpp index 8d9cd47d440..831a6eba57c 100644 --- a/src/openms_gui/source/VISUAL/SpectraTreeTab.cpp +++ b/src/openms_gui/source/VISUAL/SpectraTreeTab.cpp @@ -334,16 +334,16 @@ namespace OpenMS std::vector parent_stack; parent_stack.push_back(nullptr); bool fail = false; - last_peakmap_ = &(*cl.getPeakData()); + last_peakmap_ = &(cl.getPeakData()->getMSExperiment()); spectra_treewidget_->setHeaders(ClmnPeak::HEADER_NAMES); - for (Size i = 0; i < cl.getPeakData()->size(); ++i) + for (Size i = 0; i < cl.getPeakData()->getMSExperiment().size(); ++i) { - const MSSpectrum& current_spec = (*cl.getPeakData())[i]; + const MSSpectrum& current_spec = cl.getPeakData()->getMSExperiment()[i]; if (i > 0) { - const MSSpectrum& prev_spec = (*cl.getPeakData())[i-1]; + const MSSpectrum& prev_spec = cl.getPeakData()->getMSExperiment()[i-1]; // current MS level = previous MS level + 1 (e.g. current: MS2, previous: MS1) if (current_spec.getMSLevel() == prev_spec.getMSLevel() + 1) { @@ -412,9 +412,9 @@ namespace OpenMS spectra_treewidget_->clear(); toplevel_items.clear(); selected_item = nullptr; - for (Size i = 0; i < cl.getPeakData()->size(); ++i) + for (Size i = 0; i < cl.getPeakData()->getMSExperiment().size(); ++i) { - const MSSpectrum& current_spec = (*cl.getPeakData())[i]; + const MSSpectrum& current_spec = cl.getPeakData()->getMSExperiment()[i]; toplevel_item = new QTreeWidgetItem(); populatePeakDataRow_(toplevel_item, i, current_spec); @@ -438,9 +438,9 @@ namespace OpenMS spectra_treewidget_->setCurrentItem(selected_item); spectra_treewidget_->scrollToItem(selected_item); } - if (cl.getPeakData()->size() > 1) + if (cl.getPeakData()->getMSExperiment().size() > 1) { - more_than_one_spectrum = false; + more_than_one_spectrum = false; // why is this false if > 1??????? } } // Branch if the current layer is a chromatogram (either indicated by its @@ -450,13 +450,13 @@ namespace OpenMS const auto cl = *lp; LayerDataBase::ConstExperimentSharedPtrType exp = cl.getChromatogramData(); - if (last_peakmap_ == exp.get()) + if (last_peakmap_ == &exp->getMSExperiment()) { // underlying data did not change (which is ALWAYS the chromatograms, never peakdata!) // --> Do not update (could be many 10k entries for sqMass data and the lag would be unbearable ...) return; } - last_peakmap_ = exp.get(); + last_peakmap_ = &exp->getMSExperiment(); spectra_treewidget_->clear(); // New data: // We need to redraw the whole Widget because the we have changed all the layers. @@ -464,22 +464,23 @@ namespace OpenMS // whether multiple ones are selected. bool multiple_select = false; int this_selected_item = -1; - if (!cl.getChromatogramData()->empty()) + const MSExperiment& chrom_data = cl.getChromatogramData()->getMSExperiment(); + if (!chrom_data.empty()) { - if (cl.getChromatogramData()->metaValueExists("multiple_select")) + if (chrom_data.metaValueExists("multiple_select")) { - multiple_select = cl.getChromatogramData()->getMetaValue("multiple_select").toBool(); + multiple_select = chrom_data.getMetaValue("multiple_select").toBool(); } - if (cl.getChromatogramData()->metaValueExists("selected_chromatogram")) + if (chrom_data.metaValueExists("selected_chromatogram")) { - this_selected_item = (int)cl.getChromatogramData()->getMetaValue("selected_chromatogram"); + this_selected_item = (int)chrom_data.getMetaValue("selected_chromatogram"); } } // create a header list spectra_treewidget_->setHeaders(ClmnChrom::HEADER_NAMES); - if (exp->getChromatograms().size() > 1) + if (exp->getMSExperiment().getChromatograms().size() > 1) { more_than_one_spectrum = false; } @@ -491,9 +492,9 @@ namespace OpenMS std::map, Precursor::MZLess>& map_precursor_to_chrom_idx = map_precursor_to_chrom_idx_cache_[(size_t)(exp.get())]; if (!was_cached) { // create cache: collect all precursor that fall into the mz rt window - for (auto it = exp->getChromatograms().cbegin(); it != exp->getChromatograms().cend(); ++it) + for (auto it = exp->getMSExperiment().getChromatograms().cbegin(); it != exp->getMSExperiment().getChromatograms().cend(); ++it) { - map_precursor_to_chrom_idx[it->getPrecursor()].push_back(it - exp->getChromatograms().begin()); + map_precursor_to_chrom_idx[it->getPrecursor()].push_back(it - exp->getMSExperiment().getChromatograms().begin()); } } @@ -525,7 +526,7 @@ namespace OpenMS // Show single chromatogram: iterate over all chromatograms corresponding to the current precursor and add action for the single chromatogram for (const Size chrom_idx : indx) { - const MSChromatogram& current_chromatogram = exp->getChromatograms()[chrom_idx]; + const MSChromatogram& current_chromatogram = exp->getMSExperiment().getChromatograms()[chrom_idx]; // Children chromatogram entry QTreeWidgetItem* sub_item = new QTreeWidgetItem(toplevel_item); diff --git a/src/openms_gui/source/VISUAL/TVIdentificationViewController.cpp b/src/openms_gui/source/VISUAL/TVIdentificationViewController.cpp index 45744d01f15..d4d7638f769 100644 --- a/src/openms_gui/source/VISUAL/TVIdentificationViewController.cpp +++ b/src/openms_gui/source/VISUAL/TVIdentificationViewController.cpp @@ -28,8 +28,9 @@ #include #include - #include +#include + #include #include @@ -54,6 +55,9 @@ namespace OpenMS void TVIdentificationViewController::showSpectrumAsNew1D(int spectrum_index, int peptide_id_index, int peptide_hit_index) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "TVIdentificationViewController::showSpectrumAsNew1D() called" << endl; + #endif // basic behavior 1 auto& layer = tv_->getActiveCanvas()->getCurrentLayer(); @@ -97,44 +101,51 @@ namespace OpenMS // get peptide identification auto layer_1d_peak = dynamic_cast(&w->canvas()->getCurrentLayer()); - const vector& pis = layer_1d_peak->getCurrentSpectrum().getPeptideIdentifications(); + const auto& pids = layer_1d_peak->getPeakData()->getPeptideIdentifications(); + if (peptide_id_index >= static_cast(pids.size())) + { + OPENMS_LOG_FATAL_ERROR << "PeptideIdentification index out of bounds! Aborting!" << endl; + return; + } + const PeptideIdentification& pi = pids[peptide_id_index]; - if (!pis.empty()) + switch (layer_1d_peak->getCurrentSpectrum().getMSLevel()) { - switch (layer_1d_peak->getCurrentSpectrum().getMSLevel()) - { - // mass fingerprint annotation of name etc. - case 1: - { - addPeakAnnotations_(pis); - break; - } + // mass fingerprint annotation of name etc. + case 1: + { + addPeakAnnotations_(std::vector(1, pi)); + break; + } - // annotation with stored fragments or synthesized theoretical spectrum - case 2: + // annotation with stored fragments or synthesized theoretical spectrum + case 2: + { + // check if index in bounds and hits are present + if (peptide_hit_index < static_cast(pi.getHits().size())) { - // check if index in bounds and hits are present - if (peptide_id_index < static_cast(pis.size()) - && peptide_hit_index < static_cast(pis[peptide_id_index].getHits().size())) + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Creating annotations for PeptideIdentification index: " << peptide_id_index << endl; + cout << "PeptideHit index: " << peptide_hit_index << endl; + cout << "PeptideHit: " << pi.getHits()[peptide_hit_index].getSequence().toString() << endl; + #endif + // get hit + PeptideHit ph = pi.getHits()[peptide_hit_index]; + if (ph.getPeakAnnotations().empty()) { - // get hit - PeptideHit ph = pis[peptide_id_index].getHits()[peptide_hit_index]; - if (ph.getPeakAnnotations().empty()) - { - // if no fragment annotations are stored, create a theoretical spectrum - addTheoreticalSpectrumLayer_(ph); - } - else - { - // otherwise, use stored fragment annotations - addPeakAnnotationsFromID_(ph); - } + // if no fragment annotations are stored, create a theoretical spectrum + addTheoreticalSpectrumLayer_(ph); + } + else + { + // otherwise, use stored fragment annotations + addPeakAnnotationsFromID_(ph); } - break; } - default: - OPENMS_LOG_WARN << "Annotation of MS level > 2 not supported.!" << endl; + break; } + default: + OPENMS_LOG_WARN << "Annotation of MS level > 2 not supported.!" << endl; } // TODO Why would this need to trigger an update in e.g. the Tab Views?? @@ -148,6 +159,10 @@ namespace OpenMS void TVIdentificationViewController::addPeakAnnotations_(const vector& ph) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "TVIdentificationViewController::addPeakAnnotations() called" << endl; + #endif + // called anew for every click on a spectrum auto getCurrentLayer = [&]() -> LayerData1DPeak& { return dynamic_cast(tv_->getActive1DWidget()->canvas()->getCurrentLayer()); }; @@ -282,6 +297,10 @@ namespace OpenMS int peptide_id_index, int peptide_hit_index) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "TVIdentificationViewController::activate1DSpectrum() called" << endl; + #endif + Plot1DWidget* widget_1D = tv_->getActive1DWidget(); // if no active 1D widget is present @@ -297,35 +316,43 @@ namespace OpenMS auto current_layer = [&]() -> LayerData1DPeak& { return dynamic_cast(tv_->getActive1DWidget()->canvas()->getCurrentLayer()); }; widget_1D->canvas()->activateSpectrum(spectrum_index); - current_layer().peptide_id_index = peptide_id_index; + current_layer().peptide_id_index = peptide_id_index; // should always ne 0 current_layer().peptide_hit_index = peptide_hit_index; if (current_layer().type == LayerDataBase::DT_PEAK) { UInt ms_level = current_layer().getCurrentSpectrum().getMSLevel(); - const vector& pis = current_layer().getCurrentSpectrum().getPeptideIdentifications(); + const PeptideIdentification& pid = current_layer().getPeakData()->getPeptideIdentifications()[spectrum_index]; + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "PeptideIdentification index: " << peptide_id_index << endl; + cout << "PeptideHit index: " << peptide_hit_index << endl; + cout << "PeptideHit: " << pid.getHits()[peptide_hit_index].getSequence().toString() << endl; + cout << "MS level: " << ms_level << endl; + cout << "Spectrum index: " << spectrum_index << endl; + #endif + switch (ms_level) { case 1: // mass fingerprint annotation of name etc and precursor labels { - addPeakAnnotations_(pis); + addPeakAnnotations_(std::vector(1, pid)); vector precursors; // collect all MS2 spectra precursor till next MS1 spectrum is encountered - for (Size i = spectrum_index + 1; i < current_layer().getPeakData()->size(); ++i) + for (Size i = spectrum_index + 1; i < current_layer().getPeakData()->getMSExperiment().size(); ++i) { - if ((*current_layer().getPeakData())[i].getMSLevel() == 1) + if (current_layer().getPeakData()->getMSExperiment()[i].getMSLevel() == 1) { break; } // skip MS2 without precursor - if ((*current_layer().getPeakData())[i].getPrecursors().empty()) + if (current_layer().getPeakData()->getMSExperiment()[i].getPrecursors().empty()) { continue; } // there should be only one precursor per MS2 spectrum. - vector pcs = (*current_layer().getPeakData())[i].getPrecursors(); + vector pcs = current_layer().getPeakData()->getMSExperiment()[i].getPrecursors(); copy(pcs.begin(), pcs.end(), back_inserter(precursors)); } addPrecursorLabels1D_(precursors); @@ -333,134 +360,136 @@ namespace OpenMS } case 2: // annotation with stored fragments or synthesized theoretical spectrum { - // check if index in bounds and hits are present - if (peptide_id_index < static_cast(pis.size()) && peptide_hit_index < static_cast(pis[peptide_id_index].getHits().size())) - { - // get selected hit - PeptideHit ph = pis[peptide_id_index].getHits()[peptide_hit_index]; + // get selected hit + PeptideHit ph = pid.getHits()[peptide_hit_index]; - if (ph.getPeakAnnotations().empty()) - { - // if no fragment annotations are stored, create a theoretical spectrum - addTheoreticalSpectrumLayer_(ph); - - // synchronize PeptideHits with the annotations in the spectrum - current_layer().synchronizePeakAnnotations(); - // remove labels and theoretical spectrum (will be recreated using PH annotations) - removeGraphicalPeakAnnotations_(spectrum_index); - removeTheoreticalSpectrumLayer_(); + if (ph.getPeakAnnotations().empty()) + { + // if no fragment annotations are stored, create a theoretical spectrum + addTheoreticalSpectrumLayer_(ph); + + // synchronize PeptideHits with the annotations in the spectrum + current_layer().synchronizePeakAnnotations(); + // remove labels and theoretical spectrum (will be recreated using PH annotations) + removeGraphicalPeakAnnotations_(spectrum_index); + removeTheoreticalSpectrumLayer_(); + + // return if no active 1D widget is present + if (widget_1D == nullptr) + { + return; + } + // update current PeptideHit with the synchronized one + widget_1D->canvas()->activateSpectrum(spectrum_index); + const PeptideIdentification & pi2 = current_layer().getPeakData()->getPeptideIdentifications()[spectrum_index]; + ph = pi2.getHits()[peptide_hit_index]; - // return if no active 1D widget is present - if (widget_1D == nullptr) - { - return; - } - // update current PeptideHit with the synchronized one - widget_1D->canvas()->activateSpectrum(spectrum_index); - const vector& pis2 = current_layer().getCurrentSpectrum().getPeptideIdentifications(); - ph = pis2[peptide_id_index].getHits()[peptide_hit_index]; + } + // use stored fragment annotations + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Creating annotations for PeptideIdentification index: " << peptide_id_index << endl; + cout << "PeptideHit index: " << peptide_hit_index << endl; + cout << "PeptideHit: " << ph.getSequence().toString() << endl; + #endif + addPeakAnnotationsFromID_(ph); + + if (ph.metaValueExists(Constants::UserParam::OPENPEPXL_XL_TYPE)) // if this meta value exists, this should be an XL-MS annotation + { + String box_text; + String vert_bar = "|"; + if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "loop-link") + { + String hor_bar = "_"; + String seq_alpha = ph.getSequence().toUnmodifiedString(); + int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); + int xl_pos_beta = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS2)).toInt() - xl_pos_alpha - 1; + + String alpha_cov; + String beta_cov; + extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), 0); + + // String formatting + box_text += alpha_cov + "
" + seq_alpha + "
" + String(xl_pos_alpha, ' ') + vert_bar + n_times(xl_pos_beta, hor_bar) + vert_bar; + // cut out line: "
" + String(xl_pos_alpha, ' ') + vert_bar + String(xl_pos_beta, ' ') + vert_bar + } - // use stored fragment annotations - addPeakAnnotationsFromID_(ph); - - if (ph.metaValueExists(Constants::UserParam::OPENPEPXL_XL_TYPE)) // if this meta value exists, this should be an XL-MS annotation + else if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "cross-link") { - String box_text; - String vert_bar = "|"; - - if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "loop-link") - { - String hor_bar = "_"; - String seq_alpha = ph.getSequence().toUnmodifiedString(); - int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); - int xl_pos_beta = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS2)).toInt() - xl_pos_alpha - 1; - - String alpha_cov; - String beta_cov; - extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), 0); - - // String formatting - box_text += alpha_cov + "
" + seq_alpha + "
" + String(xl_pos_alpha, ' ') + vert_bar + n_times(xl_pos_beta, hor_bar) + vert_bar; - // cut out line: "
" + String(xl_pos_alpha, ' ') + vert_bar + String(xl_pos_beta, ' ') + vert_bar + - } - else if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "cross-link") - { - String seq_alpha = ph.getSequence().toUnmodifiedString(); - String seq_beta = AASequence::fromString(ph.getMetaValue(Constants::UserParam::OPENPEPXL_BETA_SEQUENCE)).toUnmodifiedString(); - int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); - int xl_pos_beta = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS2)).toInt(); - - // String formatting - Size prefix_length = max(xl_pos_alpha, xl_pos_beta); - //Size suffix_length = max(seq_alpha.size() - xl_pos_alpha, seq_beta.size() - xl_pos_beta); - Size alpha_space = prefix_length - xl_pos_alpha; - Size beta_space = prefix_length - xl_pos_beta; - - String alpha_cov; - String beta_cov; - extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), seq_beta.size()); - - box_text += String(alpha_space, ' ') + alpha_cov + "
" + String(alpha_space, ' ') + seq_alpha + "
" + String(prefix_length, ' ') + vert_bar + "
" + String(beta_space, ' ') + seq_beta + "
" + String(beta_space, ' ') + beta_cov; - // color: - } - else // if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "mono-link") - { - String seq_alpha = ph.getSequence().toUnmodifiedString(); - int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); - Size prefix_length = xl_pos_alpha; + String seq_alpha = ph.getSequence().toUnmodifiedString(); + String seq_beta = AASequence::fromString(ph.getMetaValue(Constants::UserParam::OPENPEPXL_BETA_SEQUENCE)).toUnmodifiedString(); + int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); + int xl_pos_beta = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS2)).toInt(); + + // String formatting + Size prefix_length = max(xl_pos_alpha, xl_pos_beta); + //Size suffix_length = max(seq_alpha.size() - xl_pos_alpha, seq_beta.size() - xl_pos_beta); + Size alpha_space = prefix_length - xl_pos_alpha; + Size beta_space = prefix_length - xl_pos_beta; + + String alpha_cov; + String beta_cov; + extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), seq_beta.size()); + + box_text += String(alpha_space, ' ') + alpha_cov + "
" + String(alpha_space, ' ') + seq_alpha + "
" + String(prefix_length, ' ') + vert_bar + "
" + String(beta_space, ' ') + seq_beta + "
" + String(beta_space, ' ') + beta_cov; + // color: + } + else // if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "mono-link") + { + String seq_alpha = ph.getSequence().toUnmodifiedString(); + int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); + Size prefix_length = xl_pos_alpha; - String alpha_cov; - String beta_cov; - extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), 0); + String alpha_cov; + String beta_cov; + extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), 0); - box_text += alpha_cov + "
" + seq_alpha + "
" + String(prefix_length, ' ') + vert_bar; + box_text += alpha_cov + "
" + seq_alpha + "
" + String(prefix_length, ' ') + vert_bar; - } - box_text = R"(
)" + box_text + "
"; - widget_1D->canvas()->setTextBox(box_text.toQString()); } - else if (ph.getPeakAnnotations().empty()) // only write the sequence + box_text = R"(
)" + box_text + "
"; + widget_1D->canvas()->setTextBox(box_text.toQString()); + } + else if (ph.getPeakAnnotations().empty()) // only write the sequence + { + String seq = ph.getSequence().toString(); + if (seq.empty()) + { + seq = ph.getMetaValue("label"); // e.g. for RNA sequences + } + widget_1D->canvas()->setTextBox(seq.toQString()); + } + else if (widget_1D->canvas()->isIonLadderVisible()) + { + if (!ph.getSequence().empty()) // generate sequence diagram for a peptide { - String seq = ph.getSequence().toString(); - if (seq.empty()) - { - seq = ph.getMetaValue("label"); // e.g. for RNA sequences - } - widget_1D->canvas()->setTextBox(seq.toQString()); + // @TODO: read ion list from the input file (meta value) + static vector top_ions = ListUtils::create("a,b,c"); + static vector bottom_ions = ListUtils::create("x,y,z"); + String diagram = generateSequenceDiagram_( + ph.getSequence(), + ph.getPeakAnnotations(), + top_ions, + bottom_ions); + widget_1D->canvas()->setTextBox(diagram.toQString()); } - else if (widget_1D->canvas()->isIonLadderVisible()) + else if (ph.metaValueExists("label")) // generate sequence diagram for RNA { - if (!ph.getSequence().empty()) // generate sequence diagram for a peptide + try { // @TODO: read ion list from the input file (meta value) - static vector top_ions = ListUtils::create("a,b,c"); - static vector bottom_ions = ListUtils::create("x,y,z"); - String diagram = generateSequenceDiagram_( - ph.getSequence(), - ph.getPeakAnnotations(), - top_ions, - bottom_ions); + NASequence na_seq = NASequence::fromString(ph.getMetaValue("label")); + static vector top_ions = ListUtils::create("a-B,a,b,c,d"); + static vector bottom_ions = ListUtils::create("w,x,y,z"); + String diagram = generateSequenceDiagram_(na_seq, ph.getPeakAnnotations(), + top_ions, bottom_ions); widget_1D->canvas()->setTextBox(diagram.toQString()); } - else if (ph.metaValueExists("label")) // generate sequence diagram for RNA + catch (Exception::ParseError&) // label doesn't contain have a valid seq. { - try - { - // @TODO: read ion list from the input file (meta value) - NASequence na_seq = NASequence::fromString(ph.getMetaValue("label")); - static vector top_ions = ListUtils::create("a-B,a,b,c,d"); - static vector bottom_ions = ListUtils::create("w,x,y,z"); - String diagram = generateSequenceDiagram_(na_seq, ph.getPeakAnnotations(), - top_ions, bottom_ions); - widget_1D->canvas()->setTextBox(diagram.toQString()); - } - catch (Exception::ParseError&) // label doesn't contain have a valid seq. - { - } } } } + break; } default: @@ -906,6 +935,9 @@ namespace OpenMS void TVIdentificationViewController::addTheoreticalSpectrumLayer_(const PeptideHit& ph) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Adding theoretical spectrum layer" << endl; + #endif PlotCanvas* current_canvas = tv_->getActive1DWidget()->canvas(); auto& current_layer = dynamic_cast(current_canvas->getCurrentLayer()); const SpectrumType& current_spectrum = current_layer.getCurrentSpectrum(); @@ -946,9 +978,8 @@ namespace OpenMS spec_id_view_->ignore_update = true; RAIICleanup cleanup([&]() { spec_id_view_->ignore_update = false; }); - PeakMap new_exp; - new_exp.addSpectrum(theo_spectrum); - ExperimentSharedPtrType new_exp_sptr(new PeakMap(new_exp)); + ExperimentSharedPtrType new_exp_sptr = boost::make_shared(); + new_exp_sptr->getMSExperiment().addSpectrum(theo_spectrum); LayerDataBase::ODExperimentSharedPtrType od_dummy(new OnDiscMSExperiment()); String layer_caption = aa_sequence.toString() + " (identification view)"; current_canvas->addPeakLayer(new_exp_sptr, od_dummy, layer_caption); @@ -1061,12 +1092,13 @@ namespace OpenMS void TVIdentificationViewController::removeGraphicalPeakAnnotations_(int spectrum_index) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Removing graphical peak annotations." << endl; + #endif + auto* widget_1D = tv_->getActive1DWidget(); auto& current_layer = widget_1D->canvas()->getCurrentLayer(); - #ifdef DEBUG_IDENTIFICATION_VIEW - cout << "Removing peak annotations." << endl; - #endif // remove all graphical peak annotations as these will be recreated from the stored peak annotations Annotations1DContainer& las = current_layer.getAnnotations(spectrum_index); auto new_end = remove_if(las.begin(), las.end(), @@ -1084,6 +1116,10 @@ namespace OpenMS void TVIdentificationViewController::deactivate1DSpectrum(int spectrum_index) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Deactivating 1D spectrum with index: " << spectrum_index << endl; + #endif + // Retrieve active 1D widget Plot1DWidget* widget_1D = tv_->getActive1DWidget(); @@ -1096,11 +1132,11 @@ namespace OpenMS // Return if no valid peak layer attached auto* current_layer_ptr = dynamic_cast(¤t_layer); - if (!current_layer_ptr || current_layer_ptr->getPeakData()->empty()) + if (!current_layer_ptr || current_layer_ptr->getPeakData()->getMSExperiment().empty()) { return; } - MSSpectrum& spectrum = (*current_layer_ptr->getPeakDataMuteable())[spectrum_index]; + MSSpectrum& spectrum = (*current_layer_ptr->getPeakDataMuteable()).getMSExperiment()[spectrum_index]; int ms_level = spectrum.getMSLevel(); if (ms_level == 2) { @@ -1121,6 +1157,10 @@ namespace OpenMS void TVIdentificationViewController::addPeakAnnotationsFromID_(const PeptideHit& hit) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Adding peak annotations from ID" << endl; + #endif + // get annotations and sequence const vector& annotations = hit.getPeakAnnotations(); @@ -1245,6 +1285,10 @@ namespace OpenMS void TVIdentificationViewController::removeTheoreticalSpectrumLayer_() { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Removing theoretical spectrum layer" << endl; + #endif + auto* spectrum_widget_1D = tv_->getActive1DWidget(); if (spectrum_widget_1D) { @@ -1270,6 +1314,10 @@ namespace OpenMS // override void TVIdentificationViewController::activateBehavior() { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Activating identification view" << endl; + #endif + Plot1DWidget* w = tv_->getActive1DWidget(); if (w == nullptr) { @@ -1283,16 +1331,24 @@ namespace OpenMS // find first MS2 spectrum with peptide identification and set current spectrum to it if (current_spectrum.getMSLevel() == 1) // no fragment spectrum { - for (Size i = 0; i < current_layer.getPeakData()->size(); ++i) + for (Size i = 0; i < current_layer.getPeakData()->getMSExperiment().size(); ++i) { - UInt ms_level = (*current_layer.getPeakData())[i].getMSLevel(); - const vector peptide_ids = (*current_layer.getPeakData())[i].getPeptideIdentifications(); - Size peptide_ids_count = peptide_ids.size(); + UInt ms_level = current_layer.getPeakData()->getMSExperiment()[i].getMSLevel(); + + if (ms_level != 2) continue; - if (ms_level != 2 || peptide_ids_count == 0) // skip non ms2 spectra and spectra with no identification + const vector& peptide_ids = current_layer.getPeakData()->getPeptideIdentifications(); + if (i >= peptide_ids.size()) + { + OPENMS_LOG_FATAL_ERROR << "Peptide identification index out of bounds!" << endl; + } + const PeptideIdentification& peptide_id = peptide_ids[i]; + + if (peptide_id.getHits().empty()) // skip spectra with no identification { continue; } + OPENMS_LOG_DEBUG << "During activation, found first MS2 spectrum with peptide identification: " << i << endl; current_layer.setCurrentIndex(i); break; } @@ -1302,6 +1358,10 @@ namespace OpenMS // override void TVIdentificationViewController::deactivateBehavior() { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Deactivating identification view" << endl; + #endif + Plot1DWidget* widget_1D = tv_->getActive1DWidget(); // return if no active 1D widget is present diff --git a/src/openms_gui/source/VISUAL/TVSpectraViewController.cpp b/src/openms_gui/source/VISUAL/TVSpectraViewController.cpp index 361535b0c2e..86e7d720e34 100644 --- a/src/openms_gui/source/VISUAL/TVSpectraViewController.cpp +++ b/src/openms_gui/source/VISUAL/TVSpectraViewController.cpp @@ -74,9 +74,9 @@ namespace OpenMS { // get caption (either chromatogram idx or peptide sequence, if available) String basename_suffix; - if (chrom_exp_sptr->metaValueExists("peptide_sequence")) + if (chrom_exp_sptr->getMSExperiment().metaValueExists("peptide_sequence")) { - basename_suffix = String(chrom_exp_sptr->getMetaValue("peptide_sequence")); + basename_suffix = String(chrom_exp_sptr->getMSExperiment().getMetaValue("peptide_sequence")); } ((basename_suffix += "[") += index) += "]"; diff --git a/src/pyOpenMS/pxds/AnnotatedMSRun.pxd b/src/pyOpenMS/pxds/AnnotatedMSRun.pxd new file mode 100644 index 00000000000..3c5bedc915f --- /dev/null +++ b/src/pyOpenMS/pxds/AnnotatedMSRun.pxd @@ -0,0 +1,52 @@ +from libcpp.vector cimport vector as libcpp_vector +from libcpp.pair cimport pair as libcpp_pair +from libcpp cimport bool +from Types cimport * +from MSExperiment cimport * +from PeptideIdentification cimport * +from ProteinIdentification cimport * +from MSSpectrum cimport * + +cdef extern from "" namespace "OpenMS": + + cdef cppclass AnnotatedMSRun: + # wrap-doc: + # Class for storing MS run data with peptide and protein identifications + # + # This class stores an MSExperiment (containing spectra) along with peptide and protein + # identifications. Each spectrum in the MSExperiment is associated with a single + # PeptideIdentification object. Object gets typically not manually created but generated + # by the IDMapper class. + # + # + # Usage: + # + # .. code-block:: python + # + # run = AnnotatedMSRun() + # exp = MSExperiment() + # MzMLFile().load(path_to_file, exp) + # run.setMSExperiment(exp) + # run.setPeptideIdentifications(my_peptide_ids) + + AnnotatedMSRun() nogil except + + AnnotatedMSRun(MSExperiment) nogil except + + AnnotatedMSRun(AnnotatedMSRun) nogil except + + + # Protein identification methods + libcpp_vector[ProteinIdentification]& getProteinIdentifications() nogil except + + const libcpp_vector[ProteinIdentification]& getProteinIdentifications() nogil except + # wrap-ignore + + # Peptide identification methods + libcpp_vector[PeptideIdentification]& getPeptideIdentifications() nogil except + + const libcpp_vector[PeptideIdentification]& getPeptideIdentifications() nogil except + # wrap-ignore + void setPeptideIdentifications(libcpp_vector[PeptideIdentification]& ids) nogil except + + + # MSExperiment methods + MSExperiment& getMSExperiment() nogil except + + const MSExperiment& getMSExperiment() nogil except + # wrap-ignore + void setMSExperiment(MSExperiment& experiment) nogil except + + + # Access methods + libcpp_pair[MSSpectrum&, PeptideIdentification&] operator[](size_t idx) nogil except + # wrap-ignore + diff --git a/src/pyOpenMS/pxds/ExperimentalSettings.pxd b/src/pyOpenMS/pxds/ExperimentalSettings.pxd index c7f9ce0911b..b012fd40f85 100644 --- a/src/pyOpenMS/pxds/ExperimentalSettings.pxd +++ b/src/pyOpenMS/pxds/ExperimentalSettings.pxd @@ -58,12 +58,6 @@ cdef extern from "" namespace "OpenMS": String getComment() except + nogil # wrap-doc:Returns the free-text comment void setComment(String comment) except + nogil # wrap-doc:Sets the free-text comment - - - libcpp_vector[ProteinIdentification] getProteinIdentifications() except + nogil # wrap-doc:Returns a reference to the protein ProteinIdentification vector - - void setProteinIdentifications(libcpp_vector[ProteinIdentification] protein_identifications) except + nogil # wrap-doc:Sets the protein ProteinIdentification vector - String getFractionIdentifier() except + nogil # wrap-doc:Returns fraction identifier diff --git a/src/pyOpenMS/pxds/SpectrumSettings.pxd b/src/pyOpenMS/pxds/SpectrumSettings.pxd index d0382f8382f..5aadb9bd2d3 100644 --- a/src/pyOpenMS/pxds/SpectrumSettings.pxd +++ b/src/pyOpenMS/pxds/SpectrumSettings.pxd @@ -3,7 +3,6 @@ from String cimport * from Peak1D cimport * from InstrumentSettings cimport * from SourceFile cimport * -from PeptideIdentification cimport * from Precursor cimport * from DataProcessing cimport * from Product cimport * @@ -42,9 +41,6 @@ cdef extern from "" namespace "OpenMS": libcpp_vector[Product] getProducts() except + nogil # wrap-doc:Returns a const reference to the products void setProducts(libcpp_vector[Product]) except + nogil # wrap-doc:Sets the products - libcpp_vector[PeptideIdentification] getPeptideIdentifications() except + nogil # wrap-doc:Returns a const reference to the PeptideIdentification vector - void setPeptideIdentifications(libcpp_vector[PeptideIdentification]) except + nogil # wrap-doc:Sets the PeptideIdentification vector - libcpp_vector[ shared_ptr[DataProcessing] ] getDataProcessing() except + nogil void setDataProcessing(libcpp_vector[ shared_ptr[DataProcessing] ]) except + nogil diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index 656625cea4c..4d54f1235a8 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -78,6 +78,7 @@ set(datastructures_executables_list set(metadata_executables_list AcquisitionInfo_test Acquisition_test + AnnotatedMSRun_test CVTermList_test CVTermListInterface_test CVTerm_test diff --git a/src/tests/class_tests/openms/source/AnnotatedMSRun_test.cpp b/src/tests/class_tests/openms/source/AnnotatedMSRun_test.cpp new file mode 100644 index 00000000000..737c62f8b97 --- /dev/null +++ b/src/tests/class_tests/openms/source/AnnotatedMSRun_test.cpp @@ -0,0 +1,343 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg$ +// $Authors: David Voigt $ +// -------------------------------------------------------------------------- + +#include +#include +#include +#include + +START_TEST(AnnotatedMSRun, "$Id$") + +using namespace OpenMS; + +// Default constructor +AnnotatedMSRun* ptr = nullptr; +AnnotatedMSRun* nullPointer = nullptr; + +START_SECTION((AnnotatedMSRun())) + ptr = new AnnotatedMSRun(); + TEST_NOT_EQUAL(ptr, nullPointer) +END_SECTION + +START_SECTION((~AnnotatedMSRun())) + delete ptr; +END_SECTION + +START_SECTION((explicit AnnotatedMSRun(MSExperiment&& experiment))) + MSExperiment exp; + MSSpectrum spec; + spec.setRT(42.0); + spec.setMSLevel(2); + exp.addSpectrum(spec); + + AnnotatedMSRun annotated_data(std::move(exp)); + TEST_EQUAL(annotated_data.getMSExperiment().size(), 1) + TEST_REAL_SIMILAR(annotated_data.getMSExperiment()[0].getRT(), 42.0) +END_SECTION + +START_SECTION((ProteinIdentification& getProteinIdentifications())) + AnnotatedMSRun annotated_data; + + auto& prot_id = annotated_data.getProteinIdentifications(); + prot_id.resize(1); + prot_id[0].setIdentifier("Test"); + TEST_EQUAL(annotated_data.getProteinIdentifications()[0].getIdentifier(), "Test") +END_SECTION + +START_SECTION((const ProteinIdentification& getProteinIdentifications() const)) + AnnotatedMSRun annotated_data; + auto& prot_id = annotated_data.getProteinIdentifications(); + prot_id.resize(1); + prot_id[0].setIdentifier("Test"); + + const AnnotatedMSRun& const_data = annotated_data; + TEST_EQUAL(const_data.getProteinIdentifications()[0].getIdentifier(), "Test") +END_SECTION + +START_SECTION((PeptideIdentification& getPeptideIdentification(size_t index))) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the first peptide identification + PeptideHit hit; + hit.setSequence(AASequence::fromString("PEPTIDE")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit); + + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDE") +END_SECTION + +START_SECTION((const PeptideIdentification& getPeptideIdentification(size_t index) const)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the first peptide identification + PeptideHit hit; + hit.setSequence(AASequence::fromString("PEPTIDE")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit); + + const AnnotatedMSRun& const_data = annotated_data; + TEST_EQUAL(const_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(const_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDE") +END_SECTION + + +START_SECTION((std::vector& getPeptideIdentifications())) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence("PEPTIDER")); + hit2.setSequence(AASequence("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + TEST_EQUAL(annotated_data.getPeptideIdentifications().size(), 2) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[1].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDER") + TEST_EQUAL(annotated_data.getPeptideIdentifications()[1].getHits()[0].getSequence().toString(), "PEPTIDAR") +END_SECTION + +START_SECTION((const std::vector& getPeptideIdentifications() const)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + const AnnotatedMSRun& const_data = annotated_data; + TEST_EQUAL(const_data.getPeptideIdentifications().size(), 2) + TEST_EQUAL(const_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(const_data.getPeptideIdentifications()[1].getHits().size(), 1) + TEST_EQUAL(const_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDER") + TEST_EQUAL(const_data.getPeptideIdentifications()[1].getHits()[0].getSequence().toString(), "PEPTIDAR") +END_SECTION + + +START_SECTION((void setPeptideIdentification(PeptideIdentification&& id, size_t index))) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Create a peptide identification + PeptideIdentification pep_id; + PeptideHit hit; + hit.setSequence(AASequence::fromString("PEPTIDE")); + pep_id.insertHit(hit); + + // Set the peptide identification + annotated_data.getPeptideIdentifications()[0] = pep_id; + + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDE") +END_SECTION + + +START_SECTION((void setPeptideIdentifications(std::vector&& ids))) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Create a vector of peptide identifications + std::vector pep_ids; + PeptideIdentification pep_id1, pep_id2; + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + pep_id1.insertHit(hit1); + pep_id2.insertHit(hit2); + pep_ids.push_back(pep_id1); + pep_ids.push_back(pep_id2); + + // Set all peptide identifications + annotated_data.setPeptideIdentifications(std::move(pep_ids)); + + TEST_EQUAL(annotated_data.getPeptideIdentifications().size(), 2) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[1].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDER") + TEST_EQUAL(annotated_data.getPeptideIdentifications()[1].getHits()[0].getSequence().toString(), "PEPTIDAR") +END_SECTION + + +START_SECTION((void clearAllPeptideIdentifications())) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + // Clear all peptide identifications + annotated_data.getPeptideIdentifications().clear(); + + TEST_EQUAL(annotated_data.getPeptideIdentifications().size(), 0) +END_SECTION + +START_SECTION((MSExperiment& getMSExperiment())) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec; + spec.setRT(42.0); + spec.setMSLevel(2); + exp.addSpectrum(spec); + + annotated_data.getMSExperiment() = std::move(exp); + TEST_EQUAL(annotated_data.getMSExperiment().size(), 1) + TEST_REAL_SIMILAR(annotated_data.getMSExperiment()[0].getRT(), 42.0) +END_SECTION + +START_SECTION((const MSExperiment& getMSExperiment() const)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec; + spec.setRT(42.0); + spec.setMSLevel(2); + exp.addSpectrum(spec); + + annotated_data.getMSExperiment() = std::move(exp); + + const AnnotatedMSRun& const_data = annotated_data; + TEST_EQUAL(const_data.getMSExperiment().size(), 1) + TEST_REAL_SIMILAR(const_data.getMSExperiment()[0].getRT(), 42.0) +END_SECTION + +START_SECTION((Iterator functionality)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + spec1.setRT(10.0); + spec2.setRT(20.0); + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + // Test iterator functionality + size_t count = 0; + for (auto [spectrum, peptide_id] : annotated_data) + { + if (count == 0) + { + TEST_REAL_SIMILAR(spectrum.getRT(), 10.0) + TEST_EQUAL(peptide_id.getHits().size(), 1) + TEST_EQUAL(peptide_id.getHits()[0].getSequence().toString(), "PEPTIDER") + } + else if (count == 1) + { + TEST_REAL_SIMILAR(spectrum.getRT(), 20.0) + TEST_EQUAL(peptide_id.getHits().size(), 1) + TEST_EQUAL(peptide_id.getHits()[0].getSequence().toString(), "PEPTIDAR") + } + count++; + } + TEST_EQUAL(count, 2) +END_SECTION + +START_SECTION((Operator[] functionality)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + spec1.setRT(10.0); + spec2.setRT(20.0); + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + // Test operator[] functionality + auto [spectrum, peptide_id] = annotated_data[0]; + TEST_REAL_SIMILAR(spectrum.getRT(), 10.0) + TEST_EQUAL(peptide_id.getHits().size(), 1) + TEST_EQUAL(peptide_id.getHits()[0].getSequence().toString(), "PEPTIDER") + + auto [spectrum2, peptide_id2] = annotated_data[1]; + TEST_REAL_SIMILAR(spectrum2.getRT(), 20.0) + TEST_EQUAL(peptide_id2.getHits().size(), 1) + TEST_EQUAL(peptide_id2.getHits()[0].getSequence().toString(), "PEPTIDAR") + + // Test const operator[] functionality + const AnnotatedMSRun& const_data = annotated_data; + auto [const_spectrum, const_peptide_id] = const_data[0]; + TEST_REAL_SIMILAR(const_spectrum.getRT(), 10.0) + TEST_EQUAL(const_peptide_id.getHits().size(), 1) + TEST_EQUAL(const_peptide_id.getHits()[0].getSequence().toString(), "PEPTIDER") +END_SECTION + +END_TEST \ No newline at end of file diff --git a/src/tests/class_tests/openms/source/ExperimentalSettings_test.cpp b/src/tests/class_tests/openms/source/ExperimentalSettings_test.cpp index 1f1beb6b945..28bed07629f 100644 --- a/src/tests/class_tests/openms/source/ExperimentalSettings_test.cpp +++ b/src/tests/class_tests/openms/source/ExperimentalSettings_test.cpp @@ -197,7 +197,7 @@ START_SECTION((ExperimentalSettings(const ExperimentalSettings& source))) tmp.getSample().setName("bla2"); tmp.getSourceFiles().resize(1); tmp.getContacts().resize(1); - tmp.getProteinIdentifications().push_back(id); + tmp.setMetaValue("label",String("label")); ExperimentalSettings tmp2(tmp); @@ -209,7 +209,7 @@ START_SECTION((ExperimentalSettings(const ExperimentalSettings& source))) TEST_EQUAL(tmp2.getSample().getName(),"bla2"); TEST_EQUAL(tmp2.getSourceFiles().size(),1); TEST_EQUAL(tmp2.getContacts().size(),1); - TEST_EQUAL(id == tmp2.getProteinIdentifications()[0], true); + TEST_EQUAL((String)(tmp2.getMetaValue("label")), "label"); END_SECTION @@ -231,7 +231,7 @@ START_SECTION((ExperimentalSettings& operator= (const ExperimentalSettings& sour tmp.getSample().setName("bla2"); tmp.getSourceFiles().resize(1); tmp.getContacts().resize(1); - tmp.getProteinIdentifications().push_back(id); + tmp.setMetaValue("label",String("label")); ExperimentalSettings tmp2; @@ -244,8 +244,6 @@ START_SECTION((ExperimentalSettings& operator= (const ExperimentalSettings& sour TEST_EQUAL(tmp2.getSample().getName(),"bla2"); TEST_EQUAL(tmp2.getSourceFiles().size(),1); TEST_EQUAL(tmp2.getContacts().size(),1); - TEST_EQUAL(tmp2.getProteinIdentifications().size(), 1); - TEST_EQUAL(id == tmp2.getProteinIdentifications()[0], true); TEST_EQUAL((String)(tmp2.getMetaValue("label")), "label"); tmp2 = ExperimentalSettings(); @@ -257,7 +255,6 @@ START_SECTION((ExperimentalSettings& operator= (const ExperimentalSettings& sour TEST_EQUAL(tmp2.getSample().getName(),""); TEST_EQUAL(tmp2.getSourceFiles().size(),0); TEST_EQUAL(tmp2.getContacts().size(),0); - TEST_EQUAL(tmp2.getProteinIdentifications().size(), 0); TEST_EQUAL(tmp2.getMetaValue("label").isEmpty(), true); END_SECTION @@ -292,10 +289,6 @@ START_SECTION((bool operator== (const ExperimentalSettings& rhs) const)) edit.getContacts().resize(1); TEST_EQUAL(edit==empty,false); - edit = empty; - edit.getProteinIdentifications().push_back(id); - TEST_EQUAL(edit==empty, false); - edit = empty; edit.setComment("bla"); TEST_EQUAL(edit==empty, false); @@ -352,63 +345,11 @@ START_SECTION((bool operator!= (const ExperimentalSettings& rhs) const)) edit.getContacts().resize(1); TEST_EQUAL(edit!=empty,true); - edit = empty; - edit.getProteinIdentifications().push_back(id); - TEST_FALSE(edit == empty); - edit = empty; edit.setMetaValue("label",String("label")); TEST_EQUAL(edit!=empty,true); END_SECTION -START_SECTION((const std::vector& getProteinIdentifications() const)) - ExperimentalSettings settings; - ProteinIdentification id; - ProteinHit protein_hit; - float protein_significance_threshold = 63.2f; - - id.setDateTime(DateTime::now()); - id.setSignificanceThreshold(protein_significance_threshold); - id.insertHit(protein_hit); - - settings.getProteinIdentifications().push_back(id); - const ProteinIdentification& test_id = settings.getProteinIdentifications()[0]; - TEST_TRUE(id == test_id) -END_SECTION - -START_SECTION((std::vector& getProteinIdentifications())) - ExperimentalSettings settings; - ProteinIdentification id; - ProteinHit protein_hit; - float protein_significance_threshold = 63.2f; - - id.setDateTime(DateTime::now()); - id.setSignificanceThreshold(protein_significance_threshold); - id.insertHit(protein_hit); - - settings.getProteinIdentifications().push_back(id); - ProteinIdentification& test_id = settings.getProteinIdentifications()[0]; - TEST_TRUE(id == test_id) -END_SECTION - -START_SECTION((void setProteinIdentifications(const std::vector& protein_identifications))) - ExperimentalSettings settings; - ProteinIdentification id; - ProteinHit protein_hit; - float protein_significance_threshold = 63.2f; - vector ids; - - id.setDateTime(DateTime::now()); - id.setSignificanceThreshold(protein_significance_threshold); - id.insertHit(protein_hit); - ids.push_back(id); - id.setSignificanceThreshold(21.f); - ids.push_back(id); - settings.setProteinIdentifications(ids); - TEST_EQUAL(ids == settings.getProteinIdentifications(), true) -END_SECTION - - ///////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// END_TEST diff --git a/src/tests/class_tests/openms/source/IDFilter_test.cpp b/src/tests/class_tests/openms/source/IDFilter_test.cpp index 68bb3bbf649..dcea12a01dc 100644 --- a/src/tests/class_tests/openms/source/IDFilter_test.cpp +++ b/src/tests/class_tests/openms/source/IDFilter_test.cpp @@ -875,76 +875,6 @@ START_SECTION((static void removeDuplicatePeptideHits(vector static void filterHitsByScore(MSExperiment& experiment, double peptide_threshold_score, double protein_threshold_score))) -{ - PeakMap experiment; - vector ids(1, global_peptides[0]); - - ids[0].sort(); - - for (Size i = 0; i < 5; ++i) - { - experiment.addSpectrum(MSSpectrum()); - } - experiment[3].setMSLevel(2); - experiment[3].setPeptideIdentifications(ids); - - IDFilter::filterHitsByScore(experiment, 31.8621, 0); - PeptideIdentification& identification = experiment[3].getPeptideIdentifications()[0]; - TEST_EQUAL(identification.getScoreType(), "Mascot"); - - vector& peptide_hits = identification.getHits(); - TEST_EQUAL(peptide_hits.size(), 5); - TEST_EQUAL(peptide_hits[0].getSequence().toString(), - "FINFGVNVEVLSRFQTK"); - TEST_REAL_SIMILAR(peptide_hits[0].getScore(), 40); - TEST_EQUAL(peptide_hits[1].getSequence().toString(), - "MSLLSNMISIVKVGYNAR"); - TEST_REAL_SIMILAR(peptide_hits[1].getScore(), 40); - TEST_EQUAL(peptide_hits[2].getSequence().toString(), - "THPYGHAIVAGIERYPSK"); - TEST_REAL_SIMILAR(peptide_hits[2].getScore(), 39); - TEST_EQUAL(peptide_hits[3].getSequence().toString(), - "LHASGITVTEIPVTATNFK"); - TEST_REAL_SIMILAR(peptide_hits[3].getScore(), 34.85); - TEST_EQUAL(peptide_hits[4].getSequence().toString(), - "MRSLGYVAVISAVATDTDK"); - TEST_REAL_SIMILAR(peptide_hits[4].getScore(), 33.85); -} -END_SECTION - -START_SECTION((template static void keepNBestHits(MSExperiment& experiment, Size n))) -{ - PeakMap experiment; - vector ids(1, global_peptides[0]); - - ids[0].sort(); - - for (Size i = 0; i < 5; ++i) - { - experiment.addSpectrum(MSSpectrum()); - } - experiment[3].setMSLevel(2); - experiment[3].setPeptideIdentifications(ids); - - IDFilter::keepNBestHits(experiment, 3); - PeptideIdentification& identification = experiment[3].getPeptideIdentifications()[0]; - TEST_EQUAL(identification.getScoreType(), "Mascot"); - - vector& peptide_hits = identification.getHits(); - TEST_EQUAL(peptide_hits.size(), 3); - TEST_EQUAL(peptide_hits[0].getSequence().toString(), - "FINFGVNVEVLSRFQTK"); - TEST_REAL_SIMILAR(peptide_hits[0].getScore(), 40); - TEST_EQUAL(peptide_hits[1].getSequence().toString(), - "MSLLSNMISIVKVGYNAR"); - TEST_REAL_SIMILAR(peptide_hits[1].getScore(), 40); - TEST_EQUAL(peptide_hits[2].getSequence().toString(), - "THPYGHAIVAGIERYPSK"); - TEST_REAL_SIMILAR(peptide_hits[2].getScore(), 39); -} -END_SECTION - START_SECTION((static void keepNBestSpectra(std::vector& peptides, Size n))) { vector proteins; @@ -972,40 +902,6 @@ START_SECTION((static void keepNBestSpectra(std::vector& } END_SECTION -START_SECTION((template static void keepHitsMatchingProteins(MSExperiment& experiment, const vector& proteins))) -{ - PeakMap experiment; - vector proteins; - vector peptides = global_peptides; - - proteins.push_back(FASTAFile::FASTAEntry("Q824A5", "first desription", - "LHASGITVTEIPVTATNFK")); - proteins.push_back(FASTAFile::FASTAEntry("Q872T5", "second description", - "THPYGHAIVAGIERYPSK")); - - for (Size i = 0; i < 5; ++i) - { - experiment.addSpectrum(MSSpectrum()); - } - experiment[3].setMSLevel(2); - experiment[3].setPeptideIdentifications(peptides); - - IDFilter::keepHitsMatchingProteins(experiment, proteins); - TEST_EQUAL(experiment[3].getPeptideIdentifications()[0].getScoreType(), - "Mascot"); - - vector& peptide_hits = - experiment[3].getPeptideIdentifications()[0].getHits(); - TEST_EQUAL(peptide_hits.size(), 2); - TEST_EQUAL(peptide_hits[0].getSequence().toString(), - "LHASGITVTEIPVTATNFK"); - TEST_REAL_SIMILAR(peptide_hits[0].getScore(), 34.85); - TEST_EQUAL(peptide_hits[1].getSequence().toString(), - "MRSLGYVAVISAVATDTDK"); - TEST_REAL_SIMILAR(peptide_hits[1].getScore(), 33.85); -} -END_SECTION - ///////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// diff --git a/src/tests/class_tests/openms/source/IDMapper_test.cpp b/src/tests/class_tests/openms/source/IDMapper_test.cpp index 2c87c1af49c..baf43441eb6 100644 --- a/src/tests/class_tests/openms/source/IDMapper_test.cpp +++ b/src/tests/class_tests/openms/source/IDMapper_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include /////////////////////////// @@ -80,13 +81,14 @@ START_SECTION((IDMapper& operator = (const IDMapper& rhs))) TEST_EQUAL(m2.getParameters(), p); END_SECTION -START_SECTION((template void annotate(MSExperiment& map, FeatureMap fmap, const bool clear_ids = false, const bool mapMS1 = false))) +/* +START_SECTION((void annotate(AnnotatedMSRun& map, FeatureMap fmap, const bool clear_ids = false, const bool mapMS1 = false))) // create id FeatureMap fm; Feature f; f.setMZ(900.0); f.setRT(9.0); - std::vector< PeptideIdentification > pids; + std::vector pids; PeptideIdentification pid; pid.setIdentifier("myID"); pid.setHits(std::vector(4)); @@ -96,11 +98,12 @@ START_SECTION((template void annotate(MSExperiment pids.push_back(pid); // with MZ&RT from PID f.setPeptideIdentifications(pids); fm.push_back(f); - std::vector< ProteinIdentification > prids(2); + std::vector prids(2); fm.setProteinIdentifications(prids); // create experiment - PeakMap experiment; + AnnotatedMSRun annotated_experiment; + MSExperiment& experiment = annotated_experiment.getMSExperiment(); MSSpectrum spectrum; Precursor precursor; precursor.setMZ(0); @@ -125,36 +128,39 @@ START_SECTION((template void annotate(MSExperiment p.setValue("ignore_charge", "true"); mapper.setParameters(p); - - mapper.annotate(experiment, fm, true, true); + mapper.annotate(annotated_experiment, fm, true, true); //test - TEST_EQUAL(experiment.getProteinIdentifications().size(), 2) + TEST_EQUAL(annotated_experiment.getProteinIdentifications().size(), 2) //scan 1 - TEST_EQUAL(experiment[0].getPeptideIdentifications().size(), 2) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[0].getHits().size(), 2) //scan 2 - TEST_EQUAL(experiment[1].getPeptideIdentifications().size(), 2) - ABORT_IF(experiment[1].getPeptideIdentifications().size() != 2) - TEST_EQUAL(experiment[1].getPeptideIdentifications()[0].getHits().size(), 4) - TEST_EQUAL(experiment[1].getPeptideIdentifications()[0].getMZ(), 900.0) - TEST_EQUAL(experiment[1].getPeptideIdentifications()[1].getHits().size(), 4) - TEST_EQUAL(experiment[1].getPeptideIdentifications()[1].getMZ(), 800.0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[1].size(), 2) + ABORT_IF(annotated_experiment.getPeptideIdentifications(1).size() != 2) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1)[0].getHits().size(), 4) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1)[0].getMZ(), 900.0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1)[1].getHits().size(), 4) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1)[1].getMZ(), 800.0) //scan 3 - TEST_EQUAL(experiment[2].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(2).size(), 0) - mapper.annotate(experiment, fm, true, false); // no MS1 mapping. MZ threshold never fulfilled + std::cout << annotated_experiment.getProteinIdentifications().size() << std::endl; + std::cout << fm.getProteinIdentifications().size() << std::endl; + mapper.annotate(annotated_experiment, fm, true, false); // no MS1 mapping. MZ threshold never fulfilled + std::cout << annotated_experiment.getProteinIdentifications().size() << std::endl; //test - TEST_EQUAL(experiment.getProteinIdentifications().size(), 2) + TEST_EQUAL(annotated_experiment.getProteinIdentifications().size(), 2) //scan 1 - TEST_EQUAL(experiment[0].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(0).size(), 0) //scan 2 - TEST_EQUAL(experiment[1].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1).size(), 0) //scan 3 - TEST_EQUAL(experiment[2].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(2).size(), 0) END_SECTION +*/ -START_SECTION((template void annotate(MSExperiment& map, const std::vector& peptide_ids, const std::vector& protein_ids, const bool clear_ids = false, const bool mapMS1 = false))) +START_SECTION((void annotate(AnnotatedMSRun& map, const std::vector& peptide_ids, const std::vector& protein_ids, const bool clear_ids = false, const bool mapMS1 = false))) // load id vector identifications; vector protein_identifications; @@ -182,7 +188,8 @@ START_SECTION((template void annotate(MSExperiment // TEST RT MAPPING // create experiment - PeakMap experiment; + AnnotatedMSRun annotated_experiment; + MSExperiment & experiment = annotated_experiment.getMSExperiment(); MSSpectrum spectrum; Precursor precursor; precursor.setMZ(0); @@ -206,31 +213,30 @@ START_SECTION((template void annotate(MSExperiment p.setValue("mz_measure","Da"); p.setValue("ignore_charge", "true"); mapper.setParameters(p); - - mapper.annotate(experiment, identifications, protein_identifications); + + mapper.annotate(annotated_experiment, identifications, protein_identifications); //test - TEST_EQUAL(experiment.getProteinIdentifications().size(), 1) - TEST_EQUAL(experiment.getProteinIdentifications()[0].getHits().size(),2) - TEST_EQUAL(experiment.getProteinIdentifications()[0].getHits()[0].getAccession(),"ABCDE") - TEST_EQUAL(experiment.getProteinIdentifications()[0].getHits()[1].getAccession(),"FGHIJ") + TEST_EQUAL(annotated_experiment.getProteinIdentifications().size(), 1) + TEST_EQUAL(annotated_experiment.getProteinIdentifications()[0].getHits().size(),2) + TEST_EQUAL(annotated_experiment.getProteinIdentifications()[0].getHits()[0].getAccession(),"ABCDE") + TEST_EQUAL(annotated_experiment.getProteinIdentifications()[0].getHits()[1].getAccession(),"FGHIJ") //scan 1 - TEST_EQUAL(experiment[0].getPeptideIdentifications().size(), 1) - TEST_EQUAL(experiment[0].getPeptideIdentifications()[0].getHits().size(), 2) - TEST_EQUAL(experiment[0].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK")) - TEST_EQUAL(experiment[0].getPeptideIdentifications()[0].getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK")) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[0].getHits().size(), 2) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK")) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[0].getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK")) //scan 2 - TEST_EQUAL(experiment[1].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[1].getHits().size(), 0) //scan 3 - TEST_EQUAL(experiment[2].getPeptideIdentifications().size(), 1) - TEST_EQUAL(experiment[2].getPeptideIdentifications()[0].getHits().size(), 1) - TEST_EQUAL(experiment[2].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("HSKLSAK")) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[2].getHits().size(), 1) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[2].getHits()[0].getSequence(), AASequence::fromString("HSKLSAK")) //----------------------------------------------------------------------------------- // TEST NATIVE_ID MAPPING // create experiment - PeakMap experiment2; + AnnotatedMSRun annotated_experiment2; + MSExperiment& experiment2 = annotated_experiment2.getMSExperiment(); MSSpectrum spectrum2; Precursor precursor2; precursor2.setMZ(0); @@ -257,24 +263,22 @@ START_SECTION((template void annotate(MSExperiment p2.setValue("ignore_charge", "true"); mapper2.setParameters(p2); - mapper2.annotate(experiment2, identifications2, protein_identifications2); + mapper2.annotate(annotated_experiment2, identifications2, protein_identifications2); //test - TEST_EQUAL(experiment2.getProteinIdentifications().size(), 1) - TEST_EQUAL(experiment2.getProteinIdentifications()[0].getHits().size(),2) - TEST_EQUAL(experiment2.getProteinIdentifications()[0].getHits()[0].getAccession(),"ABCDE") - TEST_EQUAL(experiment2.getProteinIdentifications()[0].getHits()[1].getAccession(),"FGHIJ") + TEST_EQUAL(annotated_experiment2.getProteinIdentifications().size(), 1) + TEST_EQUAL(annotated_experiment2.getProteinIdentifications()[0].getHits().size(),2) + TEST_EQUAL(annotated_experiment2.getProteinIdentifications()[0].getHits()[0].getAccession(),"ABCDE") + TEST_EQUAL(annotated_experiment2.getProteinIdentifications()[0].getHits()[1].getAccession(),"FGHIJ") //scan 1 - TEST_EQUAL(experiment2[0].getPeptideIdentifications().size(), 1) - TEST_EQUAL(experiment2[0].getPeptideIdentifications()[0].getHits().size(), 2) - TEST_EQUAL(experiment2[0].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK")) - TEST_EQUAL(experiment2[0].getPeptideIdentifications()[0].getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK")) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[0].getHits().size(), 2) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK")) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[0].getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK")) //scan 2 - TEST_EQUAL(experiment2[1].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[1].getHits().size(), 0) //scan 3 - TEST_EQUAL(experiment2[2].getPeptideIdentifications().size(), 1) - TEST_EQUAL(experiment2[2].getPeptideIdentifications()[0].getHits().size(), 1) - TEST_EQUAL(experiment2[2].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("HSKLSAK")) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[2].getHits().size(), 1) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[2].getHits()[0].getSequence(), AASequence::fromString("HSKLSAK")) END_SECTION diff --git a/src/tests/class_tests/openms/source/MSPFile_test.cpp b/src/tests/class_tests/openms/source/MSPFile_test.cpp index 8a700ae29bb..ab078636685 100644 --- a/src/tests/class_tests/openms/source/MSPFile_test.cpp +++ b/src/tests/class_tests/openms/source/MSPFile_test.cpp @@ -117,21 +117,16 @@ START_SECTION(void load(const String &filename, std::vector< PeptideIdentificati END_SECTION -START_SECTION(void store(const String& filename, const PeakMap& exp) const) +START_SECTION(void store(const String& filename, const AnnotatedMSRun& annot_exp) const) MSPFile msp_file; - vector ids; - PeakMap exp; - msp_file.load(OPENMS_GET_TEST_DATA_PATH("MSPFile_test.msp"), ids, exp); - for (Size i = 0; i != ids.size(); ++i) - { - exp[i].getPeptideIdentifications().push_back(ids[i]); - } + AnnotatedMSRun annot_exp; + msp_file.load(OPENMS_GET_TEST_DATA_PATH("MSPFile_test.msp"), annot_exp); String filename; - NEW_TMP_FILE(filename) - msp_file.store(filename, exp); + NEW_TMP_FILE(filename); + msp_file.store(filename, annot_exp); - exp.clear(true); - ids.clear(); + PeakMap exp; + vector ids; msp_file.load(filename, ids, exp); TEST_EQUAL(ids.size(), 7) TEST_EQUAL(exp.size(), 7) diff --git a/src/tests/class_tests/openms/source/SpectrumSettings_test.cpp b/src/tests/class_tests/openms/source/SpectrumSettings_test.cpp index c6927e243c2..4322c5fbe75 100644 --- a/src/tests/class_tests/openms/source/SpectrumSettings_test.cpp +++ b/src/tests/class_tests/openms/source/SpectrumSettings_test.cpp @@ -177,33 +177,6 @@ START_SECTION((void setComment(const String& comment))) TEST_EQUAL(tmp.getComment(), "bla"); END_SECTION -START_SECTION((const std::vector& getPeptideIdentifications() const)) - SpectrumSettings tmp; - vector vec(tmp.getPeptideIdentifications()); - TEST_EQUAL(vec.size(),0); -END_SECTION - -START_SECTION((void setPeptideIdentifications(const std::vector& identifications))) - SpectrumSettings tmp; - vector vec; - - tmp.setPeptideIdentifications(vec); - TEST_EQUAL(tmp.getPeptideIdentifications().size(),0); - - PeptideIdentification dbs; - vec.push_back(dbs); - tmp.setPeptideIdentifications(vec); - TEST_EQUAL(tmp.getPeptideIdentifications().size(),1); -END_SECTION - -START_SECTION((std::vector& getPeptideIdentifications())) - SpectrumSettings tmp; - vector vec; - - tmp.getPeptideIdentifications().resize(1); - TEST_EQUAL(tmp.getPeptideIdentifications().size(),1); -END_SECTION - START_SECTION((SpectrumSettings& operator= (const SpectrumSettings& source))) SpectrumSettings tmp; tmp.setMetaValue("bla","bluff"); @@ -211,7 +184,6 @@ START_SECTION((SpectrumSettings& operator= (const SpectrumSettings& source))) tmp.getInstrumentSettings().getScanWindows().resize(1); tmp.getPrecursors().resize(1); tmp.getProducts().resize(1); - tmp.getPeptideIdentifications().resize(1); tmp.setType(SpectrumSettings::CENTROID); tmp.setComment("bla"); tmp.setNativeID("nid"); @@ -220,7 +192,6 @@ START_SECTION((SpectrumSettings& operator= (const SpectrumSettings& source))) SpectrumSettings tmp2(tmp); TEST_EQUAL(tmp2.getComment(), "bla"); TEST_EQUAL(tmp2.getType(), SpectrumSettings::CENTROID); - TEST_EQUAL(tmp2.getPeptideIdentifications().size(), 1); TEST_EQUAL(tmp2.getPrecursors().size(),1); TEST_EQUAL(tmp2.getProducts().size(),1); TEST_EQUAL(tmp2.getInstrumentSettings()==InstrumentSettings(), false); @@ -239,7 +210,6 @@ START_SECTION((SpectrumSettings(const SpectrumSettings& source))) tmp.getProducts().resize(1); tmp.setType(SpectrumSettings::CENTROID); tmp.setComment("bla"); - tmp.getPeptideIdentifications().resize(1); tmp.setNativeID("nid"); tmp.getDataProcessing().resize(1); tmp.setMetaValue("bla","bluff"); @@ -253,7 +223,6 @@ START_SECTION((SpectrumSettings(const SpectrumSettings& source))) TEST_EQUAL(tmp2.getInstrumentSettings()==InstrumentSettings(), false); TEST_EQUAL(tmp2.getAcquisitionInfo().empty(), true); TEST_EQUAL(tmp2.getAcquisitionInfo()==AcquisitionInfo(), false); - TEST_EQUAL(tmp2.getPeptideIdentifications().size(), 1); TEST_STRING_EQUAL(tmp2.getNativeID(),"nid"); TEST_EQUAL(tmp2.getDataProcessing().size(),1); TEST_STRING_EQUAL(tmp2.getMetaValue("bla"),"bluff"); @@ -266,7 +235,6 @@ START_SECTION((SpectrumSettings(const SpectrumSettings& source))) TEST_EQUAL(tmp2.getProducts().size(),0); TEST_EQUAL(tmp2.getInstrumentSettings()==InstrumentSettings(), true); TEST_EQUAL(tmp2.getAcquisitionInfo().empty(), true); - TEST_EQUAL(tmp2.getPeptideIdentifications().size(), 0); TEST_STRING_EQUAL(tmp2.getNativeID(),""); TEST_EQUAL(tmp2.getDataProcessing().size(),0); TEST_EQUAL(tmp2.metaValueExists("bla"),false); @@ -309,10 +277,6 @@ START_SECTION((bool operator== (const SpectrumSettings& rhs) const)) edit.getProducts().resize(1); TEST_EQUAL(edit==empty, false); - edit = empty; - edit.getPeptideIdentifications().resize(1); - TEST_EQUAL(edit==empty, false); - edit = empty; DataProcessingPtr dp = boost::shared_ptr(new DataProcessing); edit.getDataProcessing().push_back(dp); @@ -360,10 +324,6 @@ START_SECTION((bool operator!= (const SpectrumSettings& rhs) const)) edit.getProducts().resize(1); TEST_FALSE(edit == empty); - edit = empty; - edit.getPeptideIdentifications().resize(1); - TEST_FALSE(edit == empty); - edit = empty; DataProcessingPtr dp = boost::shared_ptr(new DataProcessing); edit.getDataProcessing().push_back(dp); @@ -411,15 +371,6 @@ START_SECTION((void unify(const SpectrumSettings &rhs))) appended_product.setMZ(2.0); appended.getProducts().push_back(appended_product); - // Identifications - PeptideIdentification org_ident; - org_ident.setIdentifier("org_ident"); - org.getPeptideIdentifications().push_back(org_ident); - - PeptideIdentification appended_ident; - appended_ident.setIdentifier("appended_ident"); - appended.getPeptideIdentifications().push_back(appended_ident); - // DataProcessings DataProcessingPtr org_processing = boost::shared_ptr(new DataProcessing); Software org_software; @@ -459,16 +410,6 @@ START_SECTION((void unify(const SpectrumSettings &rhs))) TEST_EQUAL(org.getProducts()[0].getMZ(), 1.0) TEST_EQUAL(org.getProducts()[1].getMZ(), 2.0) - // Identifications - TEST_EQUAL(org.getPeptideIdentifications().size(), 2) - ABORT_IF(org.getPeptideIdentifications().size()!=2) - - TEST_EQUAL(org.getPeptideIdentifications()[0].getIdentifier(), "org_ident") - TEST_EQUAL(org.getPeptideIdentifications()[1].getIdentifier(), "appended_ident") - - // Identifications - TEST_EQUAL(org.getDataProcessing().size(), 2) - ABORT_IF(org.getDataProcessing().size()!=2) TEST_EQUAL(org.getDataProcessing()[0]->getSoftware().getName(), "org_software") TEST_EQUAL(org.getDataProcessing()[1]->getSoftware().getName(), "appended_software") diff --git a/src/tests/topp/CMakeLists.txt b/src/tests/topp/CMakeLists.txt index 5b43d64b811..ad5bd0bd641 100644 --- a/src/tests/topp/CMakeLists.txt +++ b/src/tests/topp/CMakeLists.txt @@ -820,12 +820,6 @@ add_test("TOPP_IDRTCalibration_2" ${TOPP_BIN_PATH}/IDRTCalibration -test -in ${D add_test("TOPP_IDRTCalibration_2_out1" ${DIFF} -in1 IDRTCalibration_2_output.tmp.idXML -in2 ${DATA_DIR_TOPP}/IDRTCalibration_2_output.idXML ) set_tests_properties("TOPP_IDRTCalibration_2_out1" PROPERTIES DEPENDS "TOPP_IDRTCalibration_2") - -#------------------------------------------------------------------------------ -# IDMassAccuracy tests -add_test("TOPP_IDMassAccuracy_1" ${TOPP_BIN_PATH}/IDMassAccuracy -test -in ${DATA_DIR_TOPP}/THIRDPARTY/spectra.mzML -id_in ${DATA_DIR_TOPP}/THIRDPARTY/MSGFPlusAdapter_1_out.idXML -number_of_bins 10 -out_fragment IDMassAccuracy_1_out_fragment.tsv -out_fragment_fit IDMassAccuracy_1_out_fragment_fit.tsv -out_precursor IDMassAccuracy_1_out_precursor.tsv -out_precursor_fit IDMassAccuracy_1_out_precursor_fit.tsv) -# Currently just testing if the tool runs - #------------------------------------------------------------------------------ # IsobaricAnalyzer tests add_test("TOPP_IsobaricAnalyzer_1" ${TOPP_BIN_PATH}/IsobaricAnalyzer -test -in ${DATA_DIR_TOPP}/IsobaricAnalyzer_input_1.mzML -ini ${DATA_DIR_TOPP}/IsobaricAnalyzer.ini -out IsobaricAnalyzer_output_1.tmp.consensusXML) @@ -1281,11 +1275,6 @@ add_test("TOPP_MSstatsConverter_3" ${TOPP_BIN_PATH}/MSstatsConverter -test -in $ add_test("TOPP_MSstatsConverter_3_out1" ${DIFF} -in1 MSstatsConverter_3_out.tmp.csv -in2 ${DATA_DIR_TOPP}/MSstatsConverter_3_out.csv ) set_tests_properties("TOPP_MSstatsConverter_3_out1" PROPERTIES DEPENDS "TOPP_MSstatsConverter_3") -### SpecLibSearcher tests -add_test("TOPP_SpecLibSearcher_1" ${TOPP_BIN_PATH}/SpecLibSearcher -test -ini ${DATA_DIR_TOPP}/SpecLibSearcher_1_parameters.ini -in ${DATA_DIR_TOPP}/SpecLibSearcher_1.mzML -lib ${DATA_DIR_TOPP}/SpecLibSearcher_1.MSP -out SpecLibSearcher_1.tmp.idXML) -add_test("TOPP_SpecLibSearcher_1_out1" ${DIFF} -in1 SpecLibSearcher_1.tmp.idXML -in2 ${DATA_DIR_TOPP}/SpecLibSearcher_1.idXML -whitelist "?xml-stylesheet" "IdentificationRun date" "db=") -set_tests_properties("TOPP_SpecLibSearcher_1_out1" PROPERTIES DEPENDS "TOPP_SpecLibSearcher_1") - if(NOT DISABLE_OPENSWATH) #------------------------------------------------------------------------------ # MRM / SWATH tests (from OpenSWATH) diff --git a/src/tests/topp/SpecLibSearcher_1.MSP b/src/tests/topp/SpecLibSearcher_1.MSP deleted file mode 100644 index d582e1fd7aa..00000000000 --- a/src/tests/topp/SpecLibSearcher_1.MSP +++ /dev/null @@ -1,15 +0,0 @@ -Name: AADDKEACFAVEGPK/2 -MW: 1608.745 -Comment: Spec=Consensus Pep=N-Semitryp_irreg/miss_good Fullname=C.AADDKEACFAVEGPK.L/2 Mods=1/7,C,Carbamidomethyl Parent=804.373 Inst=it Mz_diff=0.357 Mz_exact=804.3727 Mz_av=804.885 Protein="sp|P02769|ALBU_BOVIN Serum albumin precursor (Allergen Bos d 6) (BSA) - Bos taurus (Bovine)." Pseq=527 Organism="Protein" Se=1^I43:ex=0.0167/0.01974,dc=-0.756/0.4551,do=19.77/1.497,bs=0.0006,b2=0.0007,bd=-0.255 Sample=1/bsa_cam_different_voltages,43,1 Nreps=43/43 Missing=0.0642/0.0420 Parent_med=804.69/0.08 Max2med_orig=215.8/114.0 Dotfull=0.903/0.029 Dot_cons=0.948/0.034 Unassign_all=0.083 Unassigned=0.000 Dotbest=0.96 Flags=0,0,0 Naa=15 DUScorr=10/3.8/2.9 Dottheory=0.95 Pfin=1.3e+004 Probcorr=0.0067 Tfratio=2e+005 Pfract=0 -Num peaks: 10 -240.2 2 "b3-18/0.10 20/36 0.4" -359.2 2 "? 39/43 0.7" -430.3 5 "y4/0.07 43/43 1.8" -560.4 2 "?i 27/42 0.6" -609.8 3 "y11-17^2/-0.01,y11-18^2/0.49 41/43 1.2" -713.5 4 "? 23/42 0.7" -861.3 5 "b8/-0.03,y8-46/-0.13 43/43 1.5" -978.4 5 "y9/-0.07 43/43 4.9" -1364.4 2 "b13/-0.17 43/43 1.0" -1480.6 3 "?i 19/36 0.5" - diff --git a/src/tests/topp/SpecLibSearcher_1.MzData b/src/tests/topp/SpecLibSearcher_1.MzData deleted file mode 100644 index 243c7e952ed..00000000000 --- a/src/tests/topp/SpecLibSearcher_1.MzData +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - MzNwQ5qZs0NmJtdDmhkMRDNzGEQAYDJEM1NXRJqZdETNjKpEMxO5RA== - - - AAAAQAAAAEAAAKBAAAAAQAAAQEAAAIBAAACgQAAAoEAAAABAAABAQA== - - - - diff --git a/src/tests/topp/SpecLibSearcher_1.idXML b/src/tests/topp/SpecLibSearcher_1.idXML deleted file mode 100644 index ecea41a65df..00000000000 --- a/src/tests/topp/SpecLibSearcher_1.idXML +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - - - - - - - - - diff --git a/src/tests/topp/SpecLibSearcher_1.mzML b/src/tests/topp/SpecLibSearcher_1.mzML deleted file mode 100644 index 0120911dbd9..00000000000 --- a/src/tests/topp/SpecLibSearcher_1.mzML +++ /dev/null @@ -1,147 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - AAAAYGYGbkAAAABAM3N2QAAAAMDM5HpAAAAAQDODgUAAAABgZg6DQAAAAAAATIZAAAAAYGbqikAAAABAM5OOQAAAAKCZUZVAAAAAYGYil0A= - - - - - - AAAAQAAAAEAAAKBAAAAAQAAAQEAAAIBAAACgQAAAoEAAAABAAABAQA== - - - - - - - - - 6840 - - -8886 -0 - diff --git a/src/tests/topp/SpecLibSearcher_1_parameters.ini b/src/tests/topp/SpecLibSearcher_1_parameters.ini deleted file mode 100644 index c28aa1b0933..00000000000 --- a/src/tests/topp/SpecLibSearcher_1_parameters.ini +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/topp/IDMassAccuracy.cpp b/src/topp/IDMassAccuracy.cpp deleted file mode 100644 index e60e0a2cb5e..00000000000 --- a/src/topp/IDMassAccuracy.cpp +++ /dev/null @@ -1,478 +0,0 @@ -// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin -// SPDX-License-Identifier: BSD-3-Clause -// -// -------------------------------------------------------------------------- -// $Maintainer: Timo Sachsenberg $ -// $Authors: Andreas Bertsch $ -// -------------------------------------------------------------------------- - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -using namespace OpenMS; -using namespace std; -using namespace Math; - -//------------------------------------------------------------- -//Doxygen docu -//------------------------------------------------------------- - -/** -@page TOPP_IDMassAccuracy IDMassAccuracy - -@brief Calculates a distribution of the mass error from given mass spectra and IDs. - -@note Currently mzIdentML (mzid) is not directly supported as an input/output format of this tool. Convert mzid files to/from idXML using @ref TOPP_IDFileConverter if necessary. - -The command line parameters of this tool are: -@verbinclude TOPP_IDMassAccuracy.cli -INI file documentation of this tool: -@htmlinclude TOPP_IDMassAccuracy.html - -Given a number of peak maps and for each of the maps an idXML file which contains -peptide identifications the theoretical masses of the identifications and the peaks -of the spectra are compared. This can be done for precursor information stored in -the spectra as well as for fragment information. - -The result is a distribution of errors of experimental vs. theoretical masses. -Having such distributions given -the search parameters of the sequence database search can be adjusted to speed-up -the identification process and to get a higher performance. -*/ - -// We do not want this class to show up in the docu: -/// @cond TOPPCLASSES - -// simple struct which can hold the -// measured and expected masses -struct MassDifference -{ - double exp_mz = 0.0; - Int charge = 0; - double theo_mz = 0.0; - double intensity = 0.0; -}; - -class TOPPIDMassAccuracy : - public TOPPBase -{ -public: - TOPPIDMassAccuracy() : - TOPPBase("IDMassAccuracy", "Calculates a distribution of the mass error from given mass spectra and IDs.") - { - - } - -protected: - void registerOptionsAndFlags_() override - { - registerInputFileList_("in", "", StringList(), "Input mzML file list, containing the spectra."); - setValidFormats_("in", ListUtils::create("mzML")); - registerInputFileList_("id_in", "", StringList(), "Input idXML file list, containing the identifications."); - setValidFormats_("id_in", ListUtils::create("idXML")); - - registerOutputFile_("out_precursor", "", "", "Output file which contains the deviations from the precursors", false, false); - setValidFormats_("out_precursor", ListUtils::create("tsv")); - registerFlag_("precursor_error_ppm", "If this flag is used, the precursor mass tolerances are estimated in ppm instead of Da."); - - registerOutputFile_("out_fragment", "", "", "Output file which contains the fragment ion m/z deviations", false, false); - setValidFormats_("out_fragment", ListUtils::create("tsv")); - registerFlag_("fragment_error_ppm", "If this flag is used, the fragment mass tolerances are estimated in ppm instead of Da."); - - registerDoubleOption_("fragment_mass_tolerance", "", 0.5, "Maximal fragment mass tolerance which is allowed for MS/MS spectra, used for the calculation of matching ions.", false, false); - - registerIntOption_("number_of_bins", "<#bins>", 100, "Number of bins that should be used to calculate the histograms for the fitting.", false, true); - setMinInt_("number_of_bins", 10); - - registerOutputFile_("out_precursor_fit", "", "", "Gaussian fit to the histogram of mass deviations from the precursors.", false, true); - setValidFormats_("out_precursor_fit", ListUtils::create("tsv")); - - registerOutputFile_("out_fragment_fit", "", "", "Gaussian fit to the histogram of mass deviations from the fragments.", false, true); - setValidFormats_("out_fragment_fit", ListUtils::create("tsv")); - } - - double getMassDifference(double theo_mz, double exp_mz, bool use_ppm) - { - double error(exp_mz - theo_mz); - if (use_ppm) - { - error = error / theo_mz * (double)1e6; - } - return error; - } - - ExitCodes main_(int, const char **) override - { - //------------------------------------------------------------- - // parsing parameters - //------------------------------------------------------------- - - StringList id_in(getStringList_("id_in")); - StringList in_raw(getStringList_("in")); - Size number_of_bins((UInt)getIntOption_("number_of_bins")); - bool precursor_error_ppm(getFlag_("precursor_error_ppm")); - bool fragment_error_ppm(getFlag_("fragment_error_ppm")); - - if (in_raw.size() != id_in.size()) - { - writeLogError_("Number of spectrum files and identification files differs..."); - return ILLEGAL_PARAMETERS; - } - - //------------------------------------------------------------- - // reading input - //------------------------------------------------------------- - - vector > pep_ids; - vector > prot_ids; - pep_ids.resize(id_in.size()); - prot_ids.resize(id_in.size()); - - FileHandler idxmlfile; - for (Size i = 0; i != id_in.size(); ++i) - { - idxmlfile.loadIdentifications(id_in[i], prot_ids[i], pep_ids[i], {FileTypes::IDXML}); - } - - // read mzML files - vector maps_raw; - maps_raw.resize(in_raw.size()); - - FileHandler mzml_file; - for (Size i = 0; i != in_raw.size(); ++i) - { - mzml_file.loadExperiment(in_raw[i], maps_raw[i], {FileTypes::MZML}); - } - - //------------------------------------------------------------- - // calculations - //------------------------------------------------------------- - - // mapping ids - IDMapper mapper; - for (Size i = 0; i != maps_raw.size(); ++i) - { - mapper.annotate(maps_raw[i], pep_ids[i], prot_ids[i]); - } - - // normalize the spectra - Normalizer normalizer; - for (vector::iterator it1 = maps_raw.begin(); it1 != maps_raw.end(); ++it1) - { - for (PeakMap::Iterator it2 = it1->begin(); it2 != it1->end(); ++it2) - { - normalizer.filterSpectrum(*it2); - } - } - - // generate precursor statistics - vector precursor_diffs; - if (!getStringOption_("out_precursor").empty() || !getStringOption_("out_precursor_fit").empty()) - { - for (Size i = 0; i != maps_raw.size(); ++i) - { - for (Size j = 0; j != maps_raw[i].size(); ++j) - { - if (maps_raw[i][j].getPeptideIdentifications().empty()) - { - continue; - } - for (vector::const_iterator it = maps_raw[i][j].getPeptideIdentifications().begin(); it != maps_raw[i][j].getPeptideIdentifications().end(); ++it) - { - if (!it->getHits().empty()) - { - PeptideHit hit = *it->getHits().begin(); - MassDifference md; - Int charge = hit.getCharge(); - if (charge == 0) - { - charge = 1; - } - md.exp_mz = it->getMZ(); - md.theo_mz = hit.getSequence().getMonoWeight(Residue::Full, charge); - md.charge = charge; - precursor_diffs.push_back(md); - } - } - } - } - } - - // generate fragment ions statistics - vector fragment_diffs; - TheoreticalSpectrumGenerator tsg; - SpectrumAlignment sa; - double fragment_mass_tolerance(getDoubleOption_("fragment_mass_tolerance")); - Param sa_param(sa.getParameters()); - sa_param.setValue("tolerance", fragment_mass_tolerance); - sa.setParameters(sa_param); - - if (!getStringOption_("out_fragment").empty() || !getStringOption_("out_fragment_fit").empty()) - { - for (Size i = 0; i != maps_raw.size(); ++i) - { - for (Size j = 0; j != maps_raw[i].size(); ++j) - { - if (maps_raw[i][j].getPeptideIdentifications().empty()) - { - continue; - } - for (vector::const_iterator it = maps_raw[i][j].getPeptideIdentifications().begin(); it != maps_raw[i][j].getPeptideIdentifications().end(); ++it) - { - if (!it->getHits().empty()) - { - PeptideHit hit = *it->getHits().begin(); - - PeakSpectrum theo_spec; - tsg.getSpectrum(theo_spec, hit.getSequence(), 1, 1); - - vector > pairs; - sa.getSpectrumAlignment(pairs, theo_spec, maps_raw[i][j]); - //cerr << hit.getSequence() << " " << hit.getSequence().getSuffix(1).getFormula() << " " << hit.getSequence().getSuffix(1).getFormula().getMonoWeight() << endl; - for (vector >::const_iterator pit = pairs.begin(); pit != pairs.end(); ++pit) - { - MassDifference md; - md.exp_mz = maps_raw[i][j][pit->second].getMZ(); - md.theo_mz = theo_spec[pit->first].getMZ(); - //cerr.precision(15); - //cerr << md.exp_mz << " " << md.theo_mz << " " << md.exp_mz - md.theo_mz << endl; - md.intensity = maps_raw[i][j][pit->second].getIntensity(); - md.charge = hit.getCharge(); - fragment_diffs.push_back(md); - } - } - } - } - } - } - - //------------------------------------------------------------- - // writing output - //------------------------------------------------------------- - - String precursor_out_file(getStringOption_("out_precursor")); - if (!precursor_out_file.empty() || !getStringOption_("out_precursor_fit").empty()) - { - vector errors; - - double min_diff(numeric_limits::max()), max_diff(numeric_limits::min()); - for (Size i = 0; i != precursor_diffs.size(); ++i) - { - double diff = getMassDifference(precursor_diffs[i].theo_mz, precursor_diffs[i].exp_mz, precursor_error_ppm); - errors.push_back(diff); - - if (diff > max_diff) - { - max_diff = diff; - } - if (diff < min_diff) - { - min_diff = diff; - } - } - if (!precursor_out_file.empty()) - { - ofstream precursor_out(precursor_out_file.c_str()); - for (Size i = 0; i != errors.size(); ++i) - { - precursor_out << errors[i] << "\n"; - } - precursor_out.close(); - } - - // fill histogram with the collected values - double bin_size = (max_diff - min_diff) / (double)number_of_bins; - Histogram hist(min_diff, max_diff, bin_size); - for (Size i = 0; i != errors.size(); ++i) - { - hist.inc(errors[i], 1.0); - } - - writeDebug_("min_diff=" + String(min_diff) + ", max_diff=" + String(max_diff) + ", number_of_bins=" + String(number_of_bins), 1); - - // transform the histogram into a vector > for the fitting - vector > values; - for (Size i = 0; i != hist.size(); ++i) - { - DPosition<2> p; - p.setX((double)i / (double)number_of_bins * (max_diff - min_diff) + min_diff); - p.setY(hist[i]); - values.push_back(p); - } - - double mean = Math::mean(errors.begin(), errors.end()); - double abs_dev = Math::absdev(errors.begin(), errors.end(), mean); - double sdv = Math::sd(errors.begin(), errors.end(), mean); - sort(errors.begin(), errors.end()); - double median = errors[(Size)(errors.size() / 2.0)]; - - writeDebug_("Precursor mean error: " + String(mean), 1); - writeDebug_("Precursor abs. dev.: " + String(abs_dev), 1); - writeDebug_("Precursor std. dev.: " + String(sdv), 1); - writeDebug_("Precursor median error: " + String(median), 1); - - - // calculate histogram for gauss fitting - GaussFitter gf; - GaussFitter::GaussFitResult init_param (hist.maxValue(), median, sdv/500.0); - gf.setInitialParameters(init_param); - - try - { - gf.fit(values); - - // write fit data - String fit_out_file(getStringOption_("out_precursor_fit")); - if (!fit_out_file.empty()) - { - ofstream fit_out(fit_out_file.c_str()); - if (precursor_error_ppm) - { - fit_out << "error in ppm"; - } - else - { - fit_out << "error in Da"; - } - fit_out << "\tfrequency\n"; - - for (vector >::const_iterator it = values.begin(); it != values.end(); ++it) - { - fit_out << it->getX() << "\t" << it->getY() << "\n"; - } - fit_out.close(); - } - - } - catch (Exception::UnableToFit&) - { - writeLogWarn_("Unable to fit a Gaussian distribution to the precursor mass errors"); - } - } - - String fragment_out_file(getStringOption_("out_fragment")); - if (!fragment_out_file.empty() || !getStringOption_("out_fragment_fit").empty()) - { - vector errors; - double min_diff(numeric_limits::max()), max_diff(numeric_limits::min()); - for (Size i = 0; i != fragment_diffs.size(); ++i) - { - double diff = getMassDifference(fragment_diffs[i].theo_mz, fragment_diffs[i].exp_mz, fragment_error_ppm); - errors.push_back(diff); - - if (diff > max_diff) - { - max_diff = diff; - } - if (diff < min_diff) - { - min_diff = diff; - } - } - if (!fragment_out_file.empty()) - { - ofstream fragment_out(fragment_out_file.c_str()); - for (Size i = 0; i != errors.size(); ++i) - { - fragment_out << errors[i] << "\n"; - } - fragment_out.close(); - } - // fill histogram with the collected values - // here we use the intensities to scale the error - // low intensity peaks are likely to be random matches - double bin_size = (max_diff - min_diff) / (double)number_of_bins; - Histogram hist(min_diff, max_diff, bin_size); - for (Size i = 0; i != fragment_diffs.size(); ++i) - { - double diff = getMassDifference(fragment_diffs[i].theo_mz, fragment_diffs[i].exp_mz, fragment_error_ppm); - hist.inc(diff, fragment_diffs[i].intensity); - } - - writeDebug_("min_diff=" + String(min_diff) + ", max_diff=" + String(max_diff) + ", number_of_bins=" + String(number_of_bins), 1); - - // transform the histogram into a vector > for the fitting - vector > values; - for (Size i = 0; i != hist.size(); ++i) - { - DPosition<2> p; - p.setX((double)i / (double)number_of_bins * (max_diff - min_diff) + min_diff); - p.setY(hist[i]); - values.push_back(p); - } - - double mean = Math::mean(errors.begin(), errors.end()); - double abs_dev = Math::absdev(errors.begin(), errors.end(), mean); - double sdv = Math::sd(errors.begin(), errors.end(), mean); - sort(errors.begin(), errors.end()); - double median = errors[(Size)(errors.size() / 2.0)]; - - writeDebug_("Fragment mean error: " + String(mean), 1); - writeDebug_("Fragment abs. dev.: " + String(abs_dev), 1); - writeDebug_("Fragment std. dev.: " + String(sdv), 1); - writeDebug_("Fragment median error: " + String(median), 1); - - // calculate histogram for gauss fitting - GaussFitter gf; - GaussFitter::GaussFitResult init_param (hist.maxValue(), median, sdv / 100.0); - gf.setInitialParameters(init_param); - - try - { - gf.fit(values); - - // write fit data - String fit_out_file(getStringOption_("out_fragment_fit")); - if (!fit_out_file.empty()) - { - ofstream fit_out(fit_out_file.c_str()); - if (precursor_error_ppm) - { - fit_out << "error in ppm"; - } - else - { - fit_out << "error in Da"; - } - fit_out << "\tfrequency\n"; - - for (vector >::const_iterator it = values.begin(); it != values.end(); ++it) - { - fit_out << it->getX() << "\t" << it->getY() << "\n"; - } - fit_out.close(); - } - } - catch (Exception::UnableToFit&) - { - writeLogWarn_("Unable to fit a Gaussian distribution to the fragment mass errors"); - } - } - - return EXECUTION_OK; - } - -}; - - -int main(int argc, const char ** argv) -{ - TOPPIDMassAccuracy tool; - return tool.main(argc, argv); -} - -/// @endcond diff --git a/src/topp/IDSplitter.cpp b/src/topp/IDSplitter.cpp index baaddaa5d10..ccccfba51c7 100644 --- a/src/topp/IDSplitter.cpp +++ b/src/topp/IDSplitter.cpp @@ -12,6 +12,7 @@ #include #include #include +#include using namespace OpenMS; using namespace std; @@ -77,9 +78,9 @@ class TOPPIDSplitter : void registerOptionsAndFlags_() override { registerInputFile_("in", "", "", "Input file (data annotated with identifications)"); - setValidFormats_("in", ListUtils::create("mzML,featureXML,consensusXML")); + setValidFormats_("in", ListUtils::create("featureXML,consensusXML")); registerOutputFile_("out", "", "", "Output file (data without identifications). Either 'out' or 'id_out' are required. They can be used together.", false); - setValidFormats_("out", ListUtils::create("mzML,featureXML,consensusXML")); + setValidFormats_("out", ListUtils::create("featureXML,consensusXML")); registerOutputFile_("id_out", "", "", "Output file (identifications). Either 'out' or 'id_out' are required. They can be used together.", false); setValidFormats_("id_out", ListUtils::create("idXML")); } @@ -101,28 +102,7 @@ class TOPPIDSplitter : FileTypes::Type in_type = FileHandler::getType(in); - if (in_type == FileTypes::MZML) - { - PeakMap experiment; - FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}, log_type_); - // what about unassigned peptide IDs? - for (PeakMap::Iterator exp_it = experiment.begin(); - exp_it != experiment.end(); ++exp_it) - { - peptides.insert(peptides.end(), - exp_it->getPeptideIdentifications().begin(), - exp_it->getPeptideIdentifications().end()); - exp_it->getPeptideIdentifications().clear(); - } - experiment.getProteinIdentifications().swap(proteins); - if (!out.empty()) - { - addDataProcessing_(experiment, - getProcessingInfo_(DataProcessing::FILTERING)); - FileHandler().storeExperiment(out, experiment, {FileTypes::MZML}); - } - } - else if (in_type == FileTypes::FEATUREXML) + if (in_type == FileTypes::FEATUREXML) { FeatureMap features; FileHandler().loadFeatures(in, features, {FileTypes::FEATUREXML}); @@ -168,8 +148,8 @@ class TOPPIDSplitter : if (!id_out.empty()) { // IDMapper can match a peptide ID to several overlapping features, - // resulting in duplicates; this shouldn't be the case for peak data - if (in_type != FileTypes::MZML) removeDuplicates_(peptides); + // resulting in duplicates + removeDuplicates_(peptides); FileHandler().storeIdentifications(id_out, proteins, peptides, {FileTypes::IDXML}); } diff --git a/src/topp/MapAlignerIdentification.cpp b/src/topp/MapAlignerIdentification.cpp index a94603a4208..ee7e3e6f96f 100644 --- a/src/topp/MapAlignerIdentification.cpp +++ b/src/topp/MapAlignerIdentification.cpp @@ -210,13 +210,6 @@ class TOPPMapAlignerIdentification : FileTypes::Type filetype = FileHandler::getType(reference_file); switch (filetype) { - case FileTypes::MZML: - { - PeakMap experiment; - FileHandler().loadExperiment(reference_file, experiment, {FileTypes::MZML}, log_type_); - algorithm.setReference(experiment); - } - break; case FileTypes::FEATUREXML: { FeatureMap features; diff --git a/src/topp/SpecLibCreator.cpp b/src/topp/SpecLibCreator.cpp deleted file mode 100644 index 75add9ffb3a..00000000000 --- a/src/topp/SpecLibCreator.cpp +++ /dev/null @@ -1,258 +0,0 @@ -// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin -// SPDX-License-Identifier: BSD-3-Clause -// -// -------------------------------------------------------------------------- -// $Maintainer: Timo Sachsenberg $ -// $Authors: $ -// -------------------------------------------------------------------------- - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -using namespace OpenMS; -using namespace std; - -//------------------------------------------------------------- -//Doxygen docu -//------------------------------------------------------------- - -/** -@page TOPP_SpecLibCreator SpecLibCreator - -@brief creates with given data a .MSP format spectral library. - -Information file should have the following information: peptide, retention time, measured weight, charge state. -Extra information is allowed. - -@experimental This Utility is not well tested and some features might not work as expected. - -The command line parameters of this tool are: -@verbinclude TOPP_SpecLibCreator.cli -INI file documentation of this tool: -@htmlinclude TOPP_SpecLibCreator.html -*/ - -// We do not want this class to show up in the docu: -/// @cond TOPPCLASSES - -class TOPPSpecLibCreator : - public TOPPBase -{ -public: - TOPPSpecLibCreator() : - TOPPBase("SpecLibCreator", "Creates an MSP formatted spectral library.") - { - } - -protected: - void registerOptionsAndFlags_() override - { - registerInputFile_("info", "", "", "Holds id, peptide, retention time etc."); - setValidFormats_("info", ListUtils::create("csv")); - - registerStringOption_("itemseperator", "", ",", " Separator between items. e.g. ,", false); - registerStringOption_("itemenclosed", "", "false", "'true' or 'false' if true every item is enclosed e.g. '$peptide$,$run$...", false); - setValidStrings_("itemenclosed", ListUtils::create("true,false")); - - registerInputFile_("spec", "", "", "spectra"); - setValidFormats_("spec", ListUtils::create("mzData,mzXML")); - - registerOutputFile_("out", "", "", "output MSP formatted spectra library"); - setValidFormats_("out", ListUtils::create("msp")); - } - - ExitCodes main_(int, const char**) override - { - //------------------------------------------------------------- - // parameter handling - //------------------------------------------------------------- - - String info = getStringOption_("info"); - String itemseperator = getStringOption_("itemseperator"); - String out = getStringOption_("out"); - bool itemenclosed; - if (getStringOption_("itemenclosed") == "true") - { - itemenclosed = true; - } - else - { - itemenclosed = false; - } - - String spec = getStringOption_("spec"); - if (info == String::EMPTY) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "info"); - } - if (spec == String::EMPTY) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "spec"); - } - - - //------------------------------------------------------------- - // loading input - //------------------------------------------------------------- - Int retention_time = -1; - Int peptide = -1; - Int measured_weight = -1; - //UInt first_scan; - UInt charge_state(0), Experimental_id(0); //,found_by, track, comment, vaccination_peptid,epitope, confident, hlaallele; - const char* sepi = itemseperator.c_str(); - char sepo = *sepi; - CsvFile csv_file(info, sepo, itemenclosed); - vector list; - - list.resize(csv_file.rowCount()); - - for (UInt i = 0; i < csv_file.rowCount(); ++i) - { - csv_file.getRow(i, list[i]); - } - for (UInt i = 0; i < list[0].size(); ++i) - { - - if (list[0][i].toLower().removeWhitespaces().compare("retentiontime") == 0) - { - retention_time = i; - } - else if (list[0][i].toLower().hasSubstring("_id")) - { - Experimental_id = i; - } - else if (list[0][i].toLower() == "last scan") - { - // last_scan = i; - } - else if (list[0][i].toLower() == "modification") - { - // modification = i; - } - else if (list[0][i].toLower().removeWhitespaces().compare("chargestate") == 0 || list[0][i].toLower().removeWhitespaces().hasSubstring("charge")) - { - charge_state = i; - } - else if (list[0][i].toLower().trim().compare("peptide") == 0) - { - peptide = i; - } - else if (list[0][i].toLower().removeWhitespaces().hasSubstring("measuredweight") || list[0][i].removeWhitespaces().compare("measuredweight[M+nH]n+") == 0) - { - measured_weight = i; - } - } - if (retention_time == -1) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "unclear which parameter is retention time"); - } - if (peptide == -1) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "unclear which parameter is peptide"); - } - if (measured_weight == -1) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "unclear which parameter is measured weight"); - } - FileHandler fh; - FileTypes::Type in_type = fh.getType(spec); - PeakMap msexperiment; - - if (in_type == FileTypes::UNKNOWN) - { - writeLogWarn_("Warning: Could not determine input file type!"); - } - else if (in_type == FileTypes::MZDATA || in_type == FileTypes::MZXML) - { - FileHandler().loadExperiment(spec, msexperiment, {FileTypes::MZDATA, FileTypes::MZXML}, log_type_); - } - if (msexperiment.getMinRT() == 0) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "EMPTY??"); - } - PeakMap library; - - //------------------------------------------------------------- - // creating library - //------------------------------------------------------------- - UInt found_counter = 0; - - for (UInt i = 1; i < list.size(); ++i) - { - bool no_peptide = true; - double rt = (60 * (list[i][retention_time].toFloat())); // from minutes to seconds - double mz = list[i][measured_weight].toFloat(); - for (PeakMap::Iterator it = msexperiment.begin(); it < msexperiment.end(); ++it) - { - if ((abs(rt - it->getRT()) < 5) && (abs(mz - it->getPrecursors()[0].getMZ()) < 0.1)) - { - //if ( ceil(rt) == ceil(it->getRT()) || ceil(rt) == floor(it->getRT()) || floor(rt) == ceil(it->getRT()) || floor(rt) == floor(it->getRT())) - ++found_counter; - no_peptide = false; - cout << "Found Peptide " << list[i][peptide] << " with id: " << list[i][Experimental_id] << "\n"; - cout << "rt: " << it->getRT() << " and mz: " << it->getPrecursors()[0].getMZ() << "\n"; - - MSSpectrum speci; - speci.setRT(it->getRT()); - speci.setMSLevel(2); - speci.setPrecursors(it->getPrecursors()); - for (UInt j = 0; j < it->size(); ++j) - { - - Peak1D richy; - richy.setIntensity(it->operator[](j).getIntensity()); - richy.setPosition(it->operator[](j).getPosition()); - richy.setMZ(it->operator[](j).getMZ()); - richy.setPos(it->operator[](j).getPos()); //ALIAS for setMZ??? - - speci.push_back(richy); - } - PeptideHit hit; // = *it->getPeptideIdentifications().begin()->getHits().begin(); - AASequence aa = AASequence::fromString(list[i][peptide]); - hit.setSequence(aa); - hit.setCharge(list[i][charge_state].toInt()); - vector hits; - hits.push_back(hit); - vector pepi; - PeptideIdentification pep; - pep.setHits(hits); - pepi.push_back(pep); - speci.setPeptideIdentifications(pepi); - //it->getPeptideIdentifications().begin()->setHits(hits); - library.addSpectrum(speci); - } - } - if (no_peptide) - { - cout << "Peptide: " << list[i][peptide] << " not found\n"; - } - } - cout << "Found " << found_counter << " peptides\n"; - - //------------------------------------------------------------- - // writing output - //------------------------------------------------------------- - in_type = fh.getType(out); - FileHandler().storeExperiment(out, library, {FileTypes::MZDATA, FileTypes::MZXML, FileTypes::MSP}, log_type_); - return EXECUTION_OK; - } - -}; - - - - -int main(int argc, const char** argv) -{ - TOPPSpecLibCreator tool; - return tool.main(argc, argv); -} - -/// @endcond diff --git a/src/topp/SpecLibSearcher.cpp b/src/topp/SpecLibSearcher.cpp deleted file mode 100644 index a01a3762cd3..00000000000 --- a/src/topp/SpecLibSearcher.cpp +++ /dev/null @@ -1,605 +0,0 @@ -// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin -// SPDX-License-Identifier: BSD-3-Clause -// -// -------------------------------------------------------------------------- -// $Maintainer: Timo Sachsenberg $ -// $Authors: David Wojnar, Timo Sachsenberg $ -// -------------------------------------------------------------------------- - -#include - -#include -#include -#include -#include -#include -#include -// TODO add ID support to Handler -#include -#include -#include -#include - -#include -#include -#include -#include -using namespace OpenMS; -using namespace std; - -//------------------------------------------------------------- -//Doxygen docu -//------------------------------------------------------------- - -/** -@page TOPP_SpecLibSearcher SpecLibSearcher - -@brief Identifies peptide MS/MS spectra by spectral matching with a searchable spectral library. - -
- - - - - - - - -
pot. predecessor tools - → SpecLibSearcher → pot. successor tools -
@ref TOPP_SpecLibCreator @ref TOPP_IDFilter or @n any protein/peptide processing tool
-
- -@experimental This TOPP-tool is not well tested and not all features might be properly implemented and tested. - -@note Currently mzIdentML (mzid) is not directly supported as an input/output format of this tool. Convert mzid files to/from idXML using @ref TOPP_IDFileConverter if necessary. - -The command line parameters of this tool are: -@verbinclude TOPP_SpecLibSearcher.cli -INI file documentation of this tool: -@htmlinclude TOPP_SpecLibSearcher.html -*/ - -// We do not want this class to show up in the docu: -/// @cond TOPPCLASSES - -class TOPPSpecLibSearcher : - public TOPPBase -{ -public: - TOPPSpecLibSearcher() : - TOPPBase("SpecLibSearcher", "Identifies peptide MS/MS spectra by spectral matching with a searchable spectral library.") - { - } - -protected: - void registerOptionsAndFlags_() override - { - registerInputFileList_("in", "", ListUtils::create(""), "Input files"); - setValidFormats_("in", ListUtils::create("mzML")); - registerInputFile_("lib", "", "", "searchable spectral library (MSP format)"); - setValidFormats_("lib", ListUtils::create("msp")); - registerOutputFileList_("out", "", ListUtils::create(""), "Output files. Have to be as many as input files"); - setValidFormats_("out", ListUtils::create("idXML")); - - registerTOPPSubsection_("precursor", "Precursor (Parent Ion) Options"); - registerDoubleOption_("precursor:mass_tolerance", "", 10.0, "Width of precursor mass tolerance window", false); - - StringList precursor_mass_tolerance_unit_valid_strings; - precursor_mass_tolerance_unit_valid_strings.push_back("ppm"); - precursor_mass_tolerance_unit_valid_strings.push_back("Da"); - - registerStringOption_("precursor:mass_tolerance_unit", "", "ppm", "Unit of precursor mass tolerance.", false, false); - setValidStrings_("precursor:mass_tolerance_unit", precursor_mass_tolerance_unit_valid_strings); - - registerIntOption_("precursor:min_charge", "", 2, "Minimum precursor charge to be considered.", false, true); - registerIntOption_("precursor:max_charge", "", 5, "Maximum precursor charge to be considered.", false, true); - - // consider one before annotated monoisotopic peak and the annotated one - IntList isotopes = {0, 1}; - registerIntList_("precursor:isotopes", "", isotopes, "Corrects for mono-isotopic peak misassignments. (E.g.: 1 = prec. may be misassigned to first isotopic peak)", false, false); - - registerTOPPSubsection_("fragment", "Fragments (Product Ion) Options"); - registerDoubleOption_("fragment:mass_tolerance", "", 10.0, "Fragment mass tolerance", false); - -// StringList fragment_mass_tolerance_unit_valid_strings; -// fragment_mass_tolerance_unit_valid_strings.push_back("ppm"); -// fragment_mass_tolerance_unit_valid_strings.push_back("Da"); - -// registerStringOption_("fragment:mass_tolerance_unit", "", "ppm", "Unit of fragment m", false, false); -// setValidStrings_("fragment:mass_tolerance_unit", fragment_mass_tolerance_unit_valid_strings); - - registerStringOption_("compare_function", "", "ZhangSimilarityScore", "function for similarity comparison", false); - setValidStrings_("compare_function", {"ZhangSimilarityScore", "SpectraSTSimilarityScore"}); - - registerTOPPSubsection_("report", "Reporting Options"); - registerIntOption_("report:top_hits", "", 10, "Maximum number of top scoring hits per spectrum that are reported.", false, true); - - addEmptyLine_(); - - registerTOPPSubsection_("filter", "Filtering options. Most are especially useful when the query spectra are raw."); - registerDoubleOption_("filter:remove_peaks_below_threshold", "", 2.01, "All peaks of a query spectrum with intensities below will be zeroed.", false); - registerIntOption_("filter:min_peaks", "", 5, "required minimum number of peaks for a query spectrum", false); - registerIntOption_("filter:max_peaks", "", 150, "Use only the top of peaks.", false); - registerIntOption_("filter:cut_peaks_below", "", 1000, "Remove all peaks which are lower than 1/ of the highest peaks. Default equals all peaks which are lower than 0.001 of the maximum intensity peak", false); - - registerTOPPSubsection_("modifications", "Modifications Options"); - vector all_mods; - ModificationsDB::getInstance()->getAllSearchModifications(all_mods); - registerStringList_("modifications:fixed", "", ListUtils::create(""), "Fixed modifications, specified using UniMod (www.unimod.org) terms, e.g. 'Carbamidomethyl (C)'", false); - setValidStrings_("modifications:fixed", all_mods); - registerStringList_("modifications:variable", "", ListUtils::create(""), "Variable modifications, specified using UniMod (www.unimod.org) terms, e.g. 'Oxidation (M)'", false); - setValidStrings_("modifications:variable", all_mods); - registerIntOption_("modifications:variable_max_per_peptide", "", 2, "Maximum number of residues carrying a variable modification per candidate peptide", false, false); - - addEmptyLine_(); - } - - using MapLibraryPrecursorToLibrarySpectrum = multimap; - - MapLibraryPrecursorToLibrarySpectrum annotateIdentificationsToSpectra_(const vector& ids, - const PeakMap& library, - StringList variable_modifications, - StringList fixed_modifications, - double remove_peaks_below_threshold) - { - MapLibraryPrecursorToLibrarySpectrum annotated_lib; - - ModificationsDB* mdb = ModificationsDB::getInstance(); - - - // iterate over library spectra and add associated annotations - PeakMap::const_iterator library_it = library.begin(); - vector::const_iterator id_it = ids.begin(); - for (; library_it < library.end(); ++library_it, ++id_it) - { - const MSSpectrum& lib_spec = *library_it; - const double& precursor_MZ = lib_spec.getPrecursors()[0].getMZ(); - - const PeptideIdentification& id = *id_it; - const AASequence& aaseq = id.getHits()[0].getSequence(); - - PeakSpectrum lib_entry; - bool variable_modifications_ok(true), fixed_modifications_ok(true); - - // check if each amino acid listed as modified in fixed modifications are modified - if (!fixed_modifications.empty()) - { - for (Size j = 0; j < aaseq.size(); ++j) - { - const Residue& mod = aaseq.getResidue(j); - for (Size k = 0; k < fixed_modifications.size(); ++k) - { - if (mod.getOneLetterCode()[0] == mdb->getModification(fixed_modifications[k])->getOrigin() && fixed_modifications[k] != mod.getModificationName()) - { - fixed_modifications_ok = false; - break; - } - } - } - } - - // check if each amino acid listed in variable modifications is either unmodified or modified with the corresponding modification - // Note: this code currently does not allow for multiple variable modifications with same origin - if (aaseq.isModified() && (!variable_modifications.empty())) - { - for (Size j = 0; j < aaseq.size(); ++j) - { - if (!aaseq[j].isModified()) { continue; } - - const Residue& mod = aaseq.getResidue(j); - for (Size k = 0; k < variable_modifications.size(); ++k) - { - if (mod.getOneLetterCode()[0] == mdb->getModification(variable_modifications[k])->getOrigin() && variable_modifications[k] != mod.getModificationName()) - { - variable_modifications_ok = false; - break; - } - } - } - } - - // TODO: check entries that don't adhere to this rule - if (!variable_modifications_ok || !fixed_modifications_ok) { continue; } - - // copy peptide identification over to spectrum meta data - lib_entry.getPeptideIdentifications().push_back(id); - lib_entry.setPrecursors(lib_spec.getPrecursors()); - - // empty array would segfault - if (id.getHits().empty() || id.getHits()[0].getPeakAnnotations().empty()) - { - throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Expected StringDataArray of type MSPeakInfo"); - } - - const vector& pa = id.getHits()[0].getPeakAnnotations(); - // library entry transformation - for (UInt l = 0; l < lib_spec.size(); ++l) - { - Peak1D peak; - if (lib_spec[l].getIntensity() > remove_peaks_below_threshold) - { - // this is the "MSPPeakInfo" array, see MSPFile which creates a single StringDataArray - const String& sa = pa[l].annotation; - - // TODO: check why this scaling is done for ? peaks (dubious peaks?) - if (sa[0] == '?') - { - peak.setIntensity(sqrt(0.2 * lib_spec[l].getIntensity())); - } - else - { - peak.setIntensity(sqrt(lib_spec[l].getIntensity())); - } - - peak.setMZ(lib_spec[l].getMZ()); - lib_entry.push_back(peak); - } - } - annotated_lib.insert(make_pair(precursor_MZ, lib_entry)); - } - return annotated_lib; - } - - ExitCodes main_(int, const char**) override - { - //------------------------------------------------------------- - // parameter handling - //------------------------------------------------------------- - StringList in_spec = getStringList_("in"); - StringList out = getStringList_("out"); - String in_lib = getStringOption_("lib"); - String compare_function = getStringOption_("compare_function"); - - float precursor_mass_tolerance = getDoubleOption_("precursor:mass_tolerance"); - bool precursor_mass_tolerance_unit_ppm = getStringOption_("precursor:mass_tolerance_unit") == "ppm" ? true : false; - - int pc_min_charge = getIntOption_("precursor:min_charge"); - int pc_max_charge = getIntOption_("precursor:max_charge"); - - // consider one before annotated monoisotopic peak and the annotated one - IntList isotopes = getIntList_("precursor:isotopes"); - -// float fragment_mass_tolerance = getDoubleOption_("fragment:mass_tolerance"); -// bool fragment_mass_tolerance_unit_ppm = getStringOption_("fragment:mass_tolerance_unit") == "ppm" ? true : false; - - int top_hits = getIntOption_("report:top_hits"); - - float remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold"); - UInt min_peaks = getIntOption_("filter:min_peaks"); - UInt max_peaks = getIntOption_("filter:max_peaks"); - Int cut_peaks_below = getIntOption_("filter:cut_peaks_below"); - - StringList fixed_modifications = getStringList_("modifications:fixed"); - StringList variable_modifications = getStringList_("modifications:variable"); - - if (top_hits < -1) - { - writeLogError_("top_hits (should be >= -1 )"); - return ILLEGAL_PARAMETERS; - } - - // ------------------------------------------------------------- - // loading input - // ------------------------------------------------------------- - if (out.size() != in_spec.size()) - { - writeLogError_("out (should be as many as input files)"); - return ILLEGAL_PARAMETERS; - } - - time_t prog_time = time(nullptr); - MSPFile spectral_library; - PeakMap query, library; - - time_t start_build_time = time(nullptr); - // ------------------------------------------------------------- - // building map for faster search - // ------------------------------------------------------------- - - // library containing already identified peptide spectra - vector ids; - spectral_library.load(in_lib, ids, library); - - /* - // Output bin histogram - BinnedSpectrum bin_frequency(0.01, 1, PeakSpectrum()); - for (auto const & s : library) - { - BinnedSpectrum b(0.01, 1, s); - // e.g.: bin_frequency.getBins() += b.getBins(); // sum up itensities - // e.g.: bin_frequency.getBins() += b.getBins().coeffs().cwiseMin(1.0f); // count occupied bins (by truncating intensities >= 1 to 1) - } - - for (BinnedSpectrum::SparseVectorIteratorType it(bin_frequency.getBins()); it; ++it) - { - // output m/z of bin start and average bin intensity - cout << it.index() * bin_frequency.getBinSize() << "\t" << static_cast(it.value()/library.size()) << "\n"; - cout << static_cast(it.value()) << "\n"; - cout << static_cast(library.size()) << "\n"; - } - cout << endl; - */ - - MapLibraryPrecursorToLibrarySpectrum mslib = annotateIdentificationsToSpectra_(ids, library, variable_modifications, fixed_modifications, remove_peaks_below_threshold); - - time_t end_build_time = time(nullptr); - OPENMS_LOG_INFO << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n"; - - //compare function - std::unique_ptr comparator; - if (compare_function == "SpectraSTSimilarityScore") - { - comparator.reset(new SpectraSTSimilarityScore()); - } - else if (compare_function == "ZhangSimilarityScore") - { - comparator.reset(new ZhangSimilarityScore()); - } - else - { - writeLogError_("Unknown compare function"); - return ILLEGAL_PARAMETERS; - } - - //------------------------------------------------------------- - // calculations - //------------------------------------------------------------- - double score; - StringList::iterator in, out_file; - for (in = in_spec.begin(), out_file = out.begin(); in < in_spec.end(); ++in, ++out_file) - { - time_t start_time = time(nullptr); - FileHandler().loadExperiment(*in, query, {FileTypes::MZML}, log_type_); - - // results - vector peptide_ids; - vector protein_ids; - ProteinIdentification prot_id; - - //Parameters of identification - prot_id.setIdentifier("test"); - prot_id.setSearchEngineVersion("SpecLibSearcher"); - prot_id.setDateTime(DateTime::now()); - prot_id.setScoreType(compare_function); - - ProteinIdentification::SearchParameters search_parameters; - search_parameters.db = getStringOption_("lib"); - search_parameters.charges = String(getIntOption_("precursor:min_charge")) + ":" + String(getIntOption_("precursor:max_charge")); - - ProteinIdentification::PeakMassType mass_type = ProteinIdentification::MONOISOTOPIC; - search_parameters.mass_type = mass_type; - search_parameters.fixed_modifications = getStringList_("modifications:fixed"); - search_parameters.variable_modifications = getStringList_("modifications:variable"); - // search_parameters.missed_cleavages = getIntOption_("peptide:missed_cleavages"); - search_parameters.precursor_mass_tolerance = getDoubleOption_("precursor:mass_tolerance"); - search_parameters.precursor_mass_tolerance_ppm = getStringOption_("precursor:mass_tolerance_unit") == "ppm" ? true : false; -// search_parameters.fragment_mass_tolerance = getDoubleOption_("fragment:mass_tolerance"); -// search_parameters.fragment_mass_tolerance_ppm = getStringOption_("fragment:mass_tolerance_unit") == "ppm" ? true : false; - -//TODO: report an Enzyme? - - prot_id.setSearchParameters(search_parameters); - - - /***********SEARCH**********/ - for (UInt j = 0; j < query.size(); ++j) - { - //Set identifier for each identifications - PeptideIdentification pid; - pid.setIdentifier("test"); - pid.setScoreType(compare_function); - ProteinHit pr_hit; - pr_hit.setAccession(j); - prot_id.insertHit(pr_hit); - - // proper MS2? - if (query[j].empty() || query[j].getMSLevel() != 2) - { - continue; - } - - if (query[j].getPrecursors().empty()) - { - writeLogWarn_("Warning MS2 spectrum without precursor information"); - continue; - } - - // filter query spectrum - double max_intensity = std::max_element(query[j].begin(), query[j].end(), - [](const Peak1D& l, const Peak1D& r) - { - return (l.getIntensity() < r.getIntensity()); - })->getIntensity(); - - double min_high_intensity = max_intensity / cut_peaks_below; - - PeakSpectrum filtered_query; - for (UInt k = 0; k < query[j].size(); ++k) - { - if (query[j][k].getIntensity() >= remove_peaks_below_threshold - && query[j][k].getIntensity() >= min_high_intensity) - { - Peak1D peak; - peak.setIntensity(sqrt(query[j][k].getIntensity())); - peak.setMZ(query[j][k].getMZ()); - filtered_query.push_back(peak); - } - } - - // retain only top N peaks - if (filtered_query.size() > max_peaks) - { - filtered_query.sortByIntensity(true); - filtered_query.resize(max_peaks); - filtered_query.sortByPosition(); - } - - if (filtered_query.size() < min_peaks) - { - continue; - } - - const double& query_rt = query[j].getRT(); - const int& query_charge = query[j].getPrecursors()[0].getCharge(); - const double query_mz = query[j].getPrecursors()[0].getMZ(); - - if (query_charge > 0 && (query_charge < pc_min_charge || query_charge > pc_max_charge)) - { - continue; - } - - for (auto const & iso : isotopes) - { - // isotopic misassignment corrected query - const double ic_query_mz = query_mz - iso * Constants::C13C12_MASSDIFF_U; - - // if tolerance unit is ppm convert to m/z - const double precursor_mass_tolerance_mz = precursor_mass_tolerance_unit_ppm ? ic_query_mz * precursor_mass_tolerance * 1e-6 : precursor_mass_tolerance; - - // skip matching of isotopic misassignments if charge not annotated - if (iso != 0 && query_charge == 0) - { - continue; - } - - // skip matching of isotopic misassignments if search windows around isotopic peaks would overlap (resulting in more than one report of the same hit) - const double isotopic_peak_distance_mz = Constants::C13C12_MASSDIFF_U / query_charge; - if (iso != 0 && precursor_mass_tolerance_mz >= 0.5 * isotopic_peak_distance_mz) - { - continue; - } - - /* TODO: remove old code for charge estimation? - bool charge_one = false; - Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0); - Int margin = (Int) Math::round((query[j].size() / 100.0) * 1.0); - for (vector::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent) - { - if (peak->getMZ() < query_MZ) - { - break; - } - } - if (percent > margin) - { - charge_one = true; - } - */ - - - // determine MS2 precursors that match to the current peptide mass - MapLibraryPrecursorToLibrarySpectrum::const_iterator low_it, up_it; - - low_it = mslib.lower_bound(ic_query_mz - 0.5 * precursor_mass_tolerance_mz); - up_it = mslib.upper_bound(ic_query_mz + 0.5 * precursor_mass_tolerance_mz); - - // no matching precursor in data - if (low_it == up_it) - { - continue; - } - - for (; low_it != up_it; ++low_it) - { - const PeakSpectrum& lib_spec = low_it->second;; - PeptideHit hit = lib_spec.getPeptideIdentifications()[0].getHits()[0]; - const int& lib_charge = hit.getCharge(); - - // check if charge state between library and experimental spectrum match - if (query_charge > 0 && lib_charge != query_charge) - { - continue; - } - - // Special treatment for SpectraST score as it computes a score based on the whole library - if (compare_function == "SpectraSTSimilarityScore") - { - auto& sp = dynamic_cast(*comparator); - BinnedSpectrum quer_bin_spec = sp.transform(filtered_query); - BinnedSpectrum lib_bin_spec = sp.transform(lib_spec); - score = sp(filtered_query, lib_spec); //(*sp)(quer_bin,librar_bin); - double dot_bias = sp.dot_bias(quer_bin_spec, lib_bin_spec, score); - hit.setMetaValue("DOTBIAS", dot_bias); - } - else - { - score = (*comparator)(filtered_query, lib_spec); - } - - DataValue RT(lib_spec.getRT()); - DataValue MZ(lib_spec.getPrecursors()[0].getMZ()); - hit.setMetaValue("lib:RT", RT); - hit.setMetaValue("lib:MZ", MZ); - hit.setMetaValue(Constants::UserParam::ISOTOPE_ERROR, iso); - hit.setScore(score); - PeptideEvidence pe; - pe.setProteinAccession(pr_hit.getAccession()); - hit.addPeptideEvidence(pe); - pid.insertHit(hit); - } - } - - pid.setHigherScoreBetter(true); - pid.sort(); - - if (compare_function == "SpectraSTSimilarityScore") - { - if (!pid.empty() && !pid.getHits().empty()) - { - vector final_hits; - final_hits.resize(pid.getHits().size()); - auto& sp = dynamic_cast(*comparator); - Size runner_up = 1; - for (; runner_up < pid.getHits().size(); ++runner_up) - { - if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() - || runner_up > 5) - { - break; - } - } - double delta_D = sp.delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore()); - for (Size s = 0; s < pid.getHits().size(); ++s) - { - final_hits[s] = pid.getHits()[s]; - final_hits[s].setMetaValue("delta D", delta_D); - final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore()); - final_hits[s].setScore(sp.compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS"))); - } - pid.setHits(final_hits); - pid.sort(); - pid.setMZ(query[j].getPrecursors()[0].getMZ()); - pid.setRT(query_rt); - } - } - - if (top_hits != -1 && (UInt)top_hits < pid.getHits().size()) - { - pid.getHits().resize(top_hits); - } - peptide_ids.push_back(pid); - } - protein_ids.push_back(prot_id); - - //------------------------------------------------------------- - // writing output - //------------------------------------------------------------- - FileHandler().storeIdentifications(*out_file, protein_ids, peptide_ids, {FileTypes::IDXML}); - time_t end_time = time(nullptr); - OPENMS_LOG_INFO << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n"; - } - time_t end_time = time(nullptr); - OPENMS_LOG_INFO << "Total time: " << difftime(end_time, prog_time) << " seconds\n"; - return EXECUTION_OK; - } - -}; - -int main(int argc, const char** argv) -{ - TOPPSpecLibSearcher tool; - return tool.main(argc, argv); -} - -/// @endcond diff --git a/src/topp/executables.cmake b/src/topp/executables.cmake index 776339e253e..4f16bf65fc4 100644 --- a/src/topp/executables.cmake +++ b/src/topp/executables.cmake @@ -49,7 +49,6 @@ IDExtractor IDFileConverter IDFilter IDMapper -IDMassAccuracy IDMerger IDPosteriorErrorProbability IDRipper @@ -123,8 +122,6 @@ SemanticValidator SequenceCoverageCalculator SimpleSearchEngine SiriusExport -SpecLibCreator -SpecLibSearcher SpectraFilterNLargest SpectraFilterNormalizer SpectraFilterThresholdMower diff --git a/vcpkg b/vcpkg index b2a47d316de..efdc0912145 160000 --- a/vcpkg +++ b/vcpkg @@ -1 +1 @@ -Subproject commit b2a47d316de1f3625ea43a7ca3e42dd28c52ece7 +Subproject commit efdc09121456bc4a5f96a71cbef4a41fb0100bd0 From ccfd1a0087e82060f9b3ffb07773bc7b4b82c9b6 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Thu, 22 May 2025 11:11:13 +0200 Subject: [PATCH 28/31] mplemented all changes suggested in review --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 15 ++++---- .../source/FORMAT/HANDLERS/MzMLHandler.cpp | 3 +- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 28 +++++++-------- .../openms/source/XMLHandler_test.cpp | 36 +++++++++++-------- 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index ca351fad24f..164288671c4 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -21,7 +21,6 @@ #include #include -#include #include #include @@ -211,19 +210,19 @@ namespace OpenMS typedef std::basic_string XercesString; - // Converts from a narrow-character string to a wide-character string. + /// Converts from a narrow-character string to a wide-character string. inline static unique_xerces_ptr fromNative_(const char* str) { return unique_xerces_ptr(xercesc::XMLString::transcode(str)); } - // Converts from a narrow-character string to a wide-character string. + /// Converts from a narrow-character string to a wide-character string. inline static unique_xerces_ptr fromNative_(const String& str) { return fromNative_(str.c_str()); } - // Converts from a wide-character string to a narrow-character string. + /// Converts from a wide-character string to a narrow-character string. inline static String toNative_(const XMLCh* str) { String r; @@ -239,12 +238,15 @@ namespace OpenMS return r; } - // Converts from a wide-character string to a narrow-character string. + /// Converts from a wide-character string to a narrow-character string. inline static String toNative_(const unique_xerces_ptr& str) { return toNative_(str.get()); } +protected: + /// Compresses eight 8x16bit Chars in XMLCh* to 8x8bit Chars by cutting upper byte + static void compress64_ (const XMLCh * input_it, char* output_it); public: /// Constructor @@ -300,8 +302,7 @@ namespace OpenMS /// Checks if supplied if chars in XMLCh* can be encoded with ASCII static bool isASCII(const XMLCh * chars, const XMLSize_t length); - /// Compresses eight 8x16bit Chars in XMLCh* to 8x8bit Chars by cutting upper byte - static void compress64 (const XMLCh * input_it, char* output_it); + /** * @brief Transcodes the supplied XMLCh* and appends it to the OpenMS String diff --git a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp index 23b62e46c8a..79952c0f56c 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp @@ -21,7 +21,6 @@ #include -using namespace std::literals; namespace OpenMS::Internal { @@ -270,7 +269,7 @@ namespace OpenMS::Internal UInt meta_string_array_index = 0; for (Size i = 0; i < input_data.size(); i++) //loop over all binary data arrays { - if (input_data[i].meta.getName() != "m/z array"sv && input_data[i].meta.getName() != "intensity array"sv) // is meta data array? + if (input_data[i].meta.getName() != "m/z array" && input_data[i].meta.getName() != "intensity array") // is meta data array? { if (input_data[i].data_type == MzMLHandlerHelper::BinaryData::DT_FLOAT) { diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 204b0bf6194..90fc915c0ec 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -434,17 +434,17 @@ namespace OpenMS::Internal // Vorverarbeitung einzelner Zeichen bis zum Alignment oder bis zum Ende des Strings for (size_t i = 0; i < chars_to_align; ++i) { if (*pos_ptr == 0) { - return processed_chars; + return i; } ++pos_ptr; - ++processed_chars; } + processed_chars = chars_to_align; // Hauptschleife mit SIMD-Operationen + const simde__m128i zero = simde_mm_setzero_si128(); while (true) { // SIMD-Operation - simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(pos_ptr)); - simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i bits = simde_mm_load_si128(reinterpret_cast(pos_ptr)); simde__m128i cmp_zero = simde_mm_cmpeq_epi16(bits, zero); uint16_t zero_mask = simde_mm_movemask_epi8(cmp_zero); @@ -463,7 +463,7 @@ namespace OpenMS::Internal return processed_chars; } - void StringManager::compress64(const XMLCh* inputIt, char* outputIt) + void StringManager::compress64_(const XMLCh* inputIt, char* outputIt) { simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputIt)); @@ -483,10 +483,10 @@ namespace OpenMS::Internal { if (length == 0) { - return false; + return true; } - Size quotient = length / 8; + Size fullBlocks = length / 8; Size remainder = length % 8; const XMLCh* inputPtr = chars; @@ -494,7 +494,7 @@ namespace OpenMS::Internal bool bitmask = true; // Process blocks of 8 UTF-16 characters using SIMD - for (Size i = 0; i < quotient; ++i) + for (Size i = 0; i < fullBlocks; ++i) { simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputPtr)); simde__m128i zero = simde_mm_setzero_si128(); @@ -503,8 +503,7 @@ namespace OpenMS::Internal if (simde_mm_movemask_epi8(cmp) != 0xFFFF) { - bitmask = false; - break; + return false; } inputPtr += 8; @@ -515,8 +514,7 @@ namespace OpenMS::Internal { if (inputPtr[i] & 0xFF00) { - bitmask = false; - break; + return false; } } @@ -532,7 +530,7 @@ namespace OpenMS::Internal // we can convert to char directly (only keeping the least // significant byte). - Size quotient = length / 8; + Size fullBlocks = length / 8; Size remainder = length % 8; const XMLCh* inputPtr = chars; @@ -542,9 +540,9 @@ namespace OpenMS::Internal char* outputPtr = &result[currentSize]; // Copy blocks of 8 characters at a time - for (Size i = 0; i < quotient; ++i) + for (Size i = 0; i < fullBlocks; ++i) { - compress64(inputPtr, outputPtr); + compress64_(inputPtr, outputPtr); inputPtr += 8; outputPtr += 8; } diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index 46e912498a5..fc47e1e5476 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -1,12 +1,23 @@ #include #include -#include #include #include #include +class StringManager_test : public OpenMS::Internal::StringManager +{ +public: + StringManager_test() = default; + ~StringManager_test() = default; + + static void compress64(const XMLCh* input_it, char* output_it) + { + StringManager::compress64_(input_it, output_it); + } +}; + using namespace OpenMS::Internal; @@ -42,19 +53,18 @@ bool isAscii = false; START_SECTION(isASCII(const XMLCh * chars, const XMLSize_t length)) isAscii = StringManager::isASCII(ascii,a_length); - std::cout << "1 \n"; TEST_TRUE(isAscii) + isAscii = StringManager::isASCII(russianHello,r_length); - std::cout << "2 \n"; TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(mixed,m_length); - std::cout << "3 \n"; TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(empty,e_length); - std::cout << "4 \n"; - TEST_FALSE(isAscii) + TEST_TRUE(isAscii) + isAscii = StringManager::isASCII(upperBoundary,u_length); - std::cout << "5 \n"; TEST_TRUE(isAscii) END_SECTION @@ -77,21 +87,19 @@ const XMLCh eight_block_kadabra[] = { START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) std::string o1_str(8,'\0'); - StringManager::compress64(eight_block,o1_str.data()); + StringManager_test::compress64(eight_block,o1_str.data()); std::string res1_str = "Hello,Wo"; TEST_STRING_EQUAL(o1_str,res1_str); std::string o2_str(8,'\0'); - StringManager::compress64(eight_block_negative,o2_str.data()); + StringManager_test::compress64(eight_block_negative,o2_str.data()); std::string res2_str = res1_str; TEST_STRING_EQUAL(o2_str, res2_str); std::string o3_str(8,'\0'); - // char res3 [9] = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; - // res3[8] = '\0'; - StringManager::compress64(eight_block_mixed,o3_str.data()); + StringManager_test::compress64(eight_block_mixed,o3_str.data()); std::string res3_str = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; TEST_STRING_EQUAL(o3_str, res3_str); @@ -101,7 +109,7 @@ START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) o4_str [2] ='R'; o4_str [3] ='A'; - StringManager::compress64(eight_block_kadabra,((o4_str.data())+4)); + StringManager_test::compress64(eight_block_kadabra,((o4_str.data())+4)); std::string res4_str = "ABRAKADABRA!"; TEST_STRING_EQUAL(o4_str, res4_str); @@ -129,7 +137,7 @@ START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & StringManager::appendASCII(empty,e_length,o7_str); TEST_STRING_EQUAL(o7_str, res7_str); - std::cout << o7_str.size() << std::endl; + END_SECTION XMLCh* nullPointer = nullptr; From e53497c1b0adeab43b266897dd6b0d147784156a Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Thu, 22 May 2025 16:45:28 +0200 Subject: [PATCH 29/31] Fix_pms (#8063) * some pyopenms fixes * fix * still error in pyx * more fixes * last fix * add to ci * add ci * Update src/pyOpenMS/pxds/ConsensusMap.pxd Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .github/workflows/openms_ci_matrix_full.yml | 1 + src/pyOpenMS/README_WRAPPING_NEW_CLASSES | 4 +-- src/pyOpenMS/addons/MzMLFile.pyx | 8 +++--- src/pyOpenMS/addons/SequestOutfile.pyx | 2 +- src/pyOpenMS/pxds/AnnotatedMSRun.pxd | 22 +++++++------- src/pyOpenMS/pxds/IDFilter.pxd | 9 +++--- src/pyOpenMS/pxds/IDMapper.pxd | 23 ++++++++------- src/pyOpenMS/pxds/MSExperiment.pxd | 9 ++---- src/pyOpenMS/pxds/MSPFile.pxd | 3 +- src/pyOpenMS/pxds/RangeManager.pxd | 1 - src/pyOpenMS/tests/unittests/test000.py | 32 ++++++++++----------- tools/PythonExtensionChecker.py | 20 ++++++------- tools/ci/cibuild.cmake | 7 ++++- 13 files changed, 71 insertions(+), 70 deletions(-) diff --git a/.github/workflows/openms_ci_matrix_full.yml b/.github/workflows/openms_ci_matrix_full.yml index 204a4f63126..ec9081a9207 100644 --- a/.github/workflows/openms_ci_matrix_full.yml +++ b/.github/workflows/openms_ci_matrix_full.yml @@ -413,6 +413,7 @@ jobs: BUILD_TYPE: "Release" OPENMP: "ON" BOOST_USE_STATIC: ${{ steps.set-vars.outputs.static_boost }} + COMPILE_PXDS: ${{ startsWith(matrix.os, 'ubuntu') && github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'develop' && 'ON' || 'OFF' }} # test generation of pxds # BUILD_FLAGS: "-p:CL_MPCount=2" # For VS Generator and MSBuild BUILD_FLAGS: "-j${{ steps.cpu-cores.outputs.count }}" # Ninja will otherwise use all cores (doesn't go well in GHA). CMAKE_CCACHE_EXE: "ccache" diff --git a/src/pyOpenMS/README_WRAPPING_NEW_CLASSES b/src/pyOpenMS/README_WRAPPING_NEW_CLASSES index b000416feca..d7b11c48dda 100644 --- a/src/pyOpenMS/README_WRAPPING_NEW_CLASSES +++ b/src/pyOpenMS/README_WRAPPING_NEW_CLASSES @@ -32,8 +32,8 @@ cdef extern from "" namespace "OpenMS": # wrap-inherits: # DefaultParamHandler - ClassName() nogil except + - ClassName(ClassName) nogil except + + ClassName() except + nogil + ClassName(ClassName) except + nogil - make sure to use "ClassName:" instead of "ClassName(DefaultParamHandler)" to diff --git a/src/pyOpenMS/addons/MzMLFile.pyx b/src/pyOpenMS/addons/MzMLFile.pyx index e70617ca9f9..7c0cbc193e4 100644 --- a/src/pyOpenMS/addons/MzMLFile.pyx +++ b/src/pyOpenMS/addons/MzMLFile.pyx @@ -13,10 +13,10 @@ else: raise Exception('can not handle type of %s' % (args,)) - # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *) nogil except + # wrap-ignore - # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, bool skip_full_count, bool skip_first_pass) nogil except + - # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, MSExperiment& e) nogil except + # wrap-ignore - # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, MSExperiment& e, bool skip_full_count, bool skip_first_pass) nogil except + # wrap-ignore + # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *) except + nogil # wrap-ignore + # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, bool skip_full_count, bool skip_first_pass) except + nogil + # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, MSExperiment& e) except + nogil # wrap-ignore + # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, MSExperiment& e, bool skip_full_count, bool skip_first_pass) except + nogil # wrap-ignore def _transform_4(self, path, transformer, MSExperiment exp, bool skip_full_count, bool skip_first_pass): assert isinstance(exp, MSExperiment), 'arg exp wrong type' diff --git a/src/pyOpenMS/addons/SequestOutfile.pyx b/src/pyOpenMS/addons/SequestOutfile.pyx index f29bd14b75a..5c391ec0e95 100644 --- a/src/pyOpenMS/addons/SequestOutfile.pyx +++ b/src/pyOpenMS/addons/SequestOutfile.pyx @@ -7,7 +7,7 @@ from libcpp.map cimport map as libcpp_map # libcpp_map[ String, Size ] &ac_position_map, # libcpp_vector[ String ] &sequences, # libcpp_vector[ libcpp_pair[ String, Size ] ] &found, - # libcpp_map[ String, Size ] ¬_found) nogil except + + # libcpp_map[ String, Size ] ¬_found) except + nogil assert isinstance(database_filename, String), 'arg database_filename wrong type' assert isinstance(sequences, list) and all(isinstance(i, bytes) for i in sequences), 'arg sequences wrong type' diff --git a/src/pyOpenMS/pxds/AnnotatedMSRun.pxd b/src/pyOpenMS/pxds/AnnotatedMSRun.pxd index 3c5bedc915f..caed02e6463 100644 --- a/src/pyOpenMS/pxds/AnnotatedMSRun.pxd +++ b/src/pyOpenMS/pxds/AnnotatedMSRun.pxd @@ -29,24 +29,22 @@ cdef extern from "" namespace "OpenMS": # run.setMSExperiment(exp) # run.setPeptideIdentifications(my_peptide_ids) - AnnotatedMSRun() nogil except + - AnnotatedMSRun(MSExperiment) nogil except + - AnnotatedMSRun(AnnotatedMSRun) nogil except + + AnnotatedMSRun() except + nogil + AnnotatedMSRun(MSExperiment) except + nogil + AnnotatedMSRun(AnnotatedMSRun) except + nogil # Protein identification methods - libcpp_vector[ProteinIdentification]& getProteinIdentifications() nogil except + - const libcpp_vector[ProteinIdentification]& getProteinIdentifications() nogil except + # wrap-ignore + libcpp_vector[ProteinIdentification] getProteinIdentifications() except + nogil + void setProteinIdentifications(libcpp_vector[ProteinIdentification]& ids) except + nogil # Peptide identification methods - libcpp_vector[PeptideIdentification]& getPeptideIdentifications() nogil except + - const libcpp_vector[PeptideIdentification]& getPeptideIdentifications() nogil except + # wrap-ignore - void setPeptideIdentifications(libcpp_vector[PeptideIdentification]& ids) nogil except + + libcpp_vector[PeptideIdentification] getPeptideIdentifications() except + nogil + void setPeptideIdentifications(libcpp_vector[PeptideIdentification]& ids) except + nogil # MSExperiment methods - MSExperiment& getMSExperiment() nogil except + - const MSExperiment& getMSExperiment() nogil except + # wrap-ignore - void setMSExperiment(MSExperiment& experiment) nogil except + + MSExperiment getMSExperiment() except + nogil + void setMSExperiment(MSExperiment& experiment) except + nogil # Access methods - libcpp_pair[MSSpectrum&, PeptideIdentification&] operator[](size_t idx) nogil except + # wrap-ignore + libcpp_pair[MSSpectrum, PeptideIdentification] operator[](size_t idx) except + nogil # wrap-ignore diff --git a/src/pyOpenMS/pxds/IDFilter.pxd b/src/pyOpenMS/pxds/IDFilter.pxd index 4f1c506d442..c3fc0cd9def 100644 --- a/src/pyOpenMS/pxds/IDFilter.pxd +++ b/src/pyOpenMS/pxds/IDFilter.pxd @@ -11,6 +11,7 @@ from FASTAFile cimport * from ProteaseDigestion cimport * from MSExperiment cimport * +from AnnotatedMSRun cimport * from MSSpectrum cimport * from Peak1D cimport * from ChromatogramPeak cimport * @@ -77,8 +78,6 @@ cdef extern from "" namespace "OpenMS": # :param ignore_mods: Boolean operator default to false in case of any modifications in sequences during extraction # :return: Sequences - void updateHitRanks(libcpp_vector[ProteinIdentification]& identifications) except + nogil # wrap-doc:Updates the hit ranks on all peptide or protein IDs - void removeUnreferencedProteins(libcpp_vector[ProteinIdentification]& proteins, libcpp_vector[PeptideIdentification]& peptides) except + nogil # wrap-doc:Removes protein hits from the protein IDs in a 'cmap' that are not referenced by a peptide in the features or if requested in the unassigned peptide list void updateProteinReferences(libcpp_vector[PeptideIdentification]& peptides, libcpp_vector[ProteinIdentification]& proteins, bool remove_peptides_without_reference) except + nogil # wrap-doc:Removes references to missing proteins. Only PeptideEvidence entries that reference protein hits in 'proteins' are kept in the peptide hits @@ -177,15 +176,15 @@ cdef extern from "" namespace "OpenMS": void removeDuplicatePeptideHits(libcpp_vector[PeptideIdentification]& peptides) except + nogil # wrap-doc:Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID) - void filterHitsByScore(MSExperiment& experiment, double peptide_threshold_score, double protein_threshold_score) except + nogil # wrap-doc:Filters an MS/MS experiment according to score thresholds + void filterHitsByScore(AnnotatedMSRun& experiment, double peptide_threshold_score, double protein_threshold_score) except + nogil # wrap-doc:Filters an MS/MS experiment according to score thresholds - void keepNBestHits(MSExperiment& experiment, Size n) except + nogil # wrap-doc:Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum + void keepNBestHits(AnnotatedMSRun& experiment, Size n) except + nogil # wrap-doc:Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum void keepBestPerPeptide(libcpp_vector[PeptideIdentification]& peptides, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum) except + nogil # wrap-doc:Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptide sequence void keepBestPerPeptidePerRun(libcpp_vector[ProteinIdentification]& prot_ids, libcpp_vector[PeptideIdentification]& peptides, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum) except + nogil # wrap-doc:Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptide sequence on a per run basis - void keepHitsMatchingProteins(MSExperiment& experiment, libcpp_vector[FASTAEntry]& proteins) except + nogil + void keepHitsMatchingProteins(AnnotatedMSRun& experiment, libcpp_vector[FASTAEntry]& proteins) except + nogil cdef extern from "" namespace "OpenMS::IDFilter": diff --git a/src/pyOpenMS/pxds/IDMapper.pxd b/src/pyOpenMS/pxds/IDMapper.pxd index a9084dbd70b..676a34273e9 100644 --- a/src/pyOpenMS/pxds/IDMapper.pxd +++ b/src/pyOpenMS/pxds/IDMapper.pxd @@ -8,6 +8,7 @@ from ProteinIdentification cimport * from PeptideIdentification cimport * from MSExperiment cimport * +from AnnotatedMSRun cimport * from Peak1D cimport * from ChromatogramPeak cimport * @@ -20,11 +21,11 @@ cdef extern from "" namespace "OpenMS": IDMapper() except + nogil # wrap-doc:Annotates an MSExperiment, FeatureMap or ConsensusMap with peptide identifications IDMapper(IDMapper &) except + nogil - void annotate(MSExperiment & map_, - libcpp_vector[PeptideIdentification] & ids, - libcpp_vector[ProteinIdentification] & protein_ids, - bool clear_ids, - bool mapMS1) except + nogil + void annotate(AnnotatedMSRun & map_, + libcpp_vector[PeptideIdentification] & ids, + libcpp_vector[ProteinIdentification] & protein_ids, + bool clear_ids, + bool mapMS1) except + nogil # wrap-doc: # Mapping method for peak maps\n # @@ -33,7 +34,7 @@ cdef extern from "" namespace "OpenMS": # Note that a PeptideIdentication is added to ALL spectra which are within the allowed RT and MZ boundaries # # - # :param map: MSExperiment to receive the identifications + # :param map: AnnotatedMSRun to receive the identifications # :param peptide_ids: PeptideIdentification for the MSExperiment # :param protein_ids: ProteinIdentification for the MSExperiment # :param clear_ids: Reset peptide and protein identifications of each scan before annotating @@ -41,10 +42,10 @@ cdef extern from "" namespace "OpenMS": # :raises: # Exception: MissingInformation is thrown if entries of 'peptide_ids' do not contain 'MZ' and 'RT' information - void annotate(MSExperiment & map_, - FeatureMap & fmap, - bool clear_ids, - bool mapMS1) except + nogil + void annotate(AnnotatedMSRun & map_, + FeatureMap & fmap, + bool clear_ids, + bool mapMS1) except + nogil # wrap-doc: # Mapping method for peak maps\n # @@ -55,7 +56,7 @@ cdef extern from "" namespace "OpenMS": # RT and m/z are taken from the peptides, or (if missing) from the feature itself # # - # :param map: MSExperiment to receive the identifications + # :param map: AnnotatedMSRun to receive the identifications # :param fmap: FeatureMap with PeptideIdentifications for the MSExperiment # :param clear_ids: Reset peptide and protein identifications of each scan before annotating # :param map_ms1: Attach Ids to MS1 spectra using RT mapping only (without precursor, without m/z) diff --git a/src/pyOpenMS/pxds/MSExperiment.pxd b/src/pyOpenMS/pxds/MSExperiment.pxd index cbe1607616a..e4bac5dca9d 100644 --- a/src/pyOpenMS/pxds/MSExperiment.pxd +++ b/src/pyOpenMS/pxds/MSExperiment.pxd @@ -114,9 +114,6 @@ cdef extern from "" namespace "OpenMS": int getPrecursorSpectrum(int zero_based_index) except + nogil # wrap-doc:Returns the index of the precursor spectrum for spectrum at index @p zero_based_index # Range manager accessors - SpectrumRangeManager& spectrumRanges() except + nogil # wrap-doc:Returns a reference to the spectrum range manager - const SpectrumRangeManager& spectrumRanges() const except + nogil # wrap-doc:Returns a const reference to the spectrum range manager - ChromatogramRangeManager& chromatogramRanges() except + nogil # wrap-doc:Returns a reference to the chromatogram range manager - const ChromatogramRangeManager& chromatogramRanges() const except + nogil # wrap-doc:Returns a const reference to the chromatogram range manager - RangeManagerRtMzIntMob& combinedRanges() except + nogil # wrap-doc:Returns a reference to the combined range manager (for backward compatibility) - const RangeManagerRtMzIntMob& combinedRanges() const except + nogil # wrap-doc:Returns a const reference to the combined range manager (for backward compatibility) + SpectrumRangeManager spectrumRanges() except + nogil # wrap-doc:Returns a reference to the spectrum range manager + ChromatogramRangeManager chromatogramRanges() except + nogil # wrap-doc:Returns a reference to the chromatogram range manager + RangeManagerRtMzIntMob combinedRanges() except + nogil # wrap-doc:Returns a reference to the combined range manager (for backward compatibility) diff --git a/src/pyOpenMS/pxds/MSPFile.pxd b/src/pyOpenMS/pxds/MSPFile.pxd index 809c4d7c711..7f4af27de40 100644 --- a/src/pyOpenMS/pxds/MSPFile.pxd +++ b/src/pyOpenMS/pxds/MSPFile.pxd @@ -2,6 +2,7 @@ from libcpp.vector cimport vector as libcpp_vector from String cimport * from Peak1D cimport * from MSExperiment cimport * +from AnnotatedMSRun cimport * cdef extern from "" namespace "OpenMS": @@ -10,7 +11,7 @@ cdef extern from "" namespace "OpenMS": MSPFile() except + nogil # wrap-doc:File adapter for MSP files (NIST spectra library) MSPFile(MSPFile &) except + nogil - void store(String filename, MSExperiment & exp) except + nogil # wrap-doc:Stores a map in a MSPFile file + void store(String filename, AnnotatedMSRun & exp) except + nogil # wrap-doc:Stores a map in a MSPFile file void load(String filename, libcpp_vector[PeptideIdentification] & ids, MSExperiment & exp) except + nogil # wrap-doc: # Loads a map from a MSPFile file diff --git a/src/pyOpenMS/pxds/RangeManager.pxd b/src/pyOpenMS/pxds/RangeManager.pxd index db3a38d5210..5728fd06696 100644 --- a/src/pyOpenMS/pxds/RangeManager.pxd +++ b/src/pyOpenMS/pxds/RangeManager.pxd @@ -119,7 +119,6 @@ cdef extern from "" namespace "OpenMS": cdef cppclass RangeManagerRtMzIntMob "OpenMS::RangeManager": - # wrap-ignore # no-pxd-import RangeManagerRtMzIntMob() except + nogil RangeManagerRtMzIntMob(RangeManagerRtMzIntMob &) except + nogil diff --git a/src/pyOpenMS/tests/unittests/test000.py b/src/pyOpenMS/tests/unittests/test000.py index e048517728f..87c73afd365 100644 --- a/src/pyOpenMS/tests/unittests/test000.py +++ b/src/pyOpenMS/tests/unittests/test000.py @@ -39,14 +39,14 @@ def _testMetaInfoInterface(what): #void getKeys(libcpp_vector[String] & keys) #void getKeys(libcpp_vector[unsigned int] & keys) - #DataValue getMetaValue(unsigned int) nogil except + - #DataValue getMetaValue(String) nogil except + - #void setMetaValue(unsigned int, DataValue) nogil except + - #void setMetaValue(String, DataValue) nogil except + - #bool metaValueExists(String) nogil except + - #bool metaValueExists(unsigned int) nogil except + - #void removeMetaValue(String) nogil except + - #void removeMetaValue(unsigned int) nogil except + + #DataValue getMetaValue(unsigned int) except + nogil + #DataValue getMetaValue(String) except + nogil + #void setMetaValue(unsigned int, DataValue) except + nogil + #void setMetaValue(String, DataValue) except + nogil + #bool metaValueExists(String) except + nogil + #bool metaValueExists(unsigned int) except + nogil + #void removeMetaValue(String) except + nogil + #void removeMetaValue(unsigned int) except + nogil what.setMetaValue("key", 42) what.setMetaValue("key2", 42) @@ -5369,9 +5369,9 @@ def testElementDB(): # not yet implemented # - # const Map[ String, Element * ] getNames() nogil except + - # const Map[ String, Element * ] getSymbols() nogil except + - # const Map[unsigned int, Element * ] getAtomicNumbers() nogil except + + # const Map[ String, Element * ] getNames() except + nogil + # const Map[ String, Element * ] getSymbols() except + nogil + # const Map[unsigned int, Element * ] getAtomicNumbers() except + nogil @report @@ -5475,11 +5475,11 @@ def testModificationsDB(): def testRNaseDB(): """ @tests: RNaseDB - const DigestionEnzymeRNA* getEnzyme(const String& name) nogil except + - const DigestionEnzymeRNA* getEnzymeByRegEx(const String& cleavage_regex) nogil except + - void getAllNames(libcpp_vector[ String ]& all_names) nogil except + - bool hasEnzyme(const String& name) nogil except + - bool hasRegEx(const String& cleavage_regex) nogil except + + const DigestionEnzymeRNA* getEnzyme(const String& name) except + nogil + const DigestionEnzymeRNA* getEnzymeByRegEx(const String& cleavage_regex) except + nogil + void getAllNames(libcpp_vector[ String ]& all_names) except + nogil + bool hasEnzyme(const String& name) except + nogil + bool hasRegEx(const String& cleavage_regex) except + nogil """ db = pyopenms.RNaseDB() names = [] diff --git a/tools/PythonExtensionChecker.py b/tools/PythonExtensionChecker.py index 6781aa533f2..57ea98b5a21 100755 --- a/tools/PythonExtensionChecker.py +++ b/tools/PythonExtensionChecker.py @@ -196,7 +196,7 @@ def handle_member_definition(mdef, pxd_class, cnt): tres.setMessage("Renamed constructor") else: tres.setPassed(False) - tres.setMessage(" -- TODO missing constructor in PXD: %s nogil except +" % mdef.format_definition_for_cython()) + tres.setMessage(" -- TODO missing constructor in PXD: %s except + nogil " % mdef.format_definition_for_cython()) elif (mdef.name.find("operator") != -1 or mdef.name.find("begin") != -1 or @@ -206,7 +206,7 @@ def handle_member_definition(mdef, pxd_class, cnt): tres.setMessage("Cannot wrap method with iterator/operator %s" % mdef.name) else: tres.setPassed(False) - tres.setMessage(" -- TODO missing function in PXD: %s nogil except +" % mdef.format_definition_for_cython()) + tres.setMessage(" -- TODO missing function in PXD: %s except + nogil " % mdef.format_definition_for_cython()) else: # It is neither public function/enum/variable tres.setPassed(True) @@ -486,10 +486,10 @@ def get_pxd_from_class(self, dfile, internal_file_name, xml_output_path): # assignment operator, cannot be overriden in Python continue if mdef.definition.find("static") != -1: - methods += " # TODO: static # %s nogil except +\n" % declaration - static_methods += " %s nogil except + # wrap-attach:%s\n" % (declaration, preferred_classname) + methods += " # TODO: static # %s except + nogil \n" % declaration + static_methods += " %s except + nogil # wrap-attach:%s\n" % (declaration, preferred_classname) continue - methods += " %s nogil except +\n" % declaration + methods += " %s except + nogil \n" % declaration # Build up the whole file res = DoxygenCppFunction.generate_imports(imports_needed) # add default cimport @@ -498,11 +498,11 @@ def get_pxd_from_class(self, dfile, internal_file_name, xml_output_path): # We need to create a default ctor in any case, however we do not need # to *wrap* the copy constructor even though we need to have one for Cython if True: # not default_ctor: - res += " %s() nogil except +\n" % comp_name.split("::")[-1] + res += " %s() except + nogil \n" % comp_name.split("::")[-1] if not copy_ctor: - res += " %s(%s) nogil except + #wrap-ignore\n" % (comp_name.split("::")[-1], comp_name.split("::")[-1]) + res += " %s(%s) except + nogil #wrap-ignore\n" % (comp_name.split("::")[-1], comp_name.split("::")[-1]) else: - res += " %s(%s) nogil except +\n" % (comp_name.split("::")[-1], comp_name.split("::")[-1]) + res += " %s(%s) except + nogil \n" % (comp_name.split("::")[-1], comp_name.split("::")[-1]) res += methods res += enum res += "\n" @@ -645,8 +645,8 @@ def format_definition_for_cython(self, replace_nogil=True): # Add nogil if replace_nogil: - cpp_def = cpp_def.replace(";", "nogil except +") - cpp_def = cpp_def.replace("const;", "nogil except +") + cpp_def = cpp_def.replace(";", "except + nogil ") + cpp_def = cpp_def.replace("const;", "except + nogil ") else: cpp_def = cpp_def.replace("const;", "") cpp_def = cpp_def.replace(";", "") diff --git a/tools/ci/cibuild.cmake b/tools/ci/cibuild.cmake index eaa093e2386..175f282bbd1 100644 --- a/tools/ci/cibuild.cmake +++ b/tools/ci/cibuild.cmake @@ -107,6 +107,7 @@ set(VARS_TO_LOAD "Python_FIND_STRATEGY" "WITH_GUI" "WITH_THERMORAWFILEPARSER_TEST" + "COMPILE_PXDS" ) message("tools/ci/cibuild.cmake: Loading the following vars from ENV if available: ${VARS_TO_LOAD}") @@ -182,9 +183,13 @@ if("$ENV{ENABLE_STYLE_TESTING}" STREQUAL "OFF") # Generate and validate the CWL files if "ENABLE_CWL_GENERATION" is set else() ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" NUMBER_ERRORS _build_errors) + endif() + # Only build compile_pxds if PYOPENMS is not ON (since it's already a subtarget of pyopenms) + if("$ENV{COMPILE_PXDS}" STREQUAL "ON" AND "$ENV{PYOPENMS}" STREQUAL "OFF") + ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" TARGET "compile_pxds" NUMBER_ERRORS _build_errors) endif() if("$ENV{ENABLE_CWL_GENERATION}" STREQUAL "ON") - ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" TARGET "generate_cwl_files" NUMBER_ERRORS _build_errors) + ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" TARGET "generate_cwl_files" NUMBER_ERRORS _build_errors) endif() else() set(_build_errors 0) From 35bd2c122e61fe21dfa7e4f4ddc0a9deb0cf606e Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Thu, 22 May 2025 17:56:17 +0200 Subject: [PATCH 30/31] Update cibuild.cmake --- tools/ci/cibuild.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/ci/cibuild.cmake b/tools/ci/cibuild.cmake index 175f282bbd1..3d6d52d5b33 100644 --- a/tools/ci/cibuild.cmake +++ b/tools/ci/cibuild.cmake @@ -132,6 +132,7 @@ SEARCH_ENGINES_DIRECTORY=$ENV{SEARCH_ENGINES_DIRECTORY} ENABLE_TUTORIALS=Off ENABLE_GCC_WERROR=Off PYOPENMS=$ENV{PYOPENMS} +COMPILE_PXDS=$ENV{COMPILE_PXDS} MT_ENABLE_OPENMP=$ENV{OPENMP} PYTHON_EXECUTABLE:FILEPATH=$ENV{PYTHON_EXE} PY_NUM_THREADS=4 From 4c93e288f46fad267f8effb04fdfe199f10c60cc Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 27 May 2025 14:48:49 +0200 Subject: [PATCH 31/31] Impelmented Changes provided by mr. Bielow and Code Rabbit --- src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h | 2 +- src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp | 2 +- src/tests/class_tests/openms/source/XMLHandler_test.cpp | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 164288671c4..8d9fb50c544 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -299,7 +299,7 @@ namespace OpenMS { return toNative_(str); } - /// Checks if supplied if chars in XMLCh* can be encoded with ASCII + /// Checks if supplied chars in XMLCh* can be encoded with ASCII (i.e. the upper byte of each char is 0) static bool isASCII(const XMLCh * chars, const XMLSize_t length); diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 90fc915c0ec..e9375935e4b 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -303,7 +303,7 @@ namespace OpenMS::Internal } // no value, although there should be a numerical value else if (term.xref_type != ControlledVocabulary::CVTerm::NONE && term.xref_type != ControlledVocabulary::CVTerm::XSD_STRING && // should be numerical - !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the bits data array, not the 'value=' attribute! + !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the binary data array, not the 'value=' attribute! ) { warning(LOAD, String("The CV term '") + accession + " - " + term.name + "' used in tag '" + parent_tag + "' should have a numerical value. The value is '" + value + "'."); diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index fc47e1e5476..408a272bba4 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -29,7 +29,7 @@ START_TEST(StringManager, "$Id$") const XMLCh russianHello[] = { 0x041F, 0x0440, 0x0438, 0x0432, 0x0435, 0x0442, 0x043C, - 0x0438, 0x0440, // "Привет мир" (Hello World in Russian) + 0x0438, 0x0440,0x0000 // "Привет мир" (Hello World in Russian) }; XMLSize_t r_length = xercesc::XMLString::stringLen(russianHello); @@ -149,6 +149,7 @@ START_SECTION(strLength(const XMLCh* input_ptr)) o_length = StringManager::strLength(upperBoundary); TEST_EQUAL(o_length, u_length); o_length = StringManager::strLength(nullPointer); + TEST_EQUAL(o_length, 0); END_SECTION END_TEST