diff --git a/.github/workflows/openms_ci_matrix_full.yml b/.github/workflows/openms_ci_matrix_full.yml index 8d8f8c89b79..ec9081a9207 100644 --- a/.github/workflows/openms_ci_matrix_full.yml +++ b/.github/workflows/openms_ci_matrix_full.yml @@ -413,6 +413,7 @@ jobs: BUILD_TYPE: "Release" OPENMP: "ON" BOOST_USE_STATIC: ${{ steps.set-vars.outputs.static_boost }} + COMPILE_PXDS: ${{ startsWith(matrix.os, 'ubuntu') && github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'develop' && 'ON' || 'OFF' }} # test generation of pxds # BUILD_FLAGS: "-p:CL_MPCount=2" # For VS Generator and MSBuild BUILD_FLAGS: "-j${{ steps.cpu-cores.outputs.count }}" # Ninja will otherwise use all cores (doesn't go well in GHA). CMAKE_CCACHE_EXE: "ccache" @@ -670,7 +671,7 @@ jobs: - name: run GHA release action. id: create_release if: inputs.do_release - uses: ncipollo/release-action@v1.14.0 + uses: ncipollo/release-action@v1.16.0 with: bodyFile: RELEASE_TEXT_GH.md tag: ${{ github.ref_name }} @@ -941,7 +942,7 @@ jobs: mkdir -p ~/.ssh/ echo "$PASS" > ~/.ssh/private.key sudo chmod 600 ~/.ssh/private.key - ln -s ./$folder latest #create link to the release folder + ln -s ../$folder latest #create link to the release folder rsync --progress -avz -e "ssh -i ~/.ssh/private.key -p $PORT -o StrictHostKeyChecking=no" latest "$USER@$HOST:/knime-plugin/updateSite/release" do-release: @@ -963,36 +964,65 @@ jobs: shell: bash run: echo "RUN_NAME=${{ github.event.pull_request && github.event.number || github.ref_name }}" >> $GITHUB_ENV - # NB we create the tag for the OpenMS repo next in a separate action. + # We created the draft release during deploy-installer step. Now we want to publish it. + - name: Publish OpenMS release + id: publish_release + if: inputs.do_release + uses: ncipollo/release-action@v1.16.0 + with: + tag: ${{ github.ref_name }} + draft: false + allowUpdates: true + artifactErrorsFailBuild: true + makeLatest: ${{ inputs.mark_as_latest }} + omitBodyDuringUpdate: true + omitNameDuringUpdate: true + updateOnlyUnreleased: true + + + # NB we create the tag for the OpenMS repo next in a separate action. + # SPW TODO: Move the script here to a file once its stable - id: bash_create_tags name: create tags for other repos shell: bash env: GH_TOKEN: ${{ steps.app-token.outputs.token }} run: | - function createGitTag() { + function handleGitTag() { REPO=$1 SHA=$2 - gh api \ - --method POST \ + TAG_NAME="${{ env.RUN_NAME }}" + + # Check if the tag exists + TAG_EXISTS=$(gh api \ + --method GET \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${REPO}/git/refs \ - -f ref="refs/tags/${{ env.RUN_NAME }}" \ - -f sha="${SHA}" - } - - function updateGitTag() { - REPO=$1 - SHA=$2 - - gh api \ - --method PATCH \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${REPO}/git/refs/tags/${{ env.RUN_NAME }} \ - -f sha="${SHA}" \ - -F force=true + /repos/${REPO}/git/refs/tags/${TAG_NAME} 2>/dev/null || echo "TAG_NOT_FOUND") + + if [[ "$TAG_EXISTS" == "TAG_NOT_FOUND" ]]; then + # Tag doesn't exist, create it + echo "Creating tag ${TAG_NAME} in ${REPO} pointing to ${SHA}" + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + /repos/${REPO}/git/refs \ + -f ref="refs/tags/${TAG_NAME}" \ + -f sha="${SHA}" + else + # Tag exists, check if it points to the same SHA + EXISTING_SHA=$(echo $TAG_EXISTS | jq -r '.object.sha') + + if [[ "$EXISTING_SHA" == "$SHA" ]]; then + # Tag already points to the correct SHA, do nothing + echo "Tag ${TAG_NAME} already exists in ${REPO} and points to the correct SHA: ${SHA}" + else + # Tag exists but points to a different SHA, throw an error + echo "Error: Tag ${TAG_NAME} already exists in ${REPO} but points to ${EXISTING_SHA} instead of ${SHA}. Manual intervention required." + exit 1 + fi + fi } DEOPENMS_SHA=$(curl -s -X GET https://api.github.com/repos/OpenMS/de.openms.knime/git/ref/heads/develop |jq -r '.object.sha') GKN_SHA=$(curl -s -X GET https://api.github.com/repos/genericworkflownodes/GenericKnimeNodes/git/ref/heads/develop |jq -r '.object.sha') @@ -1003,15 +1033,14 @@ jobs: TUTORIAL_SHA=$(curl https://api.github.com/repos/OpenMS/Tutorials/git/refs/heads/master | jq -r ".object.sha") DOCS_SHA=$(curl https://api.github.com/repos/OpenMS/OpenMS-docs/git/refs/heads/develop | jq -r ".object.sha") - createGitTag OpenMS/contrib $CONTRIB_SHA || updateGitTag OpenMS/contrib $CONTRIB_SHA - createGitTag OpenMS/pyopenms-docs $PYDOCS_SHA || updateGitTag OpenMS/pyopenms-docs $PYDOCS_SHA - createGitTag OpenMS/THIRDPARTY $THIRDPARTY_SHA || updateGitTag OpenMS/THIRDPARTY $THIRDPARTY_SHA - createGitTag OpenMS/Tutorials $TUTORIAL_SHA || updateGitTag OpenMS/Tutorials $TUTORIAL_SHA - createGitTag OpenMS/OpenMS-docs $DOCS_SHA || updateGitTag OpenMS/OpenMS-docs $DOCS_SHA - createGitTag OpenMS/de.openms.knime $DEOPENMS_SHA || updateGitTag OpenMS/de.openms.knime $DEOPENMS_SHA - - #FIXME reenable these after we get the correct access permissions - #createGitTag genericworkflownodes/de.openms.knime.dynamicJSViewers $JSViewer_SHA || updateGitTag genericworkflownodes/de.openms.knime.dynamicJSViewers $GKN_SHA + handleGitTag OpenMS/contrib $CONTRIB_SHA + handleGitTag OpenMS/pyopenms-docs $PYDOCS_SHA + handleGitTag OpenMS/THIRDPARTY $THIRDPARTY_SHA + handleGitTag OpenMS/Tutorials $TUTORIAL_SHA + handleGitTag OpenMS/OpenMS-docs $DOCS_SHA + handleGitTag OpenMS/de.openms.knime $DEOPENMS_SHA + # Uncomment when permissions are fixed + # handleGitTag genericworkflownodes/de.openms.knime.dynamicJSViewers $JSViewer_SHA - name: Merge to Develop if: inputs.mark_as_latest diff --git a/.github/workflows/update_version_numbers.yml b/.github/workflows/update_version_numbers.yml index 77c0011edb7..6b06a9407df 100644 --- a/.github/workflows/update_version_numbers.yml +++ b/.github/workflows/update_version_numbers.yml @@ -21,52 +21,10 @@ jobs: # Update files with new package version numbers - name: update files run: | - # setting variables - package_version_major="${{ github.event.inputs.major }}" - package_version_minor="${{ github.event.inputs.minor }}" - package_version_patch="${{ github.event.inputs.patch }}" - package_version="${{ github.event.inputs.major }}.${{ github.event.inputs.minor }}.${{ github.event.inputs.patch }}" - echo "Setting version $package_version" - - # update main cmakelist - sed -i '' "s#.*set(OPENMS_PACKAGE_VERSION_MAJOR.*#set(OPENMS_PACKAGE_VERSION_MAJOR \"$package_version_major\")#" CMakeLists.txt - sed -i '' "s#.*set(OPENMS_PACKAGE_VERSION_MINOR.*#set(OPENMS_PACKAGE_VERSION_MINOR \"$package_version_minor\")#" CMakeLists.txt - sed -i '' "s#.*set(OPENMS_PACKAGE_VERSION_PATCH.*#set(OPENMS_PACKAGE_VERSION_PATCH \"$package_version_patch\")#" CMakeLists.txt - - # update version info test - sed -i '' "s#detail.version_major =.*#detail.version_major = $package_version_major;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp - sed -i '' "s#detail.version_minor =.*#detail.version_minor = $package_version_minor;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp - sed -i '' "s#detail.version_patch =.*#detail.version_patch = $package_version_patch;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp - - # update vcpkg.json - sed -i '' "s/\"version-string\": \".*\"/\"version-string\": \"$package_version\"/" vcpkg.json - - # update test write ini out: - sed -i '' "s#, e.g. #4957 - a reference to an issue or pull request on GitHub, visit e.g. https://github.com/OpenMS/OpenMS/pull/XXXX (replace XXXX with number of interest) for details + +- Libary: + ID information got factored out of MSSpectrum + +- removed outdated tools: + SpecLibCreator, SpecLibSearch, IDMassAccuracy + ------------------------------------------------------------------------------------------ ---- OpenMS 3.5.0 (under development) ---- ------------------------------------------------------------------------------------------ @@ -18,9 +25,8 @@ General: Dependencies: -OpenSwath: - Misc: + - show load/store progress for files in all TOPP tools (#8041) Fixes: @@ -28,11 +34,11 @@ Library: - Removed `assignRanks` and `sortByRanks` in PeptideIdentifications and sort and filter by score instead. Also removed `updateHitRanks` in IDFilter (#7991) - Remove ranke member in PeptideHit and store ranks as meta value (for backwards compatibility). (#7997) -- removed tools: +Removed tools: ------------------------------------------------------------------------------------------ ----- OpenMS 3.4.0 (under development) ---- +---- OpenMS 3.4.0 (May 2025) ---- ------------------------------------------------------------------------------------------ General: @@ -75,7 +81,7 @@ Library: - made FASTA file reader more robust in presence of whitespaces (#7960) - add 3' cyclophosphate version of RNAse 4, fix handling of cleavage gains (#7928) -- removed tools: +Removed tools: - XTandemAdapter - MascotAdapter (note: MascotAdapterOnline still exists) (#7927) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cf3ce1207c..7c4d6a9b8c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,6 @@ set(CMAKE_AUTOMOC_COMPILER_PREDEFINES OFF) # General CMake definitions & helper #------------------------------------------------------------------------------ SET(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS true) - #------------------------------------------------------------------------------ ## CMake sanity check: sometimes CMAKE_SIZEOF_VOID_P just vanishes when ## updating CMake. @@ -83,7 +82,13 @@ option(ENABLE_DOCS "Indicates whether documentation should be built." ON) option(WITH_GUI "Build GUI parts of OpenMS (TOPPView&Co). This requires QtGui." ON) option(NO_WEBENGINE_WIDGETS "Do not use QtWebengineWidgets. Disables Javascript views in TOPPView." OFF) option(WITH_HDF5 "Build HDF5 parts of OpenMS." OFF) -option(ENABLE_CWL "Build and validate the CWL description files for all TOPP tools." OFF) +option(ENABLE_TDL "Load dependency and compile against TDL (required for CWL file support)." ON) +option(ENABLE_CWL_GENERATION "Build and validate the CWL description files for all TOPP tools (Requires ENABLE_TDL=ON)." OFF) + +if(ENABLE_CWL_GENERATION AND NOT ENABLE_TDL) + message(FATAL_ERROR "ENABLE_CWL_GENERATION requires ENABLE_TDL to be ON.") +endif() + if(MSVC) option(MT_ENABLE_NESTED_OPENMP "Enable nested parallelism." OFF) @@ -476,7 +481,7 @@ endif() #------------------------------------------------------------------------------ # CWL generation (updates openms/share/commonwl/*.cwl files for all TOPP tools) #------------------------------------------------------------------------------ -if (ENABLE_CWL) +if (ENABLE_CWL_GENERATION) include(${OPENMS_HOST_DIRECTORY}/cmake/cwl_generation.cmake) endif() diff --git a/contrib b/contrib index e6fde7cfed8..3cdef5c7c7f 160000 --- a/contrib +++ b/contrib @@ -1 +1 @@ -Subproject commit e6fde7cfed8cde73c6625cd493ce3f82e21263cc +Subproject commit 3cdef5c7c7f98032f7d43c59ed642ebe5a1d56b1 diff --git a/doc/code_examples/Tutorial_GUI_Plot1D.cpp b/doc/code_examples/Tutorial_GUI_Plot1D.cpp index c9cccc8bbb6..c382c8eea1c 100644 --- a/doc/code_examples/Tutorial_GUI_Plot1D.cpp +++ b/doc/code_examples/Tutorial_GUI_Plot1D.cpp @@ -11,6 +11,8 @@ #include // exotic header for path to tutorial data #include +#include + using namespace OpenMS; using namespace std; @@ -20,13 +22,13 @@ Int main(int argc, const char** argv) QApplication app(argc, const_cast(argv)); - PeakMap exp; + AnnotatedMSRun exp; + auto exp_sptr = boost::make_shared(); MSSpectrum spec; // demonstrating how to load a single spectrum from file formats which only contain a single spec // alternatively: use FileHandler().loadExperiment() if you need an experiment anyway FileHandler().loadSpectrum(tutorial_data_path, spec, {FileTypes::DTA}); - exp.addSpectrum(spec); - LayerDataBase::ExperimentSharedPtrType exp_sptr(new PeakMap(exp)); + exp_sptr->getMSExperiment().addSpectrum(spec); LayerDataBase::ODExperimentSharedPtrType on_disc_exp_sptr(new OnDiscMSExperiment()); Plot1DWidget widget(Param(), DIM::Y, nullptr); widget.canvas()->addPeakLayer(exp_sptr, on_disc_exp_sptr); diff --git a/doc/code_examples/Tutorial_MSExperiment.cpp b/doc/code_examples/Tutorial_MSExperiment.cpp index ddf586b4ef7..6ac6133674a 100644 --- a/doc/code_examples/Tutorial_MSExperiment.cpp +++ b/doc/code_examples/Tutorial_MSExperiment.cpp @@ -52,12 +52,23 @@ int main() } } - // update the data ranges for all dimensions (RT, m/z, int, IM) and print them: + // updateRanges provides a fast way to update the ranges of all spectra and chromatograms in the experiment. + // Once updated, the data ranges for all dimensions (RT, m/z, int, IM) can be printed. exp.updateRanges(); std::cout << "Data ranges:\n"; - exp.printRange(std::cout); - std::cout << "\nGet maximum intensity on its own: " << exp.getMaxIntensity() << '\n'; - exp.getMinRT(); + exp.spectrumRanges().printRange(std::cout); + std::cout << "\nGet maximum intensity on its own: " << exp.spectrumRanges().getMaxIntensity() << '\n'; + std::cout << "Get minimum RT on its own: " << exp.spectrumRanges().getMinRT() << '\n'; + std::cout << "Get maximum RT on its own: " << exp.spectrumRanges().getMaxRT() << '\n'; + std::cout << "Get minimum m/z on its own: " << exp.spectrumRanges().getMinMZ() << '\n'; + std::cout << "Get maximum m/z on its own: " << exp.spectrumRanges().getMaxMZ() << '\n'; + + // Printing the IM ranges is only possible if the spectra contain IM data (would throw an exception otherwise) + if (!exp.spectrumRanges().RangeMobility::isEmpty()) + { + std::cout << "Get minimum IM on its own: " << exp.spectrumRanges().getMinMobility() << '\n'; + std::cout << "Get maximum IM on its own: " << exp.spectrumRanges().getMaxMobility() << '\n'; + } // Store the spectra to a mzML file with: FileHandler fh; diff --git a/doc/doxygen/public/TOPP.doxygen b/doc/doxygen/public/TOPP.doxygen index e1eeeecc2cf..5e44e7c0e54 100755 --- a/doc/doxygen/public/TOPP.doxygen +++ b/doc/doxygen/public/TOPP.doxygen @@ -103,7 +103,6 @@ - @subpage TOPP_NovorAdapter - De novo sequencing from tandem mass spectrometry data. - @subpage TOPP_SageAdapter - Identifies MS/MS spectra using Sage (external). - @subpage TOPP_SimpleSearchEngine - A simple database search engine for annotating MS/MS spectra. - - @subpage TOPP_SpecLibSearcher - Identifies peptide MS/MS spectra by spectral matching with a searchable spectral library. - @subpage TOPP_SpectraSTSearchAdapter - An interface to the 'SEARCH' mode of the SpectraST program (external, beta). @@ -117,7 +116,6 @@ - @subpage TOPP_IDDecoyProbability - Estimates peptide probabilities using a decoy search strategy. WARNING: This utility is deprecated. - @subpage TOPP_IDExtractor - Extracts n peptides randomly or best n from idXML files. - @subpage TOPP_IDMapper - Assigns protein/peptide identifications to feature or consensus features. - - @subpage TOPP_IDMassAccuracy - Calculates a distribution of the mass error from given mass spectra and IDs. - @subpage TOPP_IDPosteriorErrorProbability - Estimates posterior error probabilities using a mixture model. - @subpage TOPP_IDScoreSwitcher - Switches between different scores of peptide or protein hits in identification data. - @subpage TOPP_PeptideIndexer - Refreshes the protein references for all peptide hits. @@ -126,7 +124,6 @@ - @subpage TOPP_ProteinInference - Infer proteins from a list of (high-confidence) peptides. - @subpage TOPP_PSMFeatureExtractor - Creates search engine specific features for PercolatorAdapter input. - @subpage TOPP_SequenceCoverageCalculator - Prints information about idXML files. - - @subpage TOPP_SpecLibCreator - Creates an MSP-formatted spectral library. - @subpage TOPP_StaticModification - Allows to attach a set of fixed modifications to an idXML file (MS/MS search results), e.g. to add 15N (N15) labeling post-hoc. diff --git a/doc/doxygen/public/developer_tutorial.doxygen b/doc/doxygen/public/developer_tutorial.doxygen index ed173458a59..380383ca34e 100644 --- a/doc/doxygen/public/developer_tutorial.doxygen +++ b/doc/doxygen/public/developer_tutorial.doxygen @@ -160,8 +160,6 @@ The extensible %OpenMS library implements common mass spectrometric data process - Database search: - Peptides (Tool %SimpleSearchEngine and its classes - started simple but is, by now, rather complete peptide identification engine) - Protein-Protein cross-links (Tool OpenPepXL) - - Spectral library search: - - Tool SpecLibSearcher and its classes - DeNovo: - Tool CompNovoCID and its classes - Quantification: diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile index 3a41da91b7e..431b0f1f3ce 100644 --- a/dockerfiles/Dockerfile +++ b/dockerfiles/Dockerfile @@ -89,7 +89,7 @@ RUN <<-EOF # copying only the binaries that are relevant to Linux cp -r THIRDPARTY/All/* ${THIRDPARTY_DIR} - cp -r THIRDPARTY/Linux/64bit/* ${THIRDPARTY_DIR} + cp -r THIRDPARTY/Linux/x86_64/* ${THIRDPARTY_DIR} rm -rf THIRDPARTY EOF diff --git a/src/openms/CMakeLists.txt b/src/openms/CMakeLists.txt index f486a834401..86493c02b23 100644 --- a/src/openms/CMakeLists.txt +++ b/src/openms/CMakeLists.txt @@ -57,7 +57,7 @@ include (${PROJECT_SOURCE_DIR}/includes.cmake) # all the dependency libraries are linked into libOpenMS.so set(OPENMS_DEP_LIBRARIES Evergreen LibSVM::LibSVM XercesC::XercesC Eigen3::Eigen Qt6::Core Qt6::Network) -## setup the argumentes to 'target_link_libraries(OpenMS PRIVATE ${OPENMS_DEP_PRIVATE_LIBRARIES})' +## setup the arguments to 'target_link_libraries(OpenMS PRIVATE ${OPENMS_DEP_PRIVATE_LIBRARIES})' set(OPENMS_DEP_PRIVATE_LIBRARIES $<$:HDF5::HDF5> ${LPTARGET} @@ -72,8 +72,13 @@ set(OPENMS_DEP_PRIVATE_LIBRARIES SQLiteCpp ZLIB::ZLIB nlohmann_json::nlohmann_json - tdl::tdl ) +if (ENABLE_TDL) +set(OPENMS_DEP_PRIVATE_LIBRARIES + ${OPENMS_DEP_PRIVATE_LIBRARIES} + tdl::tdl + ) +endif() # Xerces requires linking against CoreFoundation&CoreServices on macOS # TODO check if this is still the case @@ -114,6 +119,11 @@ if (MSVC) target_compile_options(OpenMS PRIVATE "/we4189") endif() +if (ENABLE_TDL) + target_compile_definitions(OpenMS PUBLIC ENABLE_TDL) +endif() + + #------------------------------------------------------------------------------ # since the share basically belongs to OpenMS core we control its installation # here diff --git a/src/openms/extern/CMakeLists.txt b/src/openms/extern/CMakeLists.txt index de0014b2bfa..d14fedae3ba 100644 --- a/src/openms/extern/CMakeLists.txt +++ b/src/openms/extern/CMakeLists.txt @@ -55,7 +55,10 @@ add_subdirectory(eol-bspline) add_subdirectory(IsoSpec) add_subdirectory(GTE) add_subdirectory(Quadtree) -add_subdirectory(tool_description_lib) + +if(ENABLE_TDL) + add_subdirectory(tool_description_lib) +endif() ## ## external packages (with fallback option to local version) diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/IDBoostGraph.h b/src/openms/include/OpenMS/ANALYSIS/ID/IDBoostGraph.h index df11d7c0abe..f027fcd9e4e 100644 --- a/src/openms/include/OpenMS/ANALYSIS/ID/IDBoostGraph.h +++ b/src/openms/include/OpenMS/ANALYSIS/ID/IDBoostGraph.h @@ -59,8 +59,10 @@ namespace OpenMS public: // boost has a weird extra semicolon in their strong typedef + #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wextra-semi" + #endif /// placeholder for peptides with the same parent proteins or protein groups BOOST_STRONG_TYPEDEF(boost::blank, PeptideCluster); @@ -82,7 +84,9 @@ namespace OpenMS /// in which charge state a PSM was observed BOOST_STRONG_TYPEDEF(int, Charge); + #ifdef __clang__ #pragma clang diagnostic pop + #endif //typedefs //TODO rename ProteinGroup type since it collides with the actual OpenMS ProteinGroup diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/IDMapper.h b/src/openms/include/OpenMS/ANALYSIS/ID/IDMapper.h index 6305f1732fe..37cec7f9d96 100644 --- a/src/openms/include/OpenMS/ANALYSIS/ID/IDMapper.h +++ b/src/openms/include/OpenMS/ANALYSIS/ID/IDMapper.h @@ -22,6 +22,7 @@ namespace OpenMS { + class AnnotatedMSRun; /** @brief Annotates an MSExperiment, FeatureMap or ConsensusMap with peptide identifications @@ -65,7 +66,7 @@ namespace OpenMS @exception Exception::MissingInformation is thrown if entries of @p peptide_ids do not contain 'MZ' and 'RT' information. */ - void annotate(PeakMap& map, const std::vector& peptide_ids, const std::vector& protein_ids, const bool clear_ids = false, const bool map_ms1 = false); + void annotate(AnnotatedMSRun& map, const std::vector& peptide_ids, const std::vector& protein_ids, const bool clear_ids = false, const bool map_ms1 = false); /** @brief Mapping method for peak maps @@ -82,7 +83,7 @@ namespace OpenMS @param clear_ids Reset peptide and protein identifications of each scan before annotating @param map_ms1 attach Ids to MS1 spectra using RT mapping only (without precursor, without m/z) */ - void annotate(PeakMap& map, FeatureMap fmap, const bool clear_ids = false, const bool map_ms1 = false); + void annotate(AnnotatedMSRun& map, const FeatureMap& fmap, const bool clear_ids = false, const bool map_ms1 = false); /** @brief Mapping method for feature maps diff --git a/src/openms/include/OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.h b/src/openms/include/OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.h index 5c4e6037f7c..dc91164f99f 100644 --- a/src/openms/include/OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.h +++ b/src/openms/include/OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.h @@ -25,6 +25,12 @@ namespace OpenMS { + /* Concept for FeatureMap or ConsensusMap*/ + template + concept IsFCMap = std::same_as || std::same_as; + + class AnnotatedMSRun; + /** @brief A map alignment algorithm based on peptide identifications from MS2 spectra. @@ -74,9 +80,9 @@ namespace OpenMS } /** - @brief Align feature maps, consensus maps, peak maps, or peptide identifications. + @brief Align feature maps, consensus maps, or peptide identifications. - @param data Vector of input data (FeatureMap, ConsensusMap, PeakMap or @p vector) that should be aligned. + @param data Vector of input data (FeatureMap, ConsensusMap, or @p vector) that should be aligned. @param transformations Vector of RT transformations that will be computed. @param reference_index Index in @p data of the reference to align to, if any @@ -201,7 +207,7 @@ namespace OpenMS @return Are the RTs already sorted? (Here: false) */ - bool getRetentionTimes_(const PeakMap& experiment, SeqToList& rt_data); + bool getRetentionTimes_(const AnnotatedMSRun& experiment, SeqToList& rt_data); /** @brief Collect retention time data from peptide IDs contained in feature maps or consensus maps @@ -217,8 +223,8 @@ namespace OpenMS @return Are the RTs already sorted? (Here: true) */ - template - bool getRetentionTimes_(const MapType& features, SeqToList& rt_data) + + bool getRetentionTimes_(const IsFCMap auto& features, SeqToList& rt_data) { if (!score_cutoff_) { @@ -236,8 +242,7 @@ namespace OpenMS { return a <= b; }; } - for (typename MapType::ConstIterator feat_it = features.begin(); - feat_it != features.end(); ++feat_it) + for (auto feat_it = features.cbegin(); feat_it != features.cend(); ++feat_it) { if (use_feature_rt_) { diff --git a/src/openms/include/OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/IMSIsotopeDistribution.h b/src/openms/include/OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/IMSIsotopeDistribution.h index e91eee01169..953f7b37145 100644 --- a/src/openms/include/OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/IMSIsotopeDistribution.h +++ b/src/openms/include/OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/IMSIsotopeDistribution.h @@ -78,13 +78,7 @@ namespace OpenMS mass(local_mass), abundance(local_abundance) {} - bool operator==(const Peak & peak) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return peak.mass == mass && peak.abundance == abundance; -#pragma clang diagnostic pop - } + bool operator==(const Peak& peak) const = default; mass_type mass; abundance_type abundance; diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/Compomer.h b/src/openms/include/OpenMS/DATASTRUCTURES/Compomer.h index 6733739050c..25860f7583b 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/Compomer.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/Compomer.h @@ -23,77 +23,196 @@ namespace OpenMS /** @brief Holds information on an edge connecting two features from a (putative) charge ladder - - A compomer is storing information on the adducts used on LEFT and RIGHT nodes (Features) that are connected by the edge (i.e. ChargePair) - holding the compomer. Additionally meta information on the edge (net_charge, edge score, id) - which is kept up-to-date when adducts are added to either side is stored. - + + A Compomer represents the chemical composition difference between two mass spectrometry features. + It stores information about the adducts (ions, molecules, or atoms) that explain the mass and + charge differences between these features. + + The Compomer has two sides: + - LEFT side: adducts subtracted from the first feature + - RIGHT side: adducts added to the first feature + + This model allows representing the relationship between features that correspond to the same + analyte but with different adduct compositions or charge states. + + The Compomer maintains metadata such as: + - Net charge (difference between right and left sides) + - Total mass difference + - Probability score of this adduct combination + - Expected RT shift caused by the adducts + + This class is used extensively in the feature decharging and adduct annotation processes. + + @ingroup Datastructures */ class OPENMS_DLLAPI Compomer { public: - /// side of compomer (LEFT ^ subtract; RIGHT ^ add) + /** + @brief Enumeration for specifying which side of the compomer to operate on + + - LEFT: The left side (adducts subtracted from the first feature) + - RIGHT: The right side (adducts added to the first feature) + - BOTH: Both sides of the compomer + */ enum SIDE {LEFT, RIGHT, BOTH}; - typedef std::map CompomerSide; ///< adducts and their abundance etc - typedef std::vector CompomerComponents; ///< container for the two sides [0]=left, [1]=right + /// Type definition for one side of a compomer (maps adduct labels to Adduct objects) + typedef std::map CompomerSide; + + /** + @brief Container for both sides of a compomer + + Vector with exactly two elements: + - [0] = left side (adducts subtracted) + - [1] = right side (adducts added) + */ + typedef std::vector CompomerComponents; - /// Default Constructor + /** + @brief Default Constructor + + Initializes an empty compomer with zero net charge, mass, and probability. + */ Compomer(); - /// Constructor with net-charge and mass + /** + @brief Constructor with net-charge, mass, and probability + + @param net_charge Net charge of the compomer (right side - left side) + @param mass Mass difference represented by the compomer + @param log_p Log probability of this adduct combination + */ Compomer(Int net_charge, double mass, double log_p); - /// Copy C'tor + /** + @brief Copy constructor + + @param p Source compomer to copy from + */ Compomer(const Compomer& p); - /// Assignment Operator + /** + @brief Assignment Operator + + @param source Source compomer to assign from + @return Reference to this object + */ Compomer& operator=(const Compomer& source); - /// Add a.amount of Adduct @param a to Compomer's @param side and update its properties + /** + @brief Add an adduct to a specific side of the compomer + + Adds the specified amount of the adduct to the given side and + updates the compomer's properties (net charge, mass, etc.). + + @param a The adduct to add + @param side Which side to add the adduct to (0=LEFT, 1=RIGHT) + */ void add(const Adduct& a, UInt side); /** - * indicates if these two compomers can coexist for one feature - * @param cmp The other Compomer we compare to - * @param side_this Indicates which "side"(negative or positive adducts) we are looking at. Negative adducts belong to the left side of the ChargePair. - * @param side_other See above. + @brief Determines if two compomers conflict with each other + + Checks if these two compomers can coexist for one feature by examining + if they have conflicting adduct compositions on the specified sides. + + @param cmp The other Compomer to compare against + @param side_this Which side of this compomer to check (0=LEFT, 1=RIGHT) + @param side_other Which side of the other compomer to check (0=LEFT, 1=RIGHT) + @return True if the compomers conflict (cannot coexist), false otherwise */ bool isConflicting(const Compomer& cmp, UInt side_this, UInt side_other) const; - /// set an Id which allows unique identification of a compomer + /** + @brief Set a unique identifier for this compomer + + @param id The unique ID to assign + */ void setID(const Size& id); - /// return Id which allows unique identification of this compomer + + /** + @brief Get the unique identifier of this compomer + + @return The unique ID of this compomer + */ const Size& getID() const; - /// left and right adducts of this compomer + + /** + @brief Get both sides (left and right) of this compomer + + @return Reference to the compomer components (left and right sides) + */ const CompomerComponents& getComponent() const; - /// net charge of compomer (i.e. difference between left and right side of compomer) + /** + @brief Get the net charge of this compomer + + The net charge is calculated as the difference between the right and left sides. + + @return Net charge value + */ const Int& getNetCharge() const; - /// mass of all contained adducts + /** + @brief Get the total mass difference represented by this compomer + + @return Mass difference in Da + */ const double& getMass() const; - /// summed positive charges of contained adducts + /** + @brief Get the sum of positive charges in this compomer + + @return Total positive charges + */ const Int& getPositiveCharges() const; - /// summed negative charges of contained adducts + /** + @brief Get the sum of negative charges in this compomer + + @return Total negative charges + */ const Int& getNegativeCharges() const; - /// return log probability + /** + @brief Get the log probability of this adduct combination + + Higher values indicate more likely combinations. + + @return Log probability value + */ const double& getLogP() const; - /// return log probability + /** + @brief Get the expected retention time shift caused by this compomer + + @return Expected RT shift value + */ const double& getRTShift() const; - /// get adducts with their abundance as compact string for both sides + /** + @brief Get a string representation of all adducts in this compomer + + @return String representation of adducts on both sides + */ String getAdductsAsString() const; - /// get adducts with their abundance as compact string (amounts are absolute unless side=BOTH) - /// @param side Use LEFT for left, RIGHT for right + /** + @brief Get a string representation of adducts on a specific side + + @param side Which side to get adducts for (LEFT, RIGHT, or BOTH) + @return String representation of adducts on the specified side + */ String getAdductsAsString(UInt side) const; - /// check if Compomer only contains a single adduct on side @p side + /** + @brief Check if the compomer contains only a single adduct on the specified side + + @param a Output parameter that will contain the adduct if found + @param side Which side to check (LEFT or RIGHT) + @return True if only a single adduct is present on the specified side + */ bool isSingleAdduct(Adduct& a, const UInt side) const; /** @@ -121,28 +240,56 @@ namespace OpenMS StringList getLabels(const UInt side) const; - /// Adds @p add_side to this compomer. + /** + @brief Add a complete set of adducts to a specific side of the compomer + + @param add_side The set of adducts to add + @param side Which side to add the adducts to (LEFT or RIGHT) + */ void add(const CompomerSide& add_side, UInt side); - /// Sort compomer by (in order of importance): net-charge, mass, probability + /** + @brief Comparison operator for sorting compomers + + Sorts compomers by (in order of importance): + 1. Net charge + 2. Mass + 3. Probability + + @param c1 First compomer to compare + @param c2 Second compomer to compare + @return True if c1 should be ordered before c2 + */ friend OPENMS_DLLAPI bool operator<(const Compomer& c1, const Compomer& c2); - /// Print the contents of a Compomer to a stream. + /** + @brief Output stream operator for printing compomer contents + + @param os Output stream to write to + @param cmp Compomer to print + @return Reference to the output stream + */ friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const Compomer& cmp); - /// Comparator + /** + @brief Equality comparison operator + + @param a First compomer to compare + @param b Second compomer to compare + @return True if the compomers are equal + */ friend OPENMS_DLLAPI bool operator==(const Compomer& a, const Compomer& b); private: - CompomerComponents cmp_; ///< adducts of left and right side - Int net_charge_; ///< net charge (right - left) - double mass_; ///< net mass (right - left) - Int pos_charges_; ///< net charges on the right - Int neg_charges_; ///< net charges on the left - double log_p_; ///< log probability of compomer - double rt_shift_; ///< expected net RT shift of compomer (-shift_leftside + shift_rightside) - Size id_; + CompomerComponents cmp_; ///< Adducts of left and right side + Int net_charge_; ///< Net charge (right - left) + double mass_; ///< Net mass (right - left) + Int pos_charges_; ///< Sum of positive charges + Int neg_charges_; ///< Sum of negative charges + double log_p_; ///< Log probability of this adduct combination + double rt_shift_; ///< Expected net RT shift (-shift_leftside + shift_rightside) + Size id_; ///< Unique identifier for this compomer }; // \Compomer diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/DPosition.h b/src/openms/include/OpenMS/DATASTRUCTURES/DPosition.h index c7edc4a4093..5e5486cd690 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/DPosition.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/DPosition.h @@ -13,6 +13,7 @@ #include #include +#include #include // for std::abs on integrals and floats #include #include @@ -25,225 +26,193 @@ namespace OpenMS @ingroup Datastructures */ template - class DPosition - { +class DPosition +{ public: + /// Coordinate type + using CoordinateType = TCoordinateType; - /// Coordinate type - typedef TCoordinateType CoordinateType; - /// Mutable iterator - typedef CoordinateType* Iterator; - /// Non-mutable iterator - typedef const CoordinateType* ConstIterator; - /// Dimensions - enum - { - DIMENSION = D - }; - /** - @name STL compatibility type definitions - */ - //@{ - typedef CoordinateType value_type; - typedef CoordinateType& reference; - typedef CoordinateType* pointer; - typedef CoordinateType* iterator; - typedef const CoordinateType* const_iterator; - //@} - - /** - @name Constructors and Destructor - */ - //@{ - /** - @brief Default constructor. - - Creates a position with all coordinates zero. - */ - DPosition() = default; - - /// Constructor that fills all dimensions with the value @p x - DPosition(CoordinateType x) - { - std::fill(&(coordinate_[0]), &(coordinate_[D]), x); - } + using DataType = std::array; - /// Constructor only for DPosition<2> that takes two Coordinates. - DPosition(CoordinateType x, CoordinateType y) - { - static_assert(D == 2, "DPosition:DPosition(x,y): index overflow!"); - coordinate_[0] = x; - coordinate_[1] = y; - } + /// Dimensions + enum + { + DIMENSION = D + }; + /** + @name STL compatibility type definitions + */ + //@{ + typedef CoordinateType value_type; + typedef CoordinateType& reference; + typedef CoordinateType* pointer; + typedef CoordinateType* iterator; + typedef const CoordinateType* const_iterator; + //@} - /// Constructor only for DPosition<3> that takes three Coordinates. - DPosition(CoordinateType x, CoordinateType y, CoordinateType z) - { - static_assert(D == 3, "DPosition:DPosition(x,y,z): index overflow!"); - coordinate_[0] = x; - coordinate_[1] = y; - coordinate_[2] = z; - } + /** + @name Constructors and Destructor + */ + //@{ + /** + @brief Default constructor. - /// Copy constructor - DPosition(const DPosition& pos) = default; + Creates a position with all coordinates zero. + */ + DPosition() = default; - /// Move constructor - DPosition(DPosition&& rhs) noexcept = default; + /// Constructor that fills all dimensions with the value @p x + DPosition(CoordinateType x) + { + std::fill(coordinate_.begin(), coordinate_.end(), x); + } - /// Assignment operator - DPosition& operator=(const DPosition& source) = default; + /// Constructor only for DPosition<2> that takes two Coordinates. + DPosition(CoordinateType x, CoordinateType y) + { + static_assert(D == 2, "DPosition:DPosition(x,y): index overflow!"); + coordinate_[0] = x; + coordinate_[1] = y; + } - /// Move Assignment operator - DPosition& operator=(DPosition&& source) noexcept = default; + /// Constructor only for DPosition<3> that takes three Coordinates. + DPosition(CoordinateType x, CoordinateType y, CoordinateType z) + { + static_assert(D == 3, "DPosition:DPosition(x,y,z): index overflow!"); + coordinate_[0] = x; + coordinate_[1] = y; + coordinate_[2] = z; + } - /// Destructor (not-virtual as this will save a lot of space!) - ~DPosition() noexcept = default; + /// Copy constructor + DPosition(const DPosition& pos) = default; - //@} + /// Move constructor + DPosition(DPosition&& rhs) noexcept = default; - /// Swap the two objects - void swap(DPosition& rhs) noexcept - { - for (Size i = 0; i < D; ++i) - { - std::swap(coordinate_[i], rhs.coordinate_[i]); - } - } + /// Assignment operator + DPosition& operator=(const DPosition& source) = default; - /// Make all dimension values positive - DPosition& abs() noexcept - { - for (Size i = 0; i < D; ++i) - { - coordinate_[i] = std::abs(coordinate_[i]); - } - return *this; - } + /// Move Assignment operator + DPosition& operator=(DPosition&& source) noexcept = default; - /**@name Accessors */ - //@{ + /// Destructor (not-virtual as this will save a lot of space!) + ~DPosition() noexcept = default; - ///Const accessor for the dimensions - CoordinateType operator[](Size index) const - { - OPENMS_PRECONDITION(index < D, "DPosition:operator [] (Position): index overflow!"); - return coordinate_[index]; - } + //@} - ///Accessor for the dimensions - CoordinateType& operator[](Size index) - { - OPENMS_PRECONDITION(index < D, "DPosition:operator [] (Position): index overflow!"); - return coordinate_[index]; - } + /// Swap the two objects + void swap(DPosition& rhs) noexcept + { + std::swap(coordinate_, rhs.coordinate_); + } - ///Name accessor for the first dimension. Only for DPosition<2>, for visualization. - CoordinateType getX() const + /// Make all dimension values positive + DPosition& abs() noexcept + { + for (Size i = 0; i < D; ++i) { - OPENMS_PRECONDITION(D == 2, "DPosition:getX(): index overflow!"); - return coordinate_[0]; + coordinate_[i] = std::abs(coordinate_[i]); } + return *this; + } - ///Name accessor for the second dimension. Only for DPosition<2>, for visualization. - CoordinateType getY() const - { - OPENMS_PRECONDITION(D == 2, "DPosition:getY(): index overflow!"); - return coordinate_[1]; - } + /**@name Accessors */ + //@{ - ///Name mutator for the first dimension. Only for DPosition<2>, for visualization. - void setX(CoordinateType c) - { - OPENMS_PRECONDITION(D == 2, "DPosition:setX(): index overflow!"); - coordinate_[0] = c; - } + /// Const accessor for the dimensions + CoordinateType operator[](Size index) const + { + OPENMS_PRECONDITION(index < D, "DPosition:operator [] (Position): index overflow!"); + return coordinate_[index]; + } - ///Name mutator for the second dimension. Only for DPosition<2>, for visualization. - void setY(CoordinateType c) - { - OPENMS_PRECONDITION(D == 2, "DPosition:setY(): index overflow!"); - coordinate_[1] = c; - } + /// Accessor for the dimensions + CoordinateType& operator[](Size index) + { + OPENMS_PRECONDITION(index < D, "DPosition:operator [] (Position): index overflow!"); + return coordinate_[index]; + } - /// Equality operator - bool operator==(const DPosition& point) const - { - for (Size i = 0; i < D; i++) - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - if (coordinate_[i] != point.coordinate_[i]) return false; + /// Name accessor for the first dimension. Only for DPosition<2>, for visualization. + CoordinateType getX() const + { + OPENMS_PRECONDITION(D == 2, "DPosition:getX(): index overflow!"); + return coordinate_[0]; + } -#pragma clang diagnostic pop - } - return true; - } + /// Name accessor for the second dimension. Only for DPosition<2>, for visualization. + CoordinateType getY() const + { + OPENMS_PRECONDITION(D == 2, "DPosition:getY(): index overflow!"); + return coordinate_[1]; + } - /// Equality operator - bool operator!=(const DPosition& point) const - { - return !(operator==(point)); - } + /// Name mutator for the first dimension. Only for DPosition<2>, for visualization. + void setX(CoordinateType c) + { + OPENMS_PRECONDITION(D == 2, "DPosition:setX(): index overflow!"); + coordinate_[0] = c; + } - /** - @brief Lexicographical less than operator. - Lexicographical comparison from dimension 0 to dimension D-1 is done. - */ - bool operator<(const DPosition& point) const - { - for (Size i = 0; i < D; i++) - { - if (coordinate_[i] < point.coordinate_[i]) return true; + /// Name mutator for the second dimension. Only for DPosition<2>, for visualization. + void setY(CoordinateType c) + { + OPENMS_PRECONDITION(D == 2, "DPosition:setY(): index overflow!"); + coordinate_[1] = c; + } - if (coordinate_[i] > point.coordinate_[i]) return false; - } - return false; - } + /// Equality operator + bool operator==(const DPosition& point) const = default; - /// Lexicographical greater less or equal operator. - bool operator<=(const DPosition& point) const - { - for (Size i = 0; i < D; i++) - { - if (coordinate_[i] < point.coordinate_[i]) return true; + /// Equality operator + bool operator!=(const DPosition& point) const = default; + /** + @brief Lexicographical less than operator. + Lexicographical comparison from dimension 0 to dimension D-1 is done. + */ + bool operator<(const DPosition& point) const + { + return coordinate_ < point.coordinate_; + } - if (coordinate_[i] > point.coordinate_[i]) return false; - } - return true; - } + /// Lexicographical greater less or equal operator. + bool operator<=(const DPosition& point) const + { + return coordinate_ <= point.coordinate_; + } - /// Spatially (geometrically) less or equal operator. All coordinates must be "<=". - bool spatiallyLessEqual(const DPosition& point) const + /// Spatially (geometrically) less or equal operator. All coordinates must be "<=". + bool spatiallyLessEqual(const DPosition& point) const + { + for (Size i = 0; i < D; i++) { - for (Size i = 0; i < D; i++) - { - if (coordinate_[i] > point.coordinate_[i]) return false; - } - return true; + if (coordinate_[i] > point.coordinate_[i]) return false; } + return true; + } - /// Spatially (geometrically) greater or equal operator. All coordinates must be ">=". - bool spatiallyGreaterEqual(const DPosition& point) const + /// Spatially (geometrically) greater or equal operator. All coordinates must be ">=". + bool spatiallyGreaterEqual(const DPosition& point) const + { + for (Size i = 0; i < D; i++) { - for (Size i = 0; i < D; i++) - { - if (coordinate_[i] < point.coordinate_[i]) return false; - } - return true; + if (coordinate_[i] < point.coordinate_[i]) return false; } + return true; + } - /// Lexicographical greater than operator. - bool operator>(const DPosition& point) const - { - return !(operator<=(point)); - } + /// Lexicographical greater than operator. + bool operator>(const DPosition& point) const + { + return coordinate_ > point.coordinate_; + } - /// Lexicographical greater or equal operator. - bool operator>=(const DPosition& point) const - { - return !operator<(point); - } + /// Lexicographical greater or equal operator. + bool operator>=(const DPosition& point) const + { + return coordinate_ >= point.coordinate_; + } /// Addition (a bit inefficient) DPosition operator+(const DPosition& point) const @@ -338,10 +307,7 @@ namespace OpenMS /// Set all dimensions to zero void clear() { - for (Size i = 0; i < D; ++i) - { - coordinate_[i] = static_cast(0); - } + coordinate_.fill(0); } //@} @@ -377,33 +343,33 @@ namespace OpenMS /** @name Iteration */ //@{ /// Non-mutable begin iterator - ConstIterator begin() const + const_iterator begin() const { - return &(coordinate_[0]); + return &coordinate_[0]; } /// Non-mutable end iterator - ConstIterator end() const + const_iterator end() const { - return &(coordinate_[0]) + D; + return &coordinate_[0] + D; } /// Mutable begin iterator - Iterator begin() + iterator begin() { - return &(coordinate_[0]); + return &coordinate_[0]; } /// Mutable end iterator - Iterator end() + iterator end() { - return &(coordinate_[0]) + D; + return &coordinate_[0] + D; } //@} protected: - CoordinateType coordinate_[D]{}; + DataType coordinate_{}; }; // DPosition /// Scalar multiplication (a bit inefficient) diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/IsotopeCluster.h b/src/openms/include/OpenMS/DATASTRUCTURES/IsotopeCluster.h index 31134c75a5d..8a8504436ed 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/IsotopeCluster.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/IsotopeCluster.h @@ -14,37 +14,75 @@ namespace OpenMS { - ///Stores information about an isotopic cluster (i.e. potential peptide charge variants) + /** + @brief Stores information about an isotopic cluster (i.e. potential peptide charge variants) + + An isotopic cluster represents a group of related peaks that likely originate from the same + peptide but with different isotopic compositions. This structure stores the indices of these + peaks and the scans they appear in, along with charge state information when available. + + The structure is typically used in mass spectrometry data analysis to group related peaks + and track their charge states for further processing. + + @ingroup Datastructures + */ struct OPENMS_DLLAPI IsotopeCluster { - /// An index e.g. in an MSExperiment + /** + @brief An index pair typically representing (scan_index, peak_index) in an MSExperiment + + The first value usually refers to the scan/spectrum index, while the second value + refers to the peak index within that scan/spectrum. + */ typedef std::pair IndexPair; - /// A set of index pairs, usually referring to an MSExperiment. + + /** + @brief A set of index pairs, usually referring to peaks in an MSExperiment + + This collection stores unique pairs of indices that point to specific peaks + in specific scans of a mass spectrometry experiment. + */ typedef std::set IndexSet; - ///index set with associated charge estimate + /** + @brief Index set with associated charge estimate + + Extends the basic IndexSet with charge state information for the peaks. + This allows tracking which peaks belong to the same isotopic pattern + and what charge state they represent. + */ struct ChargedIndexSet : public IndexSet { + /** + @brief Default constructor + + Initializes the charge to 0, which by convention means "no charge estimate" + */ ChargedIndexSet() : charge(0) { } - /// charge estimate (convention: zero means "no charge estimate") + /// Charge estimate (convention: zero means "no charge estimate") Int charge; }; + /** + @brief Default constructor + + Initializes an empty isotope cluster with no peaks and no scans + */ IsotopeCluster() : peaks(), scans() { } - /// peaks in this cluster + /// Peaks in this cluster, with their charge state information ChargedIndexSet peaks; - /// the scans of this cluster + /// The scan indices where this cluster appears std::vector scans; }; diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/LPWrapper.h b/src/openms/include/OpenMS/DATASTRUCTURES/LPWrapper.h index 21a96f9213a..6aa37d62d43 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/LPWrapper.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/LPWrapper.h @@ -41,14 +41,44 @@ class CoinModel; namespace OpenMS { + /** + @brief A wrapper class for linear programming (LP) solvers + + This class provides a unified interface to different linear programming solvers, + including GLPK (GNU Linear Programming Kit) and COIN-OR (if available). + + Linear programming is a method to find the best outcome in a mathematical model + whose requirements are represented by linear relationships. It is used for + optimization problems where the objective function and constraints are linear. + + LPWrapper allows you to: + - Create and manipulate LP problems (add rows, columns, set bounds) + - Set objective functions and constraints + - Solve the LP problem using different solvers + - Access the solution and status information + + The class supports both continuous and integer variables, allowing for + mixed-integer linear programming (MILP) problems. + + @ingroup Datastructures + */ class OPENMS_DLLAPI LPWrapper { public: /** @brief Struct that holds the parameters of the LP solver + + This structure contains various parameters that control the behavior of the LP solver, + including algorithm selection, cut generation, heuristics, and output control. + + Most parameters have reasonable defaults and don't need to be modified for basic use cases. + Advanced users can tune these parameters to improve performance for specific problem types. */ struct SolverParam { + /** + @brief Default constructor that initializes all parameters with reasonable defaults + */ SolverParam() : message_level(3), branching_tech(4), backtrack_tech(3), preprocessing_tech(2), enable_feas_pump_heuristic(true), enable_gmi_cuts(true), @@ -58,77 +88,138 @@ namespace OpenMS { } - Int message_level; - Int branching_tech; - Int backtrack_tech; - Int preprocessing_tech; - bool enable_feas_pump_heuristic; - bool enable_gmi_cuts; - bool enable_mir_cuts; - bool enable_cov_cuts; - bool enable_clq_cuts; - double mip_gap; - Int time_limit; - Int output_freq; - Int output_delay; - bool enable_presolve; - bool enable_binarization; ///< only with presolve + Int message_level; ///< Controls verbosity of solver output (0-3) + Int branching_tech; ///< Branching technique for MIP problems + Int backtrack_tech; ///< Backtracking technique for MIP problems + Int preprocessing_tech; ///< Preprocessing technique + bool enable_feas_pump_heuristic; ///< Enable feasibility pump heuristic + bool enable_gmi_cuts; ///< Enable Gomory mixed-integer cuts + bool enable_mir_cuts; ///< Enable mixed-integer rounding cuts + bool enable_cov_cuts; ///< Enable cover cuts + bool enable_clq_cuts; ///< Enable clique cuts + double mip_gap; ///< Relative gap tolerance for MIP problems + Int time_limit; ///< Time limit in milliseconds + Int output_freq; ///< Output frequency in milliseconds + Int output_delay; ///< Output delay in milliseconds + bool enable_presolve; ///< Enable presolve techniques + bool enable_binarization; ///< Enable binarization (only with presolve) }; + /** + @brief Enumeration for variable/constraint bound types + + Defines the type of bounds applied to variables or constraints in the LP problem. + */ enum Type { - UNBOUNDED = 1, - LOWER_BOUND_ONLY, - UPPER_BOUND_ONLY, - DOUBLE_BOUNDED, - FIXED + UNBOUNDED = 1, ///< No bounds (free variable) + LOWER_BOUND_ONLY, ///< Only lower bound is specified + UPPER_BOUND_ONLY, ///< Only upper bound is specified + DOUBLE_BOUNDED, ///< Both lower and upper bounds are specified + FIXED ///< Lower bound equals upper bound (fixed value) }; + /** + @brief Enumeration for variable types in the LP problem + + Defines whether variables are continuous or discrete (integer/binary). + */ enum VariableType { - CONTINUOUS = 1, - INTEGER, - BINARY + CONTINUOUS = 1, ///< Continuous variable (can take any real value within bounds) + INTEGER, ///< Integer variable (can only take integer values within bounds) + BINARY ///< Binary variable (can only take values 0 or 1) }; + /** + @brief Enumeration for optimization direction + + Defines whether the objective function should be minimized or maximized. + */ enum Sense { - MIN = 1, - MAX + MIN = 1, ///< Minimize the objective function + MAX ///< Maximize the objective function }; + /** + @brief Enumeration for LP problem file formats + + Defines the file format used when writing LP problems to disk. + */ enum WriteFormat { - FORMAT_LP = 0, - FORMAT_MPS, - FORMAT_GLPK + FORMAT_LP = 0, ///< LP format (human-readable) + FORMAT_MPS, ///< MPS format (industry standard) + FORMAT_GLPK ///< GLPK's native format }; + /** + @brief Enumeration for available LP solvers + + Defines which solver backend to use for solving LP problems. + */ enum SOLVER { - SOLVER_GLPK = 0 + SOLVER_GLPK = 0 ///< GNU Linear Programming Kit solver #ifdef OPENMS_HAS_COINOR - , SOLVER_COINOR + , SOLVER_COINOR ///< COIN-OR solver (if available) #endif }; + /** + @brief Enumeration for solver status after solving an LP problem + + Defines the possible outcomes after attempting to solve an LP problem. + */ enum SolverStatus { - UNDEFINED = 1, - OPTIMAL = 5, - FEASIBLE = 2, - NO_FEASIBLE_SOL = 4 + UNDEFINED = 1, ///< Status is undefined (e.g., solver not run yet) + OPTIMAL = 5, ///< Optimal solution found + FEASIBLE = 2, ///< Feasible solution found (but not necessarily optimal) + NO_FEASIBLE_SOL = 4 ///< No feasible solution exists for the problem }; + /** + @brief Default constructor + + Initializes a new LP problem with the default solver (GLPK or COIN-OR if available). + */ LPWrapper(); + + /** + @brief Virtual destructor + + Frees all resources associated with the LP problem. + */ virtual ~LPWrapper(); // problem creation/manipulation - /// adds a row to the LP matrix, returns index + /** + @brief Adds a row to the LP matrix + + @param row_indices Indices of the columns that have non-zero coefficients in this row + @param row_values Values of the non-zero coefficients in this row + @param name Name of the row (for identification purposes) + @return Index of the newly added row + */ Int addRow(const std::vector& row_indices, const std::vector& row_values, const String& name); - /// adds an empty column to the LP matrix, returns index + + /** + @brief Adds an empty column to the LP matrix + + @return Index of the newly added column + */ Int addColumn(); - /// adds a column to the LP matrix, returns index + + /** + @brief Adds a column to the LP matrix + + @param column_indices Indices of the rows that have non-zero coefficients in this column + @param column_values Values of the non-zero coefficients in this column + @param name Name of the column (for identification purposes) + @return Index of the newly added column + */ Int addColumn(const std::vector& column_indices, const std::vector& column_values, const String& name); /** @@ -158,27 +249,91 @@ namespace OpenMS */ Int addColumn(const std::vector& column_indices, const std::vector& column_values, const String& name, double lower_bound, double upper_bound, Type type); - /// delete index-th row + /** + @brief Delete the row at the specified index + + @param index Index of the row to delete + */ void deleteRow(Int index); - /// sets name of the index-th column + + /** + @brief Set the name of a column + + @param index Index of the column to rename + @param name New name for the column + */ void setColumnName(Int index, const String& name); - /// gets name of the index-th column + + /** + @brief Get the name of a column + + @param index Index of the column + @return Name of the column + */ String getColumnName(Int index); - /// sets name of the index-th row + + /** + @brief Get the name of a row + + @param index Index of the row + @return Name of the row + */ String getRowName(Int index); - /// gets index of the row with name + + /** + @brief Find the index of a row by its name + + @param name Name of the row to find + @return Index of the row with the given name + */ Int getRowIndex(const String& name); - /// gets index of the column with name + + /** + @brief Find the index of a column by its name + + @param name Name of the column to find + @return Index of the column with the given name + */ Int getColumnIndex(const String& name); - /// gets column's upper bound + + /** + @brief Get the upper bound of a column + + @param index Index of the column + @return Upper bound value of the column + */ double getColumnUpperBound(Int index); - /// gets column's lower bound + + /** + @brief Get the lower bound of a column + + @param index Index of the column + @return Lower bound value of the column + */ double getColumnLowerBound(Int index); - /// gets row's upper bound + + /** + @brief Get the upper bound of a row + + @param index Index of the row + @return Upper bound value of the row + */ double getRowUpperBound(Int index); - /// gets row's lower bound + + /** + @brief Get the lower bound of a row + + @param index Index of the row + @return Lower bound value of the row + */ double getRowLowerBound(Int index); - /// sets name of the index-th row + + /** + @brief Set the name of a row + + @param index Index of the row to rename + @param name New name for the row + */ void setRowName(Int index, const String& name); /** @@ -217,9 +372,20 @@ namespace OpenMS */ VariableType getColumnType(Int index); - /// set objective value for column with index + /** + @brief Set the objective coefficient for a column/variable + + @param index Index of the column/variable + @param obj_value Coefficient value in the objective function + */ void setObjective(Int index, double obj_value); - /// get objective value for column with index + + /** + @brief Get the objective coefficient for a column/variable + + @param index Index of the column/variable + @return Coefficient value in the objective function + */ double getObjective(Int index); /** @@ -228,14 +394,43 @@ namespace OpenMS @param sense 1- minimize, 2- maximize */ void setObjectiveSense(Sense sense); + /** + @brief Get the current objective direction + + @return Current optimization direction (MIN or MAX) + */ Sense getObjectiveSense(); - /// get number of columns + /** + @brief Get the number of columns/variables in the LP problem + + @return Number of columns in the LP matrix + */ Int getNumberOfColumns(); - /// get number of rows + + /** + @brief Get the number of rows/constraints in the LP problem + + @return Number of rows in the LP matrix + */ Int getNumberOfRows(); + /** + @brief Set the value of a matrix element at the specified position + + @param row_index Index of the row + @param column_index Index of the column + @param value Value to set at the specified position + */ void setElement(Int row_index, Int column_index, double value); + + /** + @brief Get the value of a matrix element at the specified position + + @param row_index Index of the row + @param column_index Index of the column + @return Value at the specified position + */ double getElement(Int row_index, Int column_index); // problem reading/writing @@ -275,24 +470,53 @@ namespace OpenMS SolverStatus getStatus(); // solution access + /** + @brief Get the objective function value of the solution + + @return Value of the objective function at the optimal solution + */ double getObjectiveValue(); + + /** + @brief Get the value of a variable in the solution + + @param index Index of the column/variable + @return Value of the variable in the optimal solution + */ double getColumnValue(Int index); + /** + @brief Get the number of non-zero entries in a specific row + + @param idx Index of the row + @return Number of non-zero coefficients in the row + */ Int getNumberOfNonZeroEntriesInRow(Int idx); + + /** + @brief Get the indices of non-zero entries in a specific row + + @param idx Index of the row + @param indexes Vector to store the column indices of non-zero entries + */ void getMatrixRow(Int idx, std::vector& indexes); - /// get currently active solver + /** + @brief Get the currently active solver backend + + @return Currently active solver (GLPK or COIN-OR) + */ SOLVER getSolver() const; protected: #ifdef OPENMS_HAS_COINOR - CoinModel * model_ = nullptr; - std::vector solution_; + CoinModel * model_ = nullptr; ///< COIN-OR model object for the LP problem + std::vector solution_; ///< Solution vector when using COIN-OR #else - glp_prob * lp_problem_ = nullptr; + glp_prob * lp_problem_ = nullptr; ///< GLPK problem object for the LP problem #endif - SOLVER solver_; + SOLVER solver_; ///< Currently active solver backend }; // class diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/MassExplainer.h b/src/openms/include/OpenMS/DATASTRUCTURES/MassExplainer.h index aeba31c3154..1632afd0b51 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/MassExplainer.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/MassExplainer.h @@ -20,9 +20,19 @@ namespace OpenMS class Compomer; /** - @brief computes empirical formulas for given mass differences using a set of allowed elements - - + @brief Computes empirical formulas for given mass differences using a set of allowed elements + + MassExplainer is used to explain observed mass differences between features by + determining the most likely combination of adducts that could cause such differences. + + The class works by: + 1. Taking a set of allowed adducts (elements, molecules, or ions) + 2. Computing all possible combinations of these adducts that could explain observed mass differences + 3. Providing a query interface to search for explanations for specific mass differences + + This is particularly useful in mass spectrometry data analysis for identifying + related features that represent the same analyte but with different adducts or charge states. + @ingroup Datastructures */ class OPENMS_DLLAPI MassExplainer @@ -30,30 +40,73 @@ namespace OpenMS public: - typedef Adduct::AdductsType AdductsType; //vector + /// Type definition for a vector of Adduct objects + typedef Adduct::AdductsType AdductsType; + + /// Type definition for an iterator over Compomer objects typedef std::vector::const_iterator CompomerIterator; ///@name Constructors and destructor //@{ - /// Default constructor + /** + @brief Default constructor + + Initializes with default parameters: + - No adducts + - Charge range from -2 to +4 + - Maximum charge span of 4 + - Log probability threshold of -5.0 + - Maximum number of neutral adducts of 2 + */ MassExplainer(); - /// Constructor + /** + @brief Constructor with custom adduct base + + @param adduct_base Set of allowed adducts to use for mass difference explanations + */ MassExplainer(AdductsType adduct_base); - /// Constructor + /** + @brief Constructor with custom charge parameters + + @param q_min Minimum charge state to consider + @param q_max Maximum charge state to consider + @param max_span Maximum allowed charge span between related features + @param thresh_logp Minimum log probability threshold for accepting explanations + */ MassExplainer(Int q_min, Int q_max, Int max_span, double thresh_logp); - /// Constructor + /** + @brief Constructor with all custom parameters + + @param adduct_base Set of allowed adducts to use for mass difference explanations + @param q_min Minimum charge state to consider + @param q_max Maximum charge state to consider + @param max_span Maximum allowed charge span between related features + @param thresh_logp Minimum log probability threshold for accepting explanations + @param max_neutrals Maximum number of neutral adducts allowed in an explanation + */ MassExplainer(AdductsType adduct_base, Int q_min, Int q_max, Int max_span, double thresh_logp, Size max_neutrals); private: - /// check consistency of input - /// @param init_thresh_p set default threshold (set to "false" to keep current value) + /** + @brief Check consistency of input parameters and initialize internal data structures + + This method validates the input parameters and sets default values where needed. + + @param init_thresh_p Whether to initialize the probability threshold with default value + (set to "false" to keep current value) + */ void init_(bool init_thresh_p); public: - /// Assignment operator + /** + @brief Assignment operator + + @param rhs Source object to assign from + @return Reference to this object + */ MassExplainer& operator=(const MassExplainer& rhs); /// Destructor @@ -61,30 +114,61 @@ namespace OpenMS //@} - /// fill map with possible mass-differences along with their explanation + /** + @brief Compute all possible mass differences and their explanations + + This method generates all possible combinations of adducts from the adduct base + and stores them internally for later querying. This must be called after + changing any parameters and before performing queries. + */ void compute(); //@name Accessors //@{ - /// Sets the set of possible adducts + /** + @brief Set the base set of allowed adducts + + @param adduct_base Vector of adducts to use for explanations + */ void setAdductBase(AdductsType adduct_base); - /// Returns the set of adducts + + /** + @brief Get the current set of allowed adducts + + @return Vector of adducts currently used for explanations + */ AdductsType getAdductBase() const; - /// return a compomer by its Id (useful after a query() ). + /** + @brief Get a specific compomer by its ID + + This is typically used after a query() to retrieve detailed information + about a specific explanation. + + @param id ID of the compomer to retrieve + @return Reference to the requested compomer + */ const Compomer& getCompomerById(Size id) const; //@} - /// search the mass database for explanations - /// @param net_charge net charge of compomer seeked - /// @param mass_to_explain mass in Da that needs explanation - /// @param mass_delta allowed deviation from exact mass - /// @param thresh_log_p minimal log probability required - /// @param firstExplanation begin range with candidates according to net_charge and mass - /// @param lastExplanation end range + /** + @brief Search for explanations of a given mass difference + + This method searches the precomputed explanations for those that match + the given mass difference within the specified tolerance and have the + required net charge. + + @param net_charge Net charge of the compomer being sought + @param mass_to_explain Mass difference in Da that needs explanation + @param mass_delta Allowed deviation from exact mass (tolerance) + @param thresh_log_p Minimum log probability required for explanations + @param firstExplanation Output iterator to the beginning of matching explanations + @param lastExplanation Output iterator to the end of matching explanations + @return Number of explanations found, or negative value if no explanations found + */ SignedSize query(const Int net_charge, const float mass_to_explain, const float mass_delta, @@ -93,25 +177,43 @@ namespace OpenMS std::vector::const_iterator& lastExplanation) const; protected: - ///check if the generated compomer is valid judged by its probability, charges etc + /** + @brief Check if a generated compomer is valid based on its probability, charges, etc. + + @param cmp The compomer to validate + @return True if the compomer is valid, false otherwise + */ bool compomerValid_(const Compomer& cmp) const; - /// create a proper adduct from formula and charge and probability + /** + @brief Create a proper adduct from formula, charge, and probability + + @param formula Chemical formula of the adduct + @param charge Charge of the adduct + @param p Probability of the adduct + @return Adduct object with the specified properties + */ Adduct createAdduct_(const String& formula, const Int charge, const double p) const; - /// store possible explanations (as formula) for a certain ChargeDifference and MassDifference + /// Vector storing all possible explanations for mass differences std::vector explanations_; - /// all allowed adducts, whose combination explains the mass difference + + /// Set of allowed adducts that can be combined to explain mass differences AdductsType adduct_base_; - /// minimal expected charge + + /// Minimum charge state to consider in explanations Int q_min_; - /// maximal expected charge + + /// Maximum charge state to consider in explanations Int q_max_; - /// maximal span (in terms of charge) for co-features, e.g. a cluster with q={3,6} has span=4 + + /// Maximum allowed charge span between related features (e.g., a cluster with q={3,6} has span=4) Int max_span_; - /// minimum required probability of a compound (all other compounds are discarded) + + /// Minimum required probability threshold for accepting explanations double thresh_p_; - /// Maximum number of neutral(q=0) adducts + + /// Maximum number of neutral (q=0) adducts allowed in an explanation Size max_neutrals_; }; diff --git a/src/openms/include/OpenMS/DATASTRUCTURES/ParamValue.h b/src/openms/include/OpenMS/DATASTRUCTURES/ParamValue.h index 6e60aec6ca3..301ef016dbe 100644 --- a/src/openms/include/OpenMS/DATASTRUCTURES/ParamValue.h +++ b/src/openms/include/OpenMS/DATASTRUCTURES/ParamValue.h @@ -349,7 +349,7 @@ namespace OpenMS /// Space to store the data union { - ptrdiff_t ssize_; + std::ptrdiff_t ssize_; double dou_; std::string* str_; std::vector* str_list_; diff --git a/src/openms/include/OpenMS/FEATUREFINDER/FFIDAlgoExternalIDHandler.h b/src/openms/include/OpenMS/FEATUREFINDER/FFIDAlgoExternalIDHandler.h new file mode 100644 index 00000000000..e969a3e3aad --- /dev/null +++ b/src/openms/include/OpenMS/FEATUREFINDER/FFIDAlgoExternalIDHandler.h @@ -0,0 +1,172 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Hendrik Weisser $ +// -------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace OpenMS +{ +namespace Internal +{ + /** + * @brief Class for handling external peptide identifications in feature finding + * + * This class encapsulates all functionality related to external peptide IDs in the + * feature finding process, including storage, RT transformation, and feature annotation. + */ + class OPENMS_DLLAPI FFIDAlgoExternalIDHandler + { + public: + /// RTMap for external data structure storage + typedef std::multimap ExternalRTMap; + + /// Charge to External RTMap mapping + typedef std::map ExternalChargeMap; + + /// Sequence to External Charge Map mapping + typedef std::map ExternalPeptideMap; + + /// Default constructor + FFIDAlgoExternalIDHandler(); + + /// Reset the handler's state + void reset(); + + /// Add an external peptide to the handler's map + void addExternalPeptide(PeptideIdentification& peptide); + + /// Process external peptide IDs + void processExternalPeptides(std::vector& peptides_ext); + + /// Align internal and external IDs to estimate RT shifts and return RT uncertainty + double alignInternalAndExternalIDs( + const std::vector& peptides_internal, + const std::vector& peptides_external, + double rt_quantile); + + /// Transform RT from internal to external scale + double transformRT(double rt) const; + + /// Check if we have RT transformation data + bool hasRTTransformation() const; + + /// Get the RT transformation + const TransformationDescription& getRTTransformation() const; + + /// Classify features using SVM + void classifyFeaturesWithSVM(FeatureMap& features, const Param& param); + + /// Filter classified features + void filterClassifiedFeatures(FeatureMap& features, double quality_cutoff); + + /// Calculate FDR for classified features + void calculateFDR(FeatureMap& features); + + /// Get SVM probabilities for internal features + const std::map >& getSVMProbsInternal() const; + + private: + /// Add external peptide to charge map (merged version for compatibility) + void addExternalPeptideToMap_(PeptideIdentification& peptide, + std::map, + std::multimap>>>& peptide_map); + + /// Fill an external RTMap from our data for a specific peptide and charge + bool fillExternalRTMap_(const AASequence& sequence, Int charge, + std::multimap& rt_map); + + /// Check and set feature class based on external data + void annotateFeatureWithExternalIDs_(Feature& feature); + + /// Initialize SVM parameters + void initSVMParameters_(const Param& param); + + /// Finalize assay features + void finalizeAssayFeatures_(Feature& best_feature, double best_quality, double quality_cutoff); + + /// Get random sample for SVM training + void getRandomSample_(std::map& training_labels); + + /// Check observation counts for SVM + void checkNumObservations_(Size n_pos, Size n_neg, const String& note = "") const; + + /// Get unbiased sample for SVM training + void getUnbiasedSample_(const std::multimap >& valid_obs, + std::map& training_labels); + + /// Add dummy peptide identification from external data + void addDummyPeptideID_(Feature& feature, const PeptideIdentification* ext_id); + + /// Handle external feature probability + void handleExternalFeature_(Feature& feature, double prob_positive, double quality_cutoff); + + /// Adjust FDR calculation for external features + void adjustFDRForExternalFeatures_(std::vector& fdr_probs, + std::vector& fdr_qvalues, + Size n_internal_features); + + /// External peptide storage + ExternalPeptideMap external_peptide_map_; + + /// RT transformation description + TransformationDescription rt_transformation_; + + /// Number of external peptides + Size n_external_peptides_; + + /// Number of external features + Size n_external_features_; + + /// SVM probabilities for external features + std::multiset svm_probs_external_; + + /// SVM probabilities for internal features + std::map > svm_probs_internal_; + + /// SVM number of parts for cross-validation + Size svm_n_parts_; + + /// SVM number of samples for training + Size svm_n_samples_; + + /// SVM minimum probability threshold + double svm_min_prob_; + + /// SVM quality cutoff + double svm_quality_cutoff; + + /// SVM predictor names + std::vector svm_predictor_names_; + + /// SVM cross-validation output file + String svm_xval_out_; + + /// Debug level + Int debug_level_; + + /// Number of internal features + Size n_internal_features_; + }; + +} // namespace Internal +} // namespace OpenMS diff --git a/src/openms/include/OpenMS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.h b/src/openms/include/OpenMS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.h index 32d4b45ef57..de710bffc68 100644 --- a/src/openms/include/OpenMS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.h +++ b/src/openms/include/OpenMS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.h @@ -6,8 +6,7 @@ // $Authors: Hendrik Weisser $ // -------------------------------------------------------------------------- -#ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H -#define OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H +#pragma once #include #include @@ -16,23 +15,19 @@ #include #include #include +#include #include #include #include -namespace OpenMS -{ - class IsotopeDistribution; - - - +namespace OpenMS { class OPENMS_DLLAPI FeatureFinderIdentificationAlgorithm : public DefaultParamHandler { public: /// default constructor - FeatureFinderIdentificationAlgorithm(); + FeatureFinderIdentificationAlgorithm(); /// Main method for actual FeatureFinder /// External IDs (@p peptides_ext, @p proteins_ext) may be empty, @@ -207,9 +202,9 @@ class OPENMS_DLLAPI FeatureFinderIdentificationAlgorithm : Size n_internal_features_; ///< internal feature counter (for FDR calculation) Size n_external_features_; ///< external feature counter (for FDR calculation) /// TransformationDescription trafo_; // RT transformation (to range 0-1) - TransformationDescription trafo_external_; ///< transform. to external RT scale std::map isotope_probs_; ///< isotope probabilities of transitions MRMFeatureFinderScoring feat_finder_; ///< OpenSWATH feature finder + Internal::FFIDAlgoExternalIDHandler external_id_handler_; ///< Handler for external peptide IDs ProgressLogger prog_log_; @@ -249,22 +244,8 @@ class OPENMS_DLLAPI FeatureFinderIdentificationAlgorithm : PeptideMap& peptide_map, bool external = false); - void checkNumObservations_(Size n_pos, Size n_neg, const String& note = "") const; - - void getUnbiasedSample_(const std::multimap >& valid_obs, - std::map& training_labels); - - void getRandomSample_(std::map& training_labels) const; - - void classifyFeatures_(FeatureMap& features); - - void filterFeaturesFinalizeAssay_(Feature& best_feature, double best_quality, - const double quality_cutoff); - void filterFeatures_(FeatureMap& features, bool classified); - void calculateFDR_(FeatureMap& features); - // seeds for untargeted extraction Size addSeeds_(std::vector& peptides, const FeatureMap& seeds); @@ -313,9 +294,6 @@ class OPENMS_DLLAPI FeatureFinderIdentificationAlgorithm : return chunks; } -}; - +}; // namespace OpenMS } // namespace OpenMS - -#endif - + \ No newline at end of file diff --git a/src/openms/include/OpenMS/FEATUREFINDER/sources.cmake b/src/openms/include/OpenMS/FEATUREFINDER/sources.cmake index d50777d64be..96ac355aa37 100644 --- a/src/openms/include/OpenMS/FEATUREFINDER/sources.cmake +++ b/src/openms/include/OpenMS/FEATUREFINDER/sources.cmake @@ -21,6 +21,7 @@ FeatureFinderIdentificationAlgorithm.h FeatureFinderAlgorithmMetaboIdent.h FeatureFinderMultiplexAlgorithm.h FeatureFindingMetabo.h +FFIDAlgoExternalIDHandler.h Fitter1D.h GaussFitter1D.h GaussModel.h diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 9d4f98665ce..8d9fb50c544 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -11,7 +11,7 @@ #include #include -#include // StringList + #include #include #include @@ -210,30 +210,43 @@ namespace OpenMS typedef std::basic_string XercesString; - // Converts from a narrow-character string to a wide-character string. + /// Converts from a narrow-character string to a wide-character string. inline static unique_xerces_ptr fromNative_(const char* str) { return unique_xerces_ptr(xercesc::XMLString::transcode(str)); } - // Converts from a narrow-character string to a wide-character string. + /// Converts from a narrow-character string to a wide-character string. inline static unique_xerces_ptr fromNative_(const String& str) { return fromNative_(str.c_str()); } - // Converts from a wide-character string to a narrow-character string. + /// Converts from a wide-character string to a narrow-character string. inline static String toNative_(const XMLCh* str) - { - return String(unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + { + String r; + XMLSize_t l = strLength(str); + if(isASCII(str, l)) + { + appendASCII(str,l,r); + } + else + { + r = (unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + } + return r; } - // Converts from a wide-character string to a narrow-character string. + /// Converts from a wide-character string to a narrow-character string. inline static String toNative_(const unique_xerces_ptr& str) { return toNative_(str.get()); } +protected: + /// Compresses eight 8x16bit Chars in XMLCh* to 8x8bit Chars by cutting upper byte + static void compress64_ (const XMLCh * input_it, char* output_it); public: /// Constructor @@ -242,6 +255,9 @@ namespace OpenMS /// Destructor ~StringManager(); + /// Calculates the length of a XMLCh* string using SIMDe + static XMLSize_t strLength(const XMLCh* input_ptr); + /// Transcode the supplied C string to a xerces string inline static XercesString convert(const char * str) { @@ -283,7 +299,11 @@ namespace OpenMS { return toNative_(str); } + /// Checks if supplied chars in XMLCh* can be encoded with ASCII (i.e. the upper byte of each char is 0) + static bool isASCII(const XMLCh * chars, const XMLSize_t length); + + /** * @brief Transcodes the supplied XMLCh* and appends it to the OpenMS String * diff --git a/src/openms/include/OpenMS/FORMAT/MSPFile.h b/src/openms/include/OpenMS/FORMAT/MSPFile.h index 6831e9f17d5..523a372accc 100644 --- a/src/openms/include/OpenMS/FORMAT/MSPFile.h +++ b/src/openms/include/OpenMS/FORMAT/MSPFile.h @@ -11,11 +11,13 @@ #include #include #include +#include #include namespace OpenMS { + class AnnotatedMSRun; /** @brief File adapter for MSP files (NIST spectra library) @@ -58,12 +60,24 @@ namespace OpenMS */ void load(const String & filename, std::vector & ids, PeakMap & exp); + /** + @brief Loads a map from a MSPFile file. + + @param filename the filename of the experiment + @param annot_exp annotated experiment with spectra and ids + + @throw FileNotFound is thrown if the file could not be found + @throw ParseError is thrown if the given file could not be parsed + @throw ElementNotFound is thrown if a annotated modification cannot be found in ModificationsDB (PSI-MOD definitions) + */ + void load(const String & filename, AnnotatedMSRun & annot_exp); + /** @brief Stores a map in a MSPFile file. @throw UnableToCreateFile is thrown if the given file could not be created */ - void store(const String & filename, const PeakMap & exp) const; + void store(const String & filename, const AnnotatedMSRun & exp) const; protected: diff --git a/src/openms/include/OpenMS/FORMAT/MzTab.h b/src/openms/include/OpenMS/FORMAT/MzTab.h index 9096cbe2ee5..6931e40783d 100644 --- a/src/openms/include/OpenMS/FORMAT/MzTab.h +++ b/src/openms/include/OpenMS/FORMAT/MzTab.h @@ -18,8 +18,10 @@ #include +#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#endif namespace OpenMS { @@ -874,5 +876,6 @@ namespace OpenMS }; } // namespace OpenMS - -#pragma clang diagnostic pop +#ifdef __clang__ + #pragma clang diagnostic pop +#endif diff --git a/src/openms/include/OpenMS/KERNEL/ChromatogramPeak.h b/src/openms/include/OpenMS/KERNEL/ChromatogramPeak.h index 0e014182594..8a1748180df 100644 --- a/src/openms/include/OpenMS/KERNEL/ChromatogramPeak.h +++ b/src/openms/include/OpenMS/KERNEL/ChromatogramPeak.h @@ -141,13 +141,7 @@ namespace OpenMS } /// Equality operator - inline bool operator==(const ChromatogramPeak & rhs) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop - } + inline bool operator==(const ChromatogramPeak& rhs) const = default; /// Equality operator inline bool operator!=(const ChromatogramPeak & rhs) const diff --git a/src/openms/include/OpenMS/KERNEL/ChromatogramRangeManager.h b/src/openms/include/OpenMS/KERNEL/ChromatogramRangeManager.h new file mode 100644 index 00000000000..8c9c19e261b --- /dev/null +++ b/src/openms/include/OpenMS/KERNEL/ChromatogramRangeManager.h @@ -0,0 +1,38 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Administrator $ +// -------------------------------------------------------------------------- + +#pragma once + +#include + +namespace OpenMS +{ + /** + @brief Range manager for chromatograms + + This class manages retention time, m/z, and intensity ranges for multiple chromatograms. + It extends the basic RangeManager to provide specialized functionality for chromatogram data. + + The ChromatogramRangeManager is used in conjunction with the SpectrumRangeManager in MSExperiment + to provide separate range tracking for chromatograms and spectra. This separation allows for + more efficient and targeted range operations on specific data types. + + @see RangeManager + @see SpectrumRangeManager + @see MSExperiment + @ingroup Kernel + */ + class OPENMS_DLLAPI ChromatogramRangeManager : public RangeManager + { + public: + /// Base type + using BaseType = RangeManager; + + }; + +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/include/OpenMS/KERNEL/DimMapper.h b/src/openms/include/OpenMS/KERNEL/DimMapper.h index c288260e130..ee25985aecb 100644 --- a/src/openms/include/OpenMS/KERNEL/DimMapper.h +++ b/src/openms/include/OpenMS/KERNEL/DimMapper.h @@ -150,6 +150,16 @@ namespace OpenMS + /** + @brief Dimension implementation for retention time values. + + This class implements the DimBase interface for the retention time dimension. + It provides methods to access RT values from various data structures and + convert between RT values and generic dimension values. + + @see DimBase + @ingroup Kernel + */ class OPENMS_DLLAPI DimRT final : public DimBase { public: @@ -260,6 +270,16 @@ namespace OpenMS } }; + /** + @brief Dimension implementation for m/z values. + + This class implements the DimBase interface for the mass-to-charge ratio dimension. + It provides methods to access m/z values from various data structures and + convert between m/z values and generic dimension values. + + @see DimBase + @ingroup Kernel + */ class OPENMS_DLLAPI DimMZ final : public DimBase { public: @@ -371,6 +391,16 @@ namespace OpenMS } }; + /** + @brief Dimension implementation for intensity values. + + This class implements the DimBase interface for the intensity dimension. + It provides methods to access intensity values from various data structures and + convert between intensity values and generic dimension values. + + @see DimBase + @ingroup Kernel + */ class OPENMS_DLLAPI DimINT final : public DimBase { public: @@ -488,6 +518,19 @@ namespace OpenMS } }; + /** + @brief Dimension implementation for ion mobility values. + + This class implements the DimBase interface for the ion mobility dimension. + It provides methods to access ion mobility values from various data structures and + convert between ion mobility values and generic dimension values. + + Ion mobility dimensions support different units such as FAIMS compensation voltage, + linear ion mobility spectrometry, and trapped ion mobility spectrometry. + + @see DimBase + @ingroup Kernel + */ class OPENMS_DLLAPI DimIM final : public DimBase { public: diff --git a/src/openms/include/OpenMS/KERNEL/MSExperiment.h b/src/openms/include/OpenMS/KERNEL/MSExperiment.h index 8cb6986c3a3..fad5150e00c 100644 --- a/src/openms/include/OpenMS/KERNEL/MSExperiment.h +++ b/src/openms/include/OpenMS/KERNEL/MSExperiment.h @@ -10,8 +10,10 @@ #include #include +#include #include #include +#include #include #include @@ -43,8 +45,7 @@ namespace OpenMS @ingroup Kernel */ - class OPENMS_DLLAPI MSExperiment final : public RangeManagerContainer, - public ExperimentalSettings + class OPENMS_DLLAPI MSExperiment final : public ExperimentalSettings { public: @@ -61,10 +62,14 @@ namespace OpenMS typedef PeakType::CoordinateType CoordinateType; /// Intensity type of peaks typedef PeakType::IntensityType IntensityType; - /// RangeManager type + /// Combined RangeManager type to store the overall range of all spectra and chromatograms (for backward compatibility) typedef RangeManager RangeManagerType; - /// RangeManager type - typedef RangeManagerContainer RangeManagerContainerType; + + /// Spectrum range manager type for tracking ranges with MS level separation + typedef SpectrumRangeManager SpectrumRangeManagerType; + + /// Chromatogram range manager type for tracking chromatogram-specific ranges + typedef ChromatogramRangeManager ChromatogramRangeManagerType; /// Spectrum Type typedef MSSpectrum SpectrumType; /// Chromatogram type @@ -1067,16 +1072,56 @@ std::vector extractXICs( @note The range values (min, max, etc.) are not updated automatically. Call updateRanges() to update the values! */ ///@{ - // Docu in base class - void updateRanges() override; - + /// Delegate methods for backward compatibility + + /** + * @brief Clear all ranges in all range managers + * + * This clears the ranges in the combined range manager, the spectrum range manager, + * and the chromatogram range manager. + */ + void clearRanges() + { + combined_ranges_.clearRanges(); + spectrum_ranges_.clearRanges(); + chromatogram_ranges_.clearRanges(); + } + + /// Get the minimum RT value from the combined ranges (includes both chromatogram and spectra ranges) + double getMinRT() const { return combined_ranges_.getMinRT(); } + + /// Get the maximum RT value from the combined ranges (includes both chromatogram and spectra ranges) + double getMaxRT() const { return combined_ranges_.getMaxRT(); } + + /// Get the minimum m/z value from the combined ranges (includes both chromatogram and spectra ranges) + double getMinMZ() const { return combined_ranges_.getMinMZ(); } + + /// Get the maximum m/z value from the combined ranges (includes both chromatogram and spectra ranges) + double getMaxMZ() const { return combined_ranges_.getMaxMZ(); } + + /// Get the minimum intensity value from the combined ranges (includes both chromatogram and spectra ranges) + double getMinIntensity() const { return combined_ranges_.getMinIntensity(); } + + /// Get the maximum intensity value from the combined ranges (includes both chromatogram and spectra ranges) + double getMaxIntensity() const { return combined_ranges_.getMaxIntensity(); } + + /// Get the minimum mobility value from the combined ranges (includes both chromatogram and spectra ranges) + double getMinMobility() const { return combined_ranges_.getMinMobility(); } + + /// Get the maximum mobility value from the combined ranges (includes both chromatogram and spectra ranges) + double getMaxMobility() const { return combined_ranges_.getMaxMobility(); } + /** - @brief Updates the m/z, intensity, and retention time ranges of all spectra with a certain ms level + @brief Updates the m/z, intensity, mobility, and retention time ranges of all spectra and chromatograms - - @param ms_level MS level to consider for m/z range, RT range and intensity range (All MS levels if negative) + This method updates all three range managers: + - The spectrum range manager (for spectra ranges with MS level separation) + - The chromatogram range manager (for chromatogram ranges) + - The combined range manager (for overall ranges across both spectra and chromatograms) + + Call this method after modifying spectra or chromatograms to ensure that all range information is up-to-date. */ - void updateRanges(Int ms_level); + void updateRanges(); /// returns the total number of peaks (spectra and chromatograms included) UInt64 getSize() const; @@ -1275,9 +1320,6 @@ std::vector extractXICs( /// returns true if any MS spectra of trthe specified level contain at least one peak with intensity of 0.0 bool hasZeroIntensities(size_t ms_level) const; - /// do any of the spectra have a PeptideID? - bool hasPeptideIdentifications() const; - /// Are all MSSpectra in this experiment part of an IM Frame? I.e. they all have the same RT, but different drift times bool isIMFrame() const; @@ -1286,8 +1328,49 @@ std::vector extractXICs( std::vector chromatograms_; /// spectra std::vector spectra_; + /// Spectrum range manager for tracking m/z, intensity, RT, and ion mobility ranges of spectra with MS level separation + SpectrumRangeManagerType spectrum_ranges_; + + /// Chromatogram range manager for tracking RT, intensity, and m/z ranges of chromatograms + ChromatogramRangeManagerType chromatogram_ranges_; + + /// Combined range manager that provides overall ranges across both spectra and chromatograms (maintained for backward compatibility) + RangeManagerType combined_ranges_; -private: + public: + /** + * @brief Returns a const reference to the spectrum range manager + * + * The spectrum range manager provides access to m/z, intensity, retention time, and ion mobility + * ranges for spectra, with separate tracking for different MS levels. + * + * @return Const reference to the spectrum range manager + * @see SpectrumRangeManager + */ + const SpectrumRangeManagerType& spectrumRanges() const { return spectrum_ranges_; } + + /** + * @brief Returns a const reference to the chromatogram range manager + * + * The chromatogram range manager provides access to retention time, m/z, and intensity + * ranges for chromatograms. + * + * @return Const reference to the chromatogram range manager + * @see ChromatogramRangeManager + */ + const ChromatogramRangeManagerType& chromatogramRanges() const { return chromatogram_ranges_; } + + /** + * @brief Returns a const reference to the combined range manager + * + * The combined range manager provides access to the overall ranges across both spectra and chromatograms. + * This is maintained for backward compatibility with code that expects a single range manager. + * + * @return Const reference to the combined range manager + */ + const RangeManagerType& combinedRanges() const { return combined_ranges_; } + + private: /// Helper class to add either general data points in set2DData or use mass traces from meta values template @@ -1348,6 +1431,7 @@ std::vector extractXICs( } }; + /* @brief Append a spectrum to current MSExperiment diff --git a/src/openms/include/OpenMS/KERNEL/MSSpectrum.h b/src/openms/include/OpenMS/KERNEL/MSSpectrum.h index a664a87f7f3..47ab7b531fa 100644 --- a/src/openms/include/OpenMS/KERNEL/MSSpectrum.h +++ b/src/openms/include/OpenMS/KERNEL/MSSpectrum.h @@ -64,6 +64,16 @@ namespace OpenMS } }; + /** + * @brief Container for organizing and managing multiple chunks in a spectrum. + * + * This structure is used to track multiple chunks (segments) within a spectrum. + * Each chunk represents a portion of the spectrum that may or may not be sorted. + * This information is used to optimize sorting operations on spectra, particularly + * when only parts of the spectrum need to be sorted or have been modified. + * + * @see Chunk + */ struct Chunks { public: Chunks(const MSSpectrum& s) : spec_(s) {} diff --git a/src/openms/include/OpenMS/KERNEL/MobilityPeak1D.h b/src/openms/include/OpenMS/KERNEL/MobilityPeak1D.h index 3f1c655d57b..407ac6a4784 100644 --- a/src/openms/include/OpenMS/KERNEL/MobilityPeak1D.h +++ b/src/openms/include/OpenMS/KERNEL/MobilityPeak1D.h @@ -131,13 +131,7 @@ namespace OpenMS ///@} /// Equality operator - bool operator==(const MobilityPeak1D& rhs) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop - } + bool operator==(const MobilityPeak1D& rhs) const = default; /// Equality operator bool operator!=(const MobilityPeak1D& rhs) const diff --git a/src/openms/include/OpenMS/KERNEL/MobilityPeak2D.h b/src/openms/include/OpenMS/KERNEL/MobilityPeak2D.h index 482ce4b47be..e0687d91598 100644 --- a/src/openms/include/OpenMS/KERNEL/MobilityPeak2D.h +++ b/src/openms/include/OpenMS/KERNEL/MobilityPeak2D.h @@ -193,10 +193,7 @@ namespace OpenMS /// Equality operator bool operator==(const MobilityPeak2D & rhs) const { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop + return std::tie(intensity_, position_) == std::tie(rhs.intensity_, rhs.position_); } /// Equality operator diff --git a/src/openms/include/OpenMS/KERNEL/Peak1D.h b/src/openms/include/OpenMS/KERNEL/Peak1D.h index 5bcc1c6660a..52c269ee55e 100644 --- a/src/openms/include/OpenMS/KERNEL/Peak1D.h +++ b/src/openms/include/OpenMS/KERNEL/Peak1D.h @@ -128,13 +128,7 @@ namespace OpenMS ///@} /// Equality operator - bool operator==(const Peak1D & rhs) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop - } + bool operator==(const Peak1D& rhs) const = default; /// Equality operator bool operator!=(const Peak1D & rhs) const diff --git a/src/openms/include/OpenMS/KERNEL/Peak2D.h b/src/openms/include/OpenMS/KERNEL/Peak2D.h index b901250431d..135cd99f1a6 100644 --- a/src/openms/include/OpenMS/KERNEL/Peak2D.h +++ b/src/openms/include/OpenMS/KERNEL/Peak2D.h @@ -195,13 +195,7 @@ namespace OpenMS ///@} /// Equality operator - bool operator==(const Peak2D & rhs) const - { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - return intensity_ == rhs.intensity_ && position_ == rhs.position_; -#pragma clang diagnostic pop - } + bool operator==(const Peak2D& rhs) const = default; /// Equality operator bool operator!=(const Peak2D & rhs) const diff --git a/src/openms/include/OpenMS/KERNEL/SpectrumRangeManager.h b/src/openms/include/OpenMS/KERNEL/SpectrumRangeManager.h new file mode 100644 index 00000000000..dd8e64c8af8 --- /dev/null +++ b/src/openms/include/OpenMS/KERNEL/SpectrumRangeManager.h @@ -0,0 +1,156 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Administrator $ +// -------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include +#include + +namespace OpenMS +{ + class MSSpectrum; // Forward declaration for MSSpectrum + /** + @brief Advanced range manager for MS spectra with separate ranges for each MS level + + This class extends the basic RangeManager to provide separate range tracking for different MS levels + (MS1, MS2, etc.). It manages four types of ranges: + - m/z (mass-to-charge ratio) + - intensity + - retention time (RT) + - ion mobility + + A global range is tracked for all MS levels, and additional ranges are maintained for each specific MS level. + This allows for efficient querying of ranges for specific MS levels, which is useful for visualization, + filtering, and processing operations that need to work with specific MS levels. + + The class inherits from RangeManager and adds MS level-specific functionality. The base RangeManager + functionality is used for the global ranges, while a map of MS levels to RangeManagers is used for + the MS level-specific ranges. + + @see RangeManager + @see MSSpectrum + @see ChromatogramRangeManager + @see MSExperiment + @ingroup Kernel + */ + class OPENMS_DLLAPI SpectrumRangeManager : public RangeManager + { + public: + /// Base type + using BaseType = RangeManager; + + /// Default constructor + SpectrumRangeManager() = default; + + /// Copy constructor + SpectrumRangeManager(const SpectrumRangeManager& source) = default; + + /// Move constructor + SpectrumRangeManager(SpectrumRangeManager&& source) = default; + + /// Assignment operator + SpectrumRangeManager& operator=(const SpectrumRangeManager& source) = default; + + /// Move assignment operator + SpectrumRangeManager& operator=(SpectrumRangeManager&& source) = default; + + /// Destructor + ~SpectrumRangeManager() = default; + + /** + @brief Clears all ranges (global and MS level-specific) + */ + void clearRanges() + { + BaseType::clearRanges(); + ms_level_ranges_.clear(); + } + + /** + @brief Extends the ranges with the ranges of another range manager + + @param other The other range manager to extend from + @param ms_level The MS level for which to extend the ranges (0 for global ranges) + */ + void extend(const BaseType& other, UInt ms_level = 0) + { + ms_level == 0 ? BaseType::extend(other) : ms_level_ranges_[ms_level].extend(other); + } + + /** + @brief Gets the ranges for a specific MS level + + @param ms_level The MS level for which to retrieve the ranges + @return The ranges for the specified MS level + @throw Exception::InvalidValue if no ranges exist for the specified MS level + */ + const BaseType& byMSLevel(UInt ms_level = 0) const + { + if (auto it = ms_level_ranges_.find(ms_level); it != ms_level_ranges_.end()) + { + return it->second; + } + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No ranges for this MS level", String(ms_level)); + } + + /** + @brief Gets all MS levels for which specific ranges exist + + @return The set of MS levels + */ + std::set getMSLevels() const + { + std::set ms_levels; + for (const auto& [level, _] : ms_level_ranges_) + { + ms_levels.insert(level); + } + return ms_levels; + } + + /** + @brief Extends the RT range with an MS level parameter + + @param rt The RT value to extend with + @param ms_level The MS level for which to extend the RT range (0 for global range) + */ + void extendRT(double rt, UInt ms_level = 0) + { + ms_level == 0 ? BaseType::extendRT(rt) : ms_level_ranges_[ms_level].extendRT(rt); + } + + /** + @brief Extends the m/z range with an MS level parameter + + @param mz The m/z value to extend with + @param ms_level The MS level for which to extend the m/z range (0 for global range) + */ + void extendMZ(double mz, UInt ms_level = 0) + { + ms_level == 0 ? BaseType::extendMZ(mz) : ms_level_ranges_[ms_level].extendMZ(mz); + } + + /** + @brief Extends the ranges with the ranges of a spectrum using an MS level parameter + + @param spectrum The spectrum whose ranges to extend from + @param ms_level The MS level for which to extend the ranges (0 for global ranges) + */ + void extendUnsafe(const MSSpectrum& spectrum, UInt ms_level = 0) + { + ms_level == 0 ? BaseType::extendUnsafe(spectrum.getRange()) : ms_level_ranges_[ms_level].extendUnsafe(spectrum.getRange()); + } + + protected: + /// MS level-specific ranges + std::map ms_level_ranges_; + }; + +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/include/OpenMS/KERNEL/sources.cmake b/src/openms/include/OpenMS/KERNEL/sources.cmake index 0cfa31e9c54..1a60c9a8607 100644 --- a/src/openms/include/OpenMS/KERNEL/sources.cmake +++ b/src/openms/include/OpenMS/KERNEL/sources.cmake @@ -33,6 +33,8 @@ PeakIndex.h RangeManager.h RangeUtils.h RichPeak2D.h +SpectrumRangeManager.h +ChromatogramRangeManager.h StandardTypes.h SpectrumHelper.h ) diff --git a/src/openms/include/OpenMS/METADATA/AnnotatedMSRun.h b/src/openms/include/OpenMS/METADATA/AnnotatedMSRun.h new file mode 100644 index 00000000000..59c38e47c0d --- /dev/null +++ b/src/openms/include/OpenMS/METADATA/AnnotatedMSRun.h @@ -0,0 +1,320 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg$ +// $Authors: David Voigt, Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include + +#include + +namespace OpenMS +{ + class PeptideIdentification; + + class MSSpectrum; + + /** + * @brief Class for storing MS run data with peptide and protein identifications + * + * This class stores an MSExperiment (containing spectra) along with peptide and protein + * identifications. Each spectrum in the MSExperiment is associated with a single + * PeptideIdentification object. + * + * The class provides methods to access and modify these identifications, as well as + * iterators to traverse the spectra and their associated identifications together. + */ + class OPENMS_DLLAPI AnnotatedMSRun + { + public: + using SpectrumIdRef = std::pair; + using ConstSpectrumIdRef = std::pair; + using SpectrumType = MSExperiment::SpectrumType; + using ChromatogramType = MSExperiment::ChromatogramType; + + + /// Default constructor + AnnotatedMSRun() = default; + + /** + * @brief Move constructor for efficiently loading a MSExperiment without a deep copy + * @param experiment The MSExperiment to move into this object + */ + explicit AnnotatedMSRun(MSExperiment&& experiment) : data(std::move(experiment)) + {}; + + /// Move constructor + AnnotatedMSRun(AnnotatedMSRun&&) = default; + + /// Copy constructor + AnnotatedMSRun(const AnnotatedMSRun&) = default; + AnnotatedMSRun& operator=(const AnnotatedMSRun&) = default; + AnnotatedMSRun& operator=(AnnotatedMSRun&&) = default; + + /// Destructor + ~AnnotatedMSRun() = default; + + /** + * @brief Get the protein identification + * @return A reference to the protein identification + */ + std::vector& getProteinIdentifications() + { + return protein_ids_; + } + + /** + * @brief Get the protein identification (const version) + * @return A const reference to the protein identification + */ + const std::vector& getProteinIdentifications() const + { + return protein_ids_; + } + + /** + * @brief Get all peptide identifications for all spectra + * @return A reference to the vector of peptide identifications + */ + std::vector& getPeptideIdentifications(); + + /** + * @brief Get all peptide identifications for all spectra (const version) + * @return A const reference to the vector of peptide identifications + */ + const std::vector& getPeptideIdentifications() const; + + /** + * @brief Set all peptide identifications for all spectra + * @param ids Vector of peptide identifications + */ + void setPeptideIdentifications(std::vector&& ids); + + /** + * @brief Set all peptide identifications for all spectra + * @param ids Vector of peptide identifications + */ + void setPeptideIdentifications(const std::vector& ids); + + /** + * @brief Get the MSExperiment + * @return A reference to the MSExperiment + */ + MSExperiment& getMSExperiment(); + + /** + * @brief Get the MSExperiment (const version) + * @return A const reference to the MSExperiment + */ + const MSExperiment& getMSExperiment() const; + + /** + * @brief Set the MSExperiment + * @param experiment The MSExperiment to set + */ + void setMSExperiment(MSExperiment&& experiment); + + /** + * @brief Set the MSExperiment + * @param experiment The MSExperiment to set + */ + void setMSExperiment(const MSExperiment& experiment); + + /** + * @brief Get a const iterator to the beginning of the data + * @return A const iterator to the beginning + */ + inline auto cbegin() const + { + checkPeptideIdSize_(OPENMS_PRETTY_FUNCTION); + return PairIterator(data.getSpectra().cbegin(), peptide_ids_.cbegin()); + } + + /** + * @brief Get an iterator to the beginning of the data + * @return An iterator to the beginning + */ + inline auto begin() + { + checkPeptideIdSize_(OPENMS_PRETTY_FUNCTION); + return PairIterator(data.getSpectra().begin(), peptide_ids_.begin()); + } + + /** + * @brief Get a const iterator to the beginning of the data + * @return A const iterator to the beginning + */ + inline auto begin() const + { + checkPeptideIdSize_(OPENMS_PRETTY_FUNCTION); + return PairIterator(data.getSpectra().cbegin(), peptide_ids_.cbegin()); + } + + /** + * @brief Get an iterator to the end of the data + * @return An iterator to the end + */ + inline auto end() + { + return PairIterator(data.getSpectra().end(), peptide_ids_.end()); + } + + /** + * @brief Get a const iterator to the end of the data + * @return A const iterator to the end + */ + inline auto end() const + { + return PairIterator(data.getSpectra().end(), peptide_ids_.end()); + } + + /** + * @brief Get a const iterator to the end of the data + * @return A const iterator to the end + */ + inline auto cend() const + { + return PairIterator(data.getSpectra().cend(), peptide_ids_.cend()); + } + + /** + * @brief Access a spectrum and its associated peptide identification + * @param idx The index of the spectrum + * @return A pair of references to the spectrum and its peptide identification + */ + inline SpectrumIdRef operator[](size_t idx) + { + if (idx >= peptide_ids_.size()) + { + throw Exception::IndexOverflow(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, + idx, peptide_ids_.size()); + } + if (idx >= data.getSpectra().size()) + { + throw Exception::IndexOverflow(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, + idx, data.getSpectra().size()); + } + return {data.getSpectra()[idx], peptide_ids_[idx]}; + } + + /** + * @brief Access a spectrum and its associated peptide identification (const version) + * @param idx The index of the spectrum + * @return A pair of const references to the spectrum and its peptide identification + */ + inline ConstSpectrumIdRef operator[](size_t idx) const + { + if (idx >= peptide_ids_.size()) + { + throw Exception::IndexOverflow(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, + idx, peptide_ids_.size()); + } + if (idx >= data.getSpectra().size()) + { + throw Exception::IndexOverflow(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, + idx, data.getSpectra().size()); + } + return {data.getSpectra()[idx], peptide_ids_[idx]}; + } + + /** + * @brief Iterator for pairs of spectra and peptide identifications + * + * This iterator allows traversing the spectra and their associated peptide + * identifications together. + */ + template + struct PairIterator + { + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + + /** + * @brief Constructor + * @param ptr1 Iterator to the spectra + * @param ptr2 Iterator to the peptide identifications + */ + PairIterator(T1 ptr1, T2 ptr2) : m_ptr1(ptr1), m_ptr2(ptr2) + {} + + /** + * @brief Pre-increment operator + * @return Reference to this iterator after incrementing + */ + PairIterator& operator++() + { + ++m_ptr1; + ++m_ptr2; + return *this; + } + + /** + * @brief Post-increment operator + * @return Copy of this iterator before incrementing + */ + PairIterator operator++(int) + { + auto tmp(*this); + ++(*this); + return tmp; + } + + /** + * @brief Dereference operator + * @return A pair of references to the current spectrum and peptide identification + */ + auto operator*() + { + return std::make_pair(std::ref(*m_ptr1), std::ref(*m_ptr2)); + } + + /** + * @brief Equality operator + * @param a First iterator + * @param b Second iterator + * @return True if the iterators are equal + */ + inline friend bool operator==(const PairIterator& a, const PairIterator& b) + { + return a.m_ptr1 == b.m_ptr1 && a.m_ptr2 == b.m_ptr2; + } + + /** + * @brief Inequality operator + * @param a First iterator + * @param b Second iterator + * @return True if the iterators are not equal + */ + inline friend bool operator!=(const PairIterator& a, const PairIterator& b) + { + return !(a == b); + } + + private: + T1 m_ptr1; + T2 m_ptr2; + }; + + typedef AnnotatedMSRun::PairIterator::iterator, std::vector::iterator> Iterator; + typedef AnnotatedMSRun::PairIterator::const_iterator, std::vector::const_iterator> ConstIterator; + + private: + + // Helper to enforce invariant + void checkPeptideIdSize_(const char* function_name) const; + + std::vector peptide_ids_; + std::vector protein_ids_; + MSExperiment data; + }; +} diff --git a/src/openms/include/OpenMS/METADATA/ExperimentalSettings.h b/src/openms/include/OpenMS/METADATA/ExperimentalSettings.h index 19a6cf64acd..b9dc867a6b4 100644 --- a/src/openms/include/OpenMS/METADATA/ExperimentalSettings.h +++ b/src/openms/include/OpenMS/METADATA/ExperimentalSettings.h @@ -100,13 +100,6 @@ namespace OpenMS /// sets the free-text comment void setComment(const String & comment); - /// returns a const reference to the protein ProteinIdentification vector - const std::vector & getProteinIdentifications() const; - /// returns a mutable reference to the protein ProteinIdentification vector - std::vector & getProteinIdentifications(); - /// sets the protein ProteinIdentification vector - void setProteinIdentifications(const std::vector & protein_identifications); - /// returns fraction identifier const String & getFractionIdentifier() const; /// sets the fraction identifier @@ -120,7 +113,6 @@ namespace OpenMS HPLC hplc_; DateTime datetime_; String comment_; - std::vector protein_identifications_; String fraction_identifier_; }; diff --git a/src/openms/include/OpenMS/METADATA/ProteinHit.h b/src/openms/include/OpenMS/METADATA/ProteinHit.h index 379f4d52784..1b8f26ff6ea 100644 --- a/src/openms/include/OpenMS/METADATA/ProteinHit.h +++ b/src/openms/include/OpenMS/METADATA/ProteinHit.h @@ -63,20 +63,12 @@ namespace OpenMS /// Greater predicate for scores of hits class OPENMS_DLLAPI ScoreMore { -public: - template - bool operator()(const Arg & a, const Arg & b) + public: + template + bool operator()(const Arg& a, const Arg& b) const { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - if (a.getScore() != b.getScore()) -#pragma clang diagnostic pop - { - return a.getScore() > b.getScore(); - } - return a.getAccession() > b.getAccession(); + return std::make_tuple(a.getScore(), a.getAccession()) > std::make_tuple(b.getScore(), b.getAccession()); } - }; /// Lesser predicate for scores of hits @@ -84,16 +76,9 @@ namespace OpenMS { public: template - bool operator()(const Arg & a, const Arg & b) + bool operator()(const Arg & a, const Arg & b) const { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" - if (a.getScore() != b.getScore()) -#pragma clang diagnostic pop - { - return a.getScore() < b.getScore(); - } - return a.getAccession() < b.getAccession(); + return std::make_tuple(a.getScore(), a.getAccession()) < std::make_tuple(b.getScore(), b.getAccession()); } }; diff --git a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h index 37c79c137db..5594cbd9170 100644 --- a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h +++ b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h @@ -123,13 +123,6 @@ namespace OpenMS /// sets the products void setProducts(const std::vector & products); - /// returns a const reference to the PeptideIdentification vector - const std::vector & getPeptideIdentifications() const; - /// returns a mutable reference to the PeptideIdentification vector - std::vector & getPeptideIdentifications(); - /// sets the PeptideIdentification vector - void setPeptideIdentifications(const std::vector & identifications); - /// sets the description of the applied processing void setDataProcessing(const std::vector< DataProcessingPtr > & data_processing); @@ -149,7 +142,6 @@ namespace OpenMS AcquisitionInfo acquisition_info_; std::vector precursors_; std::vector products_; - std::vector identification_; std::vector< DataProcessingPtr > data_processing_; }; diff --git a/src/openms/include/OpenMS/METADATA/sources.cmake b/src/openms/include/OpenMS/METADATA/sources.cmake index fa826e9e53f..754ca52abf6 100644 --- a/src/openms/include/OpenMS/METADATA/sources.cmake +++ b/src/openms/include/OpenMS/METADATA/sources.cmake @@ -5,6 +5,7 @@ set(directory include/OpenMS/METADATA) set(sources_list_h AbsoluteQuantitationStandards.h Acquisition.h +AnnotatedMSRun.h AcquisitionInfo.h CVTerm.h CVTermList.h diff --git a/src/openms/include/OpenMS/PROCESSING/ID/IDFilter.h b/src/openms/include/OpenMS/PROCESSING/ID/IDFilter.h index af9c1b1ff83..573fd44a37a 100644 --- a/src/openms/include/OpenMS/PROCESSING/ID/IDFilter.h +++ b/src/openms/include/OpenMS/PROCESSING/ID/IDFilter.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,14 @@ namespace OpenMS { + template + concept IsPeptideOrProteinIdentification = + std::is_same_v || std::is_same_v; + + template + concept IsFeatureOrConsensusMap = + std::is_same_v || std::is_same_v; + /** @brief Collection of functions for filtering peptide and protein identifications. @@ -590,7 +599,7 @@ namespace OpenMS removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred); } - template + template static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred) { for (auto& feat : prot_and_pep_ids) @@ -776,7 +785,7 @@ namespace OpenMS ///@{ /// Removes peptide or protein identifications that have no hits in them - template + template static void removeEmptyIdentifications(std::vector& ids) { struct HasNoHits empty_filter; @@ -962,6 +971,20 @@ namespace OpenMS } } + /** + @brief Filters peptide or protein identifications according to the given proteins (positive). + + Hits with a matching protein accession in @p accessions are kept. + + @note The ranks of the hits may be invalidated. + */ + template + static void keepHitsMatchingProteins(IdentificationType& id, const std::set& accessions) + { + struct HasMatchingAccession acc_filter(accessions); + keepMatchingItems(id.getHits(), acc_filter); + } + /** @brief Filters peptide or protein identifications according to the given proteins (positive). @@ -972,11 +995,7 @@ namespace OpenMS template static void keepHitsMatchingProteins(std::vector& ids, const std::set& accessions) { - struct HasMatchingAccession acc_filter(accessions); - for (auto& id_it : ids) - { - keepMatchingItems(id_it.getHits(), acc_filter); - } + for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions); } ///@} @@ -1095,46 +1114,58 @@ namespace OpenMS ///@} - /// @name Filter functions for MS/MS experiments + /// @name Filter functions for AnnotatedMSRun ///@{ - /// Filters an MS/MS experiment according to score thresholds - static void filterHitsByScore(PeakMap& experiment, double peptide_threshold_score, double protein_threshold_score) + /// Filters AnnotatedMSRun according to score thresholds + static void filterHitsByScore(AnnotatedMSRun& annotated_data, + double peptide_threshold_score, + double protein_threshold_score) { // filter protein hits: - filterHitsByScore(experiment.getProteinIdentifications(), protein_threshold_score); - // don't remove empty protein IDs - they contain search metadata and may + filterHitsByScore(annotated_data.getProteinIdentifications(), + protein_threshold_score); + // don't remove empty protein IDs - they contain search meta data and may // be referenced by peptide IDs (via run ID) // filter peptide hits: - for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) + for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications()) { - filterHitsByScore(exp_it->getPeptideIdentifications(), peptide_threshold_score); - removeEmptyIdentifications(exp_it->getPeptideIdentifications()); - // TODO super-duper inefficient. - updateProteinReferences(exp_it->getPeptideIdentifications(), experiment.getProteinIdentifications()); + filterHitsByScore(peptide_id, peptide_threshold_score); } - // @TODO: remove proteins that aren't referenced by peptides any more? + updateProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications()); } - /// Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum - static void keepNBestHits(PeakMap& experiment, Size n) + /// Filters AnnotatedMSRun by keeping the N best peptide hits for every spectrum + static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n) { // don't filter the protein hits by "N best" here - filter the peptides // and update the protein hits! std::vector all_peptides; // IDs from all spectra - // filter peptide hits: - for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) + for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications()) { - std::vector& peptides = exp_it->getPeptideIdentifications(); - keepNBestHits(peptides, n); - removeEmptyIdentifications(peptides); - updateProteinReferences(peptides, experiment.getProteinIdentifications()); - all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end()); + // Create a temporary vector with a single PeptideIdentification + std::vector temp_vec = {peptide_id}; + keepNBestHits(temp_vec, n); + // Copy back the filtered hits + if (!temp_vec.empty()) + { + peptide_id = temp_vec[0]; + } + else + { + peptide_id.getHits().clear(); + } + + // Since we're working with individual PeptideIdentifications, we don't need to remove empty ones + // but we still need to update protein references + temp_vec = {peptide_id}; + updateProteinReferences(temp_vec, annotated_data.getProteinIdentifications()); + all_peptides.push_back(peptide_id); } // update protein hits: - removeUnreferencedProteins(experiment.getProteinIdentifications(), all_peptides); + removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides); } /// Filter identifications by "N best" PeptideIdentification objects (better PeptideIdentification means better [best] PeptideHit than other). @@ -1300,11 +1331,13 @@ namespace OpenMS } } - /// Filters an MS/MS experiment according to the given proteins - static void keepHitsMatchingProteins(PeakMap& experiment, const std::vector& proteins) + /// Filters AnnotatedMSRun according to the given proteins. + static void keepHitsMatchingProteins( + AnnotatedMSRun& experiment, + const std::vector& proteins) { std::set accessions; - for (std::vector::const_iterator it = proteins.begin(); it != proteins.end(); ++it) + for (auto it = proteins.begin(); it != proteins.end(); ++it) { accessions.insert(it->identifier); } @@ -1313,14 +1346,15 @@ namespace OpenMS keepHitsMatchingProteins(experiment.getProteinIdentifications(), accessions); // filter peptide hits: - for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) + // std::pair + for (auto [spectrum, peptide_id] : experiment) { - if (exp_it->getMSLevel() == 2) + if (spectrum.getMSLevel() == 2) { - keepHitsMatchingProteins(exp_it->getPeptideIdentifications(), accessions); - removeEmptyIdentifications(exp_it->getPeptideIdentifications()); + keepHitsMatchingProteins(peptide_id, accessions); } } + removeEmptyIdentifications(experiment.getPeptideIdentifications()); } ///@} diff --git a/src/openms/source/ANALYSIS/ID/IDMapper.cpp b/src/openms/source/ANALYSIS/ID/IDMapper.cpp index 0809463cc58..7a7c074d4dc 100644 --- a/src/openms/source/ANALYSIS/ID/IDMapper.cpp +++ b/src/openms/source/ANALYSIS/ID/IDMapper.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -74,18 +75,19 @@ namespace OpenMS ignore_charge_ = param_.getValue("ignore_charge") == "true"; } - void IDMapper::annotate(PeakMap& map, const vector& peptide_ids, const vector& protein_ids, const bool clear_ids, const bool map_ms1) + void IDMapper::annotate(AnnotatedMSRun& map, + const vector& peptide_ids, + const vector& protein_ids, + const bool clear_ids, + const bool map_ms1) { checkHits_(peptide_ids); SpectrumLookup lookup; if (clear_ids) { // start with empty IDs - for (PeakMap::iterator it = map.begin(); it != map.end(); ++it) - { - it->setPeptideIdentifications({}); - } - map.setProteinIdentifications({}); + map.getPeptideIdentifications().clear(); + map.getProteinIdentifications().clear(); } if (peptide_ids.empty()) return; @@ -93,33 +95,41 @@ namespace OpenMS // append protein identifications map.getProteinIdentifications().insert(map.getProteinIdentifications().end(), protein_ids.begin(), protein_ids.end()); - lookup.readSpectra(map); + // AnnotatedMSRun will have one PeptideIdentification per spectrum (including ones without hits) + map.getPeptideIdentifications().resize(map.getMSExperiment().getSpectra().size()); + + // set up the lookup table for the spectra + lookup.readSpectra(map.getMSExperiment()); // remember which peptides were mapped (for stats later) unordered_set peptides_mapped; + // store mapping of identification RT to index (ignore empty hits) multimap identifications_precursors; for (Size i = 0; i < peptide_ids.size(); ++i) { - if (!peptide_ids[i].empty()) - { // mapping is done by either native id or by comparing peptide_id RT with experiment RT - if (!peptide_ids[i].metaValueExists(Constants::UserParam::SPECTRUM_REFERENCE)) - { // use RT for mapping + if (peptide_ids[i].empty()) continue; + // mapping is done by either native id or by comparing peptide_id RT with experiment RT + if (!peptide_ids[i].metaValueExists(Constants::UserParam::SPECTRUM_REFERENCE)) + { // use RT for mapping + identifications_precursors.insert(make_pair(peptide_ids[i].getRT(), i)); + } + else + { // use native id for mapping + DataValue native_id = peptide_ids[i].getMetaValue(Constants::UserParam::SPECTRUM_REFERENCE); + try + { // spectrum can be retrieved + Size spectrum_idx = lookup.findByNativeID(native_id); + // Since we now have only one PeptideIdentification per spectrum, we need to merge the hits + PeptideIdentification& existing_id = map.getPeptideIdentifications()[spectrum_idx]; + existing_id.getHits().insert(existing_id.getHits().end(), + peptide_ids[i].getHits().begin(), + peptide_ids[i].getHits().end()); + peptides_mapped.insert(i); + } + catch (const Exception::ElementNotFound& /*e*/) + { // use RT for mapping identifications_precursors.insert(make_pair(peptide_ids[i].getRT(), i)); - } - else - { // use native id for mapping - DataValue native_id = peptide_ids[i].getMetaValue(Constants::UserParam::SPECTRUM_REFERENCE); - try - { // spectrum can be retrieved - Size spectrum_idx = lookup.findByNativeID(native_id); - map[spectrum_idx].getPeptideIdentifications().push_back(peptide_ids[i]); - peptides_mapped.insert(i); - } - catch (const Exception::ElementNotFound& /*e*/) - { // use RT for mapping - identifications_precursors.insert(make_pair(peptide_ids[i].getRT(), i)); - } } } } @@ -128,9 +138,9 @@ namespace OpenMS { // store mapping of scan RT to index multimap experiment_precursors; - for (Size i = 0; i < map.size(); i++) + for (Size i = 0; i < map.getMSExperiment().size(); i++) { - experiment_precursors.insert(make_pair(map[i].getRT(), i)); + experiment_precursors.insert(make_pair(map.getMSExperiment()[i].getRT(), i)); } // note that mappings are sorted by key via multimap (we rely on that down below) @@ -174,7 +184,7 @@ namespace OpenMS bool success = map_ms1; if (!success) { - for (const auto& precursor : map[experiment_iterator->second].getPrecursors()) + for (const auto& precursor : map.getMSExperiment()[experiment_iterator->second].getPrecursors()) { if (isMatch_(0, peptide_ids[identifications_iterator->second].getMZ(), precursor.getMZ())) { @@ -183,9 +193,14 @@ namespace OpenMS } } } + if (success) { - map[experiment_iterator->second].getPeptideIdentifications().push_back(peptide_ids[identifications_iterator->second]); + // Since we have only one PeptideIdentification per spectrum, we need to merge the hits + PeptideIdentification& existing_id = map.getPeptideIdentifications()[experiment_iterator->second]; + existing_id.getHits().insert(existing_id.getHits().end(), + peptide_ids[identifications_iterator->second].getHits().begin(), + peptide_ids[identifications_iterator->second].getHits().end()); peptides_mapped.insert(identifications_iterator->second); } ++identifications_iterator; @@ -201,8 +216,7 @@ namespace OpenMS << " Unmapped (empty) peptides: " << peptide_ids.size() - identifications_precursors.size() << endl; } - - void IDMapper::annotate(PeakMap& map, FeatureMap fmap, const bool clear_ids, const bool map_ms1) + void IDMapper::annotate(AnnotatedMSRun& map, const FeatureMap& fmap, const bool clear_ids, const bool map_ms1) { const vector& protein_ids = fmap.getProteinIdentifications(); vector peptide_ids; @@ -1085,3 +1099,4 @@ namespace OpenMS } } // namespace OpenMS + diff --git a/src/openms/source/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.cpp b/src/openms/source/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.cpp index be605ca90b2..14fd6a31283 100644 --- a/src/openms/source/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.cpp +++ b/src/openms/source/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmIdentification.cpp @@ -10,6 +10,7 @@ #include #include #include +#include using namespace std; @@ -93,8 +94,7 @@ namespace OpenMS bool MapAlignmentAlgorithmIdentification::getRetentionTimes_( const vector& peptides, SeqToList& rt_data) { - for (vector::const_iterator pep_it = peptides.begin(); - pep_it != peptides.end(); ++pep_it) + for (auto pep_it = peptides.cbegin(); pep_it != peptides.cend(); ++pep_it) { if (!pep_it->getHits().empty()) { @@ -178,14 +178,9 @@ namespace OpenMS // lists of peptide hits in "maps" will be sorted bool MapAlignmentAlgorithmIdentification::getRetentionTimes_( - const PeakMap& experiment, SeqToList& rt_data) + const AnnotatedMSRun& experiment, SeqToList& rt_data) { - for (PeakMap::ConstIterator exp_it = experiment.begin(); - exp_it != experiment.end(); ++exp_it) - { - getRetentionTimes_(exp_it->getPeptideIdentifications(), rt_data); - } - // duplicate annotations should not be possible -> no need to remove them + getRetentionTimes_(experiment.getPeptideIdentifications(), rt_data); return false; } diff --git a/src/openms/source/APPLICATIONS/ToolHandler.cpp b/src/openms/source/APPLICATIONS/ToolHandler.cpp index 417411d56b0..b602efa5baf 100644 --- a/src/openms/source/APPLICATIONS/ToolHandler.cpp +++ b/src/openms/source/APPLICATIONS/ToolHandler.cpp @@ -91,7 +91,6 @@ namespace OpenMS tools_map["IDFileConverter"] = Internal::ToolDescription("IDFileConverter", cat_file_converter); tools_map["IDFilter"] = Internal::ToolDescription("IDFilter", cat_file_filter_extract_merge); tools_map["IDMapper"] = Internal::ToolDescription("IDMapper", cat_ID_proc); - tools_map["IDMassAccuracy"] = Internal::ToolDescription("IDMassAccuracy", cat_ID_proc); tools_map["IDMerger"] = Internal::ToolDescription("IDMerger", cat_file_filter_extract_merge); tools_map["IDPosteriorErrorProbability"] = Internal::ToolDescription("IDPosteriorErrorProbability", cat_ID_proc); tools_map["IDRipper"] = Internal::ToolDescription("IDRipper", cat_file_filter_extract_merge); @@ -174,8 +173,6 @@ namespace OpenMS tools_map["SequenceCoverageCalculator"] = Internal::ToolDescription("SequenceCoverageCalculator", cat_ID_proc); tools_map["SimpleSearchEngine"] = Internal::ToolDescription("SimpleSearchEngine", cat_ID_search); tools_map["SiriusExport"] = Internal::ToolDescription("SiriusExport", cat_ID_MTX); - tools_map["SpecLibCreator"] = Internal::ToolDescription("SpecLibCreator", cat_ID_proc); - tools_map["SpecLibSearcher"] = Internal::ToolDescription("SpecLibSearcher", cat_ID_search); tools_map["SpectraFilterNLargest"] = Internal::ToolDescription("SpectraFilterNLargest", cat_signal_proc_smooth_normalize); tools_map["SpectraFilterNormalizer"] = Internal::ToolDescription("SpectraFilterNormalizer", cat_signal_proc_smooth_normalize); tools_map["SpectraFilterThresholdMower"] = Internal::ToolDescription("SpectraFilterThresholdMower", cat_signal_proc_smooth_normalize); diff --git a/src/openms/source/FEATUREFINDER/FFIDAlgoExternalIDHandler.cpp b/src/openms/source/FEATUREFINDER/FFIDAlgoExternalIDHandler.cpp new file mode 100644 index 00000000000..bb3158516db --- /dev/null +++ b/src/openms/source/FEATUREFINDER/FFIDAlgoExternalIDHandler.cpp @@ -0,0 +1,678 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Hendrik Weisser $ +// -------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace OpenMS +{ +namespace Internal +{ + FFIDAlgoExternalIDHandler::FFIDAlgoExternalIDHandler() : + n_external_peptides_(0), + n_external_features_(0), + svm_n_parts_(3), + svm_n_samples_(0), + svm_min_prob_(0.0), + n_internal_features_(0) + { + } + + void FFIDAlgoExternalIDHandler::initSVMParameters_(const Param& param) + { + svm_min_prob_ = param.getValue("svm:min_prob"); + svm_n_parts_ = param.getValue("svm:xval"); + svm_n_samples_ = param.getValue("svm:samples"); + svm_xval_out_ = param.getValue("svm:xval_out").toString(); + svm_quality_cutoff = svm_min_prob_; + svm_predictor_names_ = ListUtils::create(param.getValue("svm:predictors").toString()); + debug_level_ = param.getValue("debug"); + } + + void FFIDAlgoExternalIDHandler::reset() + { + external_peptide_map_.clear(); + rt_transformation_ = TransformationDescription(); + n_external_peptides_ = 0; + n_external_features_ = 0; + svm_probs_external_.clear(); + svm_probs_internal_.clear(); + n_internal_features_ = 0; + } + + void FFIDAlgoExternalIDHandler::addExternalPeptide(PeptideIdentification& peptide) + { + if (peptide.getHits().empty()) + { + return; + } + + peptide.sort(); + PeptideHit& hit = peptide.getHits()[0]; + peptide.getHits().resize(1); + + Int charge = hit.getCharge(); + double rt = peptide.getRT(); + double mz = peptide.getMZ(); + + external_peptide_map_[hit.getSequence()][charge].emplace(rt, &peptide); + + OPENMS_LOG_DEBUG_NOFILE << "Adding peptide (external) " << hit.getSequence() + << "; CHG: " << charge << "; RT: " << rt + << "; MZ: " << mz << std::endl; + } + + void FFIDAlgoExternalIDHandler::processExternalPeptides(std::vector& peptides_ext) + { + for (PeptideIdentification& pep : peptides_ext) + { + addExternalPeptide(pep); + pep.setMetaValue("FFId_category", "external"); + } + + n_external_peptides_ = external_peptide_map_.size(); + } + + double FFIDAlgoExternalIDHandler::alignInternalAndExternalIDs( + const std::vector& peptides_internal, + const std::vector& peptides_external, + double rt_quantile) + { + // Reset the handler state + reset(); + + // Align internal and external IDs to estimate RT shifts: + MapAlignmentAlgorithmIdentification aligner; + aligner.setReference(peptides_external); // go from internal to external scale + std::vector> aligner_peptides(1, peptides_internal); + std::vector aligner_trafos; + + OPENMS_LOG_INFO << "Realigning internal and external IDs..."; + aligner.align(aligner_peptides, aligner_trafos); + rt_transformation_ = aligner_trafos[0]; + + std::vector aligned_diffs; + rt_transformation_.getDeviations(aligned_diffs); + + // Calculate RT uncertainty based on quantile + std::sort(aligned_diffs.begin(), aligned_diffs.end()); + Size index = std::clamp(Size(rt_quantile * aligned_diffs.size()), + Size(0), aligned_diffs.size() - 1); + double rt_uncertainty = aligned_diffs[index]; + + try + { + aligner_trafos[0].fitModel("lowess"); + rt_transformation_ = aligner_trafos[0]; + } + catch (Exception::BaseException& e) + { + OPENMS_LOG_ERROR << "Error: Failed to align RTs of internal/external peptides. " + << "RT information will not be considered in the SVM classification. " + << "The original error message was:\n" << e.what() << std::endl; + } + + return rt_uncertainty; + } + + double FFIDAlgoExternalIDHandler::transformRT(double rt) const + { + return rt_transformation_.apply(rt); + } + + bool FFIDAlgoExternalIDHandler::hasRTTransformation() const + { + return !rt_transformation_.getDataPoints().empty(); + } + + const TransformationDescription& FFIDAlgoExternalIDHandler::getRTTransformation() const + { + return rt_transformation_; + } + + void FFIDAlgoExternalIDHandler::addExternalPeptideToMap_(PeptideIdentification& peptide, + std::map, + std::multimap>>>& peptide_map) + { + if (peptide.getHits().empty()) return; + + peptide.sort(); + PeptideHit& hit = peptide.getHits()[0]; + peptide.getHits().resize(1); + + Int charge = hit.getCharge(); + double rt = peptide.getRT(); + + // Add to the external map (second in the pair) + peptide_map[hit.getSequence()][charge].second.emplace(rt, &peptide); + } + + bool FFIDAlgoExternalIDHandler::fillExternalRTMap_(const AASequence& sequence, Int charge, + std::multimap& rt_map) + { + auto seq_it = external_peptide_map_.find(sequence); + if (seq_it == external_peptide_map_.end()) return false; + + auto charge_it = seq_it->second.find(charge); + if (charge_it == seq_it->second.end()) return false; + + rt_map.insert(charge_it->second.begin(), charge_it->second.end()); + return true; + } + + void FFIDAlgoExternalIDHandler::annotateFeatureWithExternalIDs_(Feature& feature) + { + feature.setMetaValue("n_total_ids", 0); + feature.setMetaValue("n_matching_ids", -1); + feature.setMetaValue("feature_class", "unknown"); + } + + void FFIDAlgoExternalIDHandler::addDummyPeptideID_(Feature& feature, const PeptideIdentification* ext_id) + { + if (!ext_id) return; + + PeptideIdentification id = *ext_id; + id.clearMetaInfo(); + id.setMetaValue("FFId_category", "implied"); + id.setRT(feature.getRT()); + id.setMZ(feature.getMZ()); + // Only one peptide hit per ID - see function "addPeptideToMap_": + PeptideHit& hit = id.getHits()[0]; + hit.clearMetaInfo(); + hit.setScore(0.0); + feature.getPeptideIdentifications().push_back(id); + } + + void FFIDAlgoExternalIDHandler::handleExternalFeature_(Feature& feature, double prob_positive, double quality_cutoff) + { + svm_probs_external_.insert(prob_positive); + + if (prob_positive >= quality_cutoff) + { + feature.setOverallQuality(prob_positive); + ++n_external_features_; + } + } + + void FFIDAlgoExternalIDHandler::adjustFDRForExternalFeatures_(std::vector& fdr_probs, + std::vector& fdr_qvalues, + Size n_internal_features) + { + std::multiset::reverse_iterator ext_it = svm_probs_external_.rbegin(); + Size external_count = 0; + + for (Int i = fdr_probs.size() - 1; i >= 0; --i) + { + double cutoff = fdr_probs[i]; + while ((ext_it != svm_probs_external_.rend()) && (*ext_it >= cutoff)) + { + ++external_count; + ++ext_it; + } + fdr_qvalues[i] = (fdr_qvalues[i] * external_count) / + (external_count + n_internal_features); + } + } + + void FFIDAlgoExternalIDHandler::checkNumObservations_(Size n_pos, Size n_neg, const String& note) const + { + if (n_pos < svm_n_parts_) + { + String msg = "Not enough positive observations for " + + String(svm_n_parts_) + "-fold cross-validation" + note + "."; + throw Exception::MissingInformation(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, msg); + } + if (n_neg < svm_n_parts_) + { + String msg = "Not enough negative observations for " + + String(svm_n_parts_) + "-fold cross-validation" + note + "."; + throw Exception::MissingInformation(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, msg); + } + } +void FFIDAlgoExternalIDHandler::getUnbiasedSample_(const std::multimap >& valid_obs, + std::map& training_labels) + { + // Create an unbiased training sample: + // - same number of pos./neg. observations (approx.), + // - same intensity distribution of pos./neg. observations. + // We use a sliding window over the set of observations, ordered by + // intensity. At each step, we examine the proportion of both pos./neg. + // observations in the window and select the middle element with according + // probability. (We use an even window size, to cover the ideal case where + // the two classes are balanced.) + const Size window_size = 8; + const Size half_win_size = window_size / 2; + if (valid_obs.size() < half_win_size + 1) + { + String msg = "Not enough observations for intensity-bias filtering."; + throw Exception::MissingInformation(__FILE__, __LINE__, + OPENMS_PRETTY_FUNCTION, msg); + } + srand(time(nullptr)); // seed random number generator + Size n_obs[2] = {0, 0}; // counters for neg./pos. observations + Size counts[2] = {0, 0}; // pos./neg. counts in current window + // iterators to begin, middle and past-the-end of sliding window: + std::multimap >::const_iterator begin, middle, end; + begin = middle = end = valid_obs.begin(); + // initialize ("middle" is at beginning of sequence, so no full window): + for (Size i = 0; i <= half_win_size; ++i, ++end) + { + ++counts[end->second.second]; // increase counter for pos./neg. obs. + } + // "i" is the index of one of the two middle values of the sliding window: + // - in the left half of the sequence, "i" is left-middle, + // - in the right half of the sequence, "i" is right-middle. + // The counts are updated as "i" and the sliding window move to the right. + for (Size i = 0; i < valid_obs.size(); ++i, ++middle) + { + // if count for either class is zero, we don't select anything: + if ((counts[0] > 0) && (counts[1] > 0)) + { + // probability thresholds for neg./pos. observations: + double thresholds[2] = {counts[1] / float(counts[0]), + counts[0] / float(counts[1])}; + // check middle values: + double rnd = rand() / double(RAND_MAX); // random num. in range 0-1 + if (rnd < thresholds[middle->second.second]) + { + training_labels[middle->second.first] = Int(middle->second.second); + ++n_obs[middle->second.second]; + } + } + // update sliding window and class counts; + // when we reach the middle of the sequence, we keep the window in place + // for one step, to change from "left-middle" to "right-middle": + if (i != valid_obs.size() / 2) + { + // only move "begin" when "middle" has advanced far enough: + if (i > half_win_size) + { + --counts[begin->second.second]; + ++begin; + } + // don't increment "end" beyond the defined range: + if (end != valid_obs.end()) + { + ++counts[end->second.second]; + ++end; + } + } + } + checkNumObservations_(n_obs[1], n_obs[0], " after bias filtering"); + } + + void FFIDAlgoExternalIDHandler::getRandomSample_(std::map& training_labels) + { + // Pick a random subset of size "svm_n_samples_" for training: Shuffle the whole + // sequence, then select the first "svm_n_samples_" elements. + std::vector selection; + selection.reserve(training_labels.size()); + for (auto it = training_labels.begin(); it != training_labels.end(); ++it) + { + selection.push_back(it->first); + } + Math::RandomShuffler shuffler; + shuffler.portable_random_shuffle(selection.begin(), selection.end()); + // However, ensure that at least "svm_n_parts_" pos./neg. observations are + // included (for cross-validation) - there must be enough, otherwise + // "checkNumObservations" would have thrown an error. To this end, move + // "svm_n_parts_" pos. observations to the beginning of sequence, followed by + // "svm_n_parts_" neg. observations (pos. first - see reason below): + Size n_obs[2] = {0, 0}; // counters for neg./pos. observations + for (Int label = 1; label >= 0; --label) + { + for (Size i = n_obs[1]; i < selection.size(); ++i) + { + Size obs_index = selection[i]; + if (training_labels[obs_index] == label) + { + std::swap(selection[i], selection[n_obs[label]]); + ++n_obs[label]; + } + if (n_obs[label] == svm_n_parts_) + { + break; + } + } + } + selection.resize(svm_n_samples_); + // copy the selected subset back: + std::map temp; + for (std::vector::iterator it = selection.begin(); it != selection.end(); + ++it) + { + temp[*it] = training_labels[*it]; + } + training_labels.swap(temp); + } + + void FFIDAlgoExternalIDHandler::classifyFeaturesWithSVM(FeatureMap& features, const Param& param) + { + // Initialize SVM parameters in the external ID handler + initSVMParameters_(param); + + if (features.empty()) + { + return; + } + if (features[0].metaValueExists("rt_delta")) // include RT feature + { + if (std::find(svm_predictor_names_.begin(), svm_predictor_names_.end(), "rt_delta") == svm_predictor_names_.end()) + { + svm_predictor_names_.push_back("rt_delta"); + } + } + // values for all features per predictor (this way around to simplify scaling + // of predictors): + SimpleSVM::PredictorMap predictors; + for (const String& pred : svm_predictor_names_) + { + predictors[pred].reserve(features.size()); + for (Feature& feat : features) + { + if (!feat.metaValueExists(pred)) + { + OPENMS_LOG_ERROR << "Meta value '" << pred << "' missing for feature '" + << feat.getUniqueId() << "'" << std::endl; + predictors.erase(pred); + break; + } + predictors[pred].push_back(feat.getMetaValue(pred)); + } + } + + // get labels for SVM: + std::map training_labels; + bool no_selection = param.getValue("svm:no_selection") == "true"; + // mapping (for bias correction): intensity -> (index, positive?) + std::multimap > valid_obs; + Size n_obs[2] = {0, 0}; // counters for neg./pos. observations + for (Size feat_index = 0; feat_index < features.size(); ++feat_index) + { + String feature_class = features[feat_index].getMetaValue("feature_class"); + int label = -1; + if (feature_class == "positive") + { + label = 1; + } + else if (feature_class == "negative") + { + label = 0; + } + if (label != -1) + { + ++n_obs[label]; + if (!no_selection) + { + double intensity = features[feat_index].getIntensity(); + valid_obs.insert(std::make_pair(intensity, std::make_pair(feat_index, + bool(label)))); + } + else + { + training_labels[feat_index] = (double)label; + } + } + } + checkNumObservations_(n_obs[1], n_obs[0]); + + if (!no_selection) + { + getUnbiasedSample_(valid_obs, training_labels); + } + if (svm_n_samples_ > 0) // limited number of samples for training + { + if (training_labels.size() < svm_n_samples_) + { + OPENMS_LOG_WARN << "Warning: There are only " << training_labels.size() + << " valid observations for training." << std::endl; + } + else if (training_labels.size() > svm_n_samples_) + { + getRandomSample_(training_labels); + } + } + + SimpleSVM svm; + // set (only) the relevant parameters: + Param svm_params = svm.getParameters(); + Logger::LogStream no_log; // suppress warnings about additional parameters + svm_params.update(param.copy("svm:", true), false, no_log); + svm.setParameters(svm_params); + svm.setup(predictors, training_labels); + if (!svm_xval_out_.empty()) + { + svm.writeXvalResults(svm_xval_out_); + } + if ((debug_level_ > 0) && svm_params.getValue("kernel") == "linear") + { + std::map feature_weights; + svm.getFeatureWeights(feature_weights); + OPENMS_LOG_DEBUG << "SVM feature weights:" << std::endl; + for (std::map::iterator it = feature_weights.begin(); + it != feature_weights.end(); ++it) + { + OPENMS_LOG_DEBUG << "- " << it->first << ": " << it->second << std::endl; + } + } + + std::vector predictions; + svm.predict(predictions); + OPENMS_POSTCONDITION(predictions.size() == features.size(), + "SVM predictions for all features expected"); + for (Size i = 0; i < features.size(); ++i) + { + features[i].setMetaValue("predicted_class", predictions[i].outcome); + double prob_positive = predictions[i].probabilities[1]; + features[i].setMetaValue("predicted_probability", prob_positive); + // @TODO: store previous (OpenSWATH) overall quality in a meta value? + features[i].setOverallQuality(prob_positive); + } + } + + void FFIDAlgoExternalIDHandler::finalizeAssayFeatures_(Feature& best_feature, double best_quality, double quality_cutoff) + { + const String& feature_class = best_feature.getMetaValue("feature_class"); + if (feature_class == "positive") // true positive prediction + { + svm_probs_internal_[best_quality].first++; + } + else if ((feature_class == "negative") || // false positive prediction + (feature_class == "ambiguous")) // let's be strict about this + { + svm_probs_internal_[best_quality].second++; + } + else if (feature_class == "unknown") + { + svm_probs_external_.insert(best_quality); + if (best_quality >= quality_cutoff) + { + best_feature.setOverallQuality(best_quality); + ++n_external_features_; + } + } + } + + void FFIDAlgoExternalIDHandler::filterClassifiedFeatures(FeatureMap& features, double quality_cutoff) + { + if (features.empty()) + { + return; + } + + // Remove features with class "negative" or "ambiguous", keep "positive". + // For class "unknown", for every assay (meta value "PeptideRef"), keep + // the feature with highest "predicted_probability" (= overall quality), + // subject to the "svm:min_prob" threshold. + // We mark features for removal by setting their overall quality to zero. + n_internal_features_ = 0; + n_external_features_ = 0; + FeatureMap::Iterator best_it = features.begin(); + double best_quality = 0.0; + String previous_ref; + for (FeatureMap::Iterator it = features.begin(); it != features.end(); ++it) + { + // features from same assay (same "PeptideRef") appear consecutively; + // if this is a new assay, finalize the previous one: + String peptide_ref = it->getMetaValue("PeptideRef"); + // remove region number, if present: + Size pos_slash = peptide_ref.rfind('/'); + Size pos_colon = peptide_ref.find(':', pos_slash + 2); + peptide_ref = peptide_ref.substr(0, pos_colon); + + if (peptide_ref != previous_ref) + { + if (!previous_ref.empty()) + { + finalizeAssayFeatures_(*best_it, best_quality, quality_cutoff); + best_quality = 0.0; + } + previous_ref = peptide_ref; + } + + // update qualities: + if ((it->getOverallQuality() > best_quality) || + // break ties by intensity: + ((it->getOverallQuality() == best_quality) && + (it->getIntensity() > best_it->getIntensity()))) + { + best_it = it; + best_quality = it->getOverallQuality(); + } + if (it->getMetaValue("feature_class") == "positive") + { + n_internal_features_++; + } + else + { + it->setOverallQuality(0.0); // gets overwritten for "best" candidate + } + } + // set of features from the last assay: + finalizeAssayFeatures_(*best_it, best_quality, quality_cutoff); + + features.erase(std::remove_if(features.begin(), features.end(), + [](const Feature& f) { + return f.getOverallQuality() == 0.0; + }), + features.end()); + } + + void FFIDAlgoExternalIDHandler::calculateFDR(FeatureMap& features) + { + if (getSVMProbsInternal().empty()) return; + + // cumulate the true/false positive counts, in decreasing probability order: + Size n_false = 0, n_true = 0; + for (std::map >::reverse_iterator prob_it = + svm_probs_internal_.rbegin(); prob_it != svm_probs_internal_.rend(); + ++prob_it) + { + n_true += prob_it->second.first; + n_false += prob_it->second.second; + prob_it->second.first = n_true; + prob_it->second.second = n_false; + } + + // print FDR for features that made the cut-off: + std::map >::iterator prob_it = + svm_probs_internal_.lower_bound(svm_min_prob_); + if (prob_it != svm_probs_internal_.end()) + { + float fdr = float(prob_it->second.second) / (prob_it->second.first + + prob_it->second.second); + OPENMS_LOG_INFO << "Estimated FDR of features detected based on 'external' IDs: " + << fdr * 100.0 << "%" << std::endl; + fdr = (fdr * n_external_features_) / (n_external_features_ + + n_internal_features_); + OPENMS_LOG_INFO << "Estimated FDR of all detected features: " << fdr * 100.0 + << "%" << std::endl; + } + + // calculate q-values: + std::vector qvalues; + qvalues.reserve(svm_probs_internal_.size()); + double min_fdr = 1.0; + for (prob_it = svm_probs_internal_.begin(); + prob_it != svm_probs_internal_.end(); ++prob_it) + { + double fdr = double(prob_it->second.second) / (prob_it->second.first + + prob_it->second.second); + if (fdr < min_fdr) + { + min_fdr = fdr; + } + qvalues.push_back(min_fdr); + } + // record only probabilities where q-value changes: + std::vector fdr_probs, fdr_qvalues; + std::vector::iterator qv_it = qvalues.begin(); + double previous_qvalue = -1.0; + for (prob_it = svm_probs_internal_.begin(); + prob_it != svm_probs_internal_.end(); ++prob_it, ++qv_it) + { + if (*qv_it != previous_qvalue) + { + fdr_probs.push_back(prob_it->first); + fdr_qvalues.push_back(*qv_it); + previous_qvalue = *qv_it; + } + } + features.setMetaValue("FDR_probabilities", fdr_probs); + features.setMetaValue("FDR_qvalues_raw", fdr_qvalues); + + // FDRs are estimated from "internal" features, but apply only to "external" + // ones. "Internal" features are considered "correct" by definition. + // We need to adjust the q-values to take this into account: + adjustFDRForExternalFeatures_(fdr_probs, fdr_qvalues, n_internal_features_); + features.setMetaValue("FDR_qvalues_corrected", fdr_qvalues); + + // @TODO: should we use "1 - qvalue" as overall quality for features? + // assign q-values to features: + for (Feature& feat : features) + { + if (feat.getMetaValue("feature_class") == "positive") + { + feat.setMetaValue("q-value", 0.0); + } + else + { + double prob = feat.getOverallQuality(); + // find the highest FDR prob. that is less-or-equal to the feature prob.: + std::vector::iterator pos = std::upper_bound(fdr_probs.begin(), + fdr_probs.end(), prob); + if (pos != fdr_probs.begin()) + { + --pos; + } + Size dist = std::distance(fdr_probs.begin(), pos); + feat.setMetaValue("q-value", fdr_qvalues[dist]); + } + } + } + + const std::map >& FFIDAlgoExternalIDHandler::getSVMProbsInternal() const + { + return svm_probs_internal_; + } + +} // namespace Internal +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp index e819450dfcf..e6dc0c019ae 100644 --- a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp +++ b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp @@ -242,10 +242,10 @@ namespace OpenMS //new scope to make local variables disappear { startProgress(0, intensity_bins_ * intensity_bins_, "Precalculating intensity scores"); - double rt_start = map_.getMinRT(); - double mz_start = map_.getMinMZ(); - intensity_rt_step_ = (map_.getMaxRT() - rt_start) / (double)intensity_bins_; - intensity_mz_step_ = (map_.getMaxMZ() - mz_start) / (double)intensity_bins_; + double rt_start = map_.spectrumRanges().byMSLevel(1).getMinRT(); + double mz_start = map_.spectrumRanges().byMSLevel(1).getMinMZ(); + intensity_rt_step_ = (map_.spectrumRanges().byMSLevel(1).getMaxRT() - rt_start) / (double)intensity_bins_; + intensity_mz_step_ = (map_.spectrumRanges().byMSLevel(1).getMaxMZ() - mz_start) / (double)intensity_bins_; intensity_thresholds_.resize(intensity_bins_); for (Size rt = 0; rt < intensity_bins_; ++rt) { @@ -356,7 +356,7 @@ namespace OpenMS //--------------------------------------------------------------------------- //new scope to make local variables disappear { - double max_mass = map_.getMaxMZ() * charge_high; + double max_mass = map_.spectrumRanges().byMSLevel(1).getMaxMZ() * charge_high; Size num_isotopes = std::ceil(max_mass / mass_window_width_) + 1; startProgress(0, num_isotopes, "Precalculating isotope distributions"); @@ -1007,7 +1007,7 @@ namespace OpenMS { //store map of abort reasons for failed seeds FeatureMap abort_map; - abort_map.reserve(abort_reasons_.size()); + abort_map.reserve( abort_reasons_.size()); Size counter = 0; for (std::map::iterator it2 = abort_reasons_.begin(); it2 != abort_reasons_.end(); ++it2, ++counter) { @@ -1829,8 +1829,8 @@ namespace OpenMS double intensity = map_[spectrum][peak].getIntensity(); double rt = map_[spectrum].getRT(); double mz = map_[spectrum][peak].getMZ(); - double rt_min = map_.getMinRT(); - double mz_min = map_.getMinMZ(); + double rt_min = map_.spectrumRanges().byMSLevel(1).getMinRT(); + double mz_min = map_.spectrumRanges().byMSLevel(1).getMinMZ(); UInt rt_bin = std::min(2 * intensity_bins_ - 1, (UInt) std::floor((rt - rt_min) / intensity_rt_step_ * 2.0)); UInt mz_bin = std::min(2 * intensity_bins_ - 1, (UInt) std::floor((mz - mz_min) / intensity_mz_step_ * 2.0)); // determine mz bins diff --git a/src/openms/source/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp b/src/openms/source/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp index a831171019b..b9f1a4e6036 100644 --- a/src/openms/source/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp +++ b/src/openms/source/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.cpp @@ -7,7 +7,9 @@ // -------------------------------------------------------------------------- #include +#include #include + #include #include #include @@ -36,9 +38,11 @@ #endif using namespace std; +using namespace OpenMS::Internal; namespace OpenMS { + FeatureFinderIdentificationAlgorithm::FeatureFinderIdentificationAlgorithm() : DefaultParamHandler("FeatureFinderIdentificationAlgorithm") { @@ -426,7 +430,6 @@ namespace OpenMS // to use MS1 Swath scores: feat_finder_.setMS1Map(SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(boost::make_shared(ms_data_))); - double rt_uncertainty(0); bool with_external_ids = !peptides_ext.empty(); if (with_external_ids && !seeds.empty()) @@ -438,30 +441,11 @@ namespace OpenMS "Using seeds and external ids is currently not supported."); } + double rt_uncertainty(0); if (with_external_ids) { - // align internal and external IDs to estimate RT shifts: - MapAlignmentAlgorithmIdentification aligner; - aligner.setReference(peptides_ext); // go from internal to external scale - vector > aligner_peptides(1, peptides); - vector aligner_trafos; - - OPENMS_LOG_INFO << "Realigning internal and external IDs..."; - aligner.align(aligner_peptides, aligner_trafos); - trafo_external_ = aligner_trafos[0]; - vector aligned_diffs; - trafo_external_.getDeviations(aligned_diffs); - Size index = std::max(Size(0), Size(rt_quantile_ * static_cast(aligned_diffs.size())) - 1); - rt_uncertainty = aligned_diffs[index]; - try - { - aligner_trafos[0].fitModel("lowess"); - trafo_external_ = aligner_trafos[0]; - } - catch (Exception::BaseException& e) - { - OPENMS_LOG_ERROR << "Error: Failed to align RTs of internal/external peptides. RT information will not be considered in the SVM classification. The original error message was:\n" << e.what() << endl; - } + // Use the external ID handler to align internal and external IDs + rt_uncertainty = external_id_handler_.alignInternalAndExternalIDs(peptides, peptides_ext, rt_quantile_); } if (rt_window_ == 0.0) @@ -511,12 +495,17 @@ namespace OpenMS } n_internal_peps_ = peptide_map_.size(); - for (PeptideIdentification& pep : peptides_ext) + + if (with_external_ids) { - addPeptideToMap_(pep, peptide_map_, true); - pep.setMetaValue("FFId_category", "external"); + // Process and add external peptides + for (PeptideIdentification& pep : peptides_ext) + { + addPeptideToMap_(pep, peptide_map_, true); + pep.setMetaValue("FFId_category", "external"); + } + n_external_peps_ = peptide_map_.size() - n_internal_peps_; } - n_external_peps_ = peptide_map_.size() - n_internal_peps_; boost::shared_ptr shared = boost::make_shared(ms_data_); OpenSwath::SpectrumAccessPtr spec_temp = @@ -679,11 +668,11 @@ namespace OpenMS void FeatureFinderIdentificationAlgorithm::postProcess_( FeatureMap & features, bool with_external_ids) - { + { // don't do SVM stuff unless we have external data to apply the model to: if (with_external_ids) { - classifyFeatures_(features); + external_id_handler_.classifyFeaturesWithSVM(features, param_); } // make sure proper unique ids get assigned to all features features.ensureUniqueId(); @@ -694,15 +683,26 @@ namespace OpenMS FileHandler().storeFeatures(candidates_out_, features); } - filterFeatures_(features, with_external_ids); - OPENMS_LOG_INFO << features.size() << " features left after filtering." << endl; + // Use ExternalIDHandler for feature filtering + if (with_external_ids) + { + external_id_handler_.filterClassifiedFeatures(features, external_id_handler_.getSVMProbsInternal().empty() ? 0.0 : double(param_.getValue("svm:min_prob"))); + OPENMS_LOG_INFO << features.size() << " features left after filtering." << endl; + } + else + { + filterFeatures_(features, with_external_ids); + OPENMS_LOG_INFO << features.size() << " features left after filtering." << endl; + } if (features.empty()) return; // elution model fit throws on empty features - if (!svm_probs_internal_.empty()) + // Calculate FDR if we have external IDs + if (with_external_ids) { - calculateFDR_(features); - } + external_id_handler_.calculateFDR(features); + } + //TODO MRMFeatureFinderScoring already does an ElutionModel scoring. It uses EMG fitting. // Would be nice if we could only do the fitting once, since it is one of the bottlenecks. // What is the intention of this post-processing here anyway? Does it filter anything? @@ -1106,24 +1106,6 @@ namespace OpenMS } } - void FeatureFinderIdentificationAlgorithm::checkNumObservations_(Size n_pos, Size n_neg, const String& note) const - { - if (n_pos < svm_n_parts_) - { - String msg = "Not enough positive observations for " + - String(svm_n_parts_) + "-fold cross-validation" + note + "."; - throw Exception::MissingInformation(__FILE__, __LINE__, - OPENMS_PRETTY_FUNCTION, msg); - } - if (n_neg < svm_n_parts_) - { - String msg = "Not enough negative observations for " + - String(svm_n_parts_) + "-fold cross-validation" + note + "."; - throw Exception::MissingInformation(__FILE__, __LINE__, - OPENMS_PRETTY_FUNCTION, msg); - } - } - void FeatureFinderIdentificationAlgorithm::annotateFeaturesFinalizeAssay_( FeatureMap& features, map >& feat_ids, RTMap& rt_internal) @@ -1269,41 +1251,46 @@ namespace OpenMS } else // only external IDs -> no validation possible { + // Set feature class to unknown feat.setMetaValue("n_total_ids", 0); feat.setMetaValue("n_matching_ids", -1); feat.setMetaValue("feature_class", "unknown"); - // add "dummy" peptide identification: - PeptideIdentification id = *(rt_external.begin()->second); - id.clearMetaInfo(); - id.setMetaValue("FFId_category", "implied"); - id.setRT(feat.getRT()); - id.setMZ(feat.getMZ()); - // only one peptide hit per ID - see function "addPeptideToMap_": - PeptideHit& hit = id.getHits()[0]; - hit.clearMetaInfo(); - hit.setScore(0.0); - feat.getPeptideIdentifications().push_back(id); + + // Add a dummy peptide identification from external data + if (!rt_external.empty()) + { + PeptideIdentification id = *(rt_external.begin()->second); + id.clearMetaInfo(); + id.setMetaValue("FFId_category", "implied"); + id.setRT(feat.getRT()); + id.setMZ(feat.getMZ()); + // only one peptide hit per ID - see function "addPeptideToMap_": + PeptideHit& hit = id.getHits()[0]; + hit.clearMetaInfo(); + hit.setScore(0.0); + feat.getPeptideIdentifications().push_back(id); + } } // distance from feature to closest peptide ID: - if (!trafo_external_.getDataPoints().empty()) + if (external_id_handler_.hasRTTransformation()) { // use external IDs if available, otherwise RT-transformed internal IDs // (but only compute the transform if necessary, once per assay!): if (rt_external.empty() && (transformed_internal.empty() || - (peptide_ref != previous_ref))) + (peptide_ref != previous_ref))) { transformed_internal.clear(); for (RTMap::const_iterator it = rt_internal.begin(); it != rt_internal.end(); ++it) { - double transformed_rt = trafo_external_.apply(it->first); + double transformed_rt = external_id_handler_.transformRT(it->first); RTMap::value_type pair = make_pair(transformed_rt, it->second); transformed_internal.insert(transformed_internal.end(), pair); } } const RTMap& rt_ref = (rt_external.empty() ? transformed_internal : - rt_external); + rt_external); double rt_min = feat.getMetaValue("leftWidth"); double rt_max = feat.getMetaValue("rightWidth"); @@ -1398,7 +1385,7 @@ namespace OpenMS if (!quantify_decoys_) { if (hit.metaValueExists("target_decoy") && hit.getMetaValue("target_decoy") == "decoy") - { + { unassignedIDs_.push_back(peptide); return; } @@ -1417,7 +1404,13 @@ namespace OpenMS Int charge = hit.getCharge(); double rt = peptide.getRT(); double mz = peptide.getMZ(); - if (!external) + + if (external) + { + OPENMS_LOG_DEBUG_NOFILE << "Adding peptide (external) " << hit.getSequence() << "; CHG: " << charge << "; RT: " << rt << "; MZ: " << mz << endl; + peptide_map[hit.getSequence()][charge].second.emplace(rt, &peptide); + } + else { if (peptide.metaValueExists("SeedFeatureID")) { @@ -1429,11 +1422,6 @@ namespace OpenMS } peptide_map[hit.getSequence()][charge].first.emplace(rt, &peptide); } - else - { - OPENMS_LOG_DEBUG_NOFILE << "Adding peptide (external) " << hit.getSequence() << "; CHG: " << charge << "; RT: " << rt << "; MZ: " << mz << endl; - peptide_map[hit.getSequence()][charge].second.emplace(rt, &peptide); - } } void FeatureFinderIdentificationAlgorithm::updateMembers_() @@ -1477,444 +1465,23 @@ namespace OpenMS add_mass_offset_peptides_ = double(param_.getValue("add_mass_offset_peptides")); } - void FeatureFinderIdentificationAlgorithm::getUnbiasedSample_(const multimap >& valid_obs, - map& training_labels) - { - // Create an unbiased training sample: - // - same number of pos./neg. observations (approx.), - // - same intensity distribution of pos./neg. observations. - // We use a sliding window over the set of observations, ordered by - // intensity. At each step, we examine the proportion of both pos./neg. - // observations in the window and select the middle element with according - // probability. (We use an even window size, to cover the ideal case where - // the two classes are balanced.) - const Size window_size = 8; - const Size half_win_size = window_size / 2; - if (valid_obs.size() < half_win_size + 1) - { - String msg = "Not enough observations for intensity-bias filtering."; - throw Exception::MissingInformation(__FILE__, __LINE__, - OPENMS_PRETTY_FUNCTION, msg); - } - srand(time(nullptr)); // seed random number generator - Size n_obs[2] = {0, 0}; // counters for neg./pos. observations - Size counts[2] = {0, 0}; // pos./neg. counts in current window - // iterators to begin, middle and past-the-end of sliding window: - multimap >::const_iterator begin, middle, end; - begin = middle = end = valid_obs.begin(); - // initialize ("middle" is at beginning of sequence, so no full window): - for (Size i = 0; i <= half_win_size; ++i, ++end) - { - ++counts[end->second.second]; // increase counter for pos./neg. obs. - } - // "i" is the index of one of the two middle values of the sliding window: - // - in the left half of the sequence, "i" is left-middle, - // - in the right half of the sequence, "i" is right-middle. - // The counts are updated as "i" and the sliding window move to the right. - for (Size i = 0; i < valid_obs.size(); ++i, ++middle) - { - // if count for either class is zero, we don't select anything: - if ((counts[0] > 0) && (counts[1] > 0)) - { - // probability thresholds for neg./pos. observations: - double thresholds[2] = {counts[1] / float(counts[0]), - counts[0] / float(counts[1])}; - // check middle values: - double rnd = rand() / double(RAND_MAX); // random num. in range 0-1 - if (rnd < thresholds[middle->second.second]) - { - training_labels[middle->second.first] = Int(middle->second.second); - ++n_obs[middle->second.second]; - } - } - // update sliding window and class counts; - // when we reach the middle of the sequence, we keep the window in place - // for one step, to change from "left-middle" to "right-middle": - if (i != valid_obs.size() / 2) - { - // only move "begin" when "middle" has advanced far enough: - if (i > half_win_size) - { - --counts[begin->second.second]; - ++begin; - } - // don't increment "end" beyond the defined range: - if (end != valid_obs.end()) - { - ++counts[end->second.second]; - ++end; - } - } - } - checkNumObservations_(n_obs[1], n_obs[0], " after bias filtering"); - } - - - void FeatureFinderIdentificationAlgorithm::getRandomSample_(std::map& training_labels) const - { - // @TODO: can this be done with less copying back and forth of data? - // Pick a random subset of size "svm_n_samples_" for training: Shuffle the whole - // sequence, then select the first "svm_n_samples_" elements. - std::vector selection; - selection.reserve(training_labels.size()); - for (auto it = training_labels.begin(); it != training_labels.end(); ++it) - { - selection.push_back(it->first); - } - //TODO check how often this is potentially called and move out the initialization - Math::RandomShuffler shuffler; - shuffler.portable_random_shuffle(selection.begin(), selection.end()); - // However, ensure that at least "svm_n_parts_" pos./neg. observations are - // included (for cross-validation) - there must be enough, otherwise - // "checkNumObservations_" would have thrown an error. To this end, move - // "svm_n_parts_" pos. observations to the beginning of sequence, followed by - // "svm_n_parts_" neg. observations (pos. first - see reason below): - Size n_obs[2] = {0, 0}; // counters for neg./pos. observations - for (Int label = 1; label >= 0; --label) - { - for (Size i = n_obs[1]; i < selection.size(); ++i) - { - Size obs_index = selection[i]; - if (training_labels[obs_index] == label) - { - std::swap(selection[i], selection[n_obs[label]]); - ++n_obs[label]; - } - if (n_obs[label] == svm_n_parts_) - { - break; - } - } - } - selection.resize(svm_n_samples_); - // copy the selected subset back: - std::map temp; - for (vector::iterator it = selection.begin(); it != selection.end(); - ++it) - { - temp[*it] = training_labels[*it]; - } - training_labels.swap(temp); - } - - void FeatureFinderIdentificationAlgorithm::classifyFeatures_(FeatureMap& features) - { - if (features.empty()) - { - return; - } - if (features[0].metaValueExists("rt_delta")) // include RT feature - { - if (std::find(svm_predictor_names_.begin(), svm_predictor_names_.end(), "rt_delta") == svm_predictor_names_.end()) - { - svm_predictor_names_.push_back("rt_delta"); - } - } - // values for all features per predictor (this way around to simplify scaling - // of predictors): - SimpleSVM::PredictorMap predictors; - for (const String& pred : svm_predictor_names_) - { - predictors[pred].reserve(features.size()); - for (Feature& feat : features) - { - if (!feat.metaValueExists(pred)) - { - OPENMS_LOG_ERROR << "Meta value '" << pred << "' missing for feature '" - << feat.getUniqueId() << "'" << endl; - predictors.erase(pred); - break; - } - predictors[pred].push_back(feat.getMetaValue(pred)); - } - } - - // get labels for SVM: - std::map training_labels; - bool no_selection = param_.getValue("svm:no_selection") == "true"; - // mapping (for bias correction): intensity -> (index, positive?) - std::multimap > valid_obs; - Size n_obs[2] = {0, 0}; // counters for neg./pos. observations - for (Size feat_index = 0; feat_index < features.size(); ++feat_index) - { - String feature_class = features[feat_index].getMetaValue("feature_class"); - int label = -1; - if (feature_class == "positive") - { - label = 1; - } - else if (feature_class == "negative") - { - label = 0; - } - if (label != -1) - { - ++n_obs[label]; - if (!no_selection) - { - double intensity = features[feat_index].getIntensity(); - valid_obs.insert(make_pair(intensity, make_pair(feat_index, - bool(label)))); - } - else - { - training_labels[feat_index] = (double)label; - } - } - } - checkNumObservations_(n_obs[1], n_obs[0]); - - if (!no_selection) - { - getUnbiasedSample_(valid_obs, training_labels); - } - if (svm_n_samples_ > 0) // limited number of samples for training - { - if (training_labels.size() < svm_n_samples_) - { - OPENMS_LOG_WARN << "Warning: There are only " << training_labels.size() - << " valid observations for training." << endl; - } - else if (training_labels.size() > svm_n_samples_) - { - getRandomSample_(training_labels); - } - } - - SimpleSVM svm; - // set (only) the relevant parameters: - Param svm_params = svm.getParameters(); - Logger::LogStream no_log; // suppress warnings about additional parameters - svm_params.update(param_.copy("svm:", true), false, no_log); - svm.setParameters(svm_params); - svm.setup(predictors, training_labels); - if (!svm_xval_out_.empty()) - { - svm.writeXvalResults(svm_xval_out_); - } - if ((debug_level_ > 0) && svm_params.getValue("kernel") == "linear") - { - std::map feature_weights; - svm.getFeatureWeights(feature_weights); - OPENMS_LOG_DEBUG << "SVM feature weights:" << endl; - for (std::map::iterator it = feature_weights.begin(); - it != feature_weights.end(); ++it) - { - OPENMS_LOG_DEBUG << "- " << it->first << ": " << it->second << endl; - } - } - - std::vector predictions; - svm.predict(predictions); - OPENMS_POSTCONDITION(predictions.size() == features.size(), - "SVM predictions for all features expected"); - for (Size i = 0; i < features.size(); ++i) - { - features[i].setMetaValue("predicted_class", predictions[i].outcome); - double prob_positive = predictions[i].probabilities[1]; - features[i].setMetaValue("predicted_probability", prob_positive); - // @TODO: store previous (OpenSWATH) overall quality in a meta value? - features[i].setOverallQuality(prob_positive); - } - } - - - void FeatureFinderIdentificationAlgorithm::filterFeaturesFinalizeAssay_(Feature& best_feature, double best_quality, - const double quality_cutoff) - { - const String& feature_class = best_feature.getMetaValue("feature_class"); - if (feature_class == "positive") // true positive prediction - { - svm_probs_internal_[best_quality].first++; - } - else if ((feature_class == "negative") || // false positive prediction - (feature_class == "ambiguous")) // let's be strict about this - { - svm_probs_internal_[best_quality].second++; - } - else if (feature_class == "unknown") - { - svm_probs_external_.insert(best_quality); - if (best_quality >= quality_cutoff) - { - best_feature.setOverallQuality(best_quality); - ++n_external_features_; - } - } - } - - void FeatureFinderIdentificationAlgorithm::filterFeatures_(FeatureMap& features, bool classified) + + void FeatureFinderIdentificationAlgorithm::filterFeatures_(OpenMS::FeatureMap& features, bool classified) { if (features.empty()) { return; } - if (classified) - { - // Remove features with class "negative" or "ambiguous", keep "positive". - // For class "unknown", for every assay (meta value "PeptideRef"), keep - // the feature with highest "predicted_probability" (= overall quality), - // subject to the "svm:min_prob" threshold. - // We mark features for removal by setting their overall quality to zero. - n_internal_features_ = n_external_features_ = 0; - FeatureMap::Iterator best_it = features.begin(); - double best_quality = 0.0; - String previous_ref; - for (FeatureMap::Iterator it = features.begin(); it != features.end(); - ++it) - { - // features from same assay (same "PeptideRef") appear consecutively; - // if this is a new assay, finalize the previous one: - String peptide_ref = it->getMetaValue("PeptideRef"); - // remove region number, if present: - Size pos_slash = peptide_ref.rfind('/'); - Size pos_colon = peptide_ref.find(':', pos_slash + 2); - peptide_ref = peptide_ref.substr(0, pos_colon); - - if (peptide_ref != previous_ref) - { - if (!previous_ref.empty()) - { - filterFeaturesFinalizeAssay_(*best_it, best_quality, - svm_quality_cutoff); - best_quality = 0.0; - } - previous_ref = peptide_ref; - } - - // update qualities: - if ((it->getOverallQuality() > best_quality) || - // break ties by intensity: - ((it->getOverallQuality() == best_quality) && - (it->getIntensity() > best_it->getIntensity()))) - { - best_it = it; - best_quality = it->getOverallQuality(); - } - if (it->getMetaValue("feature_class") == "positive") - { - n_internal_features_++; - } - else - { - it->setOverallQuality(0.0); // gets overwritten for "best" candidate - } - } - // set of features from the last assay: - filterFeaturesFinalizeAssay_(*best_it, best_quality, svm_quality_cutoff); - - features.erase(remove_if(features.begin(), features.end(), - feature_filter_quality_), features.end()); - } - else + + // For non-classified features, we still use the original filtering + if (!classified) { // remove features without ID (or pseudo ID from seeds) - features.erase(remove_if(features.begin(), features.end(), + features.erase(std::remove_if(features.begin(), features.end(), feature_filter_peptides_), features.end()); } + // Note: The classified case is now handled by ExternalIDHandler::filterClassifiedFeatures + // in the postProcess_ method } - - void FeatureFinderIdentificationAlgorithm::calculateFDR_(FeatureMap& features) - { - // cumulate the true/false positive counts, in decreasing probability order: - Size n_false = 0, n_true = 0; - for (std::map >::reverse_iterator prob_it = - svm_probs_internal_.rbegin(); prob_it != svm_probs_internal_.rend(); - ++prob_it) - { - n_true += prob_it->second.first; - n_false += prob_it->second.second; - prob_it->second.first = n_true; - prob_it->second.second = n_false; - } - - // print FDR for features that made the cut-off: - std::map >::iterator prob_it = - svm_probs_internal_.lower_bound(svm_min_prob_); - if (prob_it != svm_probs_internal_.end()) - { - float fdr = float(prob_it->second.second) / (prob_it->second.first + - prob_it->second.second); - OPENMS_LOG_INFO << "Estimated FDR of features detected based on 'external' IDs: " - << fdr * 100.0 << "%" << endl; - fdr = (fdr * n_external_features_) / (n_external_features_ + - n_internal_features_); - OPENMS_LOG_INFO << "Estimated FDR of all detected features: " << fdr * 100.0 - << "%" << endl; - } - - // calculate q-values: - std::vector qvalues; - qvalues.reserve(svm_probs_internal_.size()); - double min_fdr = 1.0; - for (prob_it = svm_probs_internal_.begin(); - prob_it != svm_probs_internal_.end(); ++prob_it) - { - double fdr = double(prob_it->second.second) / (prob_it->second.first + - prob_it->second.second); - if (fdr < min_fdr) - { - min_fdr = fdr; - } - qvalues.push_back(min_fdr); - } - // record only probabilities where q-value changes: - std::vector fdr_probs, fdr_qvalues; - std::vector::iterator qv_it = qvalues.begin(); - double previous_qvalue = -1.0; - for (prob_it = svm_probs_internal_.begin(); - prob_it != svm_probs_internal_.end(); ++prob_it, ++qv_it) - { - if (*qv_it != previous_qvalue) - { - fdr_probs.push_back(prob_it->first); - fdr_qvalues.push_back(*qv_it); - previous_qvalue = *qv_it; - } - } - features.setMetaValue("FDR_probabilities", fdr_probs); - features.setMetaValue("FDR_qvalues_raw", fdr_qvalues); - - // FDRs are estimated from "internal" features, but apply only to "external" - // ones. "Internal" features are considered "correct" by definition. - // We need to adjust the q-values to take this into account: - std::multiset::reverse_iterator ext_it = svm_probs_external_.rbegin(); - Size external_count = 0; - for (Int i = fdr_probs.size() - 1; i >= 0; --i) - { - double cutoff = fdr_probs[i]; - while ((ext_it != svm_probs_external_.rend()) && (*ext_it >= cutoff)) - { - ++external_count; - ++ext_it; - } - fdr_qvalues[i] = (fdr_qvalues[i] * external_count) / - (external_count + n_internal_features_); - } - features.setMetaValue("FDR_qvalues_corrected", fdr_qvalues); - - // @TODO: should we use "1 - qvalue" as overall quality for features? - // assign q-values to features: - for (Feature& feat : features) - { - if (feat.getMetaValue("feature_class") == "positive") - { - feat.setMetaValue("q-value", 0.0); - } - else - { - double prob = feat.getOverallQuality(); - // find the highest FDR prob. that is less-or-equal to the feature prob.: - std::vector::iterator pos = upper_bound(fdr_probs.begin(), - fdr_probs.end(), prob); - if (pos != fdr_probs.begin()) - { - --pos; - } - Size dist = distance(fdr_probs.begin(), pos); - feat.setMetaValue("q-value", fdr_qvalues[dist]); - } - } - } } diff --git a/src/openms/source/FEATUREFINDER/MultiplexClustering.cpp b/src/openms/source/FEATUREFINDER/MultiplexClustering.cpp index 7748112e961..f1cb08a8359 100644 --- a/src/openms/source/FEATUREFINDER/MultiplexClustering.cpp +++ b/src/openms/source/FEATUREFINDER/MultiplexClustering.cpp @@ -31,11 +31,11 @@ namespace OpenMS } // ranges of the experiment - double mz_min = exp_profile.getMinMZ(); - double mz_max = exp_profile.getMaxMZ(); - double rt_min = exp_profile.getMinRT(); - double rt_max = exp_profile.getMaxRT(); - + double mz_min = exp_profile.spectrumRanges().getMinMZ(); + double mz_max = exp_profile.spectrumRanges().getMaxMZ(); + double rt_min = exp_profile.spectrumRanges().getMinRT(); + double rt_max = exp_profile.spectrumRanges().getMaxRT(); + // extend the grid by a small absolute margin double mz_margin = 1e-2; double rt_margin = 1e-2; @@ -81,10 +81,10 @@ namespace OpenMS rt_typical_(rt_typical) { // ranges of the experiment - double mz_min = exp.getMinMZ(); - double mz_max = exp.getMaxMZ(); - double rt_min = exp.getMinRT(); - double rt_max = exp.getMaxRT(); + double mz_min = exp.spectrumRanges().byMSLevel(1).getMinMZ(); + double mz_max = exp.spectrumRanges().byMSLevel(1).getMaxMZ(); + double rt_min = exp.spectrumRanges().byMSLevel(1).getMinRT(); + double rt_max = exp.spectrumRanges().byMSLevel(1).getMaxRT(); if (!RangeMZ(0.0, 1.0e12).containsMZ({mz_min, mz_max}) || !RangeRT(-1.0e12, 1.0e12).containsRT({rt_min, rt_max}) ) diff --git a/src/openms/source/FEATUREFINDER/sources.cmake b/src/openms/source/FEATUREFINDER/sources.cmake index 0f7f6c389f5..eee7581e3f1 100644 --- a/src/openms/source/FEATUREFINDER/sources.cmake +++ b/src/openms/source/FEATUREFINDER/sources.cmake @@ -20,6 +20,7 @@ FeatureFinderIdentificationAlgorithm.cpp FeatureFinderAlgorithmMetaboIdent.cpp FeatureFinderMultiplexAlgorithm.cpp FeatureFindingMetabo.cpp +FFIDAlgoExternalIDHandler.cpp Fitter1D.cpp GaussFitter1D.cpp GaussModel.cpp diff --git a/src/openms/source/FORMAT/HANDLERS/MzDataHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzDataHandler.cpp index 9ca2ddd485e..643090db81a 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzDataHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzDataHandler.cpp @@ -591,10 +591,7 @@ namespace OpenMS::Internal << sm.getName() << "\n"; -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wconversion" - if (!sm.getNumber().empty() || sm.getState() || sm.getMass() || sm.getVolume() || sm.getConcentration() || !sm.isMetaEmpty()) -#pragma clang diagnostic pop + if (! sm.getNumber().empty() || sm.getState() != Sample::SAMPLENULL || sm.getMass() || sm.getVolume() || sm.getConcentration() || ! sm.isMetaEmpty()) { os << "\t\t\t\n"; writeCVS_(os, sm.getNumber(), "1000001", "SampleNumber"); diff --git a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp index f643342819c..79952c0f56c 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp @@ -18,8 +18,10 @@ #include #include + #include + namespace OpenMS::Internal { diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 6af312971b6..e9375935e4b 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -417,43 +418,151 @@ namespace OpenMS::Internal } } - //******************************************************************************************************************* + size_t StringManager::strLength(const XMLCh* input_ptr) { + if (input_ptr == nullptr) { + return 0; + } + + XMLSize_t processed_chars = 0; + const XMLCh* pos_ptr = input_ptr; + + // Verarbeite einzelne Zeichen, bis der Pointer 16-Byte-aligned ist + uintptr_t ptr_value = reinterpret_cast(pos_ptr); + size_t misalignment = ptr_value & 0xF; // Berechnet Misalignment als (Adresswert) mod 16 + size_t chars_to_align = misalignment ? (16 - misalignment) / sizeof(XMLCh) : 0; + + // Vorverarbeitung einzelner Zeichen bis zum Alignment oder bis zum Ende des Strings + for (size_t i = 0; i < chars_to_align; ++i) { + if (*pos_ptr == 0) { + return i; + } + ++pos_ptr; + } + processed_chars = chars_to_align; + + // Hauptschleife mit SIMD-Operationen + const simde__m128i zero = simde_mm_setzero_si128(); + while (true) { + // SIMD-Operation + simde__m128i bits = simde_mm_load_si128(reinterpret_cast(pos_ptr)); + simde__m128i cmp_zero = simde_mm_cmpeq_epi16(bits, zero); + uint16_t zero_mask = simde_mm_movemask_epi8(cmp_zero); + + if (zero_mask != 0x0000) { + size_t byte_pos_zero = __builtin_ctz(zero_mask); + size_t char_pos_zero = byte_pos_zero / 2; + return processed_chars + char_pos_zero; + } + + // 8 Zeichen (16 Bytes) wurden verarbeitet, keine Null gefunden + pos_ptr += 8; + processed_chars += 8; + } + + // Diese Zeile wird nie erreicht + return processed_chars; + } + + void StringManager::compress64_(const XMLCh* inputIt, char* outputIt) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputIt)); - StringManager::StringManager() - = default; + // Select every second byte (little-endian lower byte of each UTF-16 character) + const simde__m128i shuffleMask = simde_mm_setr_epi8( + 0, 2, 4, 6, 8, 10, 12, 14, + -1, -1, -1, -1, -1, -1, -1, -1 + ); + + simde__m128i compressed = simde_mm_shuffle_epi8(bits, shuffleMask); + + // Store the lower 64 bits (8 ASCII characters) + simde_mm_storel_epi64(reinterpret_cast(outputIt), compressed); + } - StringManager::~StringManager() - = default; + bool StringManager::isASCII(const XMLCh* chars, const XMLSize_t length) + { + if (length == 0) + { + return true; + } + + Size fullBlocks = length / 8; + Size remainder = length % 8; + + const XMLCh* inputPtr = chars; + simde__m128i mask = simde_mm_set1_epi16(0xFF00); + bool bitmask = true; + + // Process blocks of 8 UTF-16 characters using SIMD + for (Size i = 0; i < fullBlocks; ++i) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputPtr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i andOp = simde_mm_and_si128(bits, mask); + simde__m128i cmp = simde_mm_cmpeq_epi16(andOp, zero); + + if (simde_mm_movemask_epi8(cmp) != 0xFFFF) + { + return false; + } + + inputPtr += 8; + } - void StringManager::appendASCII(const XMLCh * chars, const XMLSize_t length, String & result) + // Check remaining characters individually + for (Size i = 0; i < remainder && bitmask; ++i) { - // XMLCh are characters in UTF16 (usually stored as 16bit unsigned + if (inputPtr[i] & 0xFF00) + { + return false; + } + } + + return bitmask; + } + + void StringManager::appendASCII(const XMLCh* chars, const XMLSize_t length, String& result) + { + // XMLCh are characters in UTF16 (usually stored as 16-bit unsigned // short but this is not guaranteed). // We know that the Base64 string here can only contain plain ASCII // and all bytes except the least significant one will be zero. Thus // we can convert to char directly (only keeping the least // significant byte). - - const XMLCh* it = chars; - const XMLCh* end = it + length; - - size_t curr_size = result.size(); - result.resize(curr_size + length); - std::string::iterator str_it = result.begin(); - std::advance(str_it, curr_size); - while (it!=end) - { - *str_it = (char)*it; - ++str_it; - ++it; + + Size fullBlocks = length / 8; + Size remainder = length % 8; + + const XMLCh* inputPtr = chars; + + Size currentSize = result.size(); + result.resize(currentSize + length); + char* outputPtr = &result[currentSize]; + + // Copy blocks of 8 characters at a time + for (Size i = 0; i < fullBlocks; ++i) + { + compress64_(inputPtr, outputPtr); + inputPtr += 8; + outputPtr += 8; + } + + // Copy any remaining characters individually + for (Size i = 0; i < remainder; ++i) + { + outputPtr[i] = static_cast(inputPtr[i] & 0xFF); } + } - // This is ca. 50 % faster than - // for (size_t i = 0; i < length; i++) - // { - // result[curr_size + i] = (char)chars[i]; - // } + //******************************************************************************************************************* + + StringManager::StringManager() + = default; + + StringManager::~StringManager() + = default; + + - } } // namespace OpenMS // namespace Internal diff --git a/src/openms/source/FORMAT/MSPFile.cpp b/src/openms/source/FORMAT/MSPFile.cpp index 13cd90d37a5..f8275eaa3cf 100644 --- a/src/openms/source/FORMAT/MSPFile.cpp +++ b/src/openms/source/FORMAT/MSPFile.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -28,7 +29,7 @@ namespace OpenMS defaults_.setValidStrings("parse_headers", parse_strings); defaults_.setValue("parse_peakinfo", "true", "Flag whether the peak annotation information should be parsed and stored for each peak"); defaults_.setValidStrings("parse_peakinfo", parse_strings); - defaults_.setValue("parse_firstpeakinfo_only", "true", "Flag whether only the first (default for 1:1 correspondence in SpecLibSearcher) or all peak annotation information should be parsed and stored for each peak."); + defaults_.setValue("parse_firstpeakinfo_only", "true", "Flag whether only the first or all peak annotation information should be parsed and stored for each peak."); defaults_.setValidStrings("parse_firstpeakinfo_only", parse_strings); defaults_.setValue("instrument", "", "If instrument given, only spectra of these type of instrument (Inst= in header) are parsed"); defaults_.setValidStrings("instrument", {"","it","qtof","toftof"}); @@ -322,6 +323,18 @@ namespace OpenMS } } + void MSPFile::load(const String & filename, AnnotatedMSRun & annot_exp) + { + // use existing load function + vector ids; + MSExperiment exp; + this->load(filename, ids, exp); + + // Convert to the new data structure (one PeptideIdentification per spectrum) + annot_exp.setPeptideIdentifications(std::move(ids)); + annot_exp.getMSExperiment() = std::move(exp); + } + void MSPFile::parseHeader_(const String & header, PeakSpectrum & spec) { // first header from std_protein of NIST spectra DB @@ -343,7 +356,7 @@ namespace OpenMS } //TODO adapt store to write new? format - void MSPFile::store(const String & filename, const PeakMap & exp) const + void MSPFile::store(const String & filename, const AnnotatedMSRun & exp) const { if (!FileHandler::hasValidExtension(filename, FileTypes::MSP)) { @@ -358,11 +371,11 @@ namespace OpenMS ofstream out(filename.c_str()); - for (const MSSpectrum& it : exp) + for (auto [spectrum, peptide_id] : exp) { - if (!it.getPeptideIdentifications().empty() && !it.getPeptideIdentifications().begin()->getHits().empty()) + if (!peptide_id.getHits().empty()) { - PeptideHit hit = *it.getPeptideIdentifications().begin()->getHits().begin(); + PeptideHit hit = peptide_id.getHits()[0]; String peptide; for (const Residue& pit : hit.getSequence()) { @@ -419,10 +432,10 @@ namespace OpenMS out << " Mods=0"; } out << " Inst=it\n"; // @improvement write instrument type, protein...and other information - out << "Num peaks: " << it.size() << "\n"; + out << "Num peaks: " << spectrum.size() << "\n"; // normalize to 10,000 - PeakSpectrum rich_spec = it; + PeakSpectrum rich_spec = spectrum; double max_int(0); for (const Peak1D& sit : rich_spec) { diff --git a/src/openms/source/FORMAT/MzQCFile.cpp b/src/openms/source/FORMAT/MzQCFile.cpp index 19c9025455d..8a7f55bad05 100644 --- a/src/openms/source/FORMAT/MzQCFile.cpp +++ b/src/openms/source/FORMAT/MzQCFile.cpp @@ -107,9 +107,9 @@ namespace OpenMS // Number of chromatograms" addMetric("QC:4000135", exp.getChromatograms().size()); // Run time (RT duration) - addMetric("QC:4000053", UInt(exp.getMaxRT() - exp.getMinRT())); + addMetric("QC:4000053", UInt(exp.spectrumRanges().getMaxRT() - exp.spectrumRanges().getMinRT())); // MZ acquisition range - addMetric("QC:4000138", tuple{exp.getMinMZ(), exp.getMaxMZ()}); + addMetric("QC:4000138", tuple{exp.spectrumRanges().getMinMZ(), exp.spectrumRanges().getMaxMZ()}); // TICs if (tic.isRunnable(status)) { diff --git a/src/openms/source/FORMAT/ParamCWLFile.cpp b/src/openms/source/FORMAT/ParamCWLFile.cpp index 1b9d865a368..80dc8473ed0 100644 --- a/src/openms/source/FORMAT/ParamCWLFile.cpp +++ b/src/openms/source/FORMAT/ParamCWLFile.cpp @@ -12,7 +12,12 @@ #include #include #include + +#if defined(ENABLE_TDL) #include +#else +#include +#endif using json = nlohmann::json; @@ -56,6 +61,7 @@ namespace OpenMS void ParamCWLFile::writeCWLToStream(std::ostream* os_ptr, const Param& param, const ToolInfo& tool_info) const { +#if defined(ENABLE_TDL) std::ostream& os = *os_ptr; os.precision(std::numeric_limits::digits10); @@ -316,5 +322,8 @@ namespace OpenMS "# SPDX-License-Identifier: Apache-2.0\n"; os << convertToCWL(tdl_tool_info) << "\n"; +#else + throw std::runtime_error{"TDL support is not available. Rebuild with -DENABLE_TDL=ON to enable this feature."}; +#endif } } // namespace OpenMS diff --git a/src/openms/source/FORMAT/ParamJSONFile.cpp b/src/openms/source/FORMAT/ParamJSONFile.cpp index 852895a780d..a2573846e01 100644 --- a/src/openms/source/FORMAT/ParamJSONFile.cpp +++ b/src/openms/source/FORMAT/ParamJSONFile.cpp @@ -11,7 +11,11 @@ #include #include #include +#if defined(ENABLE_TDL) #include +#else +#include +#endif using json = nlohmann::json; @@ -174,6 +178,7 @@ namespace OpenMS void ParamJSONFile::writeToStream(std::ostream* os_ptr, const Param& param) const { +#if defined(ENABLE_TDL) std::ostream& os = *os_ptr; // discover the name of the first nesting Level @@ -303,5 +308,8 @@ namespace OpenMS assert(stack.size() == 1); os << jsonDoc.dump(2); +#else + throw std::runtime_error{"TDL support is not available. Rebuild with -DENABLE_TDL=ON to enable this feature."}; +#endif } } // namespace OpenMS diff --git a/src/openms/source/IONMOBILITY/IMDataConverter.cpp b/src/openms/source/IONMOBILITY/IMDataConverter.cpp index dcb36573ac1..b85458fc099 100644 --- a/src/openms/source/IONMOBILITY/IMDataConverter.cpp +++ b/src/openms/source/IONMOBILITY/IMDataConverter.cpp @@ -134,7 +134,7 @@ namespace OpenMS std::vector results(number_of_bins); in.updateRanges(); // find the IM range - const auto range_IM = RangeMobility(in); + const auto range_IM = RangeMobility(in.spectrumRanges()); if (range_IM.getSpan() / number_of_bins < bin_extension_abs * 2) { throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Bin size (") + String(range_IM.getSpan() / number_of_bins) + ") is smaller than the overlap.", String(bin_extension_abs*2)); @@ -146,7 +146,6 @@ namespace OpenMS // results for each IM-frame: all spectra per bin, to get merged MSExperiment binned_spectra; - SpectraMerger merger; auto p = merger.getParameters(); const auto ms_levels = in.getMSLevels(); @@ -168,7 +167,6 @@ namespace OpenMS MSExperiment frame_melt = IMDataConverter::reshapeIMFrameToMany(std::move(frame)); for (size_t i = 0; i < bins.size(); ++i) - { binned_spectra.clear(false); // check if spectrum goes into this bin @@ -208,7 +206,7 @@ namespace OpenMS term = &cv.getTerm("MS:1002816"); break; case DriftTimeUnit::VSSC: - term = &cv.getTerm("MS:1003008"); + term = &cv.getTerm("MS:1003008"); break; default: throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Unit cannot be converted into CV term.", toString(unit)); diff --git a/src/openms/source/KERNEL/ChromatogramRangeManager.cpp b/src/openms/source/KERNEL/ChromatogramRangeManager.cpp new file mode 100644 index 00000000000..0e8adaea8e0 --- /dev/null +++ b/src/openms/source/KERNEL/ChromatogramRangeManager.cpp @@ -0,0 +1,14 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Administrator $ +// -------------------------------------------------------------------------- + +#include + +namespace OpenMS +{ + // Currently empty as the class only contains inline implementations +} \ No newline at end of file diff --git a/src/openms/source/KERNEL/ConversionHelper.cpp b/src/openms/source/KERNEL/ConversionHelper.cpp index f4347006683..e08fe9238b5 100644 --- a/src/openms/source/KERNEL/ConversionHelper.cpp +++ b/src/openms/source/KERNEL/ConversionHelper.cpp @@ -20,7 +20,7 @@ namespace OpenMS // see @todo above output_map.setUniqueId(); - input_map.updateRanges(1); + input_map.updateRanges(); if (n > input_map.getSize()) { n = input_map.getSize(); diff --git a/src/openms/source/KERNEL/MSExperiment.cpp b/src/openms/source/KERNEL/MSExperiment.cpp index a5d364ace9c..9a2161ba747 100644 --- a/src/openms/source/KERNEL/MSExperiment.cpp +++ b/src/openms/source/KERNEL/MSExperiment.cpp @@ -25,8 +25,10 @@ namespace OpenMS { /// Constructor MSExperiment::MSExperiment() : - RangeManagerContainerType(), - ExperimentalSettings() + ExperimentalSettings(), + spectrum_ranges_(), + chromatogram_ranges_(), + combined_ranges_() {} /// Copy constructor @@ -39,14 +41,15 @@ namespace OpenMS { return *this; } - RangeManagerContainerType::operator=(source); ExperimentalSettings::operator=(source); chromatograms_ = source.chromatograms_; spectra_ = source.spectra_; - - //no need to copy the alloc?! - //alloc_ + + // Copy the range managers + spectrum_ranges_ = source.spectrum_ranges_; + chromatogram_ranges_ = source.chromatogram_ranges_; + combined_ranges_ = source.combined_ranges_; return *this; } @@ -214,91 +217,77 @@ namespace OpenMS /** @name Range methods - - @note The range values (min, max, etc.) are not updated automatically. Call updateRanges() to update the values! */ - ///@{ - // Docu in base class - void MSExperiment::updateRanges() - { - updateRanges(-1); - } /** - @brief Updates the m/z, intensity, retention time, ion mobility and MS level ranges of all spectra with a certain ms level - - @param ms_level MS level to consider for m/z range, RT range, intensity range and ion mobility (if negative, all MS levels are used) + @brief Updates the m/z, intensity, retention time, ion mobility ranges for all spectra and chromatograms */ - void MSExperiment::updateRanges(Int ms_level) + void MSExperiment::updateRanges() { #ifdef OPENMS_ASSERTIONS - double rt_min = RangeRT::isEmpty() ? 0 : getMinRT(); - double rt_max = RangeRT::isEmpty() ? 0 : getMaxRT(); - double mz_min = RangeMZ::isEmpty() ? 0 : getMinMZ(); - double mz_max = RangeMZ::isEmpty() ? 0 : getMaxMZ(); - double int_min = RangeIntensity::isEmpty() ? 0 : getMinIntensity(); - double int_max = RangeIntensity::isEmpty() ? 0 : getMaxIntensity(); - double im_min = RangeMobility::isEmpty() ? 0 : getMinMobility(); - double im_max = RangeMobility::isEmpty() ? 0 : getMaxMobility(); + double rt_min = combined_ranges_.RangeRT::isEmpty() ? 0 : combined_ranges_.getMinRT(); + double rt_max = combined_ranges_.RangeRT::isEmpty() ? 0 : combined_ranges_.getMaxRT(); + double mz_min = combined_ranges_.RangeMZ::isEmpty() ? 0 : combined_ranges_.getMinMZ(); + double mz_max = combined_ranges_.RangeMZ::isEmpty() ? 0 : combined_ranges_.getMaxMZ(); + double int_min = combined_ranges_.RangeIntensity::isEmpty() ? 0 : combined_ranges_.getMinIntensity(); + double int_max = combined_ranges_.RangeIntensity::isEmpty() ? 0 : combined_ranges_.getMaxIntensity(); + double im_min = combined_ranges_.RangeMobility::isEmpty() ? 0 : combined_ranges_.getMinMobility(); + double im_max = combined_ranges_.RangeMobility::isEmpty() ? 0 : combined_ranges_.getMaxMobility(); #endif - // reset mz/rt/int range - this->clearRanges(); + // Reset all range managers + clearRanges(); - // empty + // Empty experiment if (spectra_.empty() && chromatograms_.empty()) { return; } - // update + // Update spectrum ranges for (Base::iterator it = spectra_.begin(); it != spectra_.end(); ++it) - { - if (ms_level < Int(0) || Int(it->getMSLevel()) == ms_level) - { - // ranges - this->extendRT(it->getRT()); // RT - // m/z, intensity and ion mobility from spectrum's range - it->updateRanges(); - this->extend(*it); - } - // for MS level = 1 we extend the range for all the MS2 precursors - if (ms_level == 1 && it->getMSLevel() == 2) - { - if (!it->getPrecursors().empty()) - { - this->extendRT(it->getRT()); - this->extendMZ(it->getPrecursors()[0].getMZ()); - } - } + { + // Update ranges for the spectrum itself + it->updateRanges(); + + // Update spectrum range manager with this spectrum's ranges + // Add to both general ranges and MS level-specific ranges + spectrum_ranges_.extendUnsafe(*it); + spectrum_ranges_.extendRT(it->getRT()); // RT is not part of the range of an individual spectrum + + spectrum_ranges_.extendUnsafe(*it, it->getMSLevel()); + spectrum_ranges_.extendRT(it->getRT(), it->getMSLevel()); // RT is not part of the range of an individual spectrum + } - if (this->chromatograms_.empty()) + // Update chromatogram ranges + if (!chromatograms_.empty()) { - return; + for (ChromatogramType& cp : chromatograms_) + { + // Update range of EACH chromatogram + cp.updateRanges(); + + // Add RT and intensity ranges to the chromatogram manager + chromatogram_ranges_.extend(cp.getRange()); + chromatogram_ranges_.extendMZ(cp.getMZ()); // MZ is not part of the range of an individual chromatogram + } } - // update intensity, m/z and RT according to chromatograms as well: - for (ChromatogramType& cp : chromatograms_) - { - // update range of EACH chrom, if we need them individually later - cp.updateRanges(); - - // ranges - this->extendMZ(cp.getMZ());// MZ - this->extend(cp);// RT and intensity from chroms's range - } + // Update the combined range manager with both spectrum and chromatogram ranges + combined_ranges_.extendUnsafe(spectrum_ranges_); + combined_ranges_.extendUnsafe(chromatogram_ranges_); #ifdef OPENMS_ASSERTIONS - // check if updateRanges() was necessary and find places where it was not - double im_min_new = RangeMobility::isEmpty() ? 0 : getMinMobility(); - double im_max_new = RangeMobility::isEmpty() ? 0 : getMaxMobility(); - double int_min_new = RangeIntensity::isEmpty() ? 0 : getMinIntensity(); - double int_max_new = RangeIntensity::isEmpty() ? 0 : getMaxIntensity(); - double rt_min_new = RangeRT::isEmpty() ? 0 : getMinRT(); - double rt_max_new = RangeRT::isEmpty() ? 0 : getMaxRT(); - double mz_min_new = RangeMZ::isEmpty() ? 0 : getMinMZ(); - double mz_max_new = RangeMZ::isEmpty() ? 0 : getMaxMZ(); + // check if updateRanges() was necessary to find places where it was not + double im_min_new = combined_ranges_.RangeMobility::isEmpty() ? 0 : combined_ranges_.getMinMobility(); + double im_max_new = combined_ranges_.RangeMobility::isEmpty() ? 0 : combined_ranges_.getMaxMobility(); + double int_min_new = combined_ranges_.RangeIntensity::isEmpty() ? 0 : combined_ranges_.getMinIntensity(); + double int_max_new = combined_ranges_.RangeIntensity::isEmpty() ? 0 : combined_ranges_.getMaxIntensity(); + double rt_min_new = combined_ranges_.RangeRT::isEmpty() ? 0 : combined_ranges_.getMinRT(); + double rt_max_new = combined_ranges_.RangeRT::isEmpty() ? 0 : combined_ranges_.getMaxRT(); + double mz_min_new = combined_ranges_.RangeMZ::isEmpty() ? 0 : combined_ranges_.getMinMZ(); + double mz_max_new = combined_ranges_.RangeMZ::isEmpty() ? 0 : combined_ranges_.getMaxMZ(); if (im_min_new == im_min && im_max_new == im_max && int_min_new == int_min && int_max_new == int_max @@ -428,7 +417,7 @@ namespace OpenMS void MSExperiment::reset() { spectra_.clear(); //remove data - RangeManagerType::clearRanges(); //reset range manager + clearRanges(); // reset all ranges ExperimentalSettings::operator=(ExperimentalSettings()); //reset meta info } @@ -633,24 +622,22 @@ namespace OpenMS /// Swaps the content of this map with the content of @p from void MSExperiment::swap(MSExperiment & from) { - MSExperiment tmp; - - //swap range information - tmp.RangeManagerType::operator=(*this); - this->RangeManagerType::operator=(from); - from.RangeManagerType::operator=(tmp); + // Swap range managers + std::swap(spectrum_ranges_, from.spectrum_ranges_); + std::swap(chromatogram_ranges_, from.chromatogram_ranges_); + std::swap(combined_ranges_, from.combined_ranges_); - //swap experimental settings + // Swap experimental settings + ExperimentalSettings tmp; tmp.ExperimentalSettings::operator=(*this); this->ExperimentalSettings::operator=(from); from.ExperimentalSettings::operator=(tmp); - // swap chromatograms + // Swap chromatograms std::swap(chromatograms_, from.chromatograms_); - //swap peaks + // Swap spectra spectra_.swap(from.getSpectra()); - } /// sets the spectrum list @@ -834,48 +821,37 @@ namespace OpenMS void MSExperiment::clear(bool clear_meta_data) { spectra_.clear(); + chromatograms_.clear(); if (clear_meta_data) { - clearRanges(); + clearRanges(); // reset all ranges this->ExperimentalSettings::operator=(ExperimentalSettings()); // no "clear" method - chromatograms_.clear(); } } // static bool MSExperiment::containsScanOfLevel(size_t ms_level) const { - //test if no scans with MS-level 1 exist - for (const auto& spec : getSpectra()) - { - if (spec.getMSLevel() == ms_level) - { - return true; - } - } - return false; + // Check if any spectrum with the specified MS level exists + return std::any_of(getSpectra().begin(), getSpectra().end(), + [ms_level](const auto& spec) { return spec.getMSLevel() == ms_level; }); } bool MSExperiment::hasZeroIntensities(size_t ms_level) const { - for (const auto& spec : getSpectra()) - { - if (spec.getMSLevel() != ms_level) - { - continue; - } - for (const auto& p : spec) - { - if (p.getIntensity() == 0.0) - { - return true; - } - } - } - return false; + // Check if any spectrum of the specified MS level contains peaks with zero intensity + return std::any_of(getSpectra().begin(), getSpectra().end(), + [ms_level](const auto& spec) { + if (spec.getMSLevel() != ms_level) return false; // Skip spectra that don't match the requested MS level + + // Check if this spectrum has any zero intensity peaks + return std::any_of(spec.begin(), spec.end(), + [](const auto& peak) { return peak.getIntensity() == 0.0; }); + }); } + /* bool MSExperiment::hasPeptideIdentifications() const { for (const auto& spec : getSpectra()) @@ -887,6 +863,7 @@ namespace OpenMS } return false; } + */ bool MSExperiment::isIMFrame() const { diff --git a/src/openms/source/KERNEL/SpectrumRangeManager.cpp b/src/openms/source/KERNEL/SpectrumRangeManager.cpp new file mode 100644 index 00000000000..d940447792b --- /dev/null +++ b/src/openms/source/KERNEL/SpectrumRangeManager.cpp @@ -0,0 +1,14 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg $ +// $Authors: Administrator $ +// -------------------------------------------------------------------------- + +#include + +namespace OpenMS +{ + // Currently empty as the class only contains inline implementations +} \ No newline at end of file diff --git a/src/openms/source/KERNEL/sources.cmake b/src/openms/source/KERNEL/sources.cmake index f693ef7625b..804cbeaf5ee 100644 --- a/src/openms/source/KERNEL/sources.cmake +++ b/src/openms/source/KERNEL/sources.cmake @@ -32,6 +32,8 @@ PeakIndex.cpp RangeManager.cpp RichPeak2D.cpp SpectrumHelper.cpp +SpectrumRangeManager.cpp +ChromatogramRangeManager.cpp ) ### add path to the filenames diff --git a/src/openms/source/METADATA/AnnotatedMSRun.cpp b/src/openms/source/METADATA/AnnotatedMSRun.cpp new file mode 100644 index 00000000000..482c1592b84 --- /dev/null +++ b/src/openms/source/METADATA/AnnotatedMSRun.cpp @@ -0,0 +1,64 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg$ +// $Authors: David Voigt, Timo Sachsenberg $ +// -------------------------------------------------------------------------- +#include + + +namespace OpenMS +{ + std::vector& AnnotatedMSRun::getPeptideIdentifications() + { + return peptide_ids_; + } + + const std::vector& AnnotatedMSRun::getPeptideIdentifications() const + { + return peptide_ids_; + } + + void AnnotatedMSRun::setPeptideIdentifications(const std::vector& ids) + { + peptide_ids_ = ids; + } + + void AnnotatedMSRun::setPeptideIdentifications(std::vector&& ids) + { + peptide_ids_ = std::move(ids); + } + + MSExperiment& AnnotatedMSRun::getMSExperiment() + { + return data; + } + + const MSExperiment& AnnotatedMSRun::getMSExperiment() const + { + return data; + } + + void AnnotatedMSRun::setMSExperiment(MSExperiment&& experiment) + { + data = std::move(experiment); + } + + void AnnotatedMSRun::setMSExperiment(const MSExperiment& experiment) + { + data = experiment; + } + + void AnnotatedMSRun::checkPeptideIdSize_(const char* function_name) const + { + if (data.getSpectra().size() != peptide_ids_.size()) + { + throw Exception::InvalidValue(__FILE__, __LINE__, + function_name, // Use the provided function name + "Internal inconsistency: Number of spectra and peptide identifications do not match.", + String(data.getSpectra().size()) + " vs " + String(peptide_ids_.size())); + } + } +} + diff --git a/src/openms/source/METADATA/AnnotatedMSRun.h b/src/openms/source/METADATA/AnnotatedMSRun.h new file mode 100644 index 00000000000..b61cbbcc43b --- /dev/null +++ b/src/openms/source/METADATA/AnnotatedMSRun.h @@ -0,0 +1,285 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg$ +// $Authors: David Voigt $ +// ------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include + +#include + +namespace OpenMS +{ + class PeptideIdentification; + + class MSSpectrum; + + /** + * @brief Class for storing MS run data with peptide and protein identifications + * + * This class stores an MSExperiment (containing spectra) along with peptide and protein + * identifications. Each spectrum in the MSExperiment is associated with a single + * PeptideIdentification object. + * + * The class provides methods to access and modify these identifications, as well as + * iterators to traverse the spectra and their associated identifications together. + */ + class OPENMS_DLLAPI AnnotatedMSRun + { + public: + typedef std::pair Mapping; + typedef std::pair ConstMapping; + + /// Default constructor + AnnotatedMSRun() = default; + + /** + * @brief Move constructor for efficiently loading a MSExperiment without a deep copy + * @param experiment The MSExperiment to move into this object + */ + explicit AnnotatedMSRun(MSExperiment&& experiment) : data(std::move(experiment)) + {}; + + /// Move constructor + AnnotatedMSRun(AnnotatedMSRun&&) = default; + + /// Destructor + ~AnnotatedMSRun() = default; + + /** + * @brief Get the protein identification + * @return A reference to the protein identification + */ + std::vector& getProteinIdentifications() + { + return protein_ids_; + } + + /** + * @brief Get the protein identification (const version) + * @return A const reference to the protein identification + */ + const std::vector& getProteinIdentifications() const + { + return protein_ids_; + } + + /** + * @brief Get all peptide identifications for all spectra + * @return A reference to the vector of peptide identifications + */ + std::vector& getPeptideIdentifications(); + + /** + * @brief Get all peptide identifications for all spectra (const version) + * @return A const reference to the vector of peptide identifications + */ + const std::vector& getPeptideIdentifications() const; + + /** + * @brief Set all peptide identifications for all spectra + * @param ids Vector of peptide identifications + */ + void setPeptideIdentifications(std::vector&& ids); + + /** + * @brief Set all peptide identifications for all spectra + * @param ids Vector of peptide identifications + */ + void setPeptideIdentifications(const std::vector& ids); + + /** + * @brief Get the MSExperiment + * @return A reference to the MSExperiment + */ + MSExperiment& getMSExperiment(); + + /** + * @brief Get the MSExperiment (const version) + * @return A const reference to the MSExperiment + */ + const MSExperiment& getMSExperiment() const; + + /** + * @brief Set the MSExperiment + * @param experiment The MSExperiment to set + */ + void setMSExperiment(MSExperiment&& experiment); + + /** + * @brief Set the MSExperiment + * @param experiment The MSExperiment to set + */ + void setMSExperiment(const MSExperiment& experiment); + + /** + * @brief Get a const iterator to the beginning of the data + * @return A const iterator to the beginning + */ + inline auto cbegin() const + { + return PairIterator(data.getSpectra().cbegin(), peptide_ids.cbegin()); + } + + /** + * @brief Get an iterator to the beginning of the data + * @return An iterator to the beginning + */ + inline auto begin() + { + return PairIterator(data.getSpectra().begin(), peptide_ids.begin()); + } + + /** + * @brief Get a const iterator to the beginning of the data + * @return A const iterator to the beginning + */ + inline auto begin() const + { + return PairIterator(data.getSpectra().cbegin(), peptide_ids.cbegin()); + } + + /** + * @brief Get an iterator to the end of the data + * @return An iterator to the end + */ + inline auto end() + { + return PairIterator(data.getSpectra().end(), peptide_ids.end()); + } + + /** + * @brief Get a const iterator to the end of the data + * @return A const iterator to the end + */ + inline auto end() const + { + return PairIterator(data.getSpectra().end(), peptide_ids.end()); + } + + /** + * @brief Get a const iterator to the end of the data + * @return A const iterator to the end + */ + inline auto cend() const + { + return PairIterator(data.getSpectra().cend(), peptide_ids.cend()); + } + + /** + * @brief Access a spectrum and its associated peptide identification + * @param idx The index of the spectrum + * @return A pair of references to the spectrum and its peptide identification + */ + inline Mapping operator[](size_t idx) + { + return {data.getSpectra()[idx], peptide_ids[idx]}; + } + + /** + * @brief Access a spectrum and its associated peptide identification (const version) + * @param idx The index of the spectrum + * @return A pair of const references to the spectrum and its peptide identification + */ + inline ConstMapping operator[](size_t idx) const + { + return {data.getSpectra()[idx], peptide_ids[idx]}; + } + + /** + * @brief Iterator for pairs of spectra and peptide identifications + * + * This iterator allows traversing the spectra and their associated peptide + * identifications together. + */ + template + struct PairIterator + { + // TODO add check that both vectors are of the same length + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + //using value_type = std::pair; + //using pointer = value_type*; + //using reference = value_type&; + + /** + * @brief Constructor + * @param ptr1 Iterator to the spectra + * @param ptr2 Iterator to the peptide identifications + */ + PairIterator(T1 ptr1, T2 ptr2) : m_ptr1(ptr1), m_ptr2(ptr2) + {} + + /** + * @brief Pre-increment operator + * @return Reference to this iterator after incrementing + */ + PairIterator& operator++() + { + ++m_ptr1; + ++m_ptr2; + return *this; + } + + /** + * @brief Post-increment operator + * @return Copy of this iterator before incrementing + */ + PairIterator operator++(int) + { + auto tmp(*this); + ++(*this); + return tmp; + } + + /** + * @brief Dereference operator + * @return A pair of references to the current spectrum and peptide identification + */ + auto operator*() + { + return std::make_pair(std::ref(*m_ptr1), std::ref(*m_ptr2)); + } + + /** + * @brief Equality operator + * @param a First iterator + * @param b Second iterator + * @return True if the iterators are equal + */ + inline friend bool operator==(const PairIterator& a, const PairIterator& b) + { + return a.m_ptr1 == b.m_ptr1 && a.m_ptr2 == b.m_ptr2; + } + + /** + * @brief Inequality operator + * @param a First iterator + * @param b Second iterator + * @return True if the iterators are not equal + */ + inline friend bool operator!=(const PairIterator& a, const PairIterator& b) + { + return !(a == b); + } + + private: + T1 m_ptr1; + T2 m_ptr2; + }; + + typedef AnnotatedMSRun::PairIterator::iterator, std::vector::iterator> Iterator; + typedef AnnotatedMSRun::PairIterator::const_iterator, std::vector::const_iterator> ConstIterator; + + private: + std::vector peptide_ids; + std::vector protein_ids_; + MSExperiment data; + }; +} \ No newline at end of file diff --git a/src/openms/source/METADATA/ExperimentalSettings.cpp b/src/openms/source/METADATA/ExperimentalSettings.cpp index 5a9b634e958..c9ec2d7a1e6 100644 --- a/src/openms/source/METADATA/ExperimentalSettings.cpp +++ b/src/openms/source/METADATA/ExperimentalSettings.cpp @@ -25,7 +25,6 @@ namespace OpenMS instrument_ == rhs.instrument_ && hplc_ == rhs.hplc_ && datetime_ == rhs.datetime_ && - protein_identifications_ == rhs.protein_identifications_ && comment_ == rhs.comment_ && fraction_identifier_ == rhs.fraction_identifier_ && MetaInfoInterface::operator==(rhs) && @@ -129,21 +128,6 @@ namespace OpenMS return os; } - const vector & ExperimentalSettings::getProteinIdentifications() const - { - return protein_identifications_; - } - - vector & ExperimentalSettings::getProteinIdentifications() - { - return protein_identifications_; - } - - void ExperimentalSettings::setProteinIdentifications(const vector & protein_identifications) - { - protein_identifications_ = protein_identifications; - } - const String & ExperimentalSettings::getComment() const { return comment_; diff --git a/src/openms/source/METADATA/SpectrumSettings.cpp b/src/openms/source/METADATA/SpectrumSettings.cpp index d9a5b045359..266f2da360e 100644 --- a/src/openms/source/METADATA/SpectrumSettings.cpp +++ b/src/openms/source/METADATA/SpectrumSettings.cpp @@ -28,7 +28,6 @@ namespace OpenMS acquisition_info_(), precursors_(), products_(), - identification_(), data_processing_() { } @@ -46,7 +45,6 @@ namespace OpenMS source_file_ == rhs.source_file_ && precursors_ == rhs.precursors_ && products_ == rhs.products_ && - identification_ == rhs.identification_ && ( data_processing_.size() == rhs.data_processing_.size() && std::equal(data_processing_.begin(), data_processing_.end(), @@ -80,7 +78,6 @@ namespace OpenMS //source_file_ == rhs.source_file_ && precursors_.insert(precursors_.end(), rhs.precursors_.begin(), rhs.precursors_.end()); products_.insert(products_.end(), rhs.products_.begin(), rhs.products_.end()); - identification_.insert(identification_.end(), rhs.identification_.begin(), rhs.identification_.end()); data_processing_.insert(data_processing_.end(), rhs.data_processing_.begin(), rhs.data_processing_.end()); } @@ -186,21 +183,6 @@ namespace OpenMS return os; } - const std::vector & SpectrumSettings::getPeptideIdentifications() const - { - return identification_; - } - - std::vector & SpectrumSettings::getPeptideIdentifications() - { - return identification_; - } - - void SpectrumSettings::setPeptideIdentifications(const std::vector & identification) - { - identification_ = identification; - } - const String & SpectrumSettings::getNativeID() const { return native_id_; @@ -227,4 +209,3 @@ namespace OpenMS } } - diff --git a/src/openms/source/METADATA/sources.cmake b/src/openms/source/METADATA/sources.cmake index bd9d51d3624..dda918b3d89 100644 --- a/src/openms/source/METADATA/sources.cmake +++ b/src/openms/source/METADATA/sources.cmake @@ -5,6 +5,7 @@ set(directory source/METADATA) set(sources_list AbsoluteQuantitationStandards.cpp Acquisition.cpp +AnnotatedMSRun.cpp AcquisitionInfo.cpp CVTerm.cpp CVTermList.cpp diff --git a/src/openms/source/ML/RANSAC/RANSACModelLinear.cpp b/src/openms/source/ML/RANSAC/RANSACModelLinear.cpp index 35e2dbe14ec..0210a0c2bb8 100644 --- a/src/openms/source/ML/RANSAC/RANSACModelLinear.cpp +++ b/src/openms/source/ML/RANSAC/RANSACModelLinear.cpp @@ -43,7 +43,7 @@ namespace OpenMS::Math } LinearRegression lin_reg; - lin_reg.computeRegression(0.95, x.begin(), x.end(), y.begin(), false); + lin_reg.computeRegression(0.95, x.begin(), x.end(), y.begin(), true); return lin_reg.getRSquared(); } diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerData1DChrom.h b/src/openms_gui/include/OpenMS/VISUAL/LayerData1DChrom.h index 5563a0074b5..bd835637a46 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerData1DChrom.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerData1DChrom.h @@ -31,7 +31,7 @@ namespace OpenMS bool hasIndex(Size index) const override { - return index < chromatogram_map_->getNrChromatograms(); + return index < chromatogram_map_->getMSExperiment().getNrChromatograms(); } RangeAllType getRangeForArea(const RangeAllType partial_range) const override diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerData1DPeak.h b/src/openms_gui/include/OpenMS/VISUAL/LayerData1DPeak.h index a90d6fb99a8..2ab11eddbfb 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerData1DPeak.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerData1DPeak.h @@ -34,7 +34,7 @@ namespace OpenMS bool hasIndex(Size index) const override { - return index < peak_map_->size(); + return index < peak_map_->getMSExperiment().size(); } RangeAllType getRangeForArea(const RangeAllType partial_range) const override diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerDataBase.h b/src/openms_gui/include/OpenMS/VISUAL/LayerDataBase.h index 2c13b8a71c6..c82f6367698 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerDataBase.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerDataBase.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -120,7 +121,7 @@ namespace OpenMS typedef boost::shared_ptr ConsensusMapSharedPtrType; /// Main data type (experiment) - typedef PeakMap ExperimentType; + typedef AnnotatedMSRun ExperimentType; /// SharedPtr on MSExperiment typedef boost::shared_ptr ExperimentSharedPtrType; diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h b/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h index d75e1c2129f..e1ce9cbd08c 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerDataChrom.h @@ -44,13 +44,13 @@ namespace OpenMS void updateRanges() override { - chromatogram_map_->updateRanges(); + chromatogram_map_->getMSExperiment().updateRanges(); } RangeAllType getRange() const override { RangeAllType r; - r.assign(*chromatogram_map_); + r.assign(chromatogram_map_->getMSExperiment().chromatogramRanges()); return r; } @@ -62,7 +62,7 @@ namespace OpenMS const ExperimentType::ChromatogramType& getChromatogram(Size idx) const { - return chromatogram_map_->getChromatogram(idx); + return chromatogram_map_->getMSExperiment().getChromatogram(idx); } diff --git a/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h b/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h index 69003c07dd1..6a09b8951ba 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h +++ b/src/openms_gui/include/OpenMS/VISUAL/LayerDataPeak.h @@ -53,14 +53,14 @@ namespace OpenMS void updateRanges() override { - peak_map_->updateRanges(); + peak_map_->getMSExperiment().updateRanges(); // on_disc_peaks_->updateRanges(); // note: this is not going to work since its on disk! We currently don't have a good way to access these ranges } RangeAllType getRange() const override { RangeAllType r; - r.assign(*peak_map_); + r.assign(peak_map_->getMSExperiment().spectrumRanges()); return r; } @@ -74,9 +74,9 @@ namespace OpenMS const ExperimentType::SpectrumType& getSpectrum(Size spectrum_idx) const { - if ((*peak_map_)[spectrum_idx].size() > 0) + if (peak_map_->getMSExperiment()[spectrum_idx].size() > 0) { - return (*peak_map_)[spectrum_idx]; + return peak_map_->getMSExperiment()[spectrum_idx]; } if (!on_disc_peaks_->empty()) { @@ -84,7 +84,7 @@ namespace OpenMS local_spec = on_disc_peaks_->getSpectrum(spectrum_idx); return local_spec; } - return (*peak_map_)[spectrum_idx]; + return peak_map_->getMSExperiment()[spectrum_idx]; } /** @@ -137,24 +137,30 @@ namespace OpenMS /// Check whether the current layer should be represented as ion mobility bool isIonMobilityData() const { - return this->getPeakData()->size() > 0 && this->getPeakData()->metaValueExists("is_ion_mobility") && this->getPeakData()->getMetaValue("is_ion_mobility").toBool(); + const MSExperiment& exp = this->getPeakData()->getMSExperiment(); + return exp.size() > 0 + && exp.metaValueExists("is_ion_mobility") + && exp.getMetaValue("is_ion_mobility").toBool(); } void labelAsIonMobilityData() const { - peak_map_->setMetaValue("is_ion_mobility", "true"); + peak_map_->getMSExperiment().setMetaValue("is_ion_mobility", "true"); } /// Check whether the current layer contains DIA (SWATH-MS) data bool isDIAData() const { - return this->getPeakData()->size() > 0 && this->getPeakData()->metaValueExists("is_dia_data") && this->getPeakData()->getMetaValue("is_dia_data").toBool(); + const MSExperiment& exp = this->getPeakData()->getMSExperiment(); + return exp.size() > 0 + && exp.metaValueExists("is_dia_data") + && exp.getMetaValue("is_dia_data").toBool(); } /// Label the current layer as DIA (SWATH-MS) data void labelAsDIAData() { - peak_map_->setMetaValue("is_dia_data", "true"); + peak_map_->getMSExperiment().setMetaValue("is_dia_data", "true"); } /** @@ -167,13 +173,16 @@ namespace OpenMS */ bool chromatogram_flag_set() const { - return this->getPeakData()->size() > 0 && this->getPeakData()->metaValueExists("is_chromatogram") && this->getPeakData()->getMetaValue("is_chromatogram").toBool(); + const MSExperiment& exp = this->getPeakData()->getMSExperiment(); + return exp.size() > 0 + && exp.metaValueExists("is_chromatogram") + && exp.getMetaValue("is_chromatogram").toBool(); } /// set the chromatogram flag void set_chromatogram_flag() { - peak_map_->setMetaValue("is_chromatogram", "true"); + peak_map_->getMSExperiment().setMetaValue("is_chromatogram", "true"); } /// remove the chromatogram flag @@ -181,7 +190,7 @@ namespace OpenMS { if (this->chromatogram_flag_set()) { - peak_map_->removeMetaValue("is_chromatogram"); + peak_map_->getMSExperiment().removeMetaValue("is_chromatogram"); } } diff --git a/src/openms_gui/include/OpenMS/VISUAL/MetaDataBrowser.h b/src/openms_gui/include/OpenMS/VISUAL/MetaDataBrowser.h index b52f5884385..a58cf0174b1 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/MetaDataBrowser.h +++ b/src/openms_gui/include/OpenMS/VISUAL/MetaDataBrowser.h @@ -12,7 +12,6 @@ #include #include -#include #include #include diff --git a/src/openms_gui/include/OpenMS/VISUAL/Plot2DCanvas.h b/src/openms_gui/include/OpenMS/VISUAL/Plot2DCanvas.h index 0960c6756d6..39b1c61540b 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/Plot2DCanvas.h +++ b/src/openms_gui/include/OpenMS/VISUAL/Plot2DCanvas.h @@ -109,8 +109,8 @@ protected slots: // Docu in base class bool finishAdding_() override; - /// Collects fragment ion scans in the indicated RT/mz area and adds them to the indicated action - bool collectFragmentScansInArea_(const RangeType& range, QAction* a, QMenu* msn_scans, QMenu* msn_meta); + /// Collects fragment ion scans in the indicated RT/mz area and adds them to the menus + bool collectFragmentScansInArea_(const RangeType& range, QMenu* msn_scans, QMenu* msn_meta); /// Draws the coordinates (or coordinate deltas) to the widget's upper left corner void drawCoordinates_(QPainter& painter, const PeakIndex& peak); diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp index 0fc382ab6da..47c8b1a1e2e 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp @@ -64,6 +64,7 @@ #include #include +#include using namespace std; @@ -658,44 +659,49 @@ namespace OpenMS // with actual spectra including raw data (allowing us to only // populate MS1 spectra with actual data). - peak_map_sptr = on_disc_peaks->getMetaData(); + peak_map_sptr.get()->getMSExperiment() = *on_disc_peaks->getMetaData(); for (Size k = 0; k < indexed_mzml_file.getNrSpectra() && !cache_ms1_on_disc; k++) { - if ( peak_map_sptr->getSpectrum(k).getMSLevel() == 1) + if ( peak_map_sptr->getMSExperiment().getSpectrum(k).getMSLevel() == 1) { - peak_map_sptr->getSpectrum(k) = on_disc_peaks->getSpectrum(k); + peak_map_sptr->getMSExperiment().getSpectrum(k) = on_disc_peaks->getSpectrum(k); } } for (Size k = 0; k < indexed_mzml_file.getNrChromatograms() && !cache_ms2_on_disc; k++) { - peak_map_sptr->getChromatogram(k) = on_disc_peaks->getChromatogram(k); + peak_map_sptr->getMSExperiment().getChromatogram(k) = on_disc_peaks->getChromatogram(k); } // Load at least one spectrum into memory (TOPPView assumes that at least one spectrum is in memory) - if (cache_ms1_on_disc && peak_map_sptr->getNrSpectra() > 0) peak_map_sptr->getSpectrum(0) = on_disc_peaks->getSpectrum(0); + if (cache_ms1_on_disc + && peak_map_sptr->getMSExperiment().getNrSpectra() > 0) + { + peak_map_sptr->getMSExperiment().getSpectrum(0) = on_disc_peaks->getSpectrum(0); + } } } // Load all data into memory if e.g. other file type than mzML if (!parsing_success) { - fh.loadExperiment(abs_filename, *peak_map_sptr, {file_type}, ProgressLogger::GUI, true, true); + fh.loadExperiment(abs_filename, peak_map_sptr->getMSExperiment(), {file_type}, ProgressLogger::GUI, true, true); } OPENMS_LOG_INFO << "INFO: done loading all " << std::endl; // a mzML file may contain both, chromatogram and peak data // -> this is handled in PlotCanvas::addPeakLayer FIXME: No it's not! - if (peak_map_sptr->getNrSpectra() > 0 && peak_map_sptr->getNrChromatograms() > 0) + if (peak_map_sptr->getMSExperiment().getNrSpectra() > 0 + && peak_map_sptr->getMSExperiment().getNrChromatograms() > 0) { OPENMS_LOG_WARN << "Your input data contains chromatograms and spectra, falling back to display spectra only." << std::endl; data_type = LayerDataBase::DT_PEAK; } - else if (peak_map_sptr->getNrChromatograms() > 0) + else if (peak_map_sptr->getMSExperiment().getNrChromatograms() > 0) { data_type = LayerDataBase::DT_CHROMATOGRAM; } - else if (peak_map_sptr->getNrSpectra() > 0) + else if (peak_map_sptr->getMSExperiment().getNrSpectra() > 0) { data_type = LayerDataBase::DT_PEAK; } @@ -712,8 +718,9 @@ namespace OpenMS } // sort for m/z and update ranges of newly loaded data - peak_map_sptr->sortSpectra(true); - peak_map_sptr->updateRanges(1); + + peak_map_sptr->getMSExperiment().sortSpectra(true); + peak_map_sptr->getMSExperiment().updateRanges(); // try to add the data if (caption == "") @@ -801,7 +808,7 @@ namespace OpenMS (data_type == LayerDataBase::DT_IDENT)); // only one peak spectrum? disable 2D as default - if (peak_map->size() == 1) { maps_as_2d = false; } + if (peak_map->getMSExperiment().size() == 1) { maps_as_2d = false; } // set the window where (new layer) data could be opened in // get EnhancedTabBarWidget with given id @@ -872,7 +879,7 @@ namespace OpenMS // (ensures we will keep track of this flag from now on). if (is_dia_data) { - peak_map->setMetaValue("is_dia_data", "true"); + peak_map->getMSExperiment().setMetaValue("is_dia_data", "true"); } // determine the window to open the data in @@ -1927,10 +1934,10 @@ namespace OpenMS // spectrum is generated in the dialog, so just receive it here PeakSpectrum spectrum = spec_gen_dialog_.getSpectrum(); - PeakMap new_exp; - new_exp.addSpectrum(spectrum); - new_exp.updateRanges(); - ExperimentSharedPtrType new_exp_sptr(new PeakMap(new_exp)); + ExperimentSharedPtrType new_exp_sptr = boost::make_shared(); + new_exp_sptr->getMSExperiment().addSpectrum(spectrum); + new_exp_sptr->getMSExperiment().updateRanges(); + FeatureMapSharedPtrType f_dummy(new FeatureMapType()); ConsensusMapSharedPtrType c_dummy(new ConsensusMapType()); ODExperimentSharedPtrType od_dummy(new OnDiscMSExperiment()); @@ -2008,14 +2015,15 @@ namespace OpenMS { const LayerDataBase& layer = getActiveCanvas()->getCurrentLayer(); - ExperimentSharedPtrType exp(new MSExperiment(IMDataConverter::reshapeIMFrameToMany(spec))); + ExperimentSharedPtrType exp = boost::make_shared(); + exp.get()->getMSExperiment() = std::move(IMDataConverter::reshapeIMFrameToMany(spec)); // hack, but currently not avoidable, because 2D widget does not support IM natively yet... // for (auto& spec : exp->getSpectra()) spec.setRT(spec.getDriftTime()); // open new 2D widget Plot2DWidget* w = new Plot2DWidget(getCanvasParameters(2), &ws_); // map to IM + MZ - w->setMapper(DimMapper<2>({IMTypes::fromIMUnit(exp->getSpectra()[0].getDriftTimeUnit()), DIM_UNIT::MZ})); + w->setMapper(DimMapper<2>({IMTypes::fromIMUnit(exp->getMSExperiment().getSpectra()[0].getDriftTimeUnit()), DIM_UNIT::MZ})); // add data if (!w->canvas()->addPeakLayer(exp, PlotCanvas::ODExperimentSharedPtrType(new OnDiscMSExperiment()), layer.filename + " (IM Frame)")) @@ -2038,7 +2046,7 @@ namespace OpenMS } // Add spectra into a MSExperiment, sort and prepare it for display - ExperimentSharedPtrType tmpe(new OpenMS::MSExperiment() ); + ExperimentSharedPtrType tmpe = boost::make_shared(); // Collect all MS2 spectra with the same precursor as the current spectrum // (they are in the same SWATH window) @@ -2063,7 +2071,7 @@ namespace OpenMS // view MSSpectrum t = spec; t.setMSLevel(1); - tmpe->addSpectrum(t); + tmpe->getMSExperiment().addSpectrum(t); } else if (lp->getOnDiscPeakData()->getNrSpectra() > k) { @@ -2072,7 +2080,7 @@ namespace OpenMS // view MSSpectrum t = lp->getOnDiscPeakData()->getSpectrum(k); t.setMSLevel(1); - tmpe->addSpectrum(t); + tmpe->getMSExperiment().addSpectrum(t); } } } @@ -2080,8 +2088,8 @@ namespace OpenMS } caption_add = "(DIA window " + String(lower) + " - " + String(upper) + ")"; - tmpe->sortSpectra(); - tmpe->updateRanges(); + tmpe->getMSExperiment().sortSpectra(); + tmpe->getMSExperiment().updateRanges(); // open new 2D widget Plot2DWidget* w = new Plot2DWidget(getCanvasParameters(2), &ws_); @@ -2147,9 +2155,9 @@ namespace OpenMS { // Determine ion mobility unit (default is milliseconds) String unit = "ms"; - if (exp_sptr->metaValueExists("ion_mobility_unit")) + if (exp_sptr->getMSExperiment().metaValueExists("ion_mobility_unit")) { - unit = exp_sptr->getMetaValue("ion_mobility_unit"); + unit = exp_sptr->getMSExperiment().getMetaValue("ion_mobility_unit"); } String label = "Ion Mobility [" + unit + "]"; @@ -2344,7 +2352,7 @@ namespace OpenMS return; } MetaDataBrowser dlg(false, this); - dlg.add(exp); + dlg.add(exp.getMSExperiment()); dlg.exec(); } } @@ -2411,7 +2419,7 @@ namespace OpenMS else if (spec_view != nullptr) { ExperimentSharedPtrType new_exp_sptr(new ExperimentType()); - if (LayerDataBase::DataType current_type; spec_view->getSelectedScan(*new_exp_sptr, current_type)) + if (LayerDataBase::DataType current_type; spec_view->getSelectedScan(new_exp_sptr->getMSExperiment(), current_type)) { ODExperimentSharedPtrType od_dummy(new OnDiscMSExperiment()); FeatureMapSharedPtrType f_dummy(new FeatureMapType()); @@ -2542,15 +2550,15 @@ namespace OpenMS { try { - FileHandler().loadExperiment(layer.filename, *lp->getPeakDataMuteable(), {}, ProgressLogger::NONE, true, true); + FileHandler().loadExperiment(layer.filename, lp->getPeakDataMuteable()->getMSExperiment(), {}, ProgressLogger::NONE, true, true); } catch (Exception::BaseException& e) { QMessageBox::critical(this, "Error", (String("Error while loading file") + layer.filename + "\nError message: " + e.what()).toQString()); - lp->getPeakDataMuteable()->clear(true); + lp->getPeakDataMuteable()->getMSExperiment().clear(true); } - lp->getPeakDataMuteable()->sortSpectra(true); - lp->getPeakDataMuteable()->updateRanges(1); + lp->getPeakDataMuteable()->getMSExperiment().sortSpectra(true); + lp->getPeakDataMuteable()->getMSExperiment().updateRanges(); } else if (auto* lp = dynamic_cast(&layer)) // feature data { @@ -2583,15 +2591,15 @@ namespace OpenMS // TODO CHROM try { - FileHandler().loadExperiment(layer.filename, *lp->getChromatogramData(), {}, ProgressLogger::NONE, true, true); + FileHandler().loadExperiment(layer.filename, lp->getChromatogramData()->getMSExperiment(), {}, ProgressLogger::NONE, true, true); } catch (Exception::BaseException& e) { QMessageBox::critical(this, "Error", (String("Error while loading file") + layer.filename + "\nError message: " + e.what()).toQString()); - lp->getChromatogramData()->clear(true); + lp->getChromatogramData()->getMSExperiment().clear(true); } - lp->getChromatogramData()->sortChromatograms(true); - lp->getChromatogramData()->updateRanges(1); + lp->getChromatogramData()->getMSExperiment().sortChromatograms(true); + lp->getChromatogramData()->getMSExperiment().updateRanges(); } // update all layers that need an update diff --git a/src/openms_gui/source/VISUAL/LayerData1DBase.cpp b/src/openms_gui/source/VISUAL/LayerData1DBase.cpp index 06ea549796c..21ec56f768b 100644 --- a/src/openms_gui/source/VISUAL/LayerData1DBase.cpp +++ b/src/openms_gui/source/VISUAL/LayerData1DBase.cpp @@ -32,5 +32,12 @@ namespace OpenMS { annotations_1d_.resize(current_idx_ + 1); } + + // Clear peak colors to force reinitialization for the new spectrum + // Unlike annotations which persist across spectra, peak colors need to be regenerated + // to match the size of the new spectrum, preventing "Peak color array size doesn't + // match number of peaks" errors that occur when switching between spectra with + // different numbers of peaks + peak_colors_1d.clear(); } }// namespace OpenMS diff --git a/src/openms_gui/source/VISUAL/LayerData1DPeak.cpp b/src/openms_gui/source/VISUAL/LayerData1DPeak.cpp index 0f591cc6a12..172356edb04 100644 --- a/src/openms_gui/source/VISUAL/LayerData1DPeak.cpp +++ b/src/openms_gui/source/VISUAL/LayerData1DPeak.cpp @@ -102,14 +102,26 @@ namespace OpenMS void LayerData1DPeak::synchronizePeakAnnotations() { + #ifdef DEBUG_IDENTIFICATION_VIEW + std::cout << "synchronizePeakAnnotations." << std::endl; + #endif + // Return if no valid peak layer attached - if (getPeakData() == nullptr || getPeakData()->empty() || type != LayerDataBase::DT_PEAK) + if (getPeakData() == nullptr + || getPeakData()->getMSExperiment().empty() + || type != LayerDataBase::DT_PEAK) + { + return; + } + + // no ID selected + if (peptide_id_index == -1 || peptide_hit_index == -1) { return; } // get mutable access to the spectrum - MSSpectrum& spectrum = getPeakDataMuteable()->getSpectrum(current_idx_); + MSSpectrum& spectrum = getPeakDataMuteable()->getMSExperiment().getSpectrum(current_idx_); int ms_level = spectrum.getMSLevel(); @@ -117,16 +129,14 @@ namespace OpenMS return; // store user fragment annotations - vector& pep_ids = spectrum.getPeptideIdentifications(); - - // no ID selected - if (peptide_id_index == -1 || peptide_hit_index == -1) - { - return; - } - + vector& pep_ids = getPeakDataMuteable()->getPeptideIdentifications(); + vector& prot_ids = getPeakDataMuteable()->getProteinIdentifications(); + if (!pep_ids.empty()) { + #ifdef DEBUG_IDENTIFICATION_VIEW + std::cout << "PeptideIdentifications found in the current spectrum." << std::endl; + #endif vector& hits = pep_ids[peptide_id_index].getHits(); if (!hits.empty()) @@ -141,8 +151,9 @@ namespace OpenMS hits.push_back(hit); } } - else // PeptideIdentifications are empty, create new PepIDs and PeptideHits to store the PeakAnnotations - { + else + { + std::cout << "No PeptideIdentifications found in the current spectrum." << std::endl; // copy user annotations to fragment annotation vector const Annotations1DContainer& las = getAnnotations(current_idx_); @@ -166,7 +177,6 @@ namespace OpenMS pep_id.setIdentifier("Unknown"); // create a dummy ProteinIdentification for all ID-less PeakAnnotations - vector& prot_ids = getPeakDataMuteable()->getProteinIdentifications(); if (prot_ids.empty() || prot_ids.back().getIdentifier() != String("Unknown")) { ProteinIdentification prot_id; @@ -193,7 +203,7 @@ namespace OpenMS void LayerData1DPeak::removePeakAnnotationsFromPeptideHit(const std::vector& selected_annotations) { // Return if no valid peak layer attached - if (getPeakData() == nullptr || getPeakData()->empty() || type != LayerDataBase::DT_PEAK) + if (getPeakData() == nullptr || getPeakData()->getMSExperiment().empty() || type != LayerDataBase::DT_PEAK) { return; } @@ -205,7 +215,7 @@ namespace OpenMS } // get mutable access to the spectrum - MSSpectrum& spectrum = getPeakDataMuteable()->getSpectrum(current_idx_); + MSSpectrum& spectrum = getPeakDataMuteable()->getMSExperiment().getSpectrum(current_idx_); int ms_level = spectrum.getMSLevel(); // wrong MS level @@ -218,17 +228,9 @@ namespace OpenMS // that this function returns prematurely is unlikely, // since we are deleting existing annotations, // that have to be somewhere, but better make sure - vector& pep_ids = spectrum.getPeptideIdentifications(); - if (pep_ids.empty()) - { - return; - } - vector& hits = pep_ids[peptide_id_index].getHits(); - if (hits.empty()) - { - return; - } - PeptideHit& hit = hits[peptide_hit_index]; + PeptideIdentification& pep_ids = getPeakDataMuteable()->getPeptideIdentifications()[peptide_id_index]; + + PeptideHit& hit = pep_ids.getHits()[peptide_hit_index]; vector fas = hit.getPeakAnnotations(); if (fas.empty()) { diff --git a/src/openms_gui/source/VISUAL/LayerDataBase.cpp b/src/openms_gui/source/VISUAL/LayerDataBase.cpp index 3aef61c4703..8e7cbdfd2e1 100644 --- a/src/openms_gui/source/VISUAL/LayerDataBase.cpp +++ b/src/openms_gui/source/VISUAL/LayerDataBase.cpp @@ -53,6 +53,41 @@ namespace OpenMS return n; } + /* + void LayerDataBase::updateCache_() + { + if (peak_map_->getMSExperiment().getNrSpectra() > current_spectrum_idx_ && !(*peak_map_)[current_spectrum_idx_].first.empty()) + { + cached_spectrum_ = (*peak_map_)[current_spectrum_idx_].first; + } + else if (on_disc_peaks->getNrSpectra() > current_spectrum_idx_) + { + cached_spectrum_ = on_disc_peaks->getSpectrum(current_spectrum_idx_); + } + } + + + /// add annotation from an OSW sqlite file. + + + /// get annotation (e.g. to build a hierachical ID View) + /// Not const, because we might have incomplete data, which needs to be loaded from sql source + + LayerDataBase::OSWDataSharedPtrType& LayerDataBase::getChromatogramAnnotation() + { + return chrom_annotation_; + } + + const LayerDataBase::OSWDataSharedPtrType& LayerDataBase::getChromatogramAnnotation() const + { + return chrom_annotation_; + } + + void LayerDataBase::setChromatogramAnnotation(OSWData&& data) + { + chrom_annotation_ = OSWDataSharedPtrType(new OSWData(std::move(data))); + } +*/ bool LayerDataBase::annotate(const vector& identifications, const vector& protein_identifications) { @@ -82,7 +117,6 @@ namespace OpenMS return false; } - float LayerDataBase::getMinIntensity() const { return getRange().getMinIntensity(); @@ -231,7 +265,7 @@ namespace OpenMS OSWData data; oswf.readMinimal(data); // allow data to map from transition.id (=native.id) to a chromatogram index in MSExperiment - data.buildNativeIDResolver(*lp->getChromatogramData().get()); + data.buildNativeIDResolver(lp->getChromatogramData().get()->getMSExperiment()); lp->setChromatogramAnnotation(std::move(data)); return true; } diff --git a/src/openms_gui/source/VISUAL/LayerDataChrom.cpp b/src/openms_gui/source/VISUAL/LayerDataChrom.cpp index 14afb93726f..a4ea3bad0aa 100644 --- a/src/openms_gui/source/VISUAL/LayerDataChrom.cpp +++ b/src/openms_gui/source/VISUAL/LayerDataChrom.cpp @@ -18,7 +18,9 @@ #include #include #include +#include +#include using namespace std; @@ -41,14 +43,14 @@ namespace OpenMS std::unique_ptr LayerDataChrom::storeVisibleData(const RangeAllType& visible_range, const DataFilters& layer_filters) const { auto ret = make_unique(); - ret->storeVisibleExperiment(*chromatogram_map_.get(), visible_range, layer_filters); + ret->storeVisibleExperiment(chromatogram_map_.get()->getMSExperiment(), visible_range, layer_filters); return ret; } std::unique_ptr LayerDataChrom::storeFullData() const { auto ret = make_unique(); - ret->storeFullExperiment(*chromatogram_map_.get()); + ret->storeFullExperiment(chromatogram_map_.get()->getMSExperiment()); return ret; } @@ -133,15 +135,17 @@ namespace OpenMS // projection for m/z auto ptr_mz = make_unique(); - MSExperiment exp_mz; - exp_mz.addSpectrum(std::move(projection_mz)); - ptr_mz->setPeakData(ExperimentSharedPtrType(new ExperimentType(exp_mz))); + + ExperimentSharedPtrType exp_mz = boost::make_shared(); + exp_mz->getMSExperiment().addSpectrum(std::move(projection_mz)); + ptr_mz->setPeakData(exp_mz); // projection for RT auto ptr_rt = make_unique(); - MSExperiment exp_rt; - exp_mz.addChromatogram(std::move(projection_rt)); - ptr_rt->setChromData(ExperimentSharedPtrType(new ExperimentType(exp_rt))); + + exp_mz->getMSExperiment().addChromatogram(std::move(projection_rt)); + + ptr_rt->setChromData(boost::make_shared()); auto assign_axis = [&](auto unit, auto& layer) { switch (unit) @@ -165,7 +169,7 @@ namespace OpenMS PeakIndex LayerDataChrom::findHighestDataPoint(const RangeAllType& area) const { - const PeakMap& exp = *getChromatogramData(); + const PeakMap& exp = getChromatogramData().get()->getMSExperiment(); int count {-1}; for (const auto& chrom : exp.getChromatograms()) { @@ -231,6 +235,6 @@ namespace OpenMS std::unique_ptr LayerDataChrom::getStats() const { - return make_unique(*chromatogram_map_); + return make_unique(chromatogram_map_->getMSExperiment()); } } // namespace OpenMS diff --git a/src/openms_gui/source/VISUAL/LayerDataPeak.cpp b/src/openms_gui/source/VISUAL/LayerDataPeak.cpp index f571a1a2629..8ee478b9013 100644 --- a/src/openms_gui/source/VISUAL/LayerDataPeak.cpp +++ b/src/openms_gui/source/VISUAL/LayerDataPeak.cpp @@ -47,14 +47,14 @@ namespace OpenMS std::unique_ptr LayerDataPeak::storeVisibleData(const RangeAllType& visible_range, const DataFilters& layer_filters) const { auto ret = make_unique(); - ret->storeVisibleExperiment(*peak_map_.get(), visible_range, layer_filters); + ret->storeVisibleExperiment(peak_map_->getMSExperiment(), visible_range, layer_filters); return ret; } std::unique_ptr LayerDataPeak::storeFullData() const { auto ret = make_unique(); - ret->storeFullExperiment(*peak_map_.get()); + ret->storeFullExperiment(peak_map_->getMSExperiment()); return ret; } @@ -80,10 +80,12 @@ namespace OpenMS Mobilogram projection_im; MSChromatogram projection_rt; - for (auto i = getPeakData()->areaBeginConst(area); i != getPeakData()->areaEndConst(); ++i) + const auto& exp = getPeakData()->getMSExperiment(); + auto lvls = exp.getMSLevels(); // use for smallest MS level in the data (IM frames may have all level 1, or all level 2) + for (auto i = exp.areaBeginConst(area, lvls[0]); i != exp.areaEndConst(); ++i) { PeakIndex pi = i.getPeakIndex(); - if (filters.passes((*getPeakData())[pi.spectrum], pi.peak)) + if (filters.passes(exp[pi.spectrum], pi.peak)) { // summary stats ++peak_count; @@ -104,25 +106,42 @@ namespace OpenMS } } - // write to spectra/chrom projection_mz.resize(mzint.size() + 2); - projection_mz[0].setMZ(area.getMinMZ()); - projection_mz[0].setIntensity(0.0); - projection_mz.back().setMZ(area.getMaxMZ()); - projection_mz.back().setIntensity(0.0); + // write to spectra/chrom + try + { // may throw if m/z is not in area + projection_mz[0].setMZ(area.getMinMZ()); + projection_mz[0].setIntensity(0.0); + projection_mz.back().setMZ(area.getMaxMZ()); + projection_mz.back().setIntensity(0.0); + } + catch (...) { } + projection_im.resize(mobility.size() + 2); - projection_im[0].setMobility(area.getMinMobility()); - projection_im[0].setIntensity(0.0); - projection_im.back().setMobility(area.getMaxMobility()); - projection_im.back().setIntensity(0.0); - + try + { // may throw if IM is not in area + projection_im[0].setMobility(area.getMinMobility()); + projection_im[0].setIntensity(0.0); + projection_im.back().setMobility(area.getMaxMobility()); + projection_im.back().setIntensity(0.0); + } + catch (...) + { + } projection_rt.resize(rt.size() + 2); - projection_rt[0].setRT(area.getMinRT()); - projection_rt[0].setIntensity(0.0); - projection_rt.back().setRT(area.getMaxRT()); - projection_rt.back().setIntensity(0.0); + try + { // may throw if RT is not in area + projection_rt[0].setRT(area.getMinRT()); + projection_rt[0].setIntensity(0.0); + projection_rt.back().setRT(area.getMaxRT()); + projection_rt.back().setIntensity(0.0); + } + catch (...) + { + } + Size i = 1; auto intit = mzint.begin(); @@ -209,11 +228,11 @@ namespace OpenMS auto max_int = numeric_limits::lowest(); PeakIndex max_pi; - const auto map = *getPeakData(); + const auto& map = getPeakData()->getMSExperiment(); // for IM data, use whatever is there. For RT/mz data, use MSlevel 1 const UInt MS_LEVEL = (! map.empty() && map.isIMFrame()) ? map[0].getMSLevel() : 1; - for (ExperimentType::ConstAreaIterator i = map.areaBeginConst(area, MS_LEVEL); i != map.areaEndConst(); ++i) + for (auto i = map.areaBeginConst(area, MS_LEVEL); i != map.areaEndConst(); ++i) { PeakIndex pi = i.getPeakIndex(); if (i->getIntensity() > max_int && filters.passes((map)[pi.spectrum], pi.peak)) @@ -261,7 +280,7 @@ namespace OpenMS std::unique_ptr LayerDataPeak::getStats() const { - return make_unique(*peak_map_); + return make_unique(peak_map_->getMSExperiment()); } bool LayerDataPeak::annotate(const vector& identifications, const vector& protein_identifications) diff --git a/src/openms_gui/source/VISUAL/MetaDataBrowser.cpp b/src/openms_gui/source/VISUAL/MetaDataBrowser.cpp index 25a055653f0..28f8af5f7b3 100644 --- a/src/openms_gui/source/VISUAL/MetaDataBrowser.cpp +++ b/src/openms_gui/source/VISUAL/MetaDataBrowser.cpp @@ -261,9 +261,6 @@ namespace OpenMS //check for Sample visualize_(meta.getSample(), item); - //check for ProteinIdentification - visualizeAll_(meta.getProteinIdentifications(), item); - //check for Instrument visualize_(meta.getInstrument(), item); @@ -846,9 +843,6 @@ namespace OpenMS //check for AcquisitionInfo visualize_(meta.getAcquisitionInfo(), item); - //check for PeptideIdentification - visualizeAll_(meta.getPeptideIdentifications(), item); - connectVisualizer_(visualizer); } diff --git a/src/openms_gui/source/VISUAL/Painter2DBase.cpp b/src/openms_gui/source/VISUAL/Painter2DBase.cpp index edd22a950c6..55f9ed18130 100644 --- a/src/openms_gui/source/VISUAL/Painter2DBase.cpp +++ b/src/openms_gui/source/VISUAL/Painter2DBase.cpp @@ -123,7 +123,7 @@ namespace OpenMS void Painter2DPeak::paint(QPainter* painter, Plot2DCanvas* canvas, int layer_index) { // renaming some values for readability - const auto& peak_map = *layer_->getPeakData(); + const auto& peak_map = layer_->getPeakData()->getMSExperiment(); // skip empty peak maps if (peak_map.empty()) @@ -241,7 +241,7 @@ namespace OpenMS QVector coloredPoints((int)layer_->gradient.precalculatedSize()); const double snap_factor = canvas->snap_factors_[layer_index]; - const auto& map = *layer_->getPeakData(); + const auto& map = layer_->getPeakData()->getMSExperiment();; const auto& area = canvas->visible_area_.getAreaUnit(); const auto end_area = map.areaEndConst(); // for IM data, use whatever is there. For RT/mz data, use MSlevel 1 @@ -343,7 +343,7 @@ namespace OpenMS // set painter to black (we operate directly on the pixels for all colored data) painter.setPen(Qt::black); const double snap_factor = canvas->snap_factors_[layer_index]; - const auto& map = *layer_->getPeakData(); + const auto& map = layer_->getPeakData()->getMSExperiment(); const auto& area = canvas->visible_area_.getAreaUnit(); // for IM data, use whatever is there. For RT/mz data, use MSlevel 1 @@ -450,7 +450,7 @@ namespace OpenMS void Painter2DPeak::paintPrecursorPeaks_(QPainter& painter, Plot2DCanvas* canvas) { - const auto& peak_map = *layer_->getPeakData(); + const auto& peak_map = layer_->getPeakData()->getMSExperiment(); QPen p; p.setColor(Qt::black); @@ -509,7 +509,7 @@ namespace OpenMS void Painter2DChrom::paint(QPainter* painter, Plot2DCanvas* canvas, int /*layer_index*/) { - const PeakMap& exp = *layer_->getChromatogramData(); + const PeakMap& exp = layer_->getChromatogramData()->getMSExperiment(); // TODO CHROM implement layer filters // paint chromatogram rt start and end as line diff --git a/src/openms_gui/source/VISUAL/Plot1DCanvas.cpp b/src/openms_gui/source/VISUAL/Plot1DCanvas.cpp index df20435c536..ad44d7e1dcc 100644 --- a/src/openms_gui/source/VISUAL/Plot1DCanvas.cpp +++ b/src/openms_gui/source/VISUAL/Plot1DCanvas.cpp @@ -37,6 +37,7 @@ #include #include +#include using namespace std; @@ -49,12 +50,13 @@ namespace OpenMS Plot1DCanvas::ExperimentSharedPtrType prepareChromatogram(Size index, const Plot1DCanvas::ExperimentSharedPtrType& exp_sptr, const Plot1DCanvas::ODExperimentSharedPtrType& ondisc_sptr) { // create a managed pointer fill it with a spectrum containing the chromatographic data - LayerDataBase::ExperimentSharedPtrType chrom_exp_sptr(new LayerDataBase::ExperimentType()); - chrom_exp_sptr->setMetaValue("is_chromatogram", "true"); //this is a hack to store that we have chromatogram data + auto chrom_exp_sptr = boost::make_shared(); + + chrom_exp_sptr->getMSExperiment().setMetaValue("is_chromatogram", "true"); //this is a hack to store that we have chromatogram data LayerDataBase::ExperimentType::SpectrumType spectrum; // retrieve chromatogram (either from in-memory or on-disc representation) - MSChromatogram current_chrom = exp_sptr->getChromatograms()[index]; + MSChromatogram current_chrom = exp_sptr->getMSExperiment().getChromatograms()[index]; if (current_chrom.empty()) { current_chrom = ondisc_sptr->getChromatogram(index); @@ -76,12 +78,12 @@ namespace OpenMS { spectrum.emplace_back(-1, 0); } - chrom_exp_sptr->addSpectrum(spectrum); + chrom_exp_sptr->getMSExperiment().addSpectrum(std::move(spectrum)); // store peptide_sequence if available if (current_chrom.getPrecursor().metaValueExists("peptide_sequence")) { - chrom_exp_sptr->setMetaValue("peptide_sequence", current_chrom.getPrecursor().getMetaValue("peptide_sequence")); + chrom_exp_sptr->getMSExperiment().setMetaValue("peptide_sequence", current_chrom.getPrecursor().getMetaValue("peptide_sequence")); } return chrom_exp_sptr; @@ -1048,7 +1050,7 @@ namespace OpenMS auto* peak_layer = dynamic_cast(&getCurrentLayer()); if (peak_layer) { - if (peak_layer->getPeakData()->containsScanOfLevel(1)) + if (peak_layer->getPeakData()->getMSExperiment().containsScanOfLevel(1)) { context_menu->addAction("Switch to 2D view", [&]() { emit showCurrentPeaksAs2D(); @@ -1069,7 +1071,7 @@ namespace OpenMS { auto l = dynamic_cast(&getCurrentLayer()); context_menu->addAction("Switch to DIA-MS view", [&]() { - emit showCurrentPeaksAsDIA(l->getCurrentSpectrum().getPrecursors()[0], *l->getPeakData().get()); + emit showCurrentPeaksAsDIA(l->getCurrentSpectrum().getPrecursors()[0], l->getPeakData()->getMSExperiment()); }); } } diff --git a/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp b/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp index 0b78b3c649a..55402e0c3ab 100644 --- a/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp +++ b/src/openms_gui/source/VISUAL/Plot2DCanvas.cpp @@ -337,13 +337,14 @@ namespace OpenMS auto local_max = -numeric_limits::max(); if (auto* lp = dynamic_cast(&getLayer(i))) { - for (ExperimentType::ConstAreaIterator it = lp->getPeakData()->areaBeginConst(visible_area_.getAreaUnit().getMinRT(), visible_area_.getAreaUnit().getMaxRT(), + const MSExperiment& peak_data = lp->getPeakData()->getMSExperiment(); + for (auto it = peak_data.areaBeginConst(visible_area_.getAreaUnit().getMinRT(), visible_area_.getAreaUnit().getMaxRT(), visible_area_.getAreaUnit().getMinMZ(), visible_area_.getAreaUnit().getMaxMZ()); - it != lp->getPeakData()->areaEndConst(); + it != peak_data.areaEndConst(); ++it) { PeakIndex pi = it.getPeakIndex(); - if (it->getIntensity() > local_max && getLayer(i).filters.passes((*lp->getPeakData())[pi.spectrum], pi.peak)) + if (it->getIntensity() > local_max && getLayer(i).filters.passes(peak_data[pi.spectrum], pi.peak)) { local_max = it->getIntensity(); } @@ -720,7 +721,7 @@ namespace OpenMS else if (auto* lp = dynamic_cast(&getCurrentLayer())) { //meta info - const ExperimentType::SpectrumType & s = selected_peak_.getSpectrum(*lp->getPeakData()); + const ExperimentType::SpectrumType & s = selected_peak_.getSpectrum(lp->getPeakData()->getMSExperiment()); for (Size m = 0; m < s.getFloatDataArrays().size(); ++m) { if (selected_peak_.peak < s.getFloatDataArrays()[m].size()) @@ -884,132 +885,112 @@ namespace OpenMS settings_menu->addAction("Show/hide projections"); settings_menu->addAction("Show/hide MS/MS precursors"); - //add surrounding survey scans - //find nearest survey scan - SignedSize size = lp->getPeakData()->size(); - Int current = lp->getPeakData()->RTBegin(e_units.getMinRT()) - lp->getPeakData()->begin(); - if (current == size) // if the user clicked right of the last MS1 scan - { - current = std::max(SignedSize{0}, size - 1); // we want the rightmost valid scan index - } + auto& exp = lp->getPeakData()->getMSExperiment(); - SignedSize i = 0; - while (current + i < size || current - i >= 0) + // in a IM-frame (IM vs. m/z), the RT is empty in `e_units`, and showing neighbouring RT scans is not possible (this layer only has this IM frame) + // --> skip entries for RT neighbours. + if (!e_units.RangeRT::isEmpty()) { - if (current + i < size && (*lp->getPeakData())[current + i].getMSLevel() == 1) + // add surrounding survey scans + // find nearest survey scan + SignedSize size = exp.size(); + Int current = exp.RTBegin(e_units.getMinRT()) - exp.begin(); + if (current == size) // if the user clicked right of the last MS1 scan { - current += i; - break; + current = std::max(SignedSize {0}, size - 1); // we want the rightmost valid scan index } - if (current - i >= 0 && (*lp->getPeakData())[current - i].getMSLevel() == 1) + + SignedSize i = 0; + while (current + i < size || current - i >= 0) { - current -= i; - break; + if (current + i < size && exp[current + i].getMSLevel() == 1) + { + current += i; + break; + } + if (current - i >= 0 && exp[current - i].getMSLevel() == 1) + { + current -= i; + break; + } + ++i; } - ++i; - } - // search for four scans in both directions - vector indices; - indices.push_back(current); - i = 1; - while (current - i >= 0 && indices.size() < 5) - { - if ((*lp->getPeakData())[current - i].getMSLevel() == 1) + // search for four scans in both directions + vector indices; + indices.push_back(current); + i = 1; + while (current - i >= 0 && indices.size() < 5) { - indices.push_back(current - i); + if (exp[current - i].getMSLevel() == 1) { indices.push_back(current - i); } + ++i; } - ++i; - } - i = 1; - while (current + i < size && indices.size() < 9) - { - if ((*lp->getPeakData())[current + i].getMSLevel() == 1) + i = 1; + while (current + i < size && indices.size() < 9) { - indices.push_back(current + i); + if (exp[current + i].getMSLevel() == 1) { indices.push_back(current + i); } + ++i; } - ++i; - } - sort(indices.rbegin(), indices.rend()); - QMenu* ms1_scans = context_menu->addMenu("Survey scan in 1D"); - QMenu* ms1_meta = context_menu->addMenu("Survey scan meta data"); - context_menu->addSeparator(); - for (i = 0; i < (Int)indices.size(); ++i) - { - if (indices[i] == current) + sort(indices.rbegin(), indices.rend()); + QMenu* ms1_scans = context_menu->addMenu("Survey scan in 1D"); + QMenu* ms1_meta = context_menu->addMenu("Survey scan meta data"); + context_menu->addSeparator(); + + for (auto idx : indices) { - ms1_scans->addSeparator(); + if (idx == current) { ms1_scans->addSeparator(); } + ms1_scans->addAction(QString("RT: ") + QString::number(exp[idx].getRT()), + [=]() { emit showSpectrumAsNew1D(idx); }); + if (idx == current) { ms1_scans->addSeparator(); } + + if (idx == current) { ms1_meta->addSeparator(); } + ms1_meta->addAction(QString("RT: ") + QString::number(exp[idx].getRT()), + [=]() { showMetaData(true, idx); }); + if (idx == current) { ms1_meta->addSeparator(); } } - a = ms1_scans->addAction(QString("RT: ") + QString::number((*lp->getPeakData())[indices[i]].getRT())); - a->setData(indices[i]); - if (indices[i] == current) + // add surrounding fragment scans + // - We first attempt to look at the position where the user clicked + // - Next we look within the +/- 5 scans around that position + // - Next we look within the whole visible area + QMenu* msn_scans = new QMenu("fragment scan in 1D"); + QMenu* msn_meta = new QMenu("fragment scan meta data"); + bool item_added = collectFragmentScansInArea_(check_area, msn_scans, msn_meta); + if (!item_added) { - ms1_scans->addSeparator(); + // Now simply go for the 5 closest points in RT and check whether there + // are any scans. + // NOTE: that if we go for the visible area, we run the + // risk of iterating through *all* the scans. + check_area.RangeMZ::extend((RangeMZ)visible_area_.getAreaUnit()); + const auto& exp = lp->getPeakData()->getMSExperiment(); + const auto& specs = exp.getSpectra(); + check_area.RangeRT::operator=(RangeRT(specs[indices.back()].getRT(), specs[indices.front()].getRT())); + item_added = collectFragmentScansInArea_(check_area, msn_scans, msn_meta); + + if (! item_added) + { // OK, now lets search the whole visible area (may be large!) + item_added = collectFragmentScansInArea_(visible_area_.getAreaUnit(), msn_scans, msn_meta); + } } - if (indices[i] == current) - { - ms1_meta->addSeparator(); - } - a = ms1_meta->addAction(QString("RT: ") + QString::number((*lp->getPeakData())[indices[i]].getRT())); - a->setData(indices[i]); - if (indices[i] == current) + if (item_added) { - ms1_meta->addSeparator(); + context_menu->addMenu(msn_scans); + context_menu->addMenu(msn_meta); + context_menu->addSeparator(); } - } - - // add surrounding fragment scans - // - We first attempt to look at the position where the user clicked - // - Next we look within the +/- 5 scans around that position - // - Next we look within the whole visible area - QMenu* msn_scans = new QMenu("fragment scan in 1D"); - QMenu* msn_meta = new QMenu("fragment scan meta data"); - bool item_added = collectFragmentScansInArea_(check_area, a, msn_scans, msn_meta); - if (!item_added) - { - // Now simply go for the 5 closest points in RT and check whether there - // are any scans. - // NOTE: that if we go for the visible area, we run the - // risk of iterating through *all* the scans. - check_area.RangeMZ::extend((RangeMZ)visible_area_.getAreaUnit()); - const auto& specs = lp->getPeakData()->getSpectra(); - check_area.RangeRT::operator=(RangeRT(specs[indices.back()].getRT(), specs[indices.front()].getRT())); - item_added = collectFragmentScansInArea_(check_area, a, msn_scans, msn_meta); - if (!item_added) - { // OK, now lets search the whole visible area (may be large!) - item_added = collectFragmentScansInArea_(visible_area_.getAreaUnit(), a, msn_scans, msn_meta); + auto it_closest_MS = lp->getPeakData()->getMSExperiment().getClosestSpectrumInRT(e_units.getMinRT()); + if (it_closest_MS->containsIMData()) + { + context_menu->addAction( + ("Switch to ion mobility view (MSLevel: " + String(it_closest_MS->getMSLevel()) + ";RT: " + String(it_closest_MS->getRT(), false) + ")") + .c_str(), + [=]() { emit showCurrentPeaksAsIonMobility(*it_closest_MS); }); } - } - if (item_added) - { - context_menu->addMenu(msn_scans); - context_menu->addMenu(msn_meta); - context_menu->addSeparator(); - } - - auto it_closest_MS = lp->getPeakData()->getClosestSpectrumInRT(e_units.getMinRT()); - if (it_closest_MS->containsIMData()) - { - context_menu->addAction(("Switch to ion mobility view (MSLevel: " + String(it_closest_MS->getMSLevel()) + ";RT: " + String(it_closest_MS->getRT(), false) + ")").c_str(), - [&]() {emit showCurrentPeaksAsIonMobility(*it_closest_MS); }); - } - + } // end of hasRT finishContextMenu_(context_menu, settings_menu); - - // evaluate menu - if ((result = context_menu->exec(mapToGlobal(e->pos())))) - { - if (result->parent() == ms1_scans || result->parent() == msn_scans) - { - emit showSpectrumAsNew1D(result->data().toInt()); - } - else if (result->parent() == ms1_meta || result->parent() == msn_meta) - { - showMetaData(true, result->data().toInt()); - } - } + context_menu->exec(mapToGlobal(e->pos())); } //-------------------FEATURES---------------------------------- else if (auto* lf = dynamic_cast(&layer)) @@ -1097,7 +1078,7 @@ namespace OpenMS settings_menu->addAction("Show/hide projections"); settings_menu->addAction("Show/hide MS/MS precursors"); - const PeakMap& exp = *lc->getChromatogramData(); + const PeakMap& exp = lc->getChromatogramData()->getMSExperiment(); constexpr int CHROMATOGRAM_SHOW_MZ_RANGE = 10; auto search_area = e_units; @@ -1548,12 +1529,14 @@ namespace OpenMS resetZoom(true); } - bool Plot2DCanvas::collectFragmentScansInArea_(const RangeType& range, QAction* a, QMenu* msn_scans, QMenu* msn_meta) + bool Plot2DCanvas::collectFragmentScansInArea_(const RangeType& range, QMenu* msn_scans, QMenu* msn_meta) { auto& layer = dynamic_cast(getCurrentLayer()); bool item_added = false; - const auto last_RT = layer.getPeakData()->RTEnd(range.getMaxRT()); - for (ExperimentType::ConstIterator it = layer.getPeakData()->RTBegin(range.getMinRT()); + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + const auto last_RT = peak_data.RTEnd(range.getMaxRT()); + + for (auto it = peak_data.RTBegin(range.getMinRT()); it != last_RT; ++it) { if (it->getPrecursors().empty()) continue; @@ -1561,10 +1544,11 @@ namespace OpenMS double mz = it->getPrecursors()[0].getMZ(); if (it->getMSLevel() > 1 && range.containsMZ(mz)) { - a = msn_scans->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz)); - a->setData((int)(it - layer.getPeakData()->begin())); - a = msn_meta->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz)); - a->setData((int)(it - layer.getPeakData()->begin())); + msn_scans->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz), + [=]() { emit showSpectrumAsNew1D(it - peak_data.begin()); }); + msn_meta->addAction(QString("RT: ") + QString::number(it->getRT()) + " mz: " + QString::number(mz), + [=]() { showMetaData(true, it - peak_data.begin()); }); + item_added = true; } } diff --git a/src/openms_gui/source/VISUAL/Plot3DCanvas.cpp b/src/openms_gui/source/VISUAL/Plot3DCanvas.cpp index a7b27659098..6ebd50440fc 100644 --- a/src/openms_gui/source/VISUAL/Plot3DCanvas.cpp +++ b/src/openms_gui/source/VISUAL/Plot3DCanvas.cpp @@ -89,8 +89,8 @@ namespace OpenMS // Abort if no data points are contained auto& layer = dynamic_cast(getCurrentLayer()); - - if (layer.getPeakData()->empty()) + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + if (peak_data.empty()) { popIncompleteLayer_("Cannot add a dataset that contains no survey scans. Aborting!"); return false; diff --git a/src/openms_gui/source/VISUAL/Plot3DOpenGLCanvas.cpp b/src/openms_gui/source/VISUAL/Plot3DOpenGLCanvas.cpp index a3c12986151..9e798588371 100644 --- a/src/openms_gui/source/VISUAL/Plot3DOpenGLCanvas.cpp +++ b/src/openms_gui/source/VISUAL/Plot3DOpenGLCanvas.cpp @@ -526,8 +526,9 @@ namespace OpenMS } const auto area = canvas_3d_.visible_area_.getAreaUnit(); - auto begin_it = layer.getPeakData()->areaBeginConst(area.getMinRT(), area.getMaxRT(), area.getMinMZ(), area.getMaxMZ()); - auto end_it = layer.getPeakData()->areaEndConst(); + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + auto begin_it = peak_data.areaBeginConst(area.getMinRT(), area.getMaxRT(), area.getMinMZ(), area.getMaxMZ()); + auto end_it = peak_data.areaEndConst(); // count peaks in area int count = std::distance(begin_it, end_it); @@ -555,7 +556,8 @@ namespace OpenMS } PeakIndex pi = it.getPeakIndex(); - if (layer.filters.passes((*layer.getPeakData())[pi.spectrum], pi.peak)) + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + if (layer.filters.passes(peak_data[pi.spectrum], pi.peak)) { glBegin(GL_POINTS); double intensity = 0; @@ -615,8 +617,9 @@ namespace OpenMS glLineWidth(layer.param.getValue("dot:line_width")); const auto area = canvas_3d_.visible_area_.getAreaUnit(); - auto begin_it = layer.getPeakData()->areaBeginConst(area.getMinRT(), area.getMaxRT(), area.getMinMZ(), area.getMaxMZ()); - auto end_it = layer.getPeakData()->areaEndConst(); + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + auto begin_it = peak_data.areaBeginConst(area.getMinRT(), area.getMaxRT(), area.getMinMZ(), area.getMaxMZ()); + auto end_it = peak_data.areaEndConst(); // count peaks in area int count = std::distance(begin_it, end_it); @@ -643,7 +646,8 @@ namespace OpenMS } PeakIndex pi = it.getPeakIndex(); - if (layer.filters.passes((*layer.getPeakData())[pi.spectrum], pi.peak)) + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + if (layer.filters.passes(peak_data[pi.spectrum], pi.peak)) { glBegin(GL_LINES); double intensity = 0; @@ -1087,8 +1091,9 @@ namespace OpenMS for (Size i = 0; i < canvas_3d_.getLayerCount(); i++) { const auto& layer = dynamic_cast(canvas_3d_.getLayer(i)); - auto rt_begin_it = layer.getPeakData()->RTBegin(area.getMinRT()); - auto rt_end_it = layer.getPeakData()->RTEnd(area.getMaxRT()); + const MSExperiment& peak_data = layer.getPeakData()->getMSExperiment(); + auto rt_begin_it = peak_data.RTBegin(area.getMinRT()); + auto rt_end_it = peak_data.RTEnd(area.getMaxRT()); for (auto spec_it = rt_begin_it; spec_it != rt_end_it; ++spec_it) { diff --git a/src/openms_gui/source/VISUAL/PlotCanvas.cpp b/src/openms_gui/source/VISUAL/PlotCanvas.cpp index 2c06f2ad2b0..8e640dd7adc 100644 --- a/src/openms_gui/source/VISUAL/PlotCanvas.cpp +++ b/src/openms_gui/source/VISUAL/PlotCanvas.cpp @@ -424,7 +424,7 @@ namespace OpenMS const String& caption, const bool use_noise_cutoff) { - if (map->getSpectra().empty()) + if (map->getMSExperiment().getSpectra().empty()) { auto msg = "Your input data contains no spectra. Not adding layer."; OPENMS_LOG_WARN << msg << std::endl; @@ -446,14 +446,14 @@ namespace OpenMS // calculate noise if (use_noise_cutoff) { - auto cutoff = estimateNoiseFromRandomScans(*map, 1, 10, 5); // 5% of low intensity data is considered noise + auto cutoff = estimateNoiseFromRandomScans(map->getMSExperiment(), 1, 10, 5); // 5% of low intensity data is considered noise DataFilters filters; filters.add(DataFilters::DataFilter(DataFilters::INTENSITY, DataFilters::GREATER_EQUAL, cutoff)); initFilters(filters); } else // no mower, hide zeros if wanted { - if (map->hasZeroIntensities(1)) + if (map->getMSExperiment().hasZeroIntensities(1)) { DataFilters filters; filters.add(DataFilters::DataFilter(DataFilters::INTENSITY, DataFilters::GREATER_EQUAL, 0.001)); @@ -467,7 +467,7 @@ namespace OpenMS bool PlotCanvas::addChromLayer(const ExperimentSharedPtrType& map, ODExperimentSharedPtrType od_map, const String& filename, const String& caption) { - if (map->getChromatograms().empty()) + if (map->getMSExperiment().getChromatograms().empty()) { auto msg = "Your input data contains no chromatograms. Not adding layer."; OPENMS_LOG_WARN << msg << std::endl; @@ -747,11 +747,11 @@ namespace OpenMS { if (auto lp = dynamic_cast(&layer)) { - dlg.add(*lp->getPeakDataMuteable()); + dlg.add(lp->getPeakDataMuteable()->getMSExperiment()); // Exception for Plot1DCanvas, here we add the meta data of the one spectrum if (auto lp1 = dynamic_cast(&layer)) { - dlg.add((*lp1->getPeakDataMuteable())[lp1->getCurrentIndex()]); + dlg.add(lp1->getPeakDataMuteable()->getMSExperiment()[lp1->getCurrentIndex()]); } } if (auto lp = dynamic_cast(&layer)) @@ -775,7 +775,7 @@ namespace OpenMS { if (auto lp = dynamic_cast(&layer)) { - dlg.add((*lp->getPeakDataMuteable())[index]); + dlg.add(lp->getPeakDataMuteable()->getMSExperiment()[index]); } else if (auto lp = dynamic_cast(&layer)) { diff --git a/src/openms_gui/source/VISUAL/SpectraIDViewTab.cpp b/src/openms_gui/source/VISUAL/SpectraIDViewTab.cpp index 6abade58875..4421d531df0 100644 --- a/src/openms_gui/source/VISUAL/SpectraIDViewTab.cpp +++ b/src/openms_gui/source/VISUAL/SpectraIDViewTab.cpp @@ -33,6 +33,8 @@ #include #include +//#define DEBUG_SPECTRA_ID_VIEW 1 + using namespace std; ///@improvement write the visibility-status of the columns in toppview.ini and read at start @@ -159,35 +161,34 @@ namespace OpenMS if (is_first_time_loading_ && layer_) { - for (const auto& spec : *layer_->getPeakData()) + auto& annotated_peak_data = *layer_->getPeakData(); + + if (annotated_peak_data.getPeptideIdentifications().empty()) { - if (!spec.getPeptideIdentifications().empty()) + return; + } + + for (const auto& [spec, pepid] : annotated_peak_data) + { + const vector& pep_hits = pepid.getHits(); + //add id_accession as the key of the map and push the peptideID to the vector value- + for (const auto & pep_hit : pep_hits) { - const vector& peptide_ids = spec.getPeptideIdentifications(); + const vector& evidences = pep_hit.getPeptideEvidences(); - for (const auto& pepid : peptide_ids) + for (const auto & evidence : evidences) { - const vector& pep_hits = pepid.getHits(); - //add id_accession as the key of the map and push the peptideID to the vector value- - for (const auto & pep_hit : pep_hits) - { - const vector& evidences = pep_hit.getPeptideEvidences(); - - for (const auto & evidence : evidences) - { - const String& id_accession = evidence.getProteinAccession(); - protein_to_peptide_id_map[id_accession].push_back(&pepid); - } - } + const String& id_accession = evidence.getProteinAccession(); + protein_to_peptide_id_map[id_accession].push_back(&pepid); } - } + } } // set is_first_time_loading to false so that the map gets created only the first time! is_first_time_loading_ = false; } } - //extract required part of accession and open browser + // extract required part of accession and open browser QString SpectraIDViewTab::extractNumFromAccession_(const QString& full_accession) { // anchored (^...$) regex for matching accession @@ -407,7 +408,8 @@ namespace OpenMS } int current_spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); - const auto& exp = *layer_->getPeakData(); + const auto& annotated_exp = *layer_->getPeakData(); + const auto& exp = annotated_exp.getMSExperiment(); const auto& spec2 = exp[current_spectrum_index]; // @@ -416,16 +418,16 @@ namespace OpenMS // show precursor spectrum (usually MS1) if (column == Clmn::PRECURSOR_MZ) { - const auto prec_it = exp.getPrecursorSpectrum(exp.begin() + current_spectrum_index); - - if (prec_it != exp.end() && !spec2.getPrecursors().empty()) + const auto prec_it = exp.getPrecursorSpectrum(exp.getSpectra().begin() + current_spectrum_index); + + if (prec_it != exp.getSpectra().end() && !spec2.getPrecursors().empty()) { double precursor_mz = spec2.getPrecursors()[0].getMZ(); // determine start and stop of isolation window double isolation_window_lower_mz = precursor_mz - spec2.getPrecursors()[0].getIsolationWindowLowerOffset(); double isolation_window_upper_mz = precursor_mz + spec2.getPrecursors()[0].getIsolationWindowUpperOffset(); - emit spectrumSelected(std::distance(exp.begin(), prec_it), -1, -1);// no identification or hit selected (-1) + emit spectrumSelected(std::distance(exp.getSpectra().begin(), prec_it), -1, -1); // no identification or hit selected (-1) // zoom into precursor area emit requestVisibleArea1D(isolation_window_lower_mz - 50.0, isolation_window_upper_mz + 50.0); } @@ -453,12 +455,12 @@ namespace OpenMS auto item_pepid = table_widget_->item(row, Clmn::ID_NR); if (item_pepid)// might be null for MS1 spectra { - int current_identification_index = item_pepid->data(Qt::DisplayRole).toInt(); + // int current_identification_index = item_pepid->data(Qt::DisplayRole).toInt(); int current_peptide_hit_index = table_widget_->item(row, Clmn::PEPHIT_NR)->data(Qt::DisplayRole).toInt(); - const vector& peptide_ids = spec2.getPeptideIdentifications(); - const vector& pep_hits = peptide_ids[current_identification_index].getHits(); - const PeptideHit& hit = pep_hits[current_peptide_hit_index]; + const PeptideIdentification& peptide_id = annotated_exp.getPeptideIdentifications()[current_spectrum_index]; + const vector& phits = peptide_id.getHits(); + const PeptideHit& hit = phits[current_peptide_hit_index]; // initialize window, when the table is requested for the first time // afterwards the size will stay at the manually resized window size @@ -523,7 +525,7 @@ namespace OpenMS // want the list of unidentified MS2 spectra (obtained by unchecking the 'just hits' button). auto* ptr_peak = dynamic_cast(layer); bool no_data = (ptr_peak == nullptr - || (ptr_peak && ptr_peak->getPeakData()->empty())); + || (ptr_peak && ptr_peak->getPeakData()->getMSExperiment().empty())); return !no_data; } @@ -588,17 +590,12 @@ namespace OpenMS // only when checked, otherwise only highlights { int row = selected_spec_row_idx; - int spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); + //int spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); int num_id = table_widget_->item(row, Clmn::ID_NR)->data(Qt::DisplayRole).toInt(); - int num_ph = table_widget_->item(row, Clmn::PEPHIT_NR)->data(Qt::DisplayRole).toInt(); - const auto& spec = layer_->getPeakData()->operator[](spectrum_index); - const vector& pep_id = spec.getPeptideIdentifications(); - - if(!spec.getPeptideIdentifications().empty()) - { - const vector& hits = pep_id[num_id].getHits(); - if (!hits.empty()) accs = hits[num_ph].extractProteinAccessionsSet(); - } + int num_ph = table_widget_->item(row, Clmn::PEPHIT_NR)->data(Qt::DisplayRole).toInt(); + const PeptideIdentification& pep_id = layer_->getPeakData()->getPeptideIdentifications()[num_id]; + const vector& hits = pep_id.getHits(); + if (!hits.empty()) accs = hits[num_ph].extractProteinAccessionsSet(); } // create header labels (setting header labels must occur after fill) @@ -668,6 +665,9 @@ namespace OpenMS void SpectraIDViewTab::updateEntries_() { + #ifdef DEBUG_SPECTRA_ID_VIEW + cout << "Updating entries in SpectraIDViewTab" << endl; + #endif // no valid peak layer attached if (!hasData(layer_)) @@ -699,24 +699,20 @@ namespace OpenMS { std::vector> all_hits; - for (const auto& spec : layer_->getPeakData()->getSpectra()) + for (auto [spectrum, peptide_id] : *layer_->getPeakData()) { - UInt ms_level = spec.getMSLevel(); - const vector& peptide_ids = spec.getPeptideIdentifications(); + UInt ms_level = spectrum.getMSLevel(); - if (ms_level != 2 || peptide_ids.empty()) // skip non ms2 spectra and spectra with no identification + if (ms_level != 2) // skip non ms2 spectra and spectra with no identification { continue; } - for (const auto& pep_id : peptide_ids) + const vector& phits = peptide_id.getHits(); + all_hits.insert(all_hits.end(), phits.begin(), phits.end()); + if (!has_peak_annotations && !phits.empty() && !phits[0].getPeakAnnotations().empty()) { - const vector& phits = pep_id.getHits(); - all_hits.insert(all_hits.end(), phits.begin(), phits.end()); - if (!has_peak_annotations && !phits[0].getPeakAnnotations().empty()) - { - has_peak_annotations = true; - } + has_peak_annotations = true; } } @@ -748,25 +744,25 @@ namespace OpenMS // generate flat list int selected_row(-1); // index i is needed, so iterate the old way... - for (Size i = 0; i < layer_->getPeakData()->size(); ++i) + for (Size i = 0; i < layer_->getPeakData()->getMSExperiment().size(); ++i) { - const MSSpectrum& spectrum = (*layer_->getPeakData())[i]; + auto [spectrum, peptide_id] = (*layer_->getPeakData())[i]; const UInt ms_level = spectrum.getMSLevel(); - const vector& pi = spectrum.getPeptideIdentifications(); - const Size id_count = pi.size(); const vector & precursors = spectrum.getPrecursors(); + const Size id_count = peptide_id.getHits().size(); // allow only MS2 OR MS1 with peptideIDs (from Mass Fingerprinting) - if (ms_level != 2 && id_count == 0) + if (ms_level != 2) { continue; } // skip - if (hide_no_identification_->isChecked() && id_count == 0) + if (hide_no_identification_->isChecked() && id_count == 0) { continue; } + // set row background color QColor bg_color = (id_count == 0 ? Qt::white : QColor::fromRgb(127,255,148)); @@ -780,107 +776,113 @@ namespace OpenMS } else { - for (Size pi_idx = 0; pi_idx != id_count; ++pi_idx) + // get peptide identifications of current spectrum + #ifdef DEBUG_SPECTRA_ID_VIEW + cout << "Peptide hits: " << peptide_id.getHits().size() << endl; + #endif + + for (Size ph_idx = 0; ph_idx != peptide_id.getHits().size(); ++ph_idx) { - for (Size ph_idx = 0; ph_idx != pi[pi_idx].getHits().size(); ++ph_idx) - { - const PeptideHit& ph = pi[pi_idx].getHits()[ph_idx]; + #ifdef DEBUG_SPECTRA_ID_VIEW + cout << "Peptide hit index: " << ph_idx << endl; + cout << "Peptide hit: " << peptide_id.getHits()[ph_idx].getSequence().toString() << endl; + #endif + const PeptideHit& ph = peptide_id.getHits()[ph_idx]; - // add new row at the end of the table - table_widget_->insertRow(table_widget_->rowCount()); + // add new row at the end of the table + table_widget_->insertRow(table_widget_->rowCount()); - fillRow_(spectrum, i, bg_color); + fillRow_(spectrum, i, bg_color); - table_widget_->setAtBottomRow(ph.getScore(), Clmn::SCORE, bg_color); - table_widget_->setAtBottomRow((int)ph.getRank(), Clmn::RANK, bg_color); - table_widget_->setAtBottomRow(ph.getCharge(), Clmn::CHARGE, bg_color); + table_widget_->setAtBottomRow(ph.getScore(), Clmn::SCORE, bg_color); + table_widget_->setAtBottomRow((int)ph.getRank(), Clmn::RANK, bg_color); + table_widget_->setAtBottomRow(ph.getCharge(), Clmn::CHARGE, bg_color); - // sequence - String seq = ph.getSequence().toString(); - if (seq.empty()) - { - seq = ph.getMetaValue("label"); - } - table_widget_->setAtBottomRow(seq.toQString(), Clmn::SEQUENCE, bg_color); + // sequence + String seq = ph.getSequence().toString(); + if (seq.empty()) + { + seq = ph.getMetaValue("label"); + } + table_widget_->setAtBottomRow(seq.toQString(), Clmn::SEQUENCE, bg_color); - // accession - set protein_accessions = ph.extractProteinAccessionsSet(); - String accessions = ListUtils::concatenate(vector(protein_accessions.begin(), protein_accessions.end()), ", "); - table_widget_->setAtBottomRow(accessions.toQString(), Clmn::ACCESSIONS, bg_color); - table_widget_->setAtBottomRow((int)(pi_idx), Clmn::ID_NR, bg_color); - table_widget_->setAtBottomRow((int)(ph_idx), Clmn::PEPHIT_NR, bg_color); + // accession + set protein_accessions = ph.extractProteinAccessionsSet(); + String accessions = ListUtils::concatenate(vector(protein_accessions.begin(), protein_accessions.end()), ", "); + table_widget_->setAtBottomRow(accessions.toQString(), Clmn::ACCESSIONS, bg_color); + table_widget_->setAtBottomRow((int) i, Clmn::ID_NR, bg_color); // spectrum index + table_widget_->setAtBottomRow((int)(ph_idx), Clmn::PEPHIT_NR, bg_color); - bool selected(false); - if (ph.metaValueExists("selected")) + bool selected(false); + if (ph.metaValueExists("selected")) + { + selected = ph.getMetaValue("selected").toString() == "true"; + } + table_widget_->setAtBottomRow(selected, Clmn::CURATED, bg_color); + + // additional precursor infos, e.g. ppm error + if (!precursors.empty()) + { + const Precursor& first_precursor = precursors.front(); + double ppm_error(0); + // Protein:RNA cross-link, Protein-Protein cross-link, or other data with a precomputed precursor error + if (ph.metaValueExists(Constants::UserParam::PRECURSOR_ERROR_PPM_USERPARAM)) + { + ppm_error = fabs((double)ph.getMetaValue(Constants::UserParam::PRECURSOR_ERROR_PPM_USERPARAM)); + } + else if (ph.metaValueExists("OMS:precursor_mz_error_ppm")) // for legacy reasons added in OpenMS 2.5 { - selected = ph.getMetaValue("selected").toString() == "true"; + ppm_error = fabs((double)ph.getMetaValue("OMS:precursor_mz_error_ppm")); } - table_widget_->setAtBottomRow(selected, Clmn::CURATED, bg_color); + else if (!ph.getSequence().empty()) // works for normal linear fragments with the correct modifications included in the AASequence + { + double exp_precursor = first_precursor.getMZ(); + int charge = first_precursor.getCharge(); + double theo_precursor= ph.getSequence().getMZ(charge); + ppm_error = fabs((exp_precursor - theo_precursor) / exp_precursor / 1e-6); + } + table_widget_->setAtBottomRow(ppm_error, Clmn::PREC_PPM, bg_color); + } - // additional precursor infos, e.g. ppm error - if (!precursors.empty()) + // add additional meta value columns + if (create_rows_for_commmon_metavalue_->isChecked()) + { + Int current_col = Clmn::PEAK_ANNOTATIONS; + // add peak annotation column (part of meta-value assessment above) + if (has_peak_annotations) { - const Precursor& first_precursor = precursors.front(); - double ppm_error(0); - // Protein:RNA cross-link, Protein-Protein cross-link, or other data with a precomputed precursor error - if (ph.metaValueExists(Constants::UserParam::PRECURSOR_ERROR_PPM_USERPARAM)) - { - ppm_error = fabs((double)ph.getMetaValue(Constants::UserParam::PRECURSOR_ERROR_PPM_USERPARAM)); - } - else if (ph.metaValueExists("OMS:precursor_mz_error_ppm")) // for legacy reasons added in OpenMS 2.5 + // set hidden data for export to TSV + QString annotation; + for (const PeptideHit::PeakAnnotation& pa : ph.getPeakAnnotations()) { - ppm_error = fabs((double)ph.getMetaValue("OMS:precursor_mz_error_ppm")); + annotation += String(pa.mz).toQString() + "|" + + String(pa.intensity).toQString() + "|" + + String(pa.charge).toQString() + "|" + + pa.annotation.toQString() + ";"; } - else if (!ph.getSequence().empty()) // works for normal linear fragments with the correct modifications included in the AASequence - { - double exp_precursor = first_precursor.getMZ(); - int charge = first_precursor.getCharge(); - double theo_precursor= ph.getSequence().getMZ(charge); - ppm_error = fabs((exp_precursor - theo_precursor) / exp_precursor / 1e-6); - } - table_widget_->setAtBottomRow(ppm_error, Clmn::PREC_PPM, bg_color); + QTableWidgetItem* item = table_widget_->setAtBottomRow("show", current_col, bg_color, Qt::blue); + item->setData(Qt::UserRole, annotation); + ++current_col; } - - // add additional meta value columns - if (create_rows_for_commmon_metavalue_->isChecked()) + for (const auto& ck : common_keys) { - Int current_col = Clmn::PEAK_ANNOTATIONS; - // add peak annotation column (part of meta-value assessment above) - if (has_peak_annotations) + const DataValue& dv = ph.getMetaValue(ck); + if (dv.valueType() == DataValue::DOUBLE_VALUE) { - // set hidden data for export to TSV - QString annotation; - for (const PeptideHit::PeakAnnotation& pa : ph.getPeakAnnotations()) - { - annotation += String(pa.mz).toQString() + "|" + - String(pa.intensity).toQString() + "|" + - String(pa.charge).toQString() + "|" + - pa.annotation.toQString() + ";"; - } - QTableWidgetItem* item = table_widget_->setAtBottomRow("show", current_col, bg_color, Qt::blue); - item->setData(Qt::UserRole, annotation); - ++current_col; + table_widget_->setAtBottomRow(double(dv), current_col, bg_color); } - for (const auto& ck : common_keys) + else { - const DataValue& dv = ph.getMetaValue(ck); - if (dv.valueType() == DataValue::DOUBLE_VALUE) - { - table_widget_->setAtBottomRow(double(dv), current_col, bg_color); - } - else - { - table_widget_->setAtBottomRow(dv.toQString(), current_col, bg_color); - } - - ++current_col; + table_widget_->setAtBottomRow(dv.toQString(), current_col, bg_color); } + + ++current_col; } } } } - if ((int)i == restore_spec_index) + if ((int)restore_spec_index) { // get model index of selected spectrum, // as table_widget_->rowCount() returns rows starting from 1, selected row is 1 less than the returned row @@ -937,7 +939,7 @@ namespace OpenMS void SpectraIDViewTab::saveIDs_() { // no valid peak layer attached - if (layer_ == nullptr || layer_->getPeakData()->empty() || layer_->type != LayerDataBase::DT_PEAK) + if (layer_ == nullptr || layer_->getPeakData()->getMSExperiment().empty() || layer_->type != LayerDataBase::DT_PEAK) { return; } @@ -945,7 +947,7 @@ namespace OpenMS // synchronize PeptideHits with the annotations in the spectrum dynamic_cast(layer_)->synchronizePeakAnnotations(); - vector prot_id = (*layer_->getPeakData()).getProteinIdentifications(); + vector prot_id = layer_->getPeakData()->getProteinIdentifications(); vector all_pep_ids; // collect PeptideIdentifications from each spectrum, while making sure each spectrum is only considered once @@ -963,9 +965,8 @@ namespace OpenMS } added_spectra.insert(spectrum_index); - // collect all PeptideIdentifications from this spectrum - const vector& pep_id = (*layer_->getPeakData())[spectrum_index].getPeptideIdentifications(); - copy(pep_id.begin(), pep_id.end(), back_inserter(all_pep_ids)); + const PeptideIdentification& pep_id = (*layer_->getPeakData())[spectrum_index].second; + all_pep_ids.push_back(pep_id); } QString filename = GUIHelpers::getSaveFilename(this, "Save file", "", FileTypeList({FileTypes::IDXML, FileTypes::MZIDENTML}), true, FileTypes::IDXML); @@ -987,17 +988,17 @@ namespace OpenMS // extract position of the correct Spectrum, PeptideIdentification and PeptideHit from the table int row = item->row(); String selected = item->checkState() == Qt::Checked ? "true" : "false"; - int spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); + // int spectrum_index = table_widget_->item(row, Clmn::SPEC_INDEX)->data(Qt::DisplayRole).toInt(); int num_id = table_widget_->item(row, Clmn::ID_NR)->data(Qt::DisplayRole).toInt(); int num_ph = table_widget_->item(row, Clmn::PEPHIT_NR)->data(Qt::DisplayRole).toInt(); // maintain sortability of our checkbox column TableView::updateCheckBoxItem(item); - vector& pep_id = (*layer_->getPeakDataMuteable())[spectrum_index].getPeptideIdentifications(); + PeptideIdentification& pep_id = (*layer_->getPeakDataMuteable())[num_id].second; // update "selected" value in the correct PeptideHits - vector& hits = pep_id[num_id].getHits(); + vector& hits = pep_id.getHits(); // XL-MS specific case, both PeptideHits belong to the same cross-link if (hits[0].metaValueExists("xl_chain")) { @@ -1015,8 +1016,23 @@ namespace OpenMS void SpectraIDViewTab::fillRow_(const MSSpectrum& spectrum, const int spec_index, const QColor& background_color) { + // fill spectrum information in columns const vector& precursors = spectrum.getPrecursors(); + #ifdef DEBUG_SPECTRA_ID_VIEW + cout << "Filling row in SpectraIDViewTab" << endl; + cout << spectrum.getMSLevel() << endl + << "RT: " << spectrum.getRT() << endl + << "Scan mode: " << spectrum.getInstrumentSettings().getScanMode() << endl + << "Zoom scan: " << spectrum.getInstrumentSettings().getZoomScan() << endl + << "Spectrum index: " << spec_index << endl + << "Precursor MZ: " << (precursors.empty() ? 0 : precursors.front().getMZ()) << endl + << "Precursor charge: " << (precursors.empty() ? 0 : precursors.front().getCharge()) << endl + << "Precursor intensity: " << (precursors.empty() ? 0 : precursors.front().getIntensity()) << endl + << endl; + #endif + + table_widget_->setAtBottomRow(QString::number(spectrum.getMSLevel()), Clmn::MS_LEVEL, background_color); table_widget_->setAtBottomRow(spec_index, Clmn::SPEC_INDEX, background_color); table_widget_->setAtBottomRow(spectrum.getRT(), Clmn::RT, background_color); diff --git a/src/openms_gui/source/VISUAL/SpectraTreeTab.cpp b/src/openms_gui/source/VISUAL/SpectraTreeTab.cpp index 8d9cd47d440..831a6eba57c 100644 --- a/src/openms_gui/source/VISUAL/SpectraTreeTab.cpp +++ b/src/openms_gui/source/VISUAL/SpectraTreeTab.cpp @@ -334,16 +334,16 @@ namespace OpenMS std::vector parent_stack; parent_stack.push_back(nullptr); bool fail = false; - last_peakmap_ = &(*cl.getPeakData()); + last_peakmap_ = &(cl.getPeakData()->getMSExperiment()); spectra_treewidget_->setHeaders(ClmnPeak::HEADER_NAMES); - for (Size i = 0; i < cl.getPeakData()->size(); ++i) + for (Size i = 0; i < cl.getPeakData()->getMSExperiment().size(); ++i) { - const MSSpectrum& current_spec = (*cl.getPeakData())[i]; + const MSSpectrum& current_spec = cl.getPeakData()->getMSExperiment()[i]; if (i > 0) { - const MSSpectrum& prev_spec = (*cl.getPeakData())[i-1]; + const MSSpectrum& prev_spec = cl.getPeakData()->getMSExperiment()[i-1]; // current MS level = previous MS level + 1 (e.g. current: MS2, previous: MS1) if (current_spec.getMSLevel() == prev_spec.getMSLevel() + 1) { @@ -412,9 +412,9 @@ namespace OpenMS spectra_treewidget_->clear(); toplevel_items.clear(); selected_item = nullptr; - for (Size i = 0; i < cl.getPeakData()->size(); ++i) + for (Size i = 0; i < cl.getPeakData()->getMSExperiment().size(); ++i) { - const MSSpectrum& current_spec = (*cl.getPeakData())[i]; + const MSSpectrum& current_spec = cl.getPeakData()->getMSExperiment()[i]; toplevel_item = new QTreeWidgetItem(); populatePeakDataRow_(toplevel_item, i, current_spec); @@ -438,9 +438,9 @@ namespace OpenMS spectra_treewidget_->setCurrentItem(selected_item); spectra_treewidget_->scrollToItem(selected_item); } - if (cl.getPeakData()->size() > 1) + if (cl.getPeakData()->getMSExperiment().size() > 1) { - more_than_one_spectrum = false; + more_than_one_spectrum = false; // why is this false if > 1??????? } } // Branch if the current layer is a chromatogram (either indicated by its @@ -450,13 +450,13 @@ namespace OpenMS const auto cl = *lp; LayerDataBase::ConstExperimentSharedPtrType exp = cl.getChromatogramData(); - if (last_peakmap_ == exp.get()) + if (last_peakmap_ == &exp->getMSExperiment()) { // underlying data did not change (which is ALWAYS the chromatograms, never peakdata!) // --> Do not update (could be many 10k entries for sqMass data and the lag would be unbearable ...) return; } - last_peakmap_ = exp.get(); + last_peakmap_ = &exp->getMSExperiment(); spectra_treewidget_->clear(); // New data: // We need to redraw the whole Widget because the we have changed all the layers. @@ -464,22 +464,23 @@ namespace OpenMS // whether multiple ones are selected. bool multiple_select = false; int this_selected_item = -1; - if (!cl.getChromatogramData()->empty()) + const MSExperiment& chrom_data = cl.getChromatogramData()->getMSExperiment(); + if (!chrom_data.empty()) { - if (cl.getChromatogramData()->metaValueExists("multiple_select")) + if (chrom_data.metaValueExists("multiple_select")) { - multiple_select = cl.getChromatogramData()->getMetaValue("multiple_select").toBool(); + multiple_select = chrom_data.getMetaValue("multiple_select").toBool(); } - if (cl.getChromatogramData()->metaValueExists("selected_chromatogram")) + if (chrom_data.metaValueExists("selected_chromatogram")) { - this_selected_item = (int)cl.getChromatogramData()->getMetaValue("selected_chromatogram"); + this_selected_item = (int)chrom_data.getMetaValue("selected_chromatogram"); } } // create a header list spectra_treewidget_->setHeaders(ClmnChrom::HEADER_NAMES); - if (exp->getChromatograms().size() > 1) + if (exp->getMSExperiment().getChromatograms().size() > 1) { more_than_one_spectrum = false; } @@ -491,9 +492,9 @@ namespace OpenMS std::map, Precursor::MZLess>& map_precursor_to_chrom_idx = map_precursor_to_chrom_idx_cache_[(size_t)(exp.get())]; if (!was_cached) { // create cache: collect all precursor that fall into the mz rt window - for (auto it = exp->getChromatograms().cbegin(); it != exp->getChromatograms().cend(); ++it) + for (auto it = exp->getMSExperiment().getChromatograms().cbegin(); it != exp->getMSExperiment().getChromatograms().cend(); ++it) { - map_precursor_to_chrom_idx[it->getPrecursor()].push_back(it - exp->getChromatograms().begin()); + map_precursor_to_chrom_idx[it->getPrecursor()].push_back(it - exp->getMSExperiment().getChromatograms().begin()); } } @@ -525,7 +526,7 @@ namespace OpenMS // Show single chromatogram: iterate over all chromatograms corresponding to the current precursor and add action for the single chromatogram for (const Size chrom_idx : indx) { - const MSChromatogram& current_chromatogram = exp->getChromatograms()[chrom_idx]; + const MSChromatogram& current_chromatogram = exp->getMSExperiment().getChromatograms()[chrom_idx]; // Children chromatogram entry QTreeWidgetItem* sub_item = new QTreeWidgetItem(toplevel_item); diff --git a/src/openms_gui/source/VISUAL/TVIdentificationViewController.cpp b/src/openms_gui/source/VISUAL/TVIdentificationViewController.cpp index 45744d01f15..d4d7638f769 100644 --- a/src/openms_gui/source/VISUAL/TVIdentificationViewController.cpp +++ b/src/openms_gui/source/VISUAL/TVIdentificationViewController.cpp @@ -28,8 +28,9 @@ #include #include - #include +#include + #include #include @@ -54,6 +55,9 @@ namespace OpenMS void TVIdentificationViewController::showSpectrumAsNew1D(int spectrum_index, int peptide_id_index, int peptide_hit_index) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "TVIdentificationViewController::showSpectrumAsNew1D() called" << endl; + #endif // basic behavior 1 auto& layer = tv_->getActiveCanvas()->getCurrentLayer(); @@ -97,44 +101,51 @@ namespace OpenMS // get peptide identification auto layer_1d_peak = dynamic_cast(&w->canvas()->getCurrentLayer()); - const vector& pis = layer_1d_peak->getCurrentSpectrum().getPeptideIdentifications(); + const auto& pids = layer_1d_peak->getPeakData()->getPeptideIdentifications(); + if (peptide_id_index >= static_cast(pids.size())) + { + OPENMS_LOG_FATAL_ERROR << "PeptideIdentification index out of bounds! Aborting!" << endl; + return; + } + const PeptideIdentification& pi = pids[peptide_id_index]; - if (!pis.empty()) + switch (layer_1d_peak->getCurrentSpectrum().getMSLevel()) { - switch (layer_1d_peak->getCurrentSpectrum().getMSLevel()) - { - // mass fingerprint annotation of name etc. - case 1: - { - addPeakAnnotations_(pis); - break; - } + // mass fingerprint annotation of name etc. + case 1: + { + addPeakAnnotations_(std::vector(1, pi)); + break; + } - // annotation with stored fragments or synthesized theoretical spectrum - case 2: + // annotation with stored fragments or synthesized theoretical spectrum + case 2: + { + // check if index in bounds and hits are present + if (peptide_hit_index < static_cast(pi.getHits().size())) { - // check if index in bounds and hits are present - if (peptide_id_index < static_cast(pis.size()) - && peptide_hit_index < static_cast(pis[peptide_id_index].getHits().size())) + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Creating annotations for PeptideIdentification index: " << peptide_id_index << endl; + cout << "PeptideHit index: " << peptide_hit_index << endl; + cout << "PeptideHit: " << pi.getHits()[peptide_hit_index].getSequence().toString() << endl; + #endif + // get hit + PeptideHit ph = pi.getHits()[peptide_hit_index]; + if (ph.getPeakAnnotations().empty()) { - // get hit - PeptideHit ph = pis[peptide_id_index].getHits()[peptide_hit_index]; - if (ph.getPeakAnnotations().empty()) - { - // if no fragment annotations are stored, create a theoretical spectrum - addTheoreticalSpectrumLayer_(ph); - } - else - { - // otherwise, use stored fragment annotations - addPeakAnnotationsFromID_(ph); - } + // if no fragment annotations are stored, create a theoretical spectrum + addTheoreticalSpectrumLayer_(ph); + } + else + { + // otherwise, use stored fragment annotations + addPeakAnnotationsFromID_(ph); } - break; } - default: - OPENMS_LOG_WARN << "Annotation of MS level > 2 not supported.!" << endl; + break; } + default: + OPENMS_LOG_WARN << "Annotation of MS level > 2 not supported.!" << endl; } // TODO Why would this need to trigger an update in e.g. the Tab Views?? @@ -148,6 +159,10 @@ namespace OpenMS void TVIdentificationViewController::addPeakAnnotations_(const vector& ph) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "TVIdentificationViewController::addPeakAnnotations() called" << endl; + #endif + // called anew for every click on a spectrum auto getCurrentLayer = [&]() -> LayerData1DPeak& { return dynamic_cast(tv_->getActive1DWidget()->canvas()->getCurrentLayer()); }; @@ -282,6 +297,10 @@ namespace OpenMS int peptide_id_index, int peptide_hit_index) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "TVIdentificationViewController::activate1DSpectrum() called" << endl; + #endif + Plot1DWidget* widget_1D = tv_->getActive1DWidget(); // if no active 1D widget is present @@ -297,35 +316,43 @@ namespace OpenMS auto current_layer = [&]() -> LayerData1DPeak& { return dynamic_cast(tv_->getActive1DWidget()->canvas()->getCurrentLayer()); }; widget_1D->canvas()->activateSpectrum(spectrum_index); - current_layer().peptide_id_index = peptide_id_index; + current_layer().peptide_id_index = peptide_id_index; // should always ne 0 current_layer().peptide_hit_index = peptide_hit_index; if (current_layer().type == LayerDataBase::DT_PEAK) { UInt ms_level = current_layer().getCurrentSpectrum().getMSLevel(); - const vector& pis = current_layer().getCurrentSpectrum().getPeptideIdentifications(); + const PeptideIdentification& pid = current_layer().getPeakData()->getPeptideIdentifications()[spectrum_index]; + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "PeptideIdentification index: " << peptide_id_index << endl; + cout << "PeptideHit index: " << peptide_hit_index << endl; + cout << "PeptideHit: " << pid.getHits()[peptide_hit_index].getSequence().toString() << endl; + cout << "MS level: " << ms_level << endl; + cout << "Spectrum index: " << spectrum_index << endl; + #endif + switch (ms_level) { case 1: // mass fingerprint annotation of name etc and precursor labels { - addPeakAnnotations_(pis); + addPeakAnnotations_(std::vector(1, pid)); vector precursors; // collect all MS2 spectra precursor till next MS1 spectrum is encountered - for (Size i = spectrum_index + 1; i < current_layer().getPeakData()->size(); ++i) + for (Size i = spectrum_index + 1; i < current_layer().getPeakData()->getMSExperiment().size(); ++i) { - if ((*current_layer().getPeakData())[i].getMSLevel() == 1) + if (current_layer().getPeakData()->getMSExperiment()[i].getMSLevel() == 1) { break; } // skip MS2 without precursor - if ((*current_layer().getPeakData())[i].getPrecursors().empty()) + if (current_layer().getPeakData()->getMSExperiment()[i].getPrecursors().empty()) { continue; } // there should be only one precursor per MS2 spectrum. - vector pcs = (*current_layer().getPeakData())[i].getPrecursors(); + vector pcs = current_layer().getPeakData()->getMSExperiment()[i].getPrecursors(); copy(pcs.begin(), pcs.end(), back_inserter(precursors)); } addPrecursorLabels1D_(precursors); @@ -333,134 +360,136 @@ namespace OpenMS } case 2: // annotation with stored fragments or synthesized theoretical spectrum { - // check if index in bounds and hits are present - if (peptide_id_index < static_cast(pis.size()) && peptide_hit_index < static_cast(pis[peptide_id_index].getHits().size())) - { - // get selected hit - PeptideHit ph = pis[peptide_id_index].getHits()[peptide_hit_index]; + // get selected hit + PeptideHit ph = pid.getHits()[peptide_hit_index]; - if (ph.getPeakAnnotations().empty()) - { - // if no fragment annotations are stored, create a theoretical spectrum - addTheoreticalSpectrumLayer_(ph); - - // synchronize PeptideHits with the annotations in the spectrum - current_layer().synchronizePeakAnnotations(); - // remove labels and theoretical spectrum (will be recreated using PH annotations) - removeGraphicalPeakAnnotations_(spectrum_index); - removeTheoreticalSpectrumLayer_(); + if (ph.getPeakAnnotations().empty()) + { + // if no fragment annotations are stored, create a theoretical spectrum + addTheoreticalSpectrumLayer_(ph); + + // synchronize PeptideHits with the annotations in the spectrum + current_layer().synchronizePeakAnnotations(); + // remove labels and theoretical spectrum (will be recreated using PH annotations) + removeGraphicalPeakAnnotations_(spectrum_index); + removeTheoreticalSpectrumLayer_(); + + // return if no active 1D widget is present + if (widget_1D == nullptr) + { + return; + } + // update current PeptideHit with the synchronized one + widget_1D->canvas()->activateSpectrum(spectrum_index); + const PeptideIdentification & pi2 = current_layer().getPeakData()->getPeptideIdentifications()[spectrum_index]; + ph = pi2.getHits()[peptide_hit_index]; - // return if no active 1D widget is present - if (widget_1D == nullptr) - { - return; - } - // update current PeptideHit with the synchronized one - widget_1D->canvas()->activateSpectrum(spectrum_index); - const vector& pis2 = current_layer().getCurrentSpectrum().getPeptideIdentifications(); - ph = pis2[peptide_id_index].getHits()[peptide_hit_index]; + } + // use stored fragment annotations + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Creating annotations for PeptideIdentification index: " << peptide_id_index << endl; + cout << "PeptideHit index: " << peptide_hit_index << endl; + cout << "PeptideHit: " << ph.getSequence().toString() << endl; + #endif + addPeakAnnotationsFromID_(ph); + + if (ph.metaValueExists(Constants::UserParam::OPENPEPXL_XL_TYPE)) // if this meta value exists, this should be an XL-MS annotation + { + String box_text; + String vert_bar = "|"; + if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "loop-link") + { + String hor_bar = "_"; + String seq_alpha = ph.getSequence().toUnmodifiedString(); + int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); + int xl_pos_beta = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS2)).toInt() - xl_pos_alpha - 1; + + String alpha_cov; + String beta_cov; + extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), 0); + + // String formatting + box_text += alpha_cov + "
" + seq_alpha + "
" + String(xl_pos_alpha, ' ') + vert_bar + n_times(xl_pos_beta, hor_bar) + vert_bar; + // cut out line: "
" + String(xl_pos_alpha, ' ') + vert_bar + String(xl_pos_beta, ' ') + vert_bar + } - // use stored fragment annotations - addPeakAnnotationsFromID_(ph); - - if (ph.metaValueExists(Constants::UserParam::OPENPEPXL_XL_TYPE)) // if this meta value exists, this should be an XL-MS annotation + else if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "cross-link") { - String box_text; - String vert_bar = "|"; - - if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "loop-link") - { - String hor_bar = "_"; - String seq_alpha = ph.getSequence().toUnmodifiedString(); - int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); - int xl_pos_beta = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS2)).toInt() - xl_pos_alpha - 1; - - String alpha_cov; - String beta_cov; - extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), 0); - - // String formatting - box_text += alpha_cov + "
" + seq_alpha + "
" + String(xl_pos_alpha, ' ') + vert_bar + n_times(xl_pos_beta, hor_bar) + vert_bar; - // cut out line: "
" + String(xl_pos_alpha, ' ') + vert_bar + String(xl_pos_beta, ' ') + vert_bar + - } - else if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "cross-link") - { - String seq_alpha = ph.getSequence().toUnmodifiedString(); - String seq_beta = AASequence::fromString(ph.getMetaValue(Constants::UserParam::OPENPEPXL_BETA_SEQUENCE)).toUnmodifiedString(); - int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); - int xl_pos_beta = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS2)).toInt(); - - // String formatting - Size prefix_length = max(xl_pos_alpha, xl_pos_beta); - //Size suffix_length = max(seq_alpha.size() - xl_pos_alpha, seq_beta.size() - xl_pos_beta); - Size alpha_space = prefix_length - xl_pos_alpha; - Size beta_space = prefix_length - xl_pos_beta; - - String alpha_cov; - String beta_cov; - extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), seq_beta.size()); - - box_text += String(alpha_space, ' ') + alpha_cov + "
" + String(alpha_space, ' ') + seq_alpha + "
" + String(prefix_length, ' ') + vert_bar + "
" + String(beta_space, ' ') + seq_beta + "
" + String(beta_space, ' ') + beta_cov; - // color: - } - else // if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "mono-link") - { - String seq_alpha = ph.getSequence().toUnmodifiedString(); - int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); - Size prefix_length = xl_pos_alpha; + String seq_alpha = ph.getSequence().toUnmodifiedString(); + String seq_beta = AASequence::fromString(ph.getMetaValue(Constants::UserParam::OPENPEPXL_BETA_SEQUENCE)).toUnmodifiedString(); + int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); + int xl_pos_beta = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS2)).toInt(); + + // String formatting + Size prefix_length = max(xl_pos_alpha, xl_pos_beta); + //Size suffix_length = max(seq_alpha.size() - xl_pos_alpha, seq_beta.size() - xl_pos_beta); + Size alpha_space = prefix_length - xl_pos_alpha; + Size beta_space = prefix_length - xl_pos_beta; + + String alpha_cov; + String beta_cov; + extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), seq_beta.size()); + + box_text += String(alpha_space, ' ') + alpha_cov + "
" + String(alpha_space, ' ') + seq_alpha + "
" + String(prefix_length, ' ') + vert_bar + "
" + String(beta_space, ' ') + seq_beta + "
" + String(beta_space, ' ') + beta_cov; + // color: + } + else // if (ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_TYPE) == "mono-link") + { + String seq_alpha = ph.getSequence().toUnmodifiedString(); + int xl_pos_alpha = String(ph.getMetaValue(Constants::UserParam::OPENPEPXL_XL_POS1)).toInt(); + Size prefix_length = xl_pos_alpha; - String alpha_cov; - String beta_cov; - extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), 0); + String alpha_cov; + String beta_cov; + extractCoverageStrings(ph.getPeakAnnotations(), alpha_cov, beta_cov, seq_alpha.size(), 0); - box_text += alpha_cov + "
" + seq_alpha + "
" + String(prefix_length, ' ') + vert_bar; + box_text += alpha_cov + "
" + seq_alpha + "
" + String(prefix_length, ' ') + vert_bar; - } - box_text = R"(
)" + box_text + "
"; - widget_1D->canvas()->setTextBox(box_text.toQString()); } - else if (ph.getPeakAnnotations().empty()) // only write the sequence + box_text = R"(
)" + box_text + "
"; + widget_1D->canvas()->setTextBox(box_text.toQString()); + } + else if (ph.getPeakAnnotations().empty()) // only write the sequence + { + String seq = ph.getSequence().toString(); + if (seq.empty()) + { + seq = ph.getMetaValue("label"); // e.g. for RNA sequences + } + widget_1D->canvas()->setTextBox(seq.toQString()); + } + else if (widget_1D->canvas()->isIonLadderVisible()) + { + if (!ph.getSequence().empty()) // generate sequence diagram for a peptide { - String seq = ph.getSequence().toString(); - if (seq.empty()) - { - seq = ph.getMetaValue("label"); // e.g. for RNA sequences - } - widget_1D->canvas()->setTextBox(seq.toQString()); + // @TODO: read ion list from the input file (meta value) + static vector top_ions = ListUtils::create("a,b,c"); + static vector bottom_ions = ListUtils::create("x,y,z"); + String diagram = generateSequenceDiagram_( + ph.getSequence(), + ph.getPeakAnnotations(), + top_ions, + bottom_ions); + widget_1D->canvas()->setTextBox(diagram.toQString()); } - else if (widget_1D->canvas()->isIonLadderVisible()) + else if (ph.metaValueExists("label")) // generate sequence diagram for RNA { - if (!ph.getSequence().empty()) // generate sequence diagram for a peptide + try { // @TODO: read ion list from the input file (meta value) - static vector top_ions = ListUtils::create("a,b,c"); - static vector bottom_ions = ListUtils::create("x,y,z"); - String diagram = generateSequenceDiagram_( - ph.getSequence(), - ph.getPeakAnnotations(), - top_ions, - bottom_ions); + NASequence na_seq = NASequence::fromString(ph.getMetaValue("label")); + static vector top_ions = ListUtils::create("a-B,a,b,c,d"); + static vector bottom_ions = ListUtils::create("w,x,y,z"); + String diagram = generateSequenceDiagram_(na_seq, ph.getPeakAnnotations(), + top_ions, bottom_ions); widget_1D->canvas()->setTextBox(diagram.toQString()); } - else if (ph.metaValueExists("label")) // generate sequence diagram for RNA + catch (Exception::ParseError&) // label doesn't contain have a valid seq. { - try - { - // @TODO: read ion list from the input file (meta value) - NASequence na_seq = NASequence::fromString(ph.getMetaValue("label")); - static vector top_ions = ListUtils::create("a-B,a,b,c,d"); - static vector bottom_ions = ListUtils::create("w,x,y,z"); - String diagram = generateSequenceDiagram_(na_seq, ph.getPeakAnnotations(), - top_ions, bottom_ions); - widget_1D->canvas()->setTextBox(diagram.toQString()); - } - catch (Exception::ParseError&) // label doesn't contain have a valid seq. - { - } } } } + break; } default: @@ -906,6 +935,9 @@ namespace OpenMS void TVIdentificationViewController::addTheoreticalSpectrumLayer_(const PeptideHit& ph) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Adding theoretical spectrum layer" << endl; + #endif PlotCanvas* current_canvas = tv_->getActive1DWidget()->canvas(); auto& current_layer = dynamic_cast(current_canvas->getCurrentLayer()); const SpectrumType& current_spectrum = current_layer.getCurrentSpectrum(); @@ -946,9 +978,8 @@ namespace OpenMS spec_id_view_->ignore_update = true; RAIICleanup cleanup([&]() { spec_id_view_->ignore_update = false; }); - PeakMap new_exp; - new_exp.addSpectrum(theo_spectrum); - ExperimentSharedPtrType new_exp_sptr(new PeakMap(new_exp)); + ExperimentSharedPtrType new_exp_sptr = boost::make_shared(); + new_exp_sptr->getMSExperiment().addSpectrum(theo_spectrum); LayerDataBase::ODExperimentSharedPtrType od_dummy(new OnDiscMSExperiment()); String layer_caption = aa_sequence.toString() + " (identification view)"; current_canvas->addPeakLayer(new_exp_sptr, od_dummy, layer_caption); @@ -1061,12 +1092,13 @@ namespace OpenMS void TVIdentificationViewController::removeGraphicalPeakAnnotations_(int spectrum_index) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Removing graphical peak annotations." << endl; + #endif + auto* widget_1D = tv_->getActive1DWidget(); auto& current_layer = widget_1D->canvas()->getCurrentLayer(); - #ifdef DEBUG_IDENTIFICATION_VIEW - cout << "Removing peak annotations." << endl; - #endif // remove all graphical peak annotations as these will be recreated from the stored peak annotations Annotations1DContainer& las = current_layer.getAnnotations(spectrum_index); auto new_end = remove_if(las.begin(), las.end(), @@ -1084,6 +1116,10 @@ namespace OpenMS void TVIdentificationViewController::deactivate1DSpectrum(int spectrum_index) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Deactivating 1D spectrum with index: " << spectrum_index << endl; + #endif + // Retrieve active 1D widget Plot1DWidget* widget_1D = tv_->getActive1DWidget(); @@ -1096,11 +1132,11 @@ namespace OpenMS // Return if no valid peak layer attached auto* current_layer_ptr = dynamic_cast(¤t_layer); - if (!current_layer_ptr || current_layer_ptr->getPeakData()->empty()) + if (!current_layer_ptr || current_layer_ptr->getPeakData()->getMSExperiment().empty()) { return; } - MSSpectrum& spectrum = (*current_layer_ptr->getPeakDataMuteable())[spectrum_index]; + MSSpectrum& spectrum = (*current_layer_ptr->getPeakDataMuteable()).getMSExperiment()[spectrum_index]; int ms_level = spectrum.getMSLevel(); if (ms_level == 2) { @@ -1121,6 +1157,10 @@ namespace OpenMS void TVIdentificationViewController::addPeakAnnotationsFromID_(const PeptideHit& hit) { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Adding peak annotations from ID" << endl; + #endif + // get annotations and sequence const vector& annotations = hit.getPeakAnnotations(); @@ -1245,6 +1285,10 @@ namespace OpenMS void TVIdentificationViewController::removeTheoreticalSpectrumLayer_() { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Removing theoretical spectrum layer" << endl; + #endif + auto* spectrum_widget_1D = tv_->getActive1DWidget(); if (spectrum_widget_1D) { @@ -1270,6 +1314,10 @@ namespace OpenMS // override void TVIdentificationViewController::activateBehavior() { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Activating identification view" << endl; + #endif + Plot1DWidget* w = tv_->getActive1DWidget(); if (w == nullptr) { @@ -1283,16 +1331,24 @@ namespace OpenMS // find first MS2 spectrum with peptide identification and set current spectrum to it if (current_spectrum.getMSLevel() == 1) // no fragment spectrum { - for (Size i = 0; i < current_layer.getPeakData()->size(); ++i) + for (Size i = 0; i < current_layer.getPeakData()->getMSExperiment().size(); ++i) { - UInt ms_level = (*current_layer.getPeakData())[i].getMSLevel(); - const vector peptide_ids = (*current_layer.getPeakData())[i].getPeptideIdentifications(); - Size peptide_ids_count = peptide_ids.size(); + UInt ms_level = current_layer.getPeakData()->getMSExperiment()[i].getMSLevel(); + + if (ms_level != 2) continue; - if (ms_level != 2 || peptide_ids_count == 0) // skip non ms2 spectra and spectra with no identification + const vector& peptide_ids = current_layer.getPeakData()->getPeptideIdentifications(); + if (i >= peptide_ids.size()) + { + OPENMS_LOG_FATAL_ERROR << "Peptide identification index out of bounds!" << endl; + } + const PeptideIdentification& peptide_id = peptide_ids[i]; + + if (peptide_id.getHits().empty()) // skip spectra with no identification { continue; } + OPENMS_LOG_DEBUG << "During activation, found first MS2 spectrum with peptide identification: " << i << endl; current_layer.setCurrentIndex(i); break; } @@ -1302,6 +1358,10 @@ namespace OpenMS // override void TVIdentificationViewController::deactivateBehavior() { + #ifdef DEBUG_IDENTIFICATION_VIEW + cout << "Deactivating identification view" << endl; + #endif + Plot1DWidget* widget_1D = tv_->getActive1DWidget(); // return if no active 1D widget is present diff --git a/src/openms_gui/source/VISUAL/TVSpectraViewController.cpp b/src/openms_gui/source/VISUAL/TVSpectraViewController.cpp index 361535b0c2e..86e7d720e34 100644 --- a/src/openms_gui/source/VISUAL/TVSpectraViewController.cpp +++ b/src/openms_gui/source/VISUAL/TVSpectraViewController.cpp @@ -74,9 +74,9 @@ namespace OpenMS { // get caption (either chromatogram idx or peptide sequence, if available) String basename_suffix; - if (chrom_exp_sptr->metaValueExists("peptide_sequence")) + if (chrom_exp_sptr->getMSExperiment().metaValueExists("peptide_sequence")) { - basename_suffix = String(chrom_exp_sptr->getMetaValue("peptide_sequence")); + basename_suffix = String(chrom_exp_sptr->getMSExperiment().getMetaValue("peptide_sequence")); } ((basename_suffix += "[") += index) += "]"; diff --git a/src/pyOpenMS/README_WRAPPING_NEW_CLASSES b/src/pyOpenMS/README_WRAPPING_NEW_CLASSES index b000416feca..d7b11c48dda 100644 --- a/src/pyOpenMS/README_WRAPPING_NEW_CLASSES +++ b/src/pyOpenMS/README_WRAPPING_NEW_CLASSES @@ -32,8 +32,8 @@ cdef extern from "" namespace "OpenMS": # wrap-inherits: # DefaultParamHandler - ClassName() nogil except + - ClassName(ClassName) nogil except + + ClassName() except + nogil + ClassName(ClassName) except + nogil - make sure to use "ClassName:" instead of "ClassName(DefaultParamHandler)" to diff --git a/src/pyOpenMS/addons/MzMLFile.pyx b/src/pyOpenMS/addons/MzMLFile.pyx index e70617ca9f9..7c0cbc193e4 100644 --- a/src/pyOpenMS/addons/MzMLFile.pyx +++ b/src/pyOpenMS/addons/MzMLFile.pyx @@ -13,10 +13,10 @@ else: raise Exception('can not handle type of %s' % (args,)) - # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *) nogil except + # wrap-ignore - # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, bool skip_full_count, bool skip_first_pass) nogil except + - # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, MSExperiment& e) nogil except + # wrap-ignore - # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, MSExperiment& e, bool skip_full_count, bool skip_first_pass) nogil except + # wrap-ignore + # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *) except + nogil # wrap-ignore + # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, bool skip_full_count, bool skip_first_pass) except + nogil + # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, MSExperiment& e) except + nogil # wrap-ignore + # void transform(const String&, IMSDataConsumer[Peak1D, ChromatogramPeak] *, MSExperiment& e, bool skip_full_count, bool skip_first_pass) except + nogil # wrap-ignore def _transform_4(self, path, transformer, MSExperiment exp, bool skip_full_count, bool skip_first_pass): assert isinstance(exp, MSExperiment), 'arg exp wrong type' diff --git a/src/pyOpenMS/addons/SequestOutfile.pyx b/src/pyOpenMS/addons/SequestOutfile.pyx index f29bd14b75a..5c391ec0e95 100644 --- a/src/pyOpenMS/addons/SequestOutfile.pyx +++ b/src/pyOpenMS/addons/SequestOutfile.pyx @@ -7,7 +7,7 @@ from libcpp.map cimport map as libcpp_map # libcpp_map[ String, Size ] &ac_position_map, # libcpp_vector[ String ] &sequences, # libcpp_vector[ libcpp_pair[ String, Size ] ] &found, - # libcpp_map[ String, Size ] ¬_found) nogil except + + # libcpp_map[ String, Size ] ¬_found) except + nogil assert isinstance(database_filename, String), 'arg database_filename wrong type' assert isinstance(sequences, list) and all(isinstance(i, bytes) for i in sequences), 'arg sequences wrong type' diff --git a/src/pyOpenMS/pxds/AnnotatedMSRun.pxd b/src/pyOpenMS/pxds/AnnotatedMSRun.pxd new file mode 100644 index 00000000000..caed02e6463 --- /dev/null +++ b/src/pyOpenMS/pxds/AnnotatedMSRun.pxd @@ -0,0 +1,50 @@ +from libcpp.vector cimport vector as libcpp_vector +from libcpp.pair cimport pair as libcpp_pair +from libcpp cimport bool +from Types cimport * +from MSExperiment cimport * +from PeptideIdentification cimport * +from ProteinIdentification cimport * +from MSSpectrum cimport * + +cdef extern from "" namespace "OpenMS": + + cdef cppclass AnnotatedMSRun: + # wrap-doc: + # Class for storing MS run data with peptide and protein identifications + # + # This class stores an MSExperiment (containing spectra) along with peptide and protein + # identifications. Each spectrum in the MSExperiment is associated with a single + # PeptideIdentification object. Object gets typically not manually created but generated + # by the IDMapper class. + # + # + # Usage: + # + # .. code-block:: python + # + # run = AnnotatedMSRun() + # exp = MSExperiment() + # MzMLFile().load(path_to_file, exp) + # run.setMSExperiment(exp) + # run.setPeptideIdentifications(my_peptide_ids) + + AnnotatedMSRun() except + nogil + AnnotatedMSRun(MSExperiment) except + nogil + AnnotatedMSRun(AnnotatedMSRun) except + nogil + + # Protein identification methods + libcpp_vector[ProteinIdentification] getProteinIdentifications() except + nogil + void setProteinIdentifications(libcpp_vector[ProteinIdentification]& ids) except + nogil + + # Peptide identification methods + libcpp_vector[PeptideIdentification] getPeptideIdentifications() except + nogil + void setPeptideIdentifications(libcpp_vector[PeptideIdentification]& ids) except + nogil + + # MSExperiment methods + MSExperiment getMSExperiment() except + nogil + void setMSExperiment(MSExperiment& experiment) except + nogil + + # Access methods + libcpp_pair[MSSpectrum, PeptideIdentification] operator[](size_t idx) except + nogil # wrap-ignore + diff --git a/src/pyOpenMS/pxds/ChromatogramRangeManager.pxd b/src/pyOpenMS/pxds/ChromatogramRangeManager.pxd new file mode 100644 index 00000000000..3b2374d5f3a --- /dev/null +++ b/src/pyOpenMS/pxds/ChromatogramRangeManager.pxd @@ -0,0 +1,29 @@ +from Types cimport * +from RangeManager cimport * + +cdef extern from "" namespace "OpenMS": + + cdef cppclass ChromatogramRangeManager: + # wrap-doc: + # Range manager for chromatograms + # + # This class manages retention time, m/z, and intensity ranges for multiple chromatograms. + # It extends the basic RangeManager to provide specialized functionality for chromatogram data. + # + # The template parameters for the base RangeManager are ordered differently than in SpectrumRangeManager: + # - RangeRT (retention time) is the first parameter, as it's the primary dimension for chromatograms + # - RangeIntensity is the second parameter + # - RangeMZ is the third parameter + + ChromatogramRangeManager() except + nogil + ChromatogramRangeManager(ChromatogramRangeManager &) except + nogil + + void clearRanges() except + nogil + + # Range accessors + double getMinRT() except + nogil + double getMaxRT() except + nogil + double getMinMZ() except + nogil + double getMaxMZ() except + nogil + double getMinIntensity() except + nogil + double getMaxIntensity() except + nogil \ No newline at end of file diff --git a/src/pyOpenMS/pxds/ExperimentalSettings.pxd b/src/pyOpenMS/pxds/ExperimentalSettings.pxd index c7f9ce0911b..b012fd40f85 100644 --- a/src/pyOpenMS/pxds/ExperimentalSettings.pxd +++ b/src/pyOpenMS/pxds/ExperimentalSettings.pxd @@ -58,12 +58,6 @@ cdef extern from "" namespace "OpenMS": String getComment() except + nogil # wrap-doc:Returns the free-text comment void setComment(String comment) except + nogil # wrap-doc:Sets the free-text comment - - - libcpp_vector[ProteinIdentification] getProteinIdentifications() except + nogil # wrap-doc:Returns a reference to the protein ProteinIdentification vector - - void setProteinIdentifications(libcpp_vector[ProteinIdentification] protein_identifications) except + nogil # wrap-doc:Sets the protein ProteinIdentification vector - String getFractionIdentifier() except + nogil # wrap-doc:Returns fraction identifier diff --git a/src/pyOpenMS/pxds/IDFilter.pxd b/src/pyOpenMS/pxds/IDFilter.pxd index 4f1c506d442..c3fc0cd9def 100644 --- a/src/pyOpenMS/pxds/IDFilter.pxd +++ b/src/pyOpenMS/pxds/IDFilter.pxd @@ -11,6 +11,7 @@ from FASTAFile cimport * from ProteaseDigestion cimport * from MSExperiment cimport * +from AnnotatedMSRun cimport * from MSSpectrum cimport * from Peak1D cimport * from ChromatogramPeak cimport * @@ -77,8 +78,6 @@ cdef extern from "" namespace "OpenMS": # :param ignore_mods: Boolean operator default to false in case of any modifications in sequences during extraction # :return: Sequences - void updateHitRanks(libcpp_vector[ProteinIdentification]& identifications) except + nogil # wrap-doc:Updates the hit ranks on all peptide or protein IDs - void removeUnreferencedProteins(libcpp_vector[ProteinIdentification]& proteins, libcpp_vector[PeptideIdentification]& peptides) except + nogil # wrap-doc:Removes protein hits from the protein IDs in a 'cmap' that are not referenced by a peptide in the features or if requested in the unassigned peptide list void updateProteinReferences(libcpp_vector[PeptideIdentification]& peptides, libcpp_vector[ProteinIdentification]& proteins, bool remove_peptides_without_reference) except + nogil # wrap-doc:Removes references to missing proteins. Only PeptideEvidence entries that reference protein hits in 'proteins' are kept in the peptide hits @@ -177,15 +176,15 @@ cdef extern from "" namespace "OpenMS": void removeDuplicatePeptideHits(libcpp_vector[PeptideIdentification]& peptides) except + nogil # wrap-doc:Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID) - void filterHitsByScore(MSExperiment& experiment, double peptide_threshold_score, double protein_threshold_score) except + nogil # wrap-doc:Filters an MS/MS experiment according to score thresholds + void filterHitsByScore(AnnotatedMSRun& experiment, double peptide_threshold_score, double protein_threshold_score) except + nogil # wrap-doc:Filters an MS/MS experiment according to score thresholds - void keepNBestHits(MSExperiment& experiment, Size n) except + nogil # wrap-doc:Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum + void keepNBestHits(AnnotatedMSRun& experiment, Size n) except + nogil # wrap-doc:Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum void keepBestPerPeptide(libcpp_vector[PeptideIdentification]& peptides, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum) except + nogil # wrap-doc:Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptide sequence void keepBestPerPeptidePerRun(libcpp_vector[ProteinIdentification]& prot_ids, libcpp_vector[PeptideIdentification]& peptides, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum) except + nogil # wrap-doc:Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptide sequence on a per run basis - void keepHitsMatchingProteins(MSExperiment& experiment, libcpp_vector[FASTAEntry]& proteins) except + nogil + void keepHitsMatchingProteins(AnnotatedMSRun& experiment, libcpp_vector[FASTAEntry]& proteins) except + nogil cdef extern from "" namespace "OpenMS::IDFilter": diff --git a/src/pyOpenMS/pxds/IDMapper.pxd b/src/pyOpenMS/pxds/IDMapper.pxd index a9084dbd70b..676a34273e9 100644 --- a/src/pyOpenMS/pxds/IDMapper.pxd +++ b/src/pyOpenMS/pxds/IDMapper.pxd @@ -8,6 +8,7 @@ from ProteinIdentification cimport * from PeptideIdentification cimport * from MSExperiment cimport * +from AnnotatedMSRun cimport * from Peak1D cimport * from ChromatogramPeak cimport * @@ -20,11 +21,11 @@ cdef extern from "" namespace "OpenMS": IDMapper() except + nogil # wrap-doc:Annotates an MSExperiment, FeatureMap or ConsensusMap with peptide identifications IDMapper(IDMapper &) except + nogil - void annotate(MSExperiment & map_, - libcpp_vector[PeptideIdentification] & ids, - libcpp_vector[ProteinIdentification] & protein_ids, - bool clear_ids, - bool mapMS1) except + nogil + void annotate(AnnotatedMSRun & map_, + libcpp_vector[PeptideIdentification] & ids, + libcpp_vector[ProteinIdentification] & protein_ids, + bool clear_ids, + bool mapMS1) except + nogil # wrap-doc: # Mapping method for peak maps\n # @@ -33,7 +34,7 @@ cdef extern from "" namespace "OpenMS": # Note that a PeptideIdentication is added to ALL spectra which are within the allowed RT and MZ boundaries # # - # :param map: MSExperiment to receive the identifications + # :param map: AnnotatedMSRun to receive the identifications # :param peptide_ids: PeptideIdentification for the MSExperiment # :param protein_ids: ProteinIdentification for the MSExperiment # :param clear_ids: Reset peptide and protein identifications of each scan before annotating @@ -41,10 +42,10 @@ cdef extern from "" namespace "OpenMS": # :raises: # Exception: MissingInformation is thrown if entries of 'peptide_ids' do not contain 'MZ' and 'RT' information - void annotate(MSExperiment & map_, - FeatureMap & fmap, - bool clear_ids, - bool mapMS1) except + nogil + void annotate(AnnotatedMSRun & map_, + FeatureMap & fmap, + bool clear_ids, + bool mapMS1) except + nogil # wrap-doc: # Mapping method for peak maps\n # @@ -55,7 +56,7 @@ cdef extern from "" namespace "OpenMS": # RT and m/z are taken from the peptides, or (if missing) from the feature itself # # - # :param map: MSExperiment to receive the identifications + # :param map: AnnotatedMSRun to receive the identifications # :param fmap: FeatureMap with PeptideIdentifications for the MSExperiment # :param clear_ids: Reset peptide and protein identifications of each scan before annotating # :param map_ms1: Attach Ids to MS1 spectra using RT mapping only (without precursor, without m/z) diff --git a/src/pyOpenMS/pxds/MSExperiment.pxd b/src/pyOpenMS/pxds/MSExperiment.pxd index bbdddcf0f6f..e4bac5dca9d 100644 --- a/src/pyOpenMS/pxds/MSExperiment.pxd +++ b/src/pyOpenMS/pxds/MSExperiment.pxd @@ -10,15 +10,16 @@ from ExperimentalSettings cimport * from DateTime cimport * from RangeManager cimport * from Matrix cimport * +from SpectrumRangeManager cimport * +from ChromatogramRangeManager cimport * # this class has addons, see the ./addons folder cdef extern from "" namespace "OpenMS": - cdef cppclass MSExperiment(ExperimentalSettings, RangeManagerRtMzInt): + cdef cppclass MSExperiment(ExperimentalSettings): # wrap-inherits: # ExperimentalSettings - # RangeManagerRtMzInt # # wrap-doc: # In-Memory representation of a mass spectrometry experiment. @@ -112,3 +113,7 @@ cdef extern from "" namespace "OpenMS": int getPrecursorSpectrum(int zero_based_index) except + nogil # wrap-doc:Returns the index of the precursor spectrum for spectrum at index @p zero_based_index + # Range manager accessors + SpectrumRangeManager spectrumRanges() except + nogil # wrap-doc:Returns a reference to the spectrum range manager + ChromatogramRangeManager chromatogramRanges() except + nogil # wrap-doc:Returns a reference to the chromatogram range manager + RangeManagerRtMzIntMob combinedRanges() except + nogil # wrap-doc:Returns a reference to the combined range manager (for backward compatibility) diff --git a/src/pyOpenMS/pxds/MSPFile.pxd b/src/pyOpenMS/pxds/MSPFile.pxd index 809c4d7c711..7f4af27de40 100644 --- a/src/pyOpenMS/pxds/MSPFile.pxd +++ b/src/pyOpenMS/pxds/MSPFile.pxd @@ -2,6 +2,7 @@ from libcpp.vector cimport vector as libcpp_vector from String cimport * from Peak1D cimport * from MSExperiment cimport * +from AnnotatedMSRun cimport * cdef extern from "" namespace "OpenMS": @@ -10,7 +11,7 @@ cdef extern from "" namespace "OpenMS": MSPFile() except + nogil # wrap-doc:File adapter for MSP files (NIST spectra library) MSPFile(MSPFile &) except + nogil - void store(String filename, MSExperiment & exp) except + nogil # wrap-doc:Stores a map in a MSPFile file + void store(String filename, AnnotatedMSRun & exp) except + nogil # wrap-doc:Stores a map in a MSPFile file void load(String filename, libcpp_vector[PeptideIdentification] & ids, MSExperiment & exp) except + nogil # wrap-doc: # Loads a map from a MSPFile file diff --git a/src/pyOpenMS/pxds/RangeManager.pxd b/src/pyOpenMS/pxds/RangeManager.pxd index 7d1abff0038..5728fd06696 100644 --- a/src/pyOpenMS/pxds/RangeManager.pxd +++ b/src/pyOpenMS/pxds/RangeManager.pxd @@ -118,3 +118,17 @@ cdef extern from "" namespace "OpenMS": void clearRanges() except + nogil # wrap-doc:Resets all range dimensions as empty + cdef cppclass RangeManagerRtMzIntMob "OpenMS::RangeManager": + # no-pxd-import + RangeManagerRtMzIntMob() except + nogil + RangeManagerRtMzIntMob(RangeManagerRtMzIntMob &) except + nogil + + double getMinRT() except + nogil # wrap-doc:Returns the minimum RT + double getMaxRT() except + nogil # wrap-doc:Returns the maximum RT + double getMinMZ() except + nogil # wrap-doc:Returns the minimum m/z + double getMaxMZ() except + nogil # wrap-doc:Returns the maximum m/z + double getMinIntensity() except + nogil # wrap-doc:Returns the minimum intensity + double getMaxIntensity() except + nogil # wrap-doc:Returns the maximum intensity + double getMinMobility() except + nogil # wrap-doc:Returns the minimum mobility + double getMaxMobility() except + nogil # wrap-doc:Returns the maximum mobility + void clearRanges() except + nogil # wrap-doc:Resets all range dimensions as empty diff --git a/src/pyOpenMS/pxds/SpectrumRangeManager.pxd b/src/pyOpenMS/pxds/SpectrumRangeManager.pxd new file mode 100644 index 00000000000..e1d814ea7ea --- /dev/null +++ b/src/pyOpenMS/pxds/SpectrumRangeManager.pxd @@ -0,0 +1,40 @@ +from Types cimport * +from RangeManager cimport * +from MSSpectrum cimport * +from libcpp.set cimport set as libcpp_set + +cdef extern from "" namespace "OpenMS": + + cdef cppclass SpectrumRangeManager: + # wrap-doc: + # Advanced range manager for MS spectra with separate ranges for each MS level + # + # This class extends the basic RangeManager to provide separate range tracking for different MS levels + # (MS1, MS2, etc.). It manages four types of ranges: + # - m/z (mass-to-charge ratio) + # - intensity + # - retention time (RT) + # - ion mobility + # + # A global range is tracked for all MS levels, and additional ranges are maintained for each specific MS level. + # This allows for efficient querying of ranges for specific MS levels, which is useful for visualization, + # filtering, and processing operations that need to work with specific MS levels. + + SpectrumRangeManager() except + nogil + SpectrumRangeManager(SpectrumRangeManager &) except + nogil + + void clearRanges() except + nogil + libcpp_set[UInt] getMSLevels() except + nogil + void extendRT(double rt, UInt ms_level) except + nogil + void extendMZ(double mz, UInt ms_level) except + nogil + void extendUnsafe(const MSSpectrum& spectrum, UInt ms_level) except + nogil + + # Range accessors + double getMinRT() except + nogil + double getMaxRT() except + nogil + double getMinMZ() except + nogil + double getMaxMZ() except + nogil + double getMinIntensity() except + nogil + double getMaxIntensity() except + nogil + double getMinMobility() except + nogil + double getMaxMobility() except + nogil \ No newline at end of file diff --git a/src/pyOpenMS/pxds/SpectrumSettings.pxd b/src/pyOpenMS/pxds/SpectrumSettings.pxd index d0382f8382f..5aadb9bd2d3 100644 --- a/src/pyOpenMS/pxds/SpectrumSettings.pxd +++ b/src/pyOpenMS/pxds/SpectrumSettings.pxd @@ -3,7 +3,6 @@ from String cimport * from Peak1D cimport * from InstrumentSettings cimport * from SourceFile cimport * -from PeptideIdentification cimport * from Precursor cimport * from DataProcessing cimport * from Product cimport * @@ -42,9 +41,6 @@ cdef extern from "" namespace "OpenMS": libcpp_vector[Product] getProducts() except + nogil # wrap-doc:Returns a const reference to the products void setProducts(libcpp_vector[Product]) except + nogil # wrap-doc:Sets the products - libcpp_vector[PeptideIdentification] getPeptideIdentifications() except + nogil # wrap-doc:Returns a const reference to the PeptideIdentification vector - void setPeptideIdentifications(libcpp_vector[PeptideIdentification]) except + nogil # wrap-doc:Sets the PeptideIdentification vector - libcpp_vector[ shared_ptr[DataProcessing] ] getDataProcessing() except + nogil void setDataProcessing(libcpp_vector[ shared_ptr[DataProcessing] ]) except + nogil diff --git a/src/pyOpenMS/tests/unittests/test000.py b/src/pyOpenMS/tests/unittests/test000.py index e048517728f..87c73afd365 100644 --- a/src/pyOpenMS/tests/unittests/test000.py +++ b/src/pyOpenMS/tests/unittests/test000.py @@ -39,14 +39,14 @@ def _testMetaInfoInterface(what): #void getKeys(libcpp_vector[String] & keys) #void getKeys(libcpp_vector[unsigned int] & keys) - #DataValue getMetaValue(unsigned int) nogil except + - #DataValue getMetaValue(String) nogil except + - #void setMetaValue(unsigned int, DataValue) nogil except + - #void setMetaValue(String, DataValue) nogil except + - #bool metaValueExists(String) nogil except + - #bool metaValueExists(unsigned int) nogil except + - #void removeMetaValue(String) nogil except + - #void removeMetaValue(unsigned int) nogil except + + #DataValue getMetaValue(unsigned int) except + nogil + #DataValue getMetaValue(String) except + nogil + #void setMetaValue(unsigned int, DataValue) except + nogil + #void setMetaValue(String, DataValue) except + nogil + #bool metaValueExists(String) except + nogil + #bool metaValueExists(unsigned int) except + nogil + #void removeMetaValue(String) except + nogil + #void removeMetaValue(unsigned int) except + nogil what.setMetaValue("key", 42) what.setMetaValue("key2", 42) @@ -5369,9 +5369,9 @@ def testElementDB(): # not yet implemented # - # const Map[ String, Element * ] getNames() nogil except + - # const Map[ String, Element * ] getSymbols() nogil except + - # const Map[unsigned int, Element * ] getAtomicNumbers() nogil except + + # const Map[ String, Element * ] getNames() except + nogil + # const Map[ String, Element * ] getSymbols() except + nogil + # const Map[unsigned int, Element * ] getAtomicNumbers() except + nogil @report @@ -5475,11 +5475,11 @@ def testModificationsDB(): def testRNaseDB(): """ @tests: RNaseDB - const DigestionEnzymeRNA* getEnzyme(const String& name) nogil except + - const DigestionEnzymeRNA* getEnzymeByRegEx(const String& cleavage_regex) nogil except + - void getAllNames(libcpp_vector[ String ]& all_names) nogil except + - bool hasEnzyme(const String& name) nogil except + - bool hasRegEx(const String& cleavage_regex) nogil except + + const DigestionEnzymeRNA* getEnzyme(const String& name) except + nogil + const DigestionEnzymeRNA* getEnzymeByRegEx(const String& cleavage_regex) except + nogil + void getAllNames(libcpp_vector[ String ]& all_names) except + nogil + bool hasEnzyme(const String& name) except + nogil + bool hasRegEx(const String& cleavage_regex) except + nogil """ db = pyopenms.RNaseDB() names = [] diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index 656625cea4c..8ffc99475d2 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -78,6 +78,7 @@ set(datastructures_executables_list set(metadata_executables_list AcquisitionInfo_test Acquisition_test + AnnotatedMSRun_test CVTermList_test CVTermListInterface_test CVTerm_test @@ -251,6 +252,7 @@ set(format_executables_list UnimodXMLFile_test XMassFile_test XMLFile_test + XMLHandler_test XMLValidator_test XQuestResultXMLFile_test XTandemInfile_test diff --git a/src/tests/class_tests/openms/source/AnnotatedMSRun_test.cpp b/src/tests/class_tests/openms/source/AnnotatedMSRun_test.cpp new file mode 100644 index 00000000000..737c62f8b97 --- /dev/null +++ b/src/tests/class_tests/openms/source/AnnotatedMSRun_test.cpp @@ -0,0 +1,343 @@ +// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Timo Sachsenberg$ +// $Authors: David Voigt $ +// -------------------------------------------------------------------------- + +#include +#include +#include +#include + +START_TEST(AnnotatedMSRun, "$Id$") + +using namespace OpenMS; + +// Default constructor +AnnotatedMSRun* ptr = nullptr; +AnnotatedMSRun* nullPointer = nullptr; + +START_SECTION((AnnotatedMSRun())) + ptr = new AnnotatedMSRun(); + TEST_NOT_EQUAL(ptr, nullPointer) +END_SECTION + +START_SECTION((~AnnotatedMSRun())) + delete ptr; +END_SECTION + +START_SECTION((explicit AnnotatedMSRun(MSExperiment&& experiment))) + MSExperiment exp; + MSSpectrum spec; + spec.setRT(42.0); + spec.setMSLevel(2); + exp.addSpectrum(spec); + + AnnotatedMSRun annotated_data(std::move(exp)); + TEST_EQUAL(annotated_data.getMSExperiment().size(), 1) + TEST_REAL_SIMILAR(annotated_data.getMSExperiment()[0].getRT(), 42.0) +END_SECTION + +START_SECTION((ProteinIdentification& getProteinIdentifications())) + AnnotatedMSRun annotated_data; + + auto& prot_id = annotated_data.getProteinIdentifications(); + prot_id.resize(1); + prot_id[0].setIdentifier("Test"); + TEST_EQUAL(annotated_data.getProteinIdentifications()[0].getIdentifier(), "Test") +END_SECTION + +START_SECTION((const ProteinIdentification& getProteinIdentifications() const)) + AnnotatedMSRun annotated_data; + auto& prot_id = annotated_data.getProteinIdentifications(); + prot_id.resize(1); + prot_id[0].setIdentifier("Test"); + + const AnnotatedMSRun& const_data = annotated_data; + TEST_EQUAL(const_data.getProteinIdentifications()[0].getIdentifier(), "Test") +END_SECTION + +START_SECTION((PeptideIdentification& getPeptideIdentification(size_t index))) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the first peptide identification + PeptideHit hit; + hit.setSequence(AASequence::fromString("PEPTIDE")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit); + + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDE") +END_SECTION + +START_SECTION((const PeptideIdentification& getPeptideIdentification(size_t index) const)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the first peptide identification + PeptideHit hit; + hit.setSequence(AASequence::fromString("PEPTIDE")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit); + + const AnnotatedMSRun& const_data = annotated_data; + TEST_EQUAL(const_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(const_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDE") +END_SECTION + + +START_SECTION((std::vector& getPeptideIdentifications())) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence("PEPTIDER")); + hit2.setSequence(AASequence("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + TEST_EQUAL(annotated_data.getPeptideIdentifications().size(), 2) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[1].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDER") + TEST_EQUAL(annotated_data.getPeptideIdentifications()[1].getHits()[0].getSequence().toString(), "PEPTIDAR") +END_SECTION + +START_SECTION((const std::vector& getPeptideIdentifications() const)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + const AnnotatedMSRun& const_data = annotated_data; + TEST_EQUAL(const_data.getPeptideIdentifications().size(), 2) + TEST_EQUAL(const_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(const_data.getPeptideIdentifications()[1].getHits().size(), 1) + TEST_EQUAL(const_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDER") + TEST_EQUAL(const_data.getPeptideIdentifications()[1].getHits()[0].getSequence().toString(), "PEPTIDAR") +END_SECTION + + +START_SECTION((void setPeptideIdentification(PeptideIdentification&& id, size_t index))) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Create a peptide identification + PeptideIdentification pep_id; + PeptideHit hit; + hit.setSequence(AASequence::fromString("PEPTIDE")); + pep_id.insertHit(hit); + + // Set the peptide identification + annotated_data.getPeptideIdentifications()[0] = pep_id; + + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDE") +END_SECTION + + +START_SECTION((void setPeptideIdentifications(std::vector&& ids))) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Create a vector of peptide identifications + std::vector pep_ids; + PeptideIdentification pep_id1, pep_id2; + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + pep_id1.insertHit(hit1); + pep_id2.insertHit(hit2); + pep_ids.push_back(pep_id1); + pep_ids.push_back(pep_id2); + + // Set all peptide identifications + annotated_data.setPeptideIdentifications(std::move(pep_ids)); + + TEST_EQUAL(annotated_data.getPeptideIdentifications().size(), 2) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[1].getHits().size(), 1) + TEST_EQUAL(annotated_data.getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "PEPTIDER") + TEST_EQUAL(annotated_data.getPeptideIdentifications()[1].getHits()[0].getSequence().toString(), "PEPTIDAR") +END_SECTION + + +START_SECTION((void clearAllPeptideIdentifications())) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + // Clear all peptide identifications + annotated_data.getPeptideIdentifications().clear(); + + TEST_EQUAL(annotated_data.getPeptideIdentifications().size(), 0) +END_SECTION + +START_SECTION((MSExperiment& getMSExperiment())) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec; + spec.setRT(42.0); + spec.setMSLevel(2); + exp.addSpectrum(spec); + + annotated_data.getMSExperiment() = std::move(exp); + TEST_EQUAL(annotated_data.getMSExperiment().size(), 1) + TEST_REAL_SIMILAR(annotated_data.getMSExperiment()[0].getRT(), 42.0) +END_SECTION + +START_SECTION((const MSExperiment& getMSExperiment() const)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec; + spec.setRT(42.0); + spec.setMSLevel(2); + exp.addSpectrum(spec); + + annotated_data.getMSExperiment() = std::move(exp); + + const AnnotatedMSRun& const_data = annotated_data; + TEST_EQUAL(const_data.getMSExperiment().size(), 1) + TEST_REAL_SIMILAR(const_data.getMSExperiment()[0].getRT(), 42.0) +END_SECTION + +START_SECTION((Iterator functionality)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + spec1.setRT(10.0); + spec2.setRT(20.0); + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + // Test iterator functionality + size_t count = 0; + for (auto [spectrum, peptide_id] : annotated_data) + { + if (count == 0) + { + TEST_REAL_SIMILAR(spectrum.getRT(), 10.0) + TEST_EQUAL(peptide_id.getHits().size(), 1) + TEST_EQUAL(peptide_id.getHits()[0].getSequence().toString(), "PEPTIDER") + } + else if (count == 1) + { + TEST_REAL_SIMILAR(spectrum.getRT(), 20.0) + TEST_EQUAL(peptide_id.getHits().size(), 1) + TEST_EQUAL(peptide_id.getHits()[0].getSequence().toString(), "PEPTIDAR") + } + count++; + } + TEST_EQUAL(count, 2) +END_SECTION + +START_SECTION((Operator[] functionality)) + AnnotatedMSRun annotated_data; + MSExperiment exp; + MSSpectrum spec1, spec2; + spec1.setRT(10.0); + spec2.setRT(20.0); + exp.addSpectrum(spec1); + exp.addSpectrum(spec2); + annotated_data.getMSExperiment() = std::move(exp); + + // Resize peptide identifications to match spectra + annotated_data.getPeptideIdentifications().resize(2); + + // Add data to the peptide identifications + PeptideHit hit1, hit2; + hit1.setSequence(AASequence::fromString("PEPTIDER")); + hit2.setSequence(AASequence::fromString("PEPTIDAR")); + annotated_data.getPeptideIdentifications()[0].insertHit(hit1); + annotated_data.getPeptideIdentifications()[1].insertHit(hit2); + + // Test operator[] functionality + auto [spectrum, peptide_id] = annotated_data[0]; + TEST_REAL_SIMILAR(spectrum.getRT(), 10.0) + TEST_EQUAL(peptide_id.getHits().size(), 1) + TEST_EQUAL(peptide_id.getHits()[0].getSequence().toString(), "PEPTIDER") + + auto [spectrum2, peptide_id2] = annotated_data[1]; + TEST_REAL_SIMILAR(spectrum2.getRT(), 20.0) + TEST_EQUAL(peptide_id2.getHits().size(), 1) + TEST_EQUAL(peptide_id2.getHits()[0].getSequence().toString(), "PEPTIDAR") + + // Test const operator[] functionality + const AnnotatedMSRun& const_data = annotated_data; + auto [const_spectrum, const_peptide_id] = const_data[0]; + TEST_REAL_SIMILAR(const_spectrum.getRT(), 10.0) + TEST_EQUAL(const_peptide_id.getHits().size(), 1) + TEST_EQUAL(const_peptide_id.getHits()[0].getSequence().toString(), "PEPTIDER") +END_SECTION + +END_TEST \ No newline at end of file diff --git a/src/tests/class_tests/openms/source/ExperimentalSettings_test.cpp b/src/tests/class_tests/openms/source/ExperimentalSettings_test.cpp index 1f1beb6b945..28bed07629f 100644 --- a/src/tests/class_tests/openms/source/ExperimentalSettings_test.cpp +++ b/src/tests/class_tests/openms/source/ExperimentalSettings_test.cpp @@ -197,7 +197,7 @@ START_SECTION((ExperimentalSettings(const ExperimentalSettings& source))) tmp.getSample().setName("bla2"); tmp.getSourceFiles().resize(1); tmp.getContacts().resize(1); - tmp.getProteinIdentifications().push_back(id); + tmp.setMetaValue("label",String("label")); ExperimentalSettings tmp2(tmp); @@ -209,7 +209,7 @@ START_SECTION((ExperimentalSettings(const ExperimentalSettings& source))) TEST_EQUAL(tmp2.getSample().getName(),"bla2"); TEST_EQUAL(tmp2.getSourceFiles().size(),1); TEST_EQUAL(tmp2.getContacts().size(),1); - TEST_EQUAL(id == tmp2.getProteinIdentifications()[0], true); + TEST_EQUAL((String)(tmp2.getMetaValue("label")), "label"); END_SECTION @@ -231,7 +231,7 @@ START_SECTION((ExperimentalSettings& operator= (const ExperimentalSettings& sour tmp.getSample().setName("bla2"); tmp.getSourceFiles().resize(1); tmp.getContacts().resize(1); - tmp.getProteinIdentifications().push_back(id); + tmp.setMetaValue("label",String("label")); ExperimentalSettings tmp2; @@ -244,8 +244,6 @@ START_SECTION((ExperimentalSettings& operator= (const ExperimentalSettings& sour TEST_EQUAL(tmp2.getSample().getName(),"bla2"); TEST_EQUAL(tmp2.getSourceFiles().size(),1); TEST_EQUAL(tmp2.getContacts().size(),1); - TEST_EQUAL(tmp2.getProteinIdentifications().size(), 1); - TEST_EQUAL(id == tmp2.getProteinIdentifications()[0], true); TEST_EQUAL((String)(tmp2.getMetaValue("label")), "label"); tmp2 = ExperimentalSettings(); @@ -257,7 +255,6 @@ START_SECTION((ExperimentalSettings& operator= (const ExperimentalSettings& sour TEST_EQUAL(tmp2.getSample().getName(),""); TEST_EQUAL(tmp2.getSourceFiles().size(),0); TEST_EQUAL(tmp2.getContacts().size(),0); - TEST_EQUAL(tmp2.getProteinIdentifications().size(), 0); TEST_EQUAL(tmp2.getMetaValue("label").isEmpty(), true); END_SECTION @@ -292,10 +289,6 @@ START_SECTION((bool operator== (const ExperimentalSettings& rhs) const)) edit.getContacts().resize(1); TEST_EQUAL(edit==empty,false); - edit = empty; - edit.getProteinIdentifications().push_back(id); - TEST_EQUAL(edit==empty, false); - edit = empty; edit.setComment("bla"); TEST_EQUAL(edit==empty, false); @@ -352,63 +345,11 @@ START_SECTION((bool operator!= (const ExperimentalSettings& rhs) const)) edit.getContacts().resize(1); TEST_EQUAL(edit!=empty,true); - edit = empty; - edit.getProteinIdentifications().push_back(id); - TEST_FALSE(edit == empty); - edit = empty; edit.setMetaValue("label",String("label")); TEST_EQUAL(edit!=empty,true); END_SECTION -START_SECTION((const std::vector& getProteinIdentifications() const)) - ExperimentalSettings settings; - ProteinIdentification id; - ProteinHit protein_hit; - float protein_significance_threshold = 63.2f; - - id.setDateTime(DateTime::now()); - id.setSignificanceThreshold(protein_significance_threshold); - id.insertHit(protein_hit); - - settings.getProteinIdentifications().push_back(id); - const ProteinIdentification& test_id = settings.getProteinIdentifications()[0]; - TEST_TRUE(id == test_id) -END_SECTION - -START_SECTION((std::vector& getProteinIdentifications())) - ExperimentalSettings settings; - ProteinIdentification id; - ProteinHit protein_hit; - float protein_significance_threshold = 63.2f; - - id.setDateTime(DateTime::now()); - id.setSignificanceThreshold(protein_significance_threshold); - id.insertHit(protein_hit); - - settings.getProteinIdentifications().push_back(id); - ProteinIdentification& test_id = settings.getProteinIdentifications()[0]; - TEST_TRUE(id == test_id) -END_SECTION - -START_SECTION((void setProteinIdentifications(const std::vector& protein_identifications))) - ExperimentalSettings settings; - ProteinIdentification id; - ProteinHit protein_hit; - float protein_significance_threshold = 63.2f; - vector ids; - - id.setDateTime(DateTime::now()); - id.setSignificanceThreshold(protein_significance_threshold); - id.insertHit(protein_hit); - ids.push_back(id); - id.setSignificanceThreshold(21.f); - ids.push_back(id); - settings.setProteinIdentifications(ids); - TEST_EQUAL(ids == settings.getProteinIdentifications(), true) -END_SECTION - - ///////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// END_TEST diff --git a/src/tests/class_tests/openms/source/FeatureFinderAlgorithmPicked_test.cpp b/src/tests/class_tests/openms/source/FeatureFinderAlgorithmPicked_test.cpp index b1346e22f29..b2d9daeb37f 100644 --- a/src/tests/class_tests/openms/source/FeatureFinderAlgorithmPicked_test.cpp +++ b/src/tests/class_tests/openms/source/FeatureFinderAlgorithmPicked_test.cpp @@ -47,7 +47,7 @@ START_SECTION((virtual void run())) MzMLFile mzml_file; mzml_file.getOptions().addMSLevel(1); mzml_file.load(OPENMS_GET_TEST_DATA_PATH("FeatureFinderAlgorithmPicked.mzML"),input); - input.updateRanges(1); + input.updateRanges(); FeatureMap output; //parameters diff --git a/src/tests/class_tests/openms/source/FeatureFinderMultiplexAlgorithm_test.cpp b/src/tests/class_tests/openms/source/FeatureFinderMultiplexAlgorithm_test.cpp index f16275b061a..d8ca80dd92e 100644 --- a/src/tests/class_tests/openms/source/FeatureFinderMultiplexAlgorithm_test.cpp +++ b/src/tests/class_tests/openms/source/FeatureFinderMultiplexAlgorithm_test.cpp @@ -44,7 +44,7 @@ START_SECTION((virtual void run())) mzml_file.getOptions().addMSLevel(1); mzml_file.load(OPENMS_GET_TEST_DATA_PATH("FeatureFinderMultiplex_1_input.mzML"), exp); - exp.updateRanges(1); + exp.updateRanges(); Param param; ParamXMLFile paramFile; diff --git a/src/tests/class_tests/openms/source/IDFilter_test.cpp b/src/tests/class_tests/openms/source/IDFilter_test.cpp index 68bb3bbf649..dcea12a01dc 100644 --- a/src/tests/class_tests/openms/source/IDFilter_test.cpp +++ b/src/tests/class_tests/openms/source/IDFilter_test.cpp @@ -875,76 +875,6 @@ START_SECTION((static void removeDuplicatePeptideHits(vector static void filterHitsByScore(MSExperiment& experiment, double peptide_threshold_score, double protein_threshold_score))) -{ - PeakMap experiment; - vector ids(1, global_peptides[0]); - - ids[0].sort(); - - for (Size i = 0; i < 5; ++i) - { - experiment.addSpectrum(MSSpectrum()); - } - experiment[3].setMSLevel(2); - experiment[3].setPeptideIdentifications(ids); - - IDFilter::filterHitsByScore(experiment, 31.8621, 0); - PeptideIdentification& identification = experiment[3].getPeptideIdentifications()[0]; - TEST_EQUAL(identification.getScoreType(), "Mascot"); - - vector& peptide_hits = identification.getHits(); - TEST_EQUAL(peptide_hits.size(), 5); - TEST_EQUAL(peptide_hits[0].getSequence().toString(), - "FINFGVNVEVLSRFQTK"); - TEST_REAL_SIMILAR(peptide_hits[0].getScore(), 40); - TEST_EQUAL(peptide_hits[1].getSequence().toString(), - "MSLLSNMISIVKVGYNAR"); - TEST_REAL_SIMILAR(peptide_hits[1].getScore(), 40); - TEST_EQUAL(peptide_hits[2].getSequence().toString(), - "THPYGHAIVAGIERYPSK"); - TEST_REAL_SIMILAR(peptide_hits[2].getScore(), 39); - TEST_EQUAL(peptide_hits[3].getSequence().toString(), - "LHASGITVTEIPVTATNFK"); - TEST_REAL_SIMILAR(peptide_hits[3].getScore(), 34.85); - TEST_EQUAL(peptide_hits[4].getSequence().toString(), - "MRSLGYVAVISAVATDTDK"); - TEST_REAL_SIMILAR(peptide_hits[4].getScore(), 33.85); -} -END_SECTION - -START_SECTION((template static void keepNBestHits(MSExperiment& experiment, Size n))) -{ - PeakMap experiment; - vector ids(1, global_peptides[0]); - - ids[0].sort(); - - for (Size i = 0; i < 5; ++i) - { - experiment.addSpectrum(MSSpectrum()); - } - experiment[3].setMSLevel(2); - experiment[3].setPeptideIdentifications(ids); - - IDFilter::keepNBestHits(experiment, 3); - PeptideIdentification& identification = experiment[3].getPeptideIdentifications()[0]; - TEST_EQUAL(identification.getScoreType(), "Mascot"); - - vector& peptide_hits = identification.getHits(); - TEST_EQUAL(peptide_hits.size(), 3); - TEST_EQUAL(peptide_hits[0].getSequence().toString(), - "FINFGVNVEVLSRFQTK"); - TEST_REAL_SIMILAR(peptide_hits[0].getScore(), 40); - TEST_EQUAL(peptide_hits[1].getSequence().toString(), - "MSLLSNMISIVKVGYNAR"); - TEST_REAL_SIMILAR(peptide_hits[1].getScore(), 40); - TEST_EQUAL(peptide_hits[2].getSequence().toString(), - "THPYGHAIVAGIERYPSK"); - TEST_REAL_SIMILAR(peptide_hits[2].getScore(), 39); -} -END_SECTION - START_SECTION((static void keepNBestSpectra(std::vector& peptides, Size n))) { vector proteins; @@ -972,40 +902,6 @@ START_SECTION((static void keepNBestSpectra(std::vector& } END_SECTION -START_SECTION((template static void keepHitsMatchingProteins(MSExperiment& experiment, const vector& proteins))) -{ - PeakMap experiment; - vector proteins; - vector peptides = global_peptides; - - proteins.push_back(FASTAFile::FASTAEntry("Q824A5", "first desription", - "LHASGITVTEIPVTATNFK")); - proteins.push_back(FASTAFile::FASTAEntry("Q872T5", "second description", - "THPYGHAIVAGIERYPSK")); - - for (Size i = 0; i < 5; ++i) - { - experiment.addSpectrum(MSSpectrum()); - } - experiment[3].setMSLevel(2); - experiment[3].setPeptideIdentifications(peptides); - - IDFilter::keepHitsMatchingProteins(experiment, proteins); - TEST_EQUAL(experiment[3].getPeptideIdentifications()[0].getScoreType(), - "Mascot"); - - vector& peptide_hits = - experiment[3].getPeptideIdentifications()[0].getHits(); - TEST_EQUAL(peptide_hits.size(), 2); - TEST_EQUAL(peptide_hits[0].getSequence().toString(), - "LHASGITVTEIPVTATNFK"); - TEST_REAL_SIMILAR(peptide_hits[0].getScore(), 34.85); - TEST_EQUAL(peptide_hits[1].getSequence().toString(), - "MRSLGYVAVISAVATDTDK"); - TEST_REAL_SIMILAR(peptide_hits[1].getScore(), 33.85); -} -END_SECTION - ///////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// diff --git a/src/tests/class_tests/openms/source/IDMapper_test.cpp b/src/tests/class_tests/openms/source/IDMapper_test.cpp index 2c87c1af49c..baf43441eb6 100644 --- a/src/tests/class_tests/openms/source/IDMapper_test.cpp +++ b/src/tests/class_tests/openms/source/IDMapper_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include /////////////////////////// @@ -80,13 +81,14 @@ START_SECTION((IDMapper& operator = (const IDMapper& rhs))) TEST_EQUAL(m2.getParameters(), p); END_SECTION -START_SECTION((template void annotate(MSExperiment& map, FeatureMap fmap, const bool clear_ids = false, const bool mapMS1 = false))) +/* +START_SECTION((void annotate(AnnotatedMSRun& map, FeatureMap fmap, const bool clear_ids = false, const bool mapMS1 = false))) // create id FeatureMap fm; Feature f; f.setMZ(900.0); f.setRT(9.0); - std::vector< PeptideIdentification > pids; + std::vector pids; PeptideIdentification pid; pid.setIdentifier("myID"); pid.setHits(std::vector(4)); @@ -96,11 +98,12 @@ START_SECTION((template void annotate(MSExperiment pids.push_back(pid); // with MZ&RT from PID f.setPeptideIdentifications(pids); fm.push_back(f); - std::vector< ProteinIdentification > prids(2); + std::vector prids(2); fm.setProteinIdentifications(prids); // create experiment - PeakMap experiment; + AnnotatedMSRun annotated_experiment; + MSExperiment& experiment = annotated_experiment.getMSExperiment(); MSSpectrum spectrum; Precursor precursor; precursor.setMZ(0); @@ -125,36 +128,39 @@ START_SECTION((template void annotate(MSExperiment p.setValue("ignore_charge", "true"); mapper.setParameters(p); - - mapper.annotate(experiment, fm, true, true); + mapper.annotate(annotated_experiment, fm, true, true); //test - TEST_EQUAL(experiment.getProteinIdentifications().size(), 2) + TEST_EQUAL(annotated_experiment.getProteinIdentifications().size(), 2) //scan 1 - TEST_EQUAL(experiment[0].getPeptideIdentifications().size(), 2) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[0].getHits().size(), 2) //scan 2 - TEST_EQUAL(experiment[1].getPeptideIdentifications().size(), 2) - ABORT_IF(experiment[1].getPeptideIdentifications().size() != 2) - TEST_EQUAL(experiment[1].getPeptideIdentifications()[0].getHits().size(), 4) - TEST_EQUAL(experiment[1].getPeptideIdentifications()[0].getMZ(), 900.0) - TEST_EQUAL(experiment[1].getPeptideIdentifications()[1].getHits().size(), 4) - TEST_EQUAL(experiment[1].getPeptideIdentifications()[1].getMZ(), 800.0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[1].size(), 2) + ABORT_IF(annotated_experiment.getPeptideIdentifications(1).size() != 2) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1)[0].getHits().size(), 4) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1)[0].getMZ(), 900.0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1)[1].getHits().size(), 4) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1)[1].getMZ(), 800.0) //scan 3 - TEST_EQUAL(experiment[2].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(2).size(), 0) - mapper.annotate(experiment, fm, true, false); // no MS1 mapping. MZ threshold never fulfilled + std::cout << annotated_experiment.getProteinIdentifications().size() << std::endl; + std::cout << fm.getProteinIdentifications().size() << std::endl; + mapper.annotate(annotated_experiment, fm, true, false); // no MS1 mapping. MZ threshold never fulfilled + std::cout << annotated_experiment.getProteinIdentifications().size() << std::endl; //test - TEST_EQUAL(experiment.getProteinIdentifications().size(), 2) + TEST_EQUAL(annotated_experiment.getProteinIdentifications().size(), 2) //scan 1 - TEST_EQUAL(experiment[0].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(0).size(), 0) //scan 2 - TEST_EQUAL(experiment[1].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(1).size(), 0) //scan 3 - TEST_EQUAL(experiment[2].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications(2).size(), 0) END_SECTION +*/ -START_SECTION((template void annotate(MSExperiment& map, const std::vector& peptide_ids, const std::vector& protein_ids, const bool clear_ids = false, const bool mapMS1 = false))) +START_SECTION((void annotate(AnnotatedMSRun& map, const std::vector& peptide_ids, const std::vector& protein_ids, const bool clear_ids = false, const bool mapMS1 = false))) // load id vector identifications; vector protein_identifications; @@ -182,7 +188,8 @@ START_SECTION((template void annotate(MSExperiment // TEST RT MAPPING // create experiment - PeakMap experiment; + AnnotatedMSRun annotated_experiment; + MSExperiment & experiment = annotated_experiment.getMSExperiment(); MSSpectrum spectrum; Precursor precursor; precursor.setMZ(0); @@ -206,31 +213,30 @@ START_SECTION((template void annotate(MSExperiment p.setValue("mz_measure","Da"); p.setValue("ignore_charge", "true"); mapper.setParameters(p); - - mapper.annotate(experiment, identifications, protein_identifications); + + mapper.annotate(annotated_experiment, identifications, protein_identifications); //test - TEST_EQUAL(experiment.getProteinIdentifications().size(), 1) - TEST_EQUAL(experiment.getProteinIdentifications()[0].getHits().size(),2) - TEST_EQUAL(experiment.getProteinIdentifications()[0].getHits()[0].getAccession(),"ABCDE") - TEST_EQUAL(experiment.getProteinIdentifications()[0].getHits()[1].getAccession(),"FGHIJ") + TEST_EQUAL(annotated_experiment.getProteinIdentifications().size(), 1) + TEST_EQUAL(annotated_experiment.getProteinIdentifications()[0].getHits().size(),2) + TEST_EQUAL(annotated_experiment.getProteinIdentifications()[0].getHits()[0].getAccession(),"ABCDE") + TEST_EQUAL(annotated_experiment.getProteinIdentifications()[0].getHits()[1].getAccession(),"FGHIJ") //scan 1 - TEST_EQUAL(experiment[0].getPeptideIdentifications().size(), 1) - TEST_EQUAL(experiment[0].getPeptideIdentifications()[0].getHits().size(), 2) - TEST_EQUAL(experiment[0].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK")) - TEST_EQUAL(experiment[0].getPeptideIdentifications()[0].getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK")) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[0].getHits().size(), 2) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK")) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[0].getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK")) //scan 2 - TEST_EQUAL(experiment[1].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[1].getHits().size(), 0) //scan 3 - TEST_EQUAL(experiment[2].getPeptideIdentifications().size(), 1) - TEST_EQUAL(experiment[2].getPeptideIdentifications()[0].getHits().size(), 1) - TEST_EQUAL(experiment[2].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("HSKLSAK")) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[2].getHits().size(), 1) + TEST_EQUAL(annotated_experiment.getPeptideIdentifications()[2].getHits()[0].getSequence(), AASequence::fromString("HSKLSAK")) //----------------------------------------------------------------------------------- // TEST NATIVE_ID MAPPING // create experiment - PeakMap experiment2; + AnnotatedMSRun annotated_experiment2; + MSExperiment& experiment2 = annotated_experiment2.getMSExperiment(); MSSpectrum spectrum2; Precursor precursor2; precursor2.setMZ(0); @@ -257,24 +263,22 @@ START_SECTION((template void annotate(MSExperiment p2.setValue("ignore_charge", "true"); mapper2.setParameters(p2); - mapper2.annotate(experiment2, identifications2, protein_identifications2); + mapper2.annotate(annotated_experiment2, identifications2, protein_identifications2); //test - TEST_EQUAL(experiment2.getProteinIdentifications().size(), 1) - TEST_EQUAL(experiment2.getProteinIdentifications()[0].getHits().size(),2) - TEST_EQUAL(experiment2.getProteinIdentifications()[0].getHits()[0].getAccession(),"ABCDE") - TEST_EQUAL(experiment2.getProteinIdentifications()[0].getHits()[1].getAccession(),"FGHIJ") + TEST_EQUAL(annotated_experiment2.getProteinIdentifications().size(), 1) + TEST_EQUAL(annotated_experiment2.getProteinIdentifications()[0].getHits().size(),2) + TEST_EQUAL(annotated_experiment2.getProteinIdentifications()[0].getHits()[0].getAccession(),"ABCDE") + TEST_EQUAL(annotated_experiment2.getProteinIdentifications()[0].getHits()[1].getAccession(),"FGHIJ") //scan 1 - TEST_EQUAL(experiment2[0].getPeptideIdentifications().size(), 1) - TEST_EQUAL(experiment2[0].getPeptideIdentifications()[0].getHits().size(), 2) - TEST_EQUAL(experiment2[0].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK")) - TEST_EQUAL(experiment2[0].getPeptideIdentifications()[0].getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK")) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[0].getHits().size(), 2) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("LHASGITVTEIPVTATNFK")) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[0].getHits()[1].getSequence(), AASequence::fromString("MRSLGYVAVISAVATDTDK")) //scan 2 - TEST_EQUAL(experiment2[1].getPeptideIdentifications().size(), 0) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[1].getHits().size(), 0) //scan 3 - TEST_EQUAL(experiment2[2].getPeptideIdentifications().size(), 1) - TEST_EQUAL(experiment2[2].getPeptideIdentifications()[0].getHits().size(), 1) - TEST_EQUAL(experiment2[2].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("HSKLSAK")) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[2].getHits().size(), 1) + TEST_EQUAL(annotated_experiment2.getPeptideIdentifications()[2].getHits()[0].getSequence(), AASequence::fromString("HSKLSAK")) END_SECTION diff --git a/src/tests/class_tests/openms/source/MSExperiment_test.cpp b/src/tests/class_tests/openms/source/MSExperiment_test.cpp index c4320ddfb09..a0349d13c01 100644 --- a/src/tests/class_tests/openms/source/MSExperiment_test.cpp +++ b/src/tests/class_tests/openms/source/MSExperiment_test.cpp @@ -421,7 +421,7 @@ END_SECTION START_SECTION((const MSExperiment::RangeManagerType& MSExperiment::getRange() const)) { PeakMap tmp; - TEST_EQUAL(tmp.getRange().hasRange() == HasRangeType::NONE, true) + TEST_EQUAL(tmp.combinedRanges().hasRange() == HasRangeType::NONE, true) } END_SECTION @@ -487,12 +487,12 @@ START_SECTION((virtual void updateRanges())) TEST_REAL_SIMILAR(tmp.getMinRT(),30.0) TEST_REAL_SIMILAR(tmp.getMaxRT(),50.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMZ(), 5.0) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMZ(), 10.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinRT(), 30.0) - TEST_REAL_SIMILAR(tmp.getRange().getMaxRT(), 50.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMobility(), 66) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMobility(), 199) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMinMZ(), 5.0) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMaxMZ(), 10.0) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMinRT(), 30.0) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMaxRT(), 50.0) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMinMobility(), 66) + TEST_REAL_SIMILAR(tmp.combinedRanges().getMaxMobility(), 199) TEST_EQUAL(tmp.getMSLevels().size(),2) TEST_EQUAL(tmp.getMSLevels()[0],1) @@ -500,27 +500,24 @@ START_SECTION((virtual void updateRanges())) TEST_EQUAL(tmp.getSize(),4) - //Update for MS level 1 - // Store initial MS levels std::vector initial_ms_levels = tmp.getMSLevels(); - tmp.updateRanges(1); - tmp.updateRanges(1); // Call twice to verify consistent behavior + // MS1 for (int l = 0; l < 2; ++l) { - TEST_REAL_SIMILAR(tmp.getMinMZ(),5.0) - TEST_REAL_SIMILAR(tmp.getMaxMZ(),7.0) - TEST_REAL_SIMILAR(tmp.getMinIntensity(), -7.0) - TEST_REAL_SIMILAR(tmp.getMaxIntensity(), -5.0) - TEST_REAL_SIMILAR(tmp.getMinRT(),30.0) - TEST_REAL_SIMILAR(tmp.getMaxRT(),40.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMobility(), 99) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMobility(), 99) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMinMZ(),5.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMaxMZ(),7.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMinIntensity(), -7.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMaxIntensity(), -5.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMinRT(),30.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMaxRT(),40.0) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMinMobility(), 99) + TEST_REAL_SIMILAR(tmp.spectrumRanges().byMSLevel(1).getMaxMobility(), 99) + // Verify MS levels remain unchanged TEST_EQUAL(tmp.getMSLevels() == initial_ms_levels, true) - TEST_EQUAL(tmp.getSize(),4) - tmp.updateRanges(1); + TEST_EQUAL(tmp.getSize(),4) } // test with only one peak @@ -534,26 +531,27 @@ START_SECTION((virtual void updateRanges())) s2.push_back(p2); s2.setDriftTime(99); tmp2.addSpectrum(s2); - tmp2.updateRanges(); - TEST_REAL_SIMILAR(tmp2.getMinMZ(),5.0) - TEST_REAL_SIMILAR(tmp2.getMaxMZ(),5.0) - TEST_REAL_SIMILAR(tmp2.getMinIntensity(), -5.0) - TEST_REAL_SIMILAR(tmp2.getMaxIntensity(), -5.0) - TEST_REAL_SIMILAR(tmp2.getMinRT(),30.0) - TEST_REAL_SIMILAR(tmp2.getMaxRT(),30.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMobility(), 99) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMobility(), 99) - tmp2.updateRanges(1); + // check the overall ranges TEST_REAL_SIMILAR(tmp2.getMinMZ(),5.0) TEST_REAL_SIMILAR(tmp2.getMaxMZ(),5.0) TEST_REAL_SIMILAR(tmp2.getMinIntensity(), -5.0) TEST_REAL_SIMILAR(tmp2.getMaxIntensity(), -5.0) TEST_REAL_SIMILAR(tmp2.getMinRT(),30.0) TEST_REAL_SIMILAR(tmp2.getMaxRT(),30.0) - TEST_REAL_SIMILAR(tmp.getRange().getMinMobility(), 99) - TEST_REAL_SIMILAR(tmp.getRange().getMaxMobility(), 99) + TEST_REAL_SIMILAR(tmp2.getMinMobility(), 99) + TEST_REAL_SIMILAR(tmp2.getMaxMobility(), 99) + + // check the spectra specific ranges + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMinMZ(),5.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMaxMZ(),5.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMinIntensity(), -5.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMaxIntensity(), -5.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMinRT(),30.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMaxRT(),30.0) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMinMobility(), 99) + TEST_REAL_SIMILAR(tmp2.spectrumRanges().getMaxMobility(), 99) // test ranges with a chromatogram MSChromatogram chrom1, chrom2; @@ -583,12 +581,23 @@ START_SECTION((virtual void updateRanges())) tmp2.setChromatograms(chroms); tmp2.updateRanges(); + + // test the overall ranges TEST_REAL_SIMILAR(tmp2.getMinMZ(), 5.0) TEST_REAL_SIMILAR(tmp2.getMaxMZ(), 100.0) TEST_REAL_SIMILAR(tmp2.getMinIntensity(), -5.0) TEST_REAL_SIMILAR(tmp2.getMaxIntensity(), 10.4) TEST_REAL_SIMILAR(tmp2.getMinRT(), 0.1) - TEST_REAL_SIMILAR(tmp2.getMaxRT(), 30.0) + TEST_REAL_SIMILAR(tmp2.getMaxRT(), 30.0) // overall range still 30 + + // test the chromatogram ranges + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMinMZ(), 80.0) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMaxMZ(), 100.0) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMinIntensity(), 10.0) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMaxIntensity(), 10.4) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMinRT(), 0.1) + TEST_REAL_SIMILAR(tmp2.chromatogramRanges().getMaxRT(), 0.3) // chromatogram range 0.1-0.3 + } END_SECTION @@ -1348,7 +1357,7 @@ START_SECTION((void swap(MSExperiment &from))) TEST_EQUAL(exp1.getComment(),"") TEST_EQUAL(exp1.size(),0) - TEST_EQUAL(exp1.getRange().hasRange() == HasRangeType::NONE, true) + TEST_EQUAL(exp1.combinedRanges().hasRange() == HasRangeType::NONE, true) TEST_EQUAL(exp1.getMSLevels().size(),0) TEST_EQUAL(exp1.getSize(),0); @@ -2623,8 +2632,331 @@ START_SECTION((template std::vector ids; - PeakMap exp; - msp_file.load(OPENMS_GET_TEST_DATA_PATH("MSPFile_test.msp"), ids, exp); - for (Size i = 0; i != ids.size(); ++i) - { - exp[i].getPeptideIdentifications().push_back(ids[i]); - } + AnnotatedMSRun annot_exp; + msp_file.load(OPENMS_GET_TEST_DATA_PATH("MSPFile_test.msp"), annot_exp); String filename; - NEW_TMP_FILE(filename) - msp_file.store(filename, exp); + NEW_TMP_FILE(filename); + msp_file.store(filename, annot_exp); - exp.clear(true); - ids.clear(); + PeakMap exp; + vector ids; msp_file.load(filename, ids, exp); TEST_EQUAL(ids.size(), 7) TEST_EQUAL(exp.size(), 7) diff --git a/src/tests/class_tests/openms/source/MzMLFile_test.cpp b/src/tests/class_tests/openms/source/MzMLFile_test.cpp index ce5e71e1c85..6f8ceaaad96 100644 --- a/src/tests/class_tests/openms/source/MzMLFile_test.cpp +++ b/src/tests/class_tests/openms/source/MzMLFile_test.cpp @@ -1028,10 +1028,12 @@ START_SECTION((template void store(const String& filename, co empty[0].getAcquisitionInfo().resize(1); std::string tmp_filename; - NEW_TMP_FILE(tmp_filename); + NEW_TMP_FILE(tmp_filename); + file.store(tmp_filename,empty); file.load(tmp_filename,exp); - TEST_EQUAL(exp==empty,true) + + TEST_EQUAL(exp == empty,true) //NOTE: If it does not work, use this code to find out where the difference is // TEST_EQUAL(exp.size()==empty.size(),true) diff --git a/src/tests/class_tests/openms/source/SpectrumSettings_test.cpp b/src/tests/class_tests/openms/source/SpectrumSettings_test.cpp index c6927e243c2..4322c5fbe75 100644 --- a/src/tests/class_tests/openms/source/SpectrumSettings_test.cpp +++ b/src/tests/class_tests/openms/source/SpectrumSettings_test.cpp @@ -177,33 +177,6 @@ START_SECTION((void setComment(const String& comment))) TEST_EQUAL(tmp.getComment(), "bla"); END_SECTION -START_SECTION((const std::vector& getPeptideIdentifications() const)) - SpectrumSettings tmp; - vector vec(tmp.getPeptideIdentifications()); - TEST_EQUAL(vec.size(),0); -END_SECTION - -START_SECTION((void setPeptideIdentifications(const std::vector& identifications))) - SpectrumSettings tmp; - vector vec; - - tmp.setPeptideIdentifications(vec); - TEST_EQUAL(tmp.getPeptideIdentifications().size(),0); - - PeptideIdentification dbs; - vec.push_back(dbs); - tmp.setPeptideIdentifications(vec); - TEST_EQUAL(tmp.getPeptideIdentifications().size(),1); -END_SECTION - -START_SECTION((std::vector& getPeptideIdentifications())) - SpectrumSettings tmp; - vector vec; - - tmp.getPeptideIdentifications().resize(1); - TEST_EQUAL(tmp.getPeptideIdentifications().size(),1); -END_SECTION - START_SECTION((SpectrumSettings& operator= (const SpectrumSettings& source))) SpectrumSettings tmp; tmp.setMetaValue("bla","bluff"); @@ -211,7 +184,6 @@ START_SECTION((SpectrumSettings& operator= (const SpectrumSettings& source))) tmp.getInstrumentSettings().getScanWindows().resize(1); tmp.getPrecursors().resize(1); tmp.getProducts().resize(1); - tmp.getPeptideIdentifications().resize(1); tmp.setType(SpectrumSettings::CENTROID); tmp.setComment("bla"); tmp.setNativeID("nid"); @@ -220,7 +192,6 @@ START_SECTION((SpectrumSettings& operator= (const SpectrumSettings& source))) SpectrumSettings tmp2(tmp); TEST_EQUAL(tmp2.getComment(), "bla"); TEST_EQUAL(tmp2.getType(), SpectrumSettings::CENTROID); - TEST_EQUAL(tmp2.getPeptideIdentifications().size(), 1); TEST_EQUAL(tmp2.getPrecursors().size(),1); TEST_EQUAL(tmp2.getProducts().size(),1); TEST_EQUAL(tmp2.getInstrumentSettings()==InstrumentSettings(), false); @@ -239,7 +210,6 @@ START_SECTION((SpectrumSettings(const SpectrumSettings& source))) tmp.getProducts().resize(1); tmp.setType(SpectrumSettings::CENTROID); tmp.setComment("bla"); - tmp.getPeptideIdentifications().resize(1); tmp.setNativeID("nid"); tmp.getDataProcessing().resize(1); tmp.setMetaValue("bla","bluff"); @@ -253,7 +223,6 @@ START_SECTION((SpectrumSettings(const SpectrumSettings& source))) TEST_EQUAL(tmp2.getInstrumentSettings()==InstrumentSettings(), false); TEST_EQUAL(tmp2.getAcquisitionInfo().empty(), true); TEST_EQUAL(tmp2.getAcquisitionInfo()==AcquisitionInfo(), false); - TEST_EQUAL(tmp2.getPeptideIdentifications().size(), 1); TEST_STRING_EQUAL(tmp2.getNativeID(),"nid"); TEST_EQUAL(tmp2.getDataProcessing().size(),1); TEST_STRING_EQUAL(tmp2.getMetaValue("bla"),"bluff"); @@ -266,7 +235,6 @@ START_SECTION((SpectrumSettings(const SpectrumSettings& source))) TEST_EQUAL(tmp2.getProducts().size(),0); TEST_EQUAL(tmp2.getInstrumentSettings()==InstrumentSettings(), true); TEST_EQUAL(tmp2.getAcquisitionInfo().empty(), true); - TEST_EQUAL(tmp2.getPeptideIdentifications().size(), 0); TEST_STRING_EQUAL(tmp2.getNativeID(),""); TEST_EQUAL(tmp2.getDataProcessing().size(),0); TEST_EQUAL(tmp2.metaValueExists("bla"),false); @@ -309,10 +277,6 @@ START_SECTION((bool operator== (const SpectrumSettings& rhs) const)) edit.getProducts().resize(1); TEST_EQUAL(edit==empty, false); - edit = empty; - edit.getPeptideIdentifications().resize(1); - TEST_EQUAL(edit==empty, false); - edit = empty; DataProcessingPtr dp = boost::shared_ptr(new DataProcessing); edit.getDataProcessing().push_back(dp); @@ -360,10 +324,6 @@ START_SECTION((bool operator!= (const SpectrumSettings& rhs) const)) edit.getProducts().resize(1); TEST_FALSE(edit == empty); - edit = empty; - edit.getPeptideIdentifications().resize(1); - TEST_FALSE(edit == empty); - edit = empty; DataProcessingPtr dp = boost::shared_ptr(new DataProcessing); edit.getDataProcessing().push_back(dp); @@ -411,15 +371,6 @@ START_SECTION((void unify(const SpectrumSettings &rhs))) appended_product.setMZ(2.0); appended.getProducts().push_back(appended_product); - // Identifications - PeptideIdentification org_ident; - org_ident.setIdentifier("org_ident"); - org.getPeptideIdentifications().push_back(org_ident); - - PeptideIdentification appended_ident; - appended_ident.setIdentifier("appended_ident"); - appended.getPeptideIdentifications().push_back(appended_ident); - // DataProcessings DataProcessingPtr org_processing = boost::shared_ptr(new DataProcessing); Software org_software; @@ -459,16 +410,6 @@ START_SECTION((void unify(const SpectrumSettings &rhs))) TEST_EQUAL(org.getProducts()[0].getMZ(), 1.0) TEST_EQUAL(org.getProducts()[1].getMZ(), 2.0) - // Identifications - TEST_EQUAL(org.getPeptideIdentifications().size(), 2) - ABORT_IF(org.getPeptideIdentifications().size()!=2) - - TEST_EQUAL(org.getPeptideIdentifications()[0].getIdentifier(), "org_ident") - TEST_EQUAL(org.getPeptideIdentifications()[1].getIdentifier(), "appended_ident") - - // Identifications - TEST_EQUAL(org.getDataProcessing().size(), 2) - ABORT_IF(org.getDataProcessing().size()!=2) TEST_EQUAL(org.getDataProcessing()[0]->getSoftware().getName(), "org_software") TEST_EQUAL(org.getDataProcessing()[1]->getSoftware().getName(), "appended_software") diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp new file mode 100644 index 00000000000..408a272bba4 --- /dev/null +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -0,0 +1,159 @@ + +#include +#include +#include +#include + +#include + +class StringManager_test : public OpenMS::Internal::StringManager +{ +public: + StringManager_test() = default; + ~StringManager_test() = default; + + static void compress64(const XMLCh* input_it, char* output_it) + { + StringManager::compress64_(input_it, output_it); + } +}; + +using namespace OpenMS::Internal; + + + + + +START_TEST(StringManager, "$Id$") + + +const XMLCh russianHello[] = { + 0x041F, 0x0440, 0x0438, 0x0432, 0x0435, 0x0442, 0x043C, + 0x0438, 0x0440,0x0000 // "Привет мир" (Hello World in Russian) +}; +XMLSize_t r_length = xercesc::XMLString::stringLen(russianHello); + +const XMLCh ascii[] = { + 0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F, + 0x0072,0x006C,0x0064,0x0021, 0x0000}; +XMLSize_t a_length = xercesc::XMLString::stringLen(ascii); + +const XMLCh mixed[] = { + 0x0048, 0x0065,0x0432, 0x0435, 0x0442, 0x043C, 0x006F, + 0x0072,0x006C,0x0064, 0x0021, 0x0000 }; +XMLSize_t m_length = xercesc::XMLString::stringLen(mixed); + +const XMLCh empty[] = {0}; +XMLSize_t e_length = xercesc::XMLString::stringLen(empty); + +const XMLCh upperBoundary [] = {0x00FF,0x00FF,0x0000}; +XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); + +bool isAscii = false; + +START_SECTION(isASCII(const XMLCh * chars, const XMLSize_t length)) + isAscii = StringManager::isASCII(ascii,a_length); + TEST_TRUE(isAscii) + + isAscii = StringManager::isASCII(russianHello,r_length); + TEST_FALSE(isAscii) + + isAscii = StringManager::isASCII(mixed,m_length); + TEST_FALSE(isAscii) + + isAscii = StringManager::isASCII(empty,e_length); + TEST_TRUE(isAscii) + + isAscii = StringManager::isASCII(upperBoundary,u_length); + TEST_TRUE(isAscii) +END_SECTION + +const XMLCh eight_block_negative[] = {0x0148,0x0165,0x016C,0x016C,0x016F,0x012C,0x0157,0x016F}; + +const XMLCh eight_block[] = {0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F}; + +const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0x0142,0x0145,0x014C,0x0141}; + +const XMLCh eight_block_kadabra[] = { + 0x004B, // K + 0x0041, // A + 0x0044, // D + 0x0041, // A + 0x0042, // B + 0x0052, // R + 0x0041, // A + 0x0021 // ! +}; + +START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) + std::string o1_str(8,'\0'); + StringManager_test::compress64(eight_block,o1_str.data()); + std::string res1_str = "Hello,Wo"; + TEST_STRING_EQUAL(o1_str,res1_str); + + + std::string o2_str(8,'\0'); + StringManager_test::compress64(eight_block_negative,o2_str.data()); + std::string res2_str = res1_str; + TEST_STRING_EQUAL(o2_str, res2_str); + + + std::string o3_str(8,'\0'); + StringManager_test::compress64(eight_block_mixed,o3_str.data()); + std::string res3_str = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; + TEST_STRING_EQUAL(o3_str, res3_str); + + std::string o4_str(12,'\0'); + o4_str [0] ='A'; + o4_str [1] ='B'; + o4_str [2] ='R'; + o4_str [3] ='A'; + + StringManager_test::compress64(eight_block_kadabra,((o4_str.data())+4)); + std::string res4_str = "ABRAKADABRA!"; + TEST_STRING_EQUAL(o4_str, res4_str); + +END_SECTION + +//Tests Number of Chars not Dividable by 8 +OpenMS::String o5_str; +std::string res5_str = "Hello,World!"; + +//Checks how the Function handles Data thats already stored in Output string +OpenMS::String o6_str = "Gruess Gott und "; +std::string res6_str = "Gruess Gott und Hello,World!"; + +OpenMS::String o7_str; +std::string res7_str = ""; + + +START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) + + StringManager::appendASCII(ascii,a_length,o5_str); + TEST_STRING_EQUAL(o5_str, res5_str); + + StringManager::appendASCII(ascii,a_length,o6_str); + TEST_STRING_EQUAL(o6_str, res6_str); + + StringManager::appendASCII(empty,e_length,o7_str); + TEST_STRING_EQUAL(o7_str, res7_str); + + +END_SECTION +XMLCh* nullPointer = nullptr; +START_SECTION(strLength(const XMLCh* input_ptr)) + int o_length = StringManager::strLength(ascii); + TEST_EQUAL(o_length, a_length); + o_length = StringManager::strLength(empty); + TEST_EQUAL(o_length, e_length); + o_length = StringManager::strLength(upperBoundary); + TEST_EQUAL(o_length, u_length); + o_length = StringManager::strLength(nullPointer); + TEST_EQUAL(o_length, 0); +END_SECTION + +END_TEST + + + + diff --git a/src/tests/topp/CMakeLists.txt b/src/tests/topp/CMakeLists.txt index 5b43d64b811..ad5bd0bd641 100644 --- a/src/tests/topp/CMakeLists.txt +++ b/src/tests/topp/CMakeLists.txt @@ -820,12 +820,6 @@ add_test("TOPP_IDRTCalibration_2" ${TOPP_BIN_PATH}/IDRTCalibration -test -in ${D add_test("TOPP_IDRTCalibration_2_out1" ${DIFF} -in1 IDRTCalibration_2_output.tmp.idXML -in2 ${DATA_DIR_TOPP}/IDRTCalibration_2_output.idXML ) set_tests_properties("TOPP_IDRTCalibration_2_out1" PROPERTIES DEPENDS "TOPP_IDRTCalibration_2") - -#------------------------------------------------------------------------------ -# IDMassAccuracy tests -add_test("TOPP_IDMassAccuracy_1" ${TOPP_BIN_PATH}/IDMassAccuracy -test -in ${DATA_DIR_TOPP}/THIRDPARTY/spectra.mzML -id_in ${DATA_DIR_TOPP}/THIRDPARTY/MSGFPlusAdapter_1_out.idXML -number_of_bins 10 -out_fragment IDMassAccuracy_1_out_fragment.tsv -out_fragment_fit IDMassAccuracy_1_out_fragment_fit.tsv -out_precursor IDMassAccuracy_1_out_precursor.tsv -out_precursor_fit IDMassAccuracy_1_out_precursor_fit.tsv) -# Currently just testing if the tool runs - #------------------------------------------------------------------------------ # IsobaricAnalyzer tests add_test("TOPP_IsobaricAnalyzer_1" ${TOPP_BIN_PATH}/IsobaricAnalyzer -test -in ${DATA_DIR_TOPP}/IsobaricAnalyzer_input_1.mzML -ini ${DATA_DIR_TOPP}/IsobaricAnalyzer.ini -out IsobaricAnalyzer_output_1.tmp.consensusXML) @@ -1281,11 +1275,6 @@ add_test("TOPP_MSstatsConverter_3" ${TOPP_BIN_PATH}/MSstatsConverter -test -in $ add_test("TOPP_MSstatsConverter_3_out1" ${DIFF} -in1 MSstatsConverter_3_out.tmp.csv -in2 ${DATA_DIR_TOPP}/MSstatsConverter_3_out.csv ) set_tests_properties("TOPP_MSstatsConverter_3_out1" PROPERTIES DEPENDS "TOPP_MSstatsConverter_3") -### SpecLibSearcher tests -add_test("TOPP_SpecLibSearcher_1" ${TOPP_BIN_PATH}/SpecLibSearcher -test -ini ${DATA_DIR_TOPP}/SpecLibSearcher_1_parameters.ini -in ${DATA_DIR_TOPP}/SpecLibSearcher_1.mzML -lib ${DATA_DIR_TOPP}/SpecLibSearcher_1.MSP -out SpecLibSearcher_1.tmp.idXML) -add_test("TOPP_SpecLibSearcher_1_out1" ${DIFF} -in1 SpecLibSearcher_1.tmp.idXML -in2 ${DATA_DIR_TOPP}/SpecLibSearcher_1.idXML -whitelist "?xml-stylesheet" "IdentificationRun date" "db=") -set_tests_properties("TOPP_SpecLibSearcher_1_out1" PROPERTIES DEPENDS "TOPP_SpecLibSearcher_1") - if(NOT DISABLE_OPENSWATH) #------------------------------------------------------------------------------ # MRM / SWATH tests (from OpenSWATH) diff --git a/src/tests/topp/FileFilter_47_output.mzML b/src/tests/topp/FileFilter_47_output.mzML index 70ba63aab61..30585e3d65e 100644 --- a/src/tests/topp/FileFilter_47_output.mzML +++ b/src/tests/topp/FileFilter_47_output.mzML @@ -113,7 +113,7 @@ - + @@ -126,9 +126,9 @@ - + - + @@ -141,7 +141,7 @@ - + @@ -152,7 +152,7 @@ - + @@ -168,16 +168,16 @@ - + - + - + - + @@ -401,49 +401,17 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - 14572 - 18005 - 21346 - 24944 - 28880 - - - 32785 + 14587 + 18020 + 21361 + 24959 + 28895 -34237 +32746 0 - + \ No newline at end of file diff --git a/src/tests/topp/FileFilter_48_output.mzML b/src/tests/topp/FileFilter_48_output.mzML index aacdb4e202e..94eff3d51f9 100644 --- a/src/tests/topp/FileFilter_48_output.mzML +++ b/src/tests/topp/FileFilter_48_output.mzML @@ -113,7 +113,7 @@ - + @@ -126,9 +126,9 @@ - + - + @@ -141,7 +141,7 @@ - + @@ -152,7 +152,7 @@ - + @@ -168,16 +168,16 @@ - + - + - + - + @@ -233,45 +233,13 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - 14572 - - - 18042 + 14587 -19494 +18003 0 - + \ No newline at end of file diff --git a/src/tests/topp/FileInfo_19_output.txt b/src/tests/topp/FileInfo_19_output.txt index e8af63fbdcd..b9b1723d9a0 100644 --- a/src/tests/topp/FileInfo_19_output.txt +++ b/src/tests/topp/FileInfo_19_output.txt @@ -10,12 +10,35 @@ MS levels: 1, 2 Total number of peaks: 2681 Number of spectra: 2 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 18.25 .. 18.39 sec (0.0 min) mass-to-charge: 0.00 .. 1590.88 ion mobility: -65.00 .. -65.00 intensity: 0.00 .. 31878566.00 +Spectrum Ranges: + retention time: 18.25 .. 18.39 sec (0.0 min) + mass-to-charge: 110.07 .. 1590.88 + ion mobility: -65.00 .. -65.00 + intensity: 0.00 .. 5057372.00 + +MS Level 1 Ranges: + retention time: 18.25 .. 18.25 sec (0.0 min) + mass-to-charge: 371.27 .. 1590.88 + ion mobility: -65.00 .. -65.00 + intensity: 0.00 .. 5057372.00 + +MS Level 2 Ranges: + retention time: 18.39 .. 18.39 sec (0.0 min) + mass-to-charge: 110.07 .. 871.38 + ion mobility: -65.00 .. -65.00 + intensity: 1030.52 .. 15416.40 + +Chromatogram Ranges: + retention time: 18.25 .. 18.39 sec (0.0 min) + mass-to-charge: 0.00 .. 0.00 + intensity: 117547.13 .. 31878566.00 + Number of spectra per MS level: level 1: 1 level 2: 1 diff --git a/src/tests/topp/FileInfo_1_output.txt b/src/tests/topp/FileInfo_1_output.txt index 3407bd96077..f74048ce136 100644 --- a/src/tests/topp/FileInfo_1_output.txt +++ b/src/tests/topp/FileInfo_1_output.txt @@ -10,11 +10,29 @@ MS levels: 2 Total number of peaks: 57 Number of spectra: 1 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: -1.00 .. -1.00 sec (0.0 min) mass-to-charge: 261.30 .. 783.50 ion mobility: .. intensity: 3672.00 .. 272411.00 + +Spectrum Ranges: + retention time: -1.00 .. -1.00 sec (0.0 min) + mass-to-charge: 261.30 .. 783.50 + ion mobility: .. + intensity: 3672.00 .. 272411.00 + +MS Level 2 Ranges: + retention time: -1.00 .. -1.00 sec (0.0 min) + mass-to-charge: 261.30 .. 783.50 + ion mobility: .. + intensity: 3672.00 .. 272411.00 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 2: 1 diff --git a/src/tests/topp/FileInfo_2_output.txt b/src/tests/topp/FileInfo_2_output.txt index 8dc1cd22730..76bc8af57ea 100644 --- a/src/tests/topp/FileInfo_2_output.txt +++ b/src/tests/topp/FileInfo_2_output.txt @@ -10,11 +10,29 @@ MS levels: 1 Total number of peaks: 8 Number of spectra: 8 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 0.00 .. 6.00 sec (0.1 min) mass-to-charge: 500.00 .. 1100.00 ion mobility: .. intensity: 50.00 .. 400.00 + +Spectrum Ranges: + retention time: 0.00 .. 6.00 sec (0.1 min) + mass-to-charge: 500.00 .. 1100.00 + ion mobility: .. + intensity: 50.00 .. 400.00 + +MS Level 1 Ranges: + retention time: 0.00 .. 6.00 sec (0.1 min) + mass-to-charge: 500.00 .. 1100.00 + ion mobility: .. + intensity: 50.00 .. 400.00 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 8 diff --git a/src/tests/topp/FileInfo_4_output.txt b/src/tests/topp/FileInfo_4_output.txt index 8defbec1941..631a82f142f 100644 --- a/src/tests/topp/FileInfo_4_output.txt +++ b/src/tests/topp/FileInfo_4_output.txt @@ -11,12 +11,35 @@ MS levels: 1, 2 Total number of peaks: 6864 Number of spectra: 20 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 0.26 .. 37.64 sec (0.6 min) mass-to-charge: 207.51 .. 1496.08 ion mobility: .. intensity: 12.15 .. 25903.89 +Spectrum Ranges: + retention time: 0.26 .. 37.64 sec (0.6 min) + mass-to-charge: 207.51 .. 1496.08 + ion mobility: .. + intensity: 12.15 .. 25903.89 + +MS Level 1 Ranges: + retention time: 0.26 .. 32.24 sec (0.5 min) + mass-to-charge: 402.48 .. 1496.08 + ion mobility: .. + intensity: 12.15 .. 25903.89 + +MS Level 2 Ranges: + retention time: 21.95 .. 37.64 sec (0.3 min) + mass-to-charge: 207.51 .. 1035.83 + ion mobility: .. + intensity: 13.39 .. 3633.01 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 14 level 2: 6 diff --git a/src/tests/topp/FileInfo_5_output.txt b/src/tests/topp/FileInfo_5_output.txt index c8b0d5cd9e8..a238096d66c 100644 --- a/src/tests/topp/FileInfo_5_output.txt +++ b/src/tests/topp/FileInfo_5_output.txt @@ -12,12 +12,35 @@ MS levels: 1, 2 Total number of peaks: 3149 Number of spectra: 10 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 20.87 .. 39.97 sec (0.3 min) mass-to-charge: 500.07 .. 1497.28 ion mobility: .. intensity: 16.30 .. 38920.12 +Spectrum Ranges: + retention time: 20.87 .. 39.97 sec (0.3 min) + mass-to-charge: 500.07 .. 1497.28 + ion mobility: .. + intensity: 16.30 .. 38920.12 + +MS Level 1 Ranges: + retention time: 20.87 .. 39.97 sec (0.3 min) + mass-to-charge: 500.07 .. 1497.28 + ion mobility: .. + intensity: 16.30 .. 38920.12 + +MS Level 2 Ranges: + retention time: 25.40 .. 39.11 sec (0.2 min) + mass-to-charge: 500.10 .. 850.08 + ion mobility: .. + intensity: 23.68 .. 729.68 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 4 level 2: 6 diff --git a/src/tests/topp/FileInfo_6_output.txt b/src/tests/topp/FileInfo_6_output.txt index 0b7af16dba8..33e3f40118c 100644 --- a/src/tests/topp/FileInfo_6_output.txt +++ b/src/tests/topp/FileInfo_6_output.txt @@ -11,12 +11,29 @@ MS levels: 1 Total number of peaks: 9 Number of spectra: 2 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 474.56 .. 475.32 sec (0.0 min) mass-to-charge: 937.28 .. 941.20 ion mobility: .. intensity: 1639.00 .. 18025.00 +Spectrum Ranges: + retention time: 474.56 .. 475.32 sec (0.0 min) + mass-to-charge: 937.28 .. 941.20 + ion mobility: .. + intensity: 1639.00 .. 18025.00 + +MS Level 1 Ranges: + retention time: 474.56 .. 475.32 sec (0.0 min) + mass-to-charge: 937.28 .. 941.20 + ion mobility: .. + intensity: 1639.00 .. 18025.00 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 2 diff --git a/src/tests/topp/FileInfo_9_output.txt b/src/tests/topp/FileInfo_9_output.txt index e1bd5e96ad0..a2c707860ef 100644 --- a/src/tests/topp/FileInfo_9_output.txt +++ b/src/tests/topp/FileInfo_9_output.txt @@ -12,12 +12,35 @@ MS levels: 1, 2 Total number of peaks: 40 Number of spectra: 4 -Ranges: +Combined Ranges (spectra + chromatograms): retention time: 5.10 .. 5.40 sec (0.0 min) mass-to-charge: 0.00 .. 18.00 ion mobility: .. intensity: 1.00 .. 20.00 +Spectrum Ranges: + retention time: 5.10 .. 5.40 sec (0.0 min) + mass-to-charge: 0.00 .. 18.00 + ion mobility: .. + intensity: 1.00 .. 20.00 + +MS Level 1 Ranges: + retention time: 5.10 .. 5.40 sec (0.0 min) + mass-to-charge: 0.00 .. 14.00 + ion mobility: .. + intensity: 1.00 .. 15.00 + +MS Level 2 Ranges: + retention time: 5.20 .. 5.20 sec (0.0 min) + mass-to-charge: 0.00 .. 18.00 + ion mobility: .. + intensity: 2.00 .. 20.00 + +Chromatogram Ranges: + retention time: .. sec ( min) + mass-to-charge: .. + intensity: .. + Number of spectra per MS level: level 1: 3 level 2: 1 diff --git a/src/tests/topp/QCCalculator_2_output.mzQC b/src/tests/topp/QCCalculator_2_output.mzQC index 03429e90413..130ae90c4d9 100644 --- a/src/tests/topp/QCCalculator_2_output.mzQC +++ b/src/tests/topp/QCCalculator_2_output.mzQC @@ -1,6 +1,6 @@ { "mzQC": { - "creationDate": "2021-07-13T18:11:15", + "creationDate": "2025-05-16T14:22:42", "version": "1.0.0", "contactName": "name", "contactAddress": "address", @@ -11,7 +11,7 @@ "label": "label", "inputFiles": [ { - "location": "/home/axel/dev/OpenMS/src/tests/topp/QCCalculator_input.mzML", + "location": "/home/sachsenb/Development/OpenMS/src/tests/topp/QCCalculator_input.mzML", "name": "QCCalculator_input.mzML", "fileFormat": { "accession": "MS:10000584", @@ -40,7 +40,7 @@ { "accession": "MS:1009001", "name": "QCCalculator", - "version": "2.6.0", + "version": "3.5.0", "uri": "https://www.openms.de" } ] @@ -71,7 +71,7 @@ "name": "MZ acquisition range", "value": [ 0, - 678 + 18 ] }, { diff --git a/src/tests/topp/SpecLibSearcher_1.MSP b/src/tests/topp/SpecLibSearcher_1.MSP deleted file mode 100644 index d582e1fd7aa..00000000000 --- a/src/tests/topp/SpecLibSearcher_1.MSP +++ /dev/null @@ -1,15 +0,0 @@ -Name: AADDKEACFAVEGPK/2 -MW: 1608.745 -Comment: Spec=Consensus Pep=N-Semitryp_irreg/miss_good Fullname=C.AADDKEACFAVEGPK.L/2 Mods=1/7,C,Carbamidomethyl Parent=804.373 Inst=it Mz_diff=0.357 Mz_exact=804.3727 Mz_av=804.885 Protein="sp|P02769|ALBU_BOVIN Serum albumin precursor (Allergen Bos d 6) (BSA) - Bos taurus (Bovine)." Pseq=527 Organism="Protein" Se=1^I43:ex=0.0167/0.01974,dc=-0.756/0.4551,do=19.77/1.497,bs=0.0006,b2=0.0007,bd=-0.255 Sample=1/bsa_cam_different_voltages,43,1 Nreps=43/43 Missing=0.0642/0.0420 Parent_med=804.69/0.08 Max2med_orig=215.8/114.0 Dotfull=0.903/0.029 Dot_cons=0.948/0.034 Unassign_all=0.083 Unassigned=0.000 Dotbest=0.96 Flags=0,0,0 Naa=15 DUScorr=10/3.8/2.9 Dottheory=0.95 Pfin=1.3e+004 Probcorr=0.0067 Tfratio=2e+005 Pfract=0 -Num peaks: 10 -240.2 2 "b3-18/0.10 20/36 0.4" -359.2 2 "? 39/43 0.7" -430.3 5 "y4/0.07 43/43 1.8" -560.4 2 "?i 27/42 0.6" -609.8 3 "y11-17^2/-0.01,y11-18^2/0.49 41/43 1.2" -713.5 4 "? 23/42 0.7" -861.3 5 "b8/-0.03,y8-46/-0.13 43/43 1.5" -978.4 5 "y9/-0.07 43/43 4.9" -1364.4 2 "b13/-0.17 43/43 1.0" -1480.6 3 "?i 19/36 0.5" - diff --git a/src/tests/topp/SpecLibSearcher_1.MzData b/src/tests/topp/SpecLibSearcher_1.MzData deleted file mode 100644 index 243c7e952ed..00000000000 --- a/src/tests/topp/SpecLibSearcher_1.MzData +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - MzNwQ5qZs0NmJtdDmhkMRDNzGEQAYDJEM1NXRJqZdETNjKpEMxO5RA== - - - AAAAQAAAAEAAAKBAAAAAQAAAQEAAAIBAAACgQAAAoEAAAABAAABAQA== - - - - diff --git a/src/tests/topp/SpecLibSearcher_1.idXML b/src/tests/topp/SpecLibSearcher_1.idXML deleted file mode 100644 index ecea41a65df..00000000000 --- a/src/tests/topp/SpecLibSearcher_1.idXML +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - - - - - - - - - diff --git a/src/tests/topp/SpecLibSearcher_1.mzML b/src/tests/topp/SpecLibSearcher_1.mzML deleted file mode 100644 index 0120911dbd9..00000000000 --- a/src/tests/topp/SpecLibSearcher_1.mzML +++ /dev/null @@ -1,147 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - AAAAYGYGbkAAAABAM3N2QAAAAMDM5HpAAAAAQDODgUAAAABgZg6DQAAAAAAATIZAAAAAYGbqikAAAABAM5OOQAAAAKCZUZVAAAAAYGYil0A= - - - - - - AAAAQAAAAEAAAKBAAAAAQAAAQEAAAIBAAACgQAAAoEAAAABAAABAQA== - - - - - - - - - 6840 - - -8886 -0 - diff --git a/src/tests/topp/SpecLibSearcher_1_parameters.ini b/src/tests/topp/SpecLibSearcher_1_parameters.ini deleted file mode 100644 index c28aa1b0933..00000000000 --- a/src/tests/topp/SpecLibSearcher_1_parameters.ini +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/topp/AssayGeneratorMetabo.cpp b/src/topp/AssayGeneratorMetabo.cpp index 3bab1a0d923..b8ba8740f8d 100644 --- a/src/topp/AssayGeneratorMetabo.cpp +++ b/src/topp/AssayGeneratorMetabo.cpp @@ -213,11 +213,11 @@ class TOPPAssayGeneratorMetabo : { // load mzML PeakMap spectra; - FileHandler().loadExperiment(in[file_counter], spectra, {FileTypes::MZML}); + FileHandler().loadExperiment(in[file_counter], spectra, {FileTypes::MZML}, log_type_); // load featurexml FeatureMap feature_map; - FileHandler().loadFeatures(id[file_counter], feature_map, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(id[file_counter], feature_map, {FileTypes::FEATUREXML}, log_type_); // check if featureXML corresponds to mzML StringList featurexml_primary_path; @@ -435,7 +435,7 @@ class TOPPAssayGeneratorMetabo : // validate OpenMS::TransitionTSVFile::validateTargetedExperiment(t_exp); // write traML - FileHandler().storeTransitions(out, t_exp, {FileTypes::TRAML}); + FileHandler().storeTransitions(out, t_exp, {FileTypes::TRAML}, log_type_); } else if (extension == "pqp") { diff --git a/src/topp/FileConverter.cpp b/src/topp/FileConverter.cpp index d76eb9b411a..1e97221983a 100644 --- a/src/topp/FileConverter.cpp +++ b/src/topp/FileConverter.cpp @@ -247,7 +247,7 @@ class TOPPFileConverter : if (in_type == FileTypes::CONSENSUSXML) { - FileHandler().loadConsensusFeatures(in, cm, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in, cm, {FileTypes::CONSENSUSXML}, log_type_); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML) && @@ -306,7 +306,7 @@ class TOPPFileConverter : } else if (in_type == FileTypes::EDTA) { - FileHandler().loadConsensusFeatures(in, cm, {FileTypes::EDTA}); + FileHandler().loadConsensusFeatures(in, cm, {FileTypes::EDTA}, log_type_); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) @@ -478,7 +478,7 @@ class TOPPFileConverter : } ChromatogramTools().convertSpectraToChromatograms(exp, true, convert_to_chromatograms); - mzmlFile.storeExperiment(out, exp, {FileTypes::MZML}); + mzmlFile.storeExperiment(out, exp, {FileTypes::MZML}, log_type_); } else if (out_type == FileTypes::MZDATA) { @@ -492,7 +492,7 @@ class TOPPFileConverter : addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZDATA)); ChromatogramTools().convertChromatogramsToSpectra(exp); - FileHandler().storeExperiment(out, exp, {FileTypes::MZDATA}); + FileHandler().storeExperiment(out, exp, {FileTypes::MZDATA}, log_type_); } else if (out_type == FileTypes::MZXML) { @@ -565,7 +565,7 @@ class TOPPFileConverter : } else if (in_type == FileTypes::OMS) { - FileHandler().loadFeatures(in, fm, {FileTypes::OMS}); + FileHandler().loadFeatures(in, fm, {FileTypes::OMS}, log_type_); IdentificationDataConverter::exportFeatureIDs(fm); } else // not loaded as feature map or consensus map @@ -594,7 +594,7 @@ class TOPPFileConverter : addDataProcessing_(fm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); - FileHandler().storeFeatures(out, fm, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures(out, fm, {FileTypes::FEATUREXML}, log_type_); } else if (out_type == FileTypes::CONSENSUSXML) { @@ -626,7 +626,7 @@ class TOPPFileConverter : addDataProcessing_(cm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); - FileHandler().storeConsensusFeatures(out, cm, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out, cm, {FileTypes::CONSENSUSXML}, log_type_); } else if (out_type == FileTypes::EDTA) { @@ -642,11 +642,11 @@ class TOPPFileConverter : } if (!fm.empty()) { - FileHandler().storeFeatures(out, fm, {FileTypes::EDTA}); + FileHandler().storeFeatures(out, fm, {FileTypes::EDTA}, log_type_); } else if (!cm.empty()) { - FileHandler().storeConsensusFeatures(out, cm, {FileTypes::EDTA}); + FileHandler().storeConsensusFeatures(out, cm, {FileTypes::EDTA}, log_type_); } } else if (out_type == FileTypes::CACHEDMZML) @@ -679,19 +679,19 @@ class TOPPFileConverter : } else if (out_type == FileTypes::SQMASS) { - FileHandler().storeExperiment(out, exp, {FileTypes::SQMASS}); + FileHandler().storeExperiment(out, exp, {FileTypes::SQMASS}, log_type_); } else if (out_type == FileTypes::OMS) { if (in_type == FileTypes::FEATUREXML) { IdentificationDataConverter::importFeatureIDs(fm); - FileHandler().storeFeatures(out, fm, {FileTypes::OMS}); + FileHandler().storeFeatures(out, fm, {FileTypes::OMS}, log_type_); } else if (in_type == FileTypes::CONSENSUSXML) { IdentificationDataConverter::importConsensusIDs(cm); - FileHandler().storeConsensusFeatures(out, cm, {FileTypes::OMS}); + FileHandler().storeConsensusFeatures(out, cm, {FileTypes::OMS}, log_type_); } else { diff --git a/src/topp/FileFilter.cpp b/src/topp/FileFilter.cpp index 2ecea6feaa4..f26df5fe16b 100644 --- a/src/topp/FileFilter.cpp +++ b/src/topp/FileFilter.cpp @@ -946,7 +946,7 @@ class TOPPFileFilter : bool is_blacklist = getStringOption_("spectra:blackorwhitelist:blacklist") == "true" ? true : false; PeakMap lib_file; - FileHandler().loadExperiment(lib_file_name, lib_file, {FileTypes::MZML}); + FileHandler().loadExperiment(lib_file_name, lib_file, {FileTypes::MZML}, log_type_); int ret = filterByBlackOrWhiteList(is_blacklist, exp, lib_file, tol_rt, tol_mz, tol_sim, is_ppm); if (ret != EXECUTION_OK) diff --git a/src/topp/FileInfo.cpp b/src/topp/FileInfo.cpp index e7a3e3e7e8e..6b3610ffc99 100644 --- a/src/topp/FileInfo.cpp +++ b/src/topp/FileInfo.cpp @@ -8,7 +8,6 @@ #include #include - #include #include @@ -33,11 +32,13 @@ #include #include #include +#include #include #include #include + #include #include #include @@ -165,6 +166,10 @@ class TOPPFileInfo : public TOPPBase registerFlag_("i", "Check whether a given mzML file contains valid indices (conforming to the indexedmzML standard)"); } + // Forward declare the specialized version for MSExperiment to avoid compiler errors + // template <> + // void writeRangesHumanReadable_(const MSExperiment& map, ostream &os); + template void writeRangesHumanReadable_(const Map& map, ostream &os) { @@ -208,7 +213,196 @@ class TOPPFileInfo : public TOPPBase else { os << " intensity: " << String::number(map.getMinIntensity(), 2) << " .. " << String::number(map.getMaxIntensity(), 2) << "\n\n"; - } + } + } + + void writeRangesHumanReadable_(const MSExperiment& exp, ostream &os) + { + // 1. Display Combined Ranges (same format as before for backward compatibility) + os << "Combined Ranges (spectra + chromatograms):" << '\n'; + // Use the combinedRanges() accessor + if (exp.combinedRanges().RangeRT::isEmpty()) + { + os << " retention time: .. sec ( min)\n"; + } + else + { + os << " retention time: " << String::number(exp.combinedRanges().getMinRT(), 2) << " .. " + << String::number(exp.combinedRanges().getMaxRT(), 2) << " sec (" + << String::number((exp.combinedRanges().getMaxRT() - exp.combinedRanges().getMinRT()) / 60, 1) << " min)\n"; + } + + // Display m/z range + if (exp.combinedRanges().RangeMZ::isEmpty()) + { + os << " mass-to-charge: .. \n"; + } + else + { + os << " mass-to-charge: " << String::number(exp.combinedRanges().getMinMZ(), 2) << " .. " + << String::number(exp.combinedRanges().getMaxMZ(), 2) << '\n'; + } + + // Display mobility range if present + if (exp.combinedRanges().RangeMobility::isEmpty()) + { + os << " ion mobility: .. \n"; + } + else + { + os << " ion mobility: " << String::number(exp.combinedRanges().getMinMobility(), 2) << " .. " + << String::number(exp.combinedRanges().getMaxMobility(), 2) << '\n'; + } + + // Display intensity range + if (exp.combinedRanges().RangeIntensity::isEmpty()) + { + os << " intensity: .. \n\n"; + } + else + { + os << " intensity: " << String::number(exp.combinedRanges().getMinIntensity(), 2) << " .. " + << String::number(exp.combinedRanges().getMaxIntensity(), 2) << "\n\n"; + } + + // 2. Display Spectrum Ranges (overall) + os << "Spectrum Ranges:" << '\n'; + // Use the spectrumRanges() accessor with MS level 0 for overall ranges + const auto& spec_ranges = exp.spectrumRanges(); + + if (spec_ranges.RangeRT::isEmpty()) + { + os << " retention time: .. sec ( min)\n"; + } + else + { + os << " retention time: " << String::number(spec_ranges.getMinRT(), 2) << " .. " + << String::number(spec_ranges.getMaxRT(), 2) << " sec (" + << String::number((spec_ranges.getMaxRT() - spec_ranges.getMinRT()) / 60, 1) << " min)\n"; + } + + // Display m/z range + if (spec_ranges.RangeMZ::isEmpty()) + { + os << " mass-to-charge: .. \n"; + } + else + { + os << " mass-to-charge: " << String::number(spec_ranges.getMinMZ(), 2) << " .. " + << String::number(spec_ranges.getMaxMZ(), 2) << '\n'; + } + + // Display mobility range if present + if (spec_ranges.RangeMobility::isEmpty()) + { + os << " ion mobility: .. \n"; + } + else + { + os << " ion mobility: " << String::number(spec_ranges.getMinMobility(), 2) << " .. " + << String::number(spec_ranges.getMaxMobility(), 2) << '\n'; + } + + // Display intensity range + if (spec_ranges.RangeIntensity::isEmpty()) + { + os << " intensity: .. \n\n"; + } + else + { + os << " intensity: " << String::number(spec_ranges.getMinIntensity(), 2) << " .. " + << String::number(spec_ranges.getMaxIntensity(), 2) << "\n\n"; + } + + // 3. Display Spectrum Ranges per MS Level + std::set ms_levels = exp.spectrumRanges().getMSLevels(); + for (UInt ms_level : ms_levels) + { + os << "MS Level " << ms_level << " Ranges:" << '\n'; + const auto& level_ranges = exp.spectrumRanges().byMSLevel(ms_level); + + // Output RT range for this MS level + if (level_ranges.RangeRT::isEmpty()) + { + os << " retention time: .. sec ( min)\n"; + } + else + { + os << " retention time: " << String::number(level_ranges.getMinRT(), 2) << " .. " + << String::number(level_ranges.getMaxRT(), 2) << " sec (" + << String::number((level_ranges.getMaxRT() - level_ranges.getMinRT()) / 60, 1) << " min)\n"; + } + + // Display m/z range for this MS level + if (level_ranges.RangeMZ::isEmpty()) + { + os << " mass-to-charge: .. \n"; + } + else + { + os << " mass-to-charge: " << String::number(level_ranges.getMinMZ(), 2) << " .. " + << String::number(level_ranges.getMaxMZ(), 2) << '\n'; + } + + // Display mobility range for this MS level if present + if (level_ranges.RangeMobility::isEmpty()) + { + os << " ion mobility: .. \n"; + } + else + { + os << " ion mobility: " << String::number(level_ranges.getMinMobility(), 2) << " .. " + << String::number(level_ranges.getMaxMobility(), 2) << '\n'; + } + + // Display intensity range for this MS level + if (level_ranges.RangeIntensity::isEmpty()) + { + os << " intensity: .. \n\n"; + } + else + { + os << " intensity: " << String::number(level_ranges.getMinIntensity(), 2) << " .. " + << String::number(level_ranges.getMaxIntensity(), 2) << "\n\n"; + } + } + + // 4. Display Chromatogram Ranges + os << "Chromatogram Ranges:" << '\n'; + const auto& chrom_ranges = exp.chromatogramRanges(); + + if (chrom_ranges.RangeRT::isEmpty()) + { + os << " retention time: .. sec ( min)\n"; + } + else + { + os << " retention time: " << String::number(chrom_ranges.getMinRT(), 2) << " .. " + << String::number(chrom_ranges.getMaxRT(), 2) << " sec (" + << String::number((chrom_ranges.getMaxRT() - chrom_ranges.getMinRT()) / 60, 1) << " min)\n"; + } + + // Display m/z range for chromatograms + if (chrom_ranges.RangeMZ::isEmpty()) + { + os << " mass-to-charge: .. \n"; + } + else + { + os << " mass-to-charge: " << String::number(chrom_ranges.getMinMZ(), 2) << " .. " + << String::number(chrom_ranges.getMaxMZ(), 2) << '\n'; + } + + // Display intensity range for chromatograms + if (chrom_ranges.RangeIntensity::isEmpty()) + { + os << " intensity: .. \n\n"; + } + else + { + os << " intensity: " << String::number(chrom_ranges.getMinIntensity(), 2) << " .. " + << String::number(chrom_ranges.getMaxIntensity(), 2) << "\n\n"; + } } template @@ -258,6 +452,174 @@ class TOPPFileInfo : public TOPPBase << "general: ranges: intensity: max" << '\t' << "" << '\n'; } } + + + void writeRangesMachineReadable_(const MSExperiment& exp, ostream &os) + { + // 1. Combined Ranges + if (!exp.combinedRanges().RangeRT::isEmpty()) + { + os << "general: combined ranges: retention time: min" << '\t' << String::number(exp.combinedRanges().getMinRT(), 2) << '\n' + << "general: combined ranges: retention time: max" << '\t' << String::number(exp.combinedRanges().getMaxRT(), 2) << '\n'; + } + else + { + os << "general: combined ranges: retention time: min" << '\t' << "" << '\n' + << "general: combined ranges: retention time: max" << '\t' << "" << '\n'; + } + + if (!exp.combinedRanges().RangeMZ::isEmpty()) + { + os << "general: combined ranges: mass-to-charge: min" << '\t' << String::number(exp.combinedRanges().getMinMZ(), 2) << '\n' + << "general: combined ranges: mass-to-charge: max" << '\t' << String::number(exp.combinedRanges().getMaxMZ(), 2) << '\n'; + } + else + { + os << "general: combined ranges: mass-to-charge: min" << '\t' << "" << '\n' + << "general: combined ranges: mass-to-charge: max" << '\t' << "" << '\n'; + } + + if (!exp.combinedRanges().RangeMobility::isEmpty()) + { + os << "general: combined ranges: ion-mobility: min" << '\t' << String::number(exp.combinedRanges().getMinMobility(), 2) << '\n' + << "general: combined ranges: ion-mobility: max" << '\t' << String::number(exp.combinedRanges().getMaxMobility(), 2) << '\n'; + } + + if (!exp.combinedRanges().RangeIntensity::isEmpty()) + { + os << "general: combined ranges: intensity: min" << '\t' << String::number(exp.combinedRanges().getMinIntensity(), 2) << '\n' + << "general: combined ranges: intensity: max" << '\t' << String::number(exp.combinedRanges().getMaxIntensity(), 2) << '\n'; + } + else + { + os << "general: combined ranges: intensity: min" << '\t' << "" << '\n' + << "general: combined ranges: intensity: max" << '\t' << "" << '\n'; + } + + // 2. Spectrum Ranges (overall) + const auto& spec_ranges = exp.spectrumRanges(); + if (!spec_ranges.RangeRT::isEmpty()) + { + os << "general: spectrum ranges: retention time: min" << '\t' << String::number(spec_ranges.getMinRT(), 2) << '\n' + << "general: spectrum ranges: retention time: max" << '\t' << String::number(spec_ranges.getMaxRT(), 2) << '\n'; + } + else + { + os << "general: spectrum ranges: retention time: min" << '\t' << "" << '\n' + << "general: spectrum ranges: retention time: max" << '\t' << "" << '\n'; + } + + // Similar code for m/z, mobility, intensity for spectrum ranges + if (!spec_ranges.RangeMZ::isEmpty()) + { + os << "general: spectrum ranges: mass-to-charge: min" << '\t' << String::number(spec_ranges.getMinMZ(), 2) << '\n' + << "general: spectrum ranges: mass-to-charge: max" << '\t' << String::number(spec_ranges.getMaxMZ(), 2) << '\n'; + } + else + { + os << "general: spectrum ranges: mass-to-charge: min" << '\t' << "" << '\n' + << "general: spectrum ranges: mass-to-charge: max" << '\t' << "" << '\n'; + } + + if (!spec_ranges.RangeMobility::isEmpty()) + { + os << "general: spectrum ranges: ion-mobility: min" << '\t' << String::number(spec_ranges.getMinMobility(), 2) << '\n' + << "general: spectrum ranges: ion-mobility: max" << '\t' << String::number(spec_ranges.getMaxMobility(), 2) << '\n'; + } + + if (!spec_ranges.RangeIntensity::isEmpty()) + { + os << "general: spectrum ranges: intensity: min" << '\t' << String::number(spec_ranges.getMinIntensity(), 2) << '\n' + << "general: spectrum ranges: intensity: max" << '\t' << String::number(spec_ranges.getMaxIntensity(), 2) << '\n'; + } + else + { + os << "general: spectrum ranges: intensity: min" << '\t' << "" << '\n' + << "general: spectrum ranges: intensity: max" << '\t' << "" << '\n'; + } + + // 3. MS Level-specific Ranges + std::set ms_levels = exp.spectrumRanges().getMSLevels(); + for (UInt ms_level : ms_levels) + { + const auto& level_ranges = exp.spectrumRanges().byMSLevel(ms_level); + if (!level_ranges.RangeRT::isEmpty()) + { + os << "general: MS" << ms_level << " ranges: retention time: min" << '\t' << String::number(level_ranges.getMinRT(), 2) << '\n' + << "general: MS" << ms_level << " ranges: retention time: max" << '\t' << String::number(level_ranges.getMaxRT(), 2) << '\n'; + } + else + { + os << "general: MS" << ms_level << " ranges: retention time: min" << '\t' << "" << '\n' + << "general: MS" << ms_level << " ranges: retention time: max" << '\t' << "" << '\n'; + } + + // Similar code for other dimensions + if (!level_ranges.RangeMZ::isEmpty()) + { + os << "general: MS" << ms_level << " ranges: mass-to-charge: min" << '\t' << String::number(level_ranges.getMinMZ(), 2) << '\n' + << "general: MS" << ms_level << " ranges: mass-to-charge: max" << '\t' << String::number(level_ranges.getMaxMZ(), 2) << '\n'; + } + else + { + os << "general: MS" << ms_level << " ranges: mass-to-charge: min" << '\t' << "" << '\n' + << "general: MS" << ms_level << " ranges: mass-to-charge: max" << '\t' << "" << '\n'; + } + + if (!level_ranges.RangeMobility::isEmpty()) + { + os << "general: MS" << ms_level << " ranges: ion-mobility: min" << '\t' << String::number(level_ranges.getMinMobility(), 2) << '\n' + << "general: MS" << ms_level << " ranges: ion-mobility: max" << '\t' << String::number(level_ranges.getMaxMobility(), 2) << '\n'; + } + + if (!level_ranges.RangeIntensity::isEmpty()) + { + os << "general: MS" << ms_level << " ranges: intensity: min" << '\t' << String::number(level_ranges.getMinIntensity(), 2) << '\n' + << "general: MS" << ms_level << " ranges: intensity: max" << '\t' << String::number(level_ranges.getMaxIntensity(), 2) << '\n'; + } + else + { + os << "general: MS" << ms_level << " ranges: intensity: min" << '\t' << "" << '\n' + << "general: MS" << ms_level << " ranges: intensity: max" << '\t' << "" << '\n'; + } + } + + // 4. Chromatogram Ranges + const auto& chrom_ranges = exp.chromatogramRanges(); + if (!chrom_ranges.RangeRT::isEmpty()) + { + os << "general: chromatogram ranges: retention time: min" << '\t' << String::number(chrom_ranges.getMinRT(), 2) << '\n' + << "general: chromatogram ranges: retention time: max" << '\t' << String::number(chrom_ranges.getMaxRT(), 2) << '\n'; + } + else + { + os << "general: chromatogram ranges: retention time: min" << '\t' << "" << '\n' + << "general: chromatogram ranges: retention time: max" << '\t' << "" << '\n'; + } + + // Similar code for m/z and intensity for chromatogram ranges + if (!chrom_ranges.RangeMZ::isEmpty()) + { + os << "general: chromatogram ranges: mass-to-charge: min" << '\t' << String::number(chrom_ranges.getMinMZ(), 2) << '\n' + << "general: chromatogram ranges: mass-to-charge: max" << '\t' << String::number(chrom_ranges.getMaxMZ(), 2) << '\n'; + } + else + { + os << "general: chromatogram ranges: mass-to-charge: min" << '\t' << "" << '\n' + << "general: chromatogram ranges: mass-to-charge: max" << '\t' << "" << '\n'; + } + + if (!chrom_ranges.RangeIntensity::isEmpty()) + { + os << "general: chromatogram ranges: intensity: min" << '\t' << String::number(chrom_ranges.getMinIntensity(), 2) << '\n' + << "general: chromatogram ranges: intensity: max" << '\t' << String::number(chrom_ranges.getMaxIntensity(), 2) << '\n'; + } + else + { + os << "general: chromatogram ranges: intensity: min" << '\t' << "" << '\n' + << "general: chromatogram ranges: intensity: max" << '\t' << "" << '\n'; + } + } template void writeSummaryStatisticsMachineReadable_(const Math::SummaryStatistics &stats, ostream &os, String title) @@ -1722,7 +2084,7 @@ class TOPPFileInfo : public TOPPBase else //peaks { //copy intensities of MS-level 1 peaks - exp.updateRanges(1); + exp.updateRanges(); Size size = exp.getSize(); vector intensities; intensities.reserve(size); diff --git a/src/topp/FileMerger.cpp b/src/topp/FileMerger.cpp index aa5c425f646..ea180264a72 100644 --- a/src/topp/FileMerger.cpp +++ b/src/topp/FileMerger.cpp @@ -121,7 +121,7 @@ class TOPPFileMerger : TransformationDescription trafo; if (first_file) // no transformation necessary { - rt_offset_ = map.getMaxRT() + rt_gap_; + rt_offset_ = map.getMaxRT() + rt_gap_; // overall range for all spectra trafo.fitModel("identity"); } else // subsequent file -> apply transformation diff --git a/src/topp/HighResPrecursorMassCorrector.cpp b/src/topp/HighResPrecursorMassCorrector.cpp index 05178803474..217bd5ae9b4 100644 --- a/src/topp/HighResPrecursorMassCorrector.cpp +++ b/src/topp/HighResPrecursorMassCorrector.cpp @@ -135,7 +135,7 @@ class TOPPHiResPrecursorMassCorrector : const bool highest_intensity_peak_ppm = getStringOption_("highest_intensity_peak:mz_tolerance_unit") == "ppm" ? true : false; PeakMap exp; - FileHandler().loadExperiment(in_mzml, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(in_mzml, exp, {FileTypes::MZML}, log_type_); cout << setprecision(12); @@ -170,7 +170,7 @@ class TOPPHiResPrecursorMassCorrector : if (!in_feature.empty()) { FeatureMap features; - FileHandler().loadFeatures(in_feature, features); + FileHandler().loadFeatures(in_feature, features, {}, log_type_); corrected_to_nearest_feature = PrecursorCorrection::correctToNearestFeature(features, exp, rt_tolerance, mz_tolerance, mz_unit_ppm, believe_charge, keep_original, assign_all_matching, max_trace, debug_level_); corrected_precursors.insert(corrected_to_nearest_feature.begin(), corrected_to_nearest_feature.end()); } diff --git a/src/topp/IDFileConverter.cpp b/src/topp/IDFileConverter.cpp index 3e40a9b7266..af9af9bf76d 100644 --- a/src/topp/IDFileConverter.cpp +++ b/src/topp/IDFileConverter.cpp @@ -160,7 +160,7 @@ class TOPPIDFileConverter : bool ret = true; PeakMap expmap; SpectrumLookup lookup; - FileHandler().loadExperiment(filename, expmap); + FileHandler().loadExperiment(filename, expmap, {}, log_type_); lookup.readSpectra(expmap.getSpectra()); #pragma omp parallel for diff --git a/src/topp/IDMapper.cpp b/src/topp/IDMapper.cpp index 400904d97e7..ac332103444 100644 --- a/src/topp/IDMapper.cpp +++ b/src/topp/IDMapper.cpp @@ -193,7 +193,7 @@ class TOPPIDMapper : public TOPPBase PeakMap exp; if (!spectra.empty()) { - FileHandler().loadExperiment(spectra, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(spectra, exp, {FileTypes::MZML}, log_type_); } bool measure_from_subelements = getFlag_("consensus:use_subelements"); @@ -224,7 +224,7 @@ class TOPPIDMapper : public TOPPBase if (!spectra.empty()) { - FileHandler().loadExperiment(spectra, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(spectra, exp, {FileTypes::MZML}, log_type_); } mapper.annotate(map, peptide_ids, protein_ids, (getStringOption_("feature:use_centroid_rt") == "true"), (getStringOption_("feature:use_centroid_mz") == "true"), exp); diff --git a/src/topp/IDMassAccuracy.cpp b/src/topp/IDMassAccuracy.cpp deleted file mode 100644 index e60e0a2cb5e..00000000000 --- a/src/topp/IDMassAccuracy.cpp +++ /dev/null @@ -1,478 +0,0 @@ -// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin -// SPDX-License-Identifier: BSD-3-Clause -// -// -------------------------------------------------------------------------- -// $Maintainer: Timo Sachsenberg $ -// $Authors: Andreas Bertsch $ -// -------------------------------------------------------------------------- - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -using namespace OpenMS; -using namespace std; -using namespace Math; - -//------------------------------------------------------------- -//Doxygen docu -//------------------------------------------------------------- - -/** -@page TOPP_IDMassAccuracy IDMassAccuracy - -@brief Calculates a distribution of the mass error from given mass spectra and IDs. - -@note Currently mzIdentML (mzid) is not directly supported as an input/output format of this tool. Convert mzid files to/from idXML using @ref TOPP_IDFileConverter if necessary. - -The command line parameters of this tool are: -@verbinclude TOPP_IDMassAccuracy.cli -INI file documentation of this tool: -@htmlinclude TOPP_IDMassAccuracy.html - -Given a number of peak maps and for each of the maps an idXML file which contains -peptide identifications the theoretical masses of the identifications and the peaks -of the spectra are compared. This can be done for precursor information stored in -the spectra as well as for fragment information. - -The result is a distribution of errors of experimental vs. theoretical masses. -Having such distributions given -the search parameters of the sequence database search can be adjusted to speed-up -the identification process and to get a higher performance. -*/ - -// We do not want this class to show up in the docu: -/// @cond TOPPCLASSES - -// simple struct which can hold the -// measured and expected masses -struct MassDifference -{ - double exp_mz = 0.0; - Int charge = 0; - double theo_mz = 0.0; - double intensity = 0.0; -}; - -class TOPPIDMassAccuracy : - public TOPPBase -{ -public: - TOPPIDMassAccuracy() : - TOPPBase("IDMassAccuracy", "Calculates a distribution of the mass error from given mass spectra and IDs.") - { - - } - -protected: - void registerOptionsAndFlags_() override - { - registerInputFileList_("in", "", StringList(), "Input mzML file list, containing the spectra."); - setValidFormats_("in", ListUtils::create("mzML")); - registerInputFileList_("id_in", "", StringList(), "Input idXML file list, containing the identifications."); - setValidFormats_("id_in", ListUtils::create("idXML")); - - registerOutputFile_("out_precursor", "", "", "Output file which contains the deviations from the precursors", false, false); - setValidFormats_("out_precursor", ListUtils::create("tsv")); - registerFlag_("precursor_error_ppm", "If this flag is used, the precursor mass tolerances are estimated in ppm instead of Da."); - - registerOutputFile_("out_fragment", "", "", "Output file which contains the fragment ion m/z deviations", false, false); - setValidFormats_("out_fragment", ListUtils::create("tsv")); - registerFlag_("fragment_error_ppm", "If this flag is used, the fragment mass tolerances are estimated in ppm instead of Da."); - - registerDoubleOption_("fragment_mass_tolerance", "", 0.5, "Maximal fragment mass tolerance which is allowed for MS/MS spectra, used for the calculation of matching ions.", false, false); - - registerIntOption_("number_of_bins", "<#bins>", 100, "Number of bins that should be used to calculate the histograms for the fitting.", false, true); - setMinInt_("number_of_bins", 10); - - registerOutputFile_("out_precursor_fit", "", "", "Gaussian fit to the histogram of mass deviations from the precursors.", false, true); - setValidFormats_("out_precursor_fit", ListUtils::create("tsv")); - - registerOutputFile_("out_fragment_fit", "", "", "Gaussian fit to the histogram of mass deviations from the fragments.", false, true); - setValidFormats_("out_fragment_fit", ListUtils::create("tsv")); - } - - double getMassDifference(double theo_mz, double exp_mz, bool use_ppm) - { - double error(exp_mz - theo_mz); - if (use_ppm) - { - error = error / theo_mz * (double)1e6; - } - return error; - } - - ExitCodes main_(int, const char **) override - { - //------------------------------------------------------------- - // parsing parameters - //------------------------------------------------------------- - - StringList id_in(getStringList_("id_in")); - StringList in_raw(getStringList_("in")); - Size number_of_bins((UInt)getIntOption_("number_of_bins")); - bool precursor_error_ppm(getFlag_("precursor_error_ppm")); - bool fragment_error_ppm(getFlag_("fragment_error_ppm")); - - if (in_raw.size() != id_in.size()) - { - writeLogError_("Number of spectrum files and identification files differs..."); - return ILLEGAL_PARAMETERS; - } - - //------------------------------------------------------------- - // reading input - //------------------------------------------------------------- - - vector > pep_ids; - vector > prot_ids; - pep_ids.resize(id_in.size()); - prot_ids.resize(id_in.size()); - - FileHandler idxmlfile; - for (Size i = 0; i != id_in.size(); ++i) - { - idxmlfile.loadIdentifications(id_in[i], prot_ids[i], pep_ids[i], {FileTypes::IDXML}); - } - - // read mzML files - vector maps_raw; - maps_raw.resize(in_raw.size()); - - FileHandler mzml_file; - for (Size i = 0; i != in_raw.size(); ++i) - { - mzml_file.loadExperiment(in_raw[i], maps_raw[i], {FileTypes::MZML}); - } - - //------------------------------------------------------------- - // calculations - //------------------------------------------------------------- - - // mapping ids - IDMapper mapper; - for (Size i = 0; i != maps_raw.size(); ++i) - { - mapper.annotate(maps_raw[i], pep_ids[i], prot_ids[i]); - } - - // normalize the spectra - Normalizer normalizer; - for (vector::iterator it1 = maps_raw.begin(); it1 != maps_raw.end(); ++it1) - { - for (PeakMap::Iterator it2 = it1->begin(); it2 != it1->end(); ++it2) - { - normalizer.filterSpectrum(*it2); - } - } - - // generate precursor statistics - vector precursor_diffs; - if (!getStringOption_("out_precursor").empty() || !getStringOption_("out_precursor_fit").empty()) - { - for (Size i = 0; i != maps_raw.size(); ++i) - { - for (Size j = 0; j != maps_raw[i].size(); ++j) - { - if (maps_raw[i][j].getPeptideIdentifications().empty()) - { - continue; - } - for (vector::const_iterator it = maps_raw[i][j].getPeptideIdentifications().begin(); it != maps_raw[i][j].getPeptideIdentifications().end(); ++it) - { - if (!it->getHits().empty()) - { - PeptideHit hit = *it->getHits().begin(); - MassDifference md; - Int charge = hit.getCharge(); - if (charge == 0) - { - charge = 1; - } - md.exp_mz = it->getMZ(); - md.theo_mz = hit.getSequence().getMonoWeight(Residue::Full, charge); - md.charge = charge; - precursor_diffs.push_back(md); - } - } - } - } - } - - // generate fragment ions statistics - vector fragment_diffs; - TheoreticalSpectrumGenerator tsg; - SpectrumAlignment sa; - double fragment_mass_tolerance(getDoubleOption_("fragment_mass_tolerance")); - Param sa_param(sa.getParameters()); - sa_param.setValue("tolerance", fragment_mass_tolerance); - sa.setParameters(sa_param); - - if (!getStringOption_("out_fragment").empty() || !getStringOption_("out_fragment_fit").empty()) - { - for (Size i = 0; i != maps_raw.size(); ++i) - { - for (Size j = 0; j != maps_raw[i].size(); ++j) - { - if (maps_raw[i][j].getPeptideIdentifications().empty()) - { - continue; - } - for (vector::const_iterator it = maps_raw[i][j].getPeptideIdentifications().begin(); it != maps_raw[i][j].getPeptideIdentifications().end(); ++it) - { - if (!it->getHits().empty()) - { - PeptideHit hit = *it->getHits().begin(); - - PeakSpectrum theo_spec; - tsg.getSpectrum(theo_spec, hit.getSequence(), 1, 1); - - vector > pairs; - sa.getSpectrumAlignment(pairs, theo_spec, maps_raw[i][j]); - //cerr << hit.getSequence() << " " << hit.getSequence().getSuffix(1).getFormula() << " " << hit.getSequence().getSuffix(1).getFormula().getMonoWeight() << endl; - for (vector >::const_iterator pit = pairs.begin(); pit != pairs.end(); ++pit) - { - MassDifference md; - md.exp_mz = maps_raw[i][j][pit->second].getMZ(); - md.theo_mz = theo_spec[pit->first].getMZ(); - //cerr.precision(15); - //cerr << md.exp_mz << " " << md.theo_mz << " " << md.exp_mz - md.theo_mz << endl; - md.intensity = maps_raw[i][j][pit->second].getIntensity(); - md.charge = hit.getCharge(); - fragment_diffs.push_back(md); - } - } - } - } - } - } - - //------------------------------------------------------------- - // writing output - //------------------------------------------------------------- - - String precursor_out_file(getStringOption_("out_precursor")); - if (!precursor_out_file.empty() || !getStringOption_("out_precursor_fit").empty()) - { - vector errors; - - double min_diff(numeric_limits::max()), max_diff(numeric_limits::min()); - for (Size i = 0; i != precursor_diffs.size(); ++i) - { - double diff = getMassDifference(precursor_diffs[i].theo_mz, precursor_diffs[i].exp_mz, precursor_error_ppm); - errors.push_back(diff); - - if (diff > max_diff) - { - max_diff = diff; - } - if (diff < min_diff) - { - min_diff = diff; - } - } - if (!precursor_out_file.empty()) - { - ofstream precursor_out(precursor_out_file.c_str()); - for (Size i = 0; i != errors.size(); ++i) - { - precursor_out << errors[i] << "\n"; - } - precursor_out.close(); - } - - // fill histogram with the collected values - double bin_size = (max_diff - min_diff) / (double)number_of_bins; - Histogram hist(min_diff, max_diff, bin_size); - for (Size i = 0; i != errors.size(); ++i) - { - hist.inc(errors[i], 1.0); - } - - writeDebug_("min_diff=" + String(min_diff) + ", max_diff=" + String(max_diff) + ", number_of_bins=" + String(number_of_bins), 1); - - // transform the histogram into a vector > for the fitting - vector > values; - for (Size i = 0; i != hist.size(); ++i) - { - DPosition<2> p; - p.setX((double)i / (double)number_of_bins * (max_diff - min_diff) + min_diff); - p.setY(hist[i]); - values.push_back(p); - } - - double mean = Math::mean(errors.begin(), errors.end()); - double abs_dev = Math::absdev(errors.begin(), errors.end(), mean); - double sdv = Math::sd(errors.begin(), errors.end(), mean); - sort(errors.begin(), errors.end()); - double median = errors[(Size)(errors.size() / 2.0)]; - - writeDebug_("Precursor mean error: " + String(mean), 1); - writeDebug_("Precursor abs. dev.: " + String(abs_dev), 1); - writeDebug_("Precursor std. dev.: " + String(sdv), 1); - writeDebug_("Precursor median error: " + String(median), 1); - - - // calculate histogram for gauss fitting - GaussFitter gf; - GaussFitter::GaussFitResult init_param (hist.maxValue(), median, sdv/500.0); - gf.setInitialParameters(init_param); - - try - { - gf.fit(values); - - // write fit data - String fit_out_file(getStringOption_("out_precursor_fit")); - if (!fit_out_file.empty()) - { - ofstream fit_out(fit_out_file.c_str()); - if (precursor_error_ppm) - { - fit_out << "error in ppm"; - } - else - { - fit_out << "error in Da"; - } - fit_out << "\tfrequency\n"; - - for (vector >::const_iterator it = values.begin(); it != values.end(); ++it) - { - fit_out << it->getX() << "\t" << it->getY() << "\n"; - } - fit_out.close(); - } - - } - catch (Exception::UnableToFit&) - { - writeLogWarn_("Unable to fit a Gaussian distribution to the precursor mass errors"); - } - } - - String fragment_out_file(getStringOption_("out_fragment")); - if (!fragment_out_file.empty() || !getStringOption_("out_fragment_fit").empty()) - { - vector errors; - double min_diff(numeric_limits::max()), max_diff(numeric_limits::min()); - for (Size i = 0; i != fragment_diffs.size(); ++i) - { - double diff = getMassDifference(fragment_diffs[i].theo_mz, fragment_diffs[i].exp_mz, fragment_error_ppm); - errors.push_back(diff); - - if (diff > max_diff) - { - max_diff = diff; - } - if (diff < min_diff) - { - min_diff = diff; - } - } - if (!fragment_out_file.empty()) - { - ofstream fragment_out(fragment_out_file.c_str()); - for (Size i = 0; i != errors.size(); ++i) - { - fragment_out << errors[i] << "\n"; - } - fragment_out.close(); - } - // fill histogram with the collected values - // here we use the intensities to scale the error - // low intensity peaks are likely to be random matches - double bin_size = (max_diff - min_diff) / (double)number_of_bins; - Histogram hist(min_diff, max_diff, bin_size); - for (Size i = 0; i != fragment_diffs.size(); ++i) - { - double diff = getMassDifference(fragment_diffs[i].theo_mz, fragment_diffs[i].exp_mz, fragment_error_ppm); - hist.inc(diff, fragment_diffs[i].intensity); - } - - writeDebug_("min_diff=" + String(min_diff) + ", max_diff=" + String(max_diff) + ", number_of_bins=" + String(number_of_bins), 1); - - // transform the histogram into a vector > for the fitting - vector > values; - for (Size i = 0; i != hist.size(); ++i) - { - DPosition<2> p; - p.setX((double)i / (double)number_of_bins * (max_diff - min_diff) + min_diff); - p.setY(hist[i]); - values.push_back(p); - } - - double mean = Math::mean(errors.begin(), errors.end()); - double abs_dev = Math::absdev(errors.begin(), errors.end(), mean); - double sdv = Math::sd(errors.begin(), errors.end(), mean); - sort(errors.begin(), errors.end()); - double median = errors[(Size)(errors.size() / 2.0)]; - - writeDebug_("Fragment mean error: " + String(mean), 1); - writeDebug_("Fragment abs. dev.: " + String(abs_dev), 1); - writeDebug_("Fragment std. dev.: " + String(sdv), 1); - writeDebug_("Fragment median error: " + String(median), 1); - - // calculate histogram for gauss fitting - GaussFitter gf; - GaussFitter::GaussFitResult init_param (hist.maxValue(), median, sdv / 100.0); - gf.setInitialParameters(init_param); - - try - { - gf.fit(values); - - // write fit data - String fit_out_file(getStringOption_("out_fragment_fit")); - if (!fit_out_file.empty()) - { - ofstream fit_out(fit_out_file.c_str()); - if (precursor_error_ppm) - { - fit_out << "error in ppm"; - } - else - { - fit_out << "error in Da"; - } - fit_out << "\tfrequency\n"; - - for (vector >::const_iterator it = values.begin(); it != values.end(); ++it) - { - fit_out << it->getX() << "\t" << it->getY() << "\n"; - } - fit_out.close(); - } - } - catch (Exception::UnableToFit&) - { - writeLogWarn_("Unable to fit a Gaussian distribution to the fragment mass errors"); - } - } - - return EXECUTION_OK; - } - -}; - - -int main(int argc, const char ** argv) -{ - TOPPIDMassAccuracy tool; - return tool.main(argc, argv); -} - -/// @endcond diff --git a/src/topp/IDSplitter.cpp b/src/topp/IDSplitter.cpp index 1b1d643a297..ccccfba51c7 100644 --- a/src/topp/IDSplitter.cpp +++ b/src/topp/IDSplitter.cpp @@ -12,6 +12,7 @@ #include #include #include +#include using namespace OpenMS; using namespace std; @@ -77,9 +78,9 @@ class TOPPIDSplitter : void registerOptionsAndFlags_() override { registerInputFile_("in", "", "", "Input file (data annotated with identifications)"); - setValidFormats_("in", ListUtils::create("mzML,featureXML,consensusXML")); + setValidFormats_("in", ListUtils::create("featureXML,consensusXML")); registerOutputFile_("out", "", "", "Output file (data without identifications). Either 'out' or 'id_out' are required. They can be used together.", false); - setValidFormats_("out", ListUtils::create("mzML,featureXML,consensusXML")); + setValidFormats_("out", ListUtils::create("featureXML,consensusXML")); registerOutputFile_("id_out", "", "", "Output file (identifications). Either 'out' or 'id_out' are required. They can be used together.", false); setValidFormats_("id_out", ListUtils::create("idXML")); } @@ -101,28 +102,7 @@ class TOPPIDSplitter : FileTypes::Type in_type = FileHandler::getType(in); - if (in_type == FileTypes::MZML) - { - PeakMap experiment; - FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}); - // what about unassigned peptide IDs? - for (PeakMap::Iterator exp_it = experiment.begin(); - exp_it != experiment.end(); ++exp_it) - { - peptides.insert(peptides.end(), - exp_it->getPeptideIdentifications().begin(), - exp_it->getPeptideIdentifications().end()); - exp_it->getPeptideIdentifications().clear(); - } - experiment.getProteinIdentifications().swap(proteins); - if (!out.empty()) - { - addDataProcessing_(experiment, - getProcessingInfo_(DataProcessing::FILTERING)); - FileHandler().storeExperiment(out, experiment, {FileTypes::MZML}); - } - } - else if (in_type == FileTypes::FEATUREXML) + if (in_type == FileTypes::FEATUREXML) { FeatureMap features; FileHandler().loadFeatures(in, features, {FileTypes::FEATUREXML}); @@ -168,8 +148,8 @@ class TOPPIDSplitter : if (!id_out.empty()) { // IDMapper can match a peptide ID to several overlapping features, - // resulting in duplicates; this shouldn't be the case for peak data - if (in_type != FileTypes::MZML) removeDuplicates_(peptides); + // resulting in duplicates + removeDuplicates_(peptides); FileHandler().storeIdentifications(id_out, proteins, peptides, {FileTypes::IDXML}); } diff --git a/src/topp/ImageCreator.cpp b/src/topp/ImageCreator.cpp index 7e52f07289d..a96644f1153 100644 --- a/src/topp/ImageCreator.cpp +++ b/src/topp/ImageCreator.cpp @@ -269,11 +269,12 @@ class TOPPImageCreator : exp.getSpectra().erase(remove_if(exp.begin(), exp.end(), predicate), exp.end()); } - exp.updateRanges(1); + exp.updateRanges(); Size rows = getIntOption_("height"), cols = getIntOption_("width"); if (rows == 0) rows = exp.size(); - if (cols == 0) cols = UInt(ceil(exp.getMaxMZ() - exp.getMinMZ())); + if (cols == 0) cols = UInt(ceil( + exp.spectrumRanges().byMSLevel(1).getMaxMZ() - exp.spectrumRanges().byMSLevel(1).getMinMZ())); //---------------------------------------------------------------- //Do the actual resampling @@ -284,9 +285,9 @@ class TOPPImageCreator : if (!getFlag_("transpose")) { // scans run bottom-up: - bilip.setMapping_0(0, exp.getMaxRT(), rows - 1, exp.getMinRT()); + bilip.setMapping_0(0, exp.spectrumRanges().byMSLevel(1).getMaxRT(), rows - 1, exp.spectrumRanges().byMSLevel(1).getMinRT()); // peaks run left-right: - bilip.setMapping_1(0, exp.getMinMZ(), cols - 1, exp.getMaxMZ()); + bilip.setMapping_1(0, exp.spectrumRanges().byMSLevel(1).getMinMZ(), cols - 1, exp.spectrumRanges().byMSLevel(1).getMaxMZ()); for (PeakMap::Iterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter) diff --git a/src/topp/IonMobilityBinning.cpp b/src/topp/IonMobilityBinning.cpp index 28e3843933c..9ee4ced3654 100644 --- a/src/topp/IonMobilityBinning.cpp +++ b/src/topp/IonMobilityBinning.cpp @@ -75,7 +75,7 @@ class TOPPIonMobilityBinning : MZ_UNITS mz_binning_width_unit = getStringOption_("SpectraMerging:mz_binning_width_unit") == "Da" ? MZ_UNITS::DA : MZ_UNITS::PPM; PeakMap experiment; - FileHandler().loadExperiment(input_file, experiment, {FileTypes::MZML}); + FileHandler().loadExperiment(input_file, experiment, {FileTypes::MZML}, log_type_); auto [mzML_bins, im_ranges] = IMDataConverter::splitExperimentByIonMobility(std::move(experiment), bins, bin_extension_abs, mz_binning_width, mz_binning_width_unit); diff --git a/src/topp/MRMMapper.cpp b/src/topp/MRMMapper.cpp index 6ce07dbf612..e24d191024b 100644 --- a/src/topp/MRMMapper.cpp +++ b/src/topp/MRMMapper.cpp @@ -127,8 +127,8 @@ class TOPPMRMMapper OpenMS::PeakMap chromatogram_map; OpenMS::PeakMap output; - FileHandler().loadTransitions(tr_file, targeted_exp, {FileTypes::TRAML}); - FileHandler().loadExperiment(in, chromatogram_map, {FileTypes::MZML}); + FileHandler().loadTransitions(tr_file, targeted_exp, {FileTypes::TRAML}, log_type_); + FileHandler().loadExperiment(in, chromatogram_map, {FileTypes::MZML}, log_type_); Param param = getParam_().copy("algorithm:", true); @@ -146,7 +146,7 @@ class TOPPMRMMapper } output.setChromatograms(chromatograms); - FileHandler().storeExperiment(out, output, {FileTypes::MZML}); + FileHandler().storeExperiment(out, output, {FileTypes::MZML}, log_type_); return EXECUTION_OK; } diff --git a/src/topp/MapAlignerIdentification.cpp b/src/topp/MapAlignerIdentification.cpp index fd2a63eb9c2..ee7e3e6f96f 100644 --- a/src/topp/MapAlignerIdentification.cpp +++ b/src/topp/MapAlignerIdentification.cpp @@ -210,24 +210,17 @@ class TOPPMapAlignerIdentification : FileTypes::Type filetype = FileHandler::getType(reference_file); switch (filetype) { - case FileTypes::MZML: - { - PeakMap experiment; - FileHandler().loadExperiment(reference_file, experiment, {FileTypes::MZML}); - algorithm.setReference(experiment); - } - break; case FileTypes::FEATUREXML: { FeatureMap features; - FileHandler().loadFeatures(reference_file, features); + FileHandler().loadFeatures(reference_file, features, {}, log_type_); algorithm.setReference(features); } break; case FileTypes::CONSENSUSXML: { ConsensusMap consensus; - FileHandler().loadConsensusFeatures(reference_file, consensus); + FileHandler().loadConsensusFeatures(reference_file, consensus, {}, log_type_); algorithm.setReference(consensus); } break; @@ -235,7 +228,7 @@ class TOPPMapAlignerIdentification : { vector proteins; vector peptides; - FileHandler().loadIdentifications(reference_file, proteins, peptides); + FileHandler().loadIdentifications(reference_file, proteins, peptides, {}, log_type_); algorithm.setReference(peptides); } break; @@ -447,7 +440,7 @@ class TOPPMapAlignerIdentification : for (Size i = 0; i < input_files.size(); ++i) { progresslogger.setProgress(i); - idxml_file.loadIdentifications(input_files[i], protein_ids[i], peptide_ids[i], {FileTypes::IDXML}); + idxml_file.loadIdentifications(input_files[i], protein_ids[i], peptide_ids[i], {FileTypes::IDXML}, log_type_); } progresslogger.endProgress(); @@ -462,7 +455,7 @@ class TOPPMapAlignerIdentification : for (Size i = 0; i < output_files.size(); ++i) { progresslogger.setProgress(i); - idxml_file.storeIdentifications(output_files[i], protein_ids[i], peptide_ids[i], {FileTypes::IDXML}); + idxml_file.storeIdentifications(output_files[i], protein_ids[i], peptide_ids[i], {FileTypes::IDXML}, log_type_); } progresslogger.endProgress(); } diff --git a/src/topp/MapAlignerPoseClustering.cpp b/src/topp/MapAlignerPoseClustering.cpp index 4ee5c8483f7..dfb00003cad 100644 --- a/src/topp/MapAlignerPoseClustering.cpp +++ b/src/topp/MapAlignerPoseClustering.cpp @@ -160,8 +160,9 @@ class TOPPMapAlignerPoseClustering : else if (in_type == FileTypes::MZML) // this is expensive! { PeakMap exp; - FileHandler().loadExperiment(in_files[i], exp, {FileTypes::MZML}); - exp.updateRanges(1); + + FileHandler().loadExperiment(in_files[i], exp, {FileTypes::MZML}, log_type_); + exp.updateRanges(); s = exp.getSize(); } if (s > max_count) @@ -186,13 +187,13 @@ class TOPPMapAlignerPoseClustering : FileHandler f_fxml_tmp; // for the reference, we never need CH or subordinates f_fxml_tmp.getFeatOptions().setLoadConvexHull(false); f_fxml_tmp.getFeatOptions().setLoadSubordinates(false); - f_fxml_tmp.loadFeatures(file, map_ref, {FileTypes::FEATUREXML}); + f_fxml_tmp.loadFeatures(file, map_ref, {FileTypes::FEATUREXML}, log_type_); algorithm.setReference(map_ref); } else if (in_type == FileTypes::MZML) { PeakMap map_ref; - FileHandler().loadExperiment(file, map_ref); + FileHandler().loadExperiment(file, map_ref, {}, log_type_); algorithm.setReference(map_ref); } @@ -239,13 +240,13 @@ class TOPPMapAlignerPoseClustering : MapAlignmentTransformer::transformRetentionTimes(map, trafo); // annotate output with data processing info addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT)); - f_fxml_tmp.storeFeatures(out_files[i], map, {FileTypes::FEATUREXML}); + f_fxml_tmp.storeFeatures(out_files[i], map, {FileTypes::FEATUREXML}, log_type_); } } else if (in_type == FileTypes::MZML) { PeakMap map; - FileHandler().loadExperiment(in_files[i], map, {FileTypes::MZML}); + FileHandler().loadExperiment(in_files[i], map, {FileTypes::MZML}, log_type_); if (i == static_cast(reference_index)) { trafo.fitModel("identity"); @@ -259,7 +260,7 @@ class TOPPMapAlignerPoseClustering : MapAlignmentTransformer::transformRetentionTimes(map, trafo); // annotate output with data processing info addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT)); - FileHandler().storeExperiment(out_files[i], map, {FileTypes::MZML}); + FileHandler().storeExperiment(out_files[i], map, {FileTypes::MZML}, log_type_); } } diff --git a/src/topp/MapRTTransformer.cpp b/src/topp/MapRTTransformer.cpp index 49401678980..c767fdb4126 100644 --- a/src/topp/MapRTTransformer.cpp +++ b/src/topp/MapRTTransformer.cpp @@ -166,35 +166,35 @@ class TOPPMapRTTransformer : if (in_type == FileTypes::MZML) { PeakMap map; - FileHandler().loadExperiment(in, map, {FileTypes::MZML}); + FileHandler().loadExperiment(in, map, {FileTypes::MZML}, log_type_); applyTransformation_( trafo, map); - FileHandler().storeExperiment(out, map, {FileTypes::MZML}); + FileHandler().storeExperiment(out, map, {FileTypes::MZML}, log_type_); } else if (in_type == FileTypes::FEATUREXML) { FeatureMap map; - FileHandler().loadFeatures(in, map, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(in, map, {FileTypes::FEATUREXML}, log_type_); applyTransformation_( trafo, map); - FileHandler().storeFeatures(out, map, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures(out, map, {FileTypes::FEATUREXML}, log_type_); } else if (in_type == FileTypes::CONSENSUSXML) { ConsensusMap map; - FileHandler().loadConsensusFeatures(in, map, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in, map, {FileTypes::CONSENSUSXML}, log_type_); applyTransformation_( trafo, map); - FileHandler().storeConsensusFeatures(out, map, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out, map, {FileTypes::CONSENSUSXML}, log_type_); } else if (in_type == FileTypes::IDXML) { vector proteins; vector peptides; - FileHandler().loadIdentifications(in, proteins, peptides, {FileTypes::IDXML}); + FileHandler().loadIdentifications(in, proteins, peptides, {FileTypes::IDXML}, log_type_); bool store_original_rt = getFlag_("store_original_rt"); MapAlignmentTransformer::transformRetentionTimes(peptides, trafo, store_original_rt); // no "data processing" section in idXML - FileHandler().storeIdentifications(out, proteins, peptides, {FileTypes::IDXML}); + FileHandler().storeIdentifications(out, proteins, peptides, {FileTypes::IDXML}, log_type_); } } diff --git a/src/topp/MetaboliteSpectralMatcher.cpp b/src/topp/MetaboliteSpectralMatcher.cpp index 5e59e7b209f..2cbee2a5741 100644 --- a/src/topp/MetaboliteSpectralMatcher.cpp +++ b/src/topp/MetaboliteSpectralMatcher.cpp @@ -119,7 +119,7 @@ class TOPPMetaboliteSpectralMatcher : mz_file.getOptions().setMSLevels(ms_level); PeakMap ms_peakmap; - mz_file.loadExperiment(in, ms_peakmap, {FileTypes::MZML}); + mz_file.loadExperiment(in, ms_peakmap, {FileTypes::MZML}, log_type_); if (ms_peakmap.empty()) { @@ -141,7 +141,7 @@ class TOPPMetaboliteSpectralMatcher : // load database //------------------------------------------------------------- PeakMap spec_db; - FileHandler().loadExperiment(spec_db_filename, spec_db, {FileTypes::MSP, FileTypes::MZML, FileTypes::MGF}); + FileHandler().loadExperiment(spec_db_filename, spec_db, {FileTypes::MSP, FileTypes::MZML, FileTypes::MGF}, log_type_); if (spec_db.empty()) { diff --git a/src/topp/MultiplexResolver.cpp b/src/topp/MultiplexResolver.cpp index 288c649ee8d..5cf2a692ac1 100644 --- a/src/topp/MultiplexResolver.cpp +++ b/src/topp/MultiplexResolver.cpp @@ -572,14 +572,14 @@ class TOPPMultiplexResolver : * load consensus map */ ConsensusMap map_in; - FileHandler().loadConsensusFeatures(in_, map_in, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in_, map_in, {FileTypes::CONSENSUSXML}, log_type_); /** * load (optional) blacklist */ if (!(in_blacklist_.empty())) { - FileHandler().loadExperiment(in_blacklist_, exp_blacklist_, {FileTypes::MZML}); + FileHandler().loadExperiment(in_blacklist_, exp_blacklist_, {FileTypes::MZML}, log_type_); } /** @@ -603,10 +603,10 @@ class TOPPMultiplexResolver : /** * store consensus maps */ - FileHandler().storeConsensusFeatures(out_, map_out, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out_, map_out, {FileTypes::CONSENSUSXML}, log_type_); if (!out_conflicts_.empty()) { - FileHandler().storeConsensusFeatures(out_conflicts_, map_conflicts, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out_conflicts_, map_conflicts, {FileTypes::CONSENSUSXML}, log_type_); } return EXECUTION_OK; diff --git a/src/topp/MzMLSplitter.cpp b/src/topp/MzMLSplitter.cpp index 4b5f5ebd1a1..3d25e78788a 100644 --- a/src/topp/MzMLSplitter.cpp +++ b/src/topp/MzMLSplitter.cpp @@ -112,7 +112,7 @@ class TOPPMzMLSplitter : public TOPPBase writeLogInfo_("Splitting file into " + String(parts) + " parts..."); PeakMap experiment; - FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}); + FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}, log_type_); vector spectra; vector chromatograms; @@ -171,7 +171,7 @@ class TOPPMzMLSplitter : public TOPPBase chrom_start += n_chrom; writeLogInfo_("Part " + String(counter) + ": " + String(n_spec) + " spectra, " + String(n_chrom) + " chromatograms"); - FileHandler().storeExperiment(out_name.str(), part, {FileTypes::MZML}); + FileHandler().storeExperiment(out_name.str(), part, {FileTypes::MZML}, log_type_); } return EXECUTION_OK; diff --git a/src/topp/OpenSwathChromatogramExtractor.cpp b/src/topp/OpenSwathChromatogramExtractor.cpp index 75731461ff1..6ccee83190e 100644 --- a/src/topp/OpenSwathChromatogramExtractor.cpp +++ b/src/topp/OpenSwathChromatogramExtractor.cpp @@ -221,7 +221,7 @@ class TOPPOpenSwathChromatogramExtractor // Find the transitions to extract and extract them MapType tmp_out; OpenMS::TargetedExperiment transition_exp_used; - FileHandler().loadExperiment(file_list[i], *exp, {FileTypes::MZML}); + FileHandler().loadExperiment(file_list[i], *exp, {FileTypes::MZML}, log_type_); if (exp->empty()) { continue; // if empty, go on diff --git a/src/topp/OpenSwathMzMLFileCacher.cpp b/src/topp/OpenSwathMzMLFileCacher.cpp index 32c18390229..31a5c414d6f 100644 --- a/src/topp/OpenSwathMzMLFileCacher.cpp +++ b/src/topp/OpenSwathMzMLFileCacher.cpp @@ -153,7 +153,7 @@ class TOPPOpenSwathMzMLFileCacher MapType exp; SqMassFile sqfile; sqfile.load(in, exp); - FileHandler().storeExperiment(out, exp, {FileTypes::MZML}); + FileHandler().storeExperiment(out, exp, {FileTypes::MZML}, log_type_); return EXECUTION_OK; } else if (in_type == FileTypes::MZML && out_type == FileTypes::SQMASS && process_lowmemory) @@ -186,7 +186,7 @@ class TOPPOpenSwathMzMLFileCacher sqfile.setConfig(config); MapType exp; - FileHandler().loadExperiment(in, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(in, exp, {FileTypes::MZML}, log_type_); sqfile.store(out, exp); return EXECUTION_OK; } diff --git a/src/topp/ProteomicsLFQ.cpp b/src/topp/ProteomicsLFQ.cpp index 6273b2a9948..948a5ccf84d 100644 --- a/src/topp/ProteomicsLFQ.cpp +++ b/src/topp/ProteomicsLFQ.cpp @@ -362,7 +362,7 @@ class ProteomicsLFQ : // load raw file PeakMap ms_raw; - FileHandler().loadExperiment(mz_file, ms_raw, {FileTypes::MZML}); + FileHandler().loadExperiment(mz_file, ms_raw, {FileTypes::MZML}, log_type_); ms_raw.clearMetaDataArrays(); ms_raw.updateRanges(); @@ -819,7 +819,7 @@ class ProteomicsLFQ : { const String& mz_file_abs_path = File::absolutePath(mz_file); - FileHandler().loadIdentifications(id_file_abs_path, protein_ids, peptide_ids, {FileTypes::IDXML}); + FileHandler().loadIdentifications(id_file_abs_path, protein_ids, peptide_ids, {FileTypes::IDXML}, log_type_); ExitCodes e = checkSingleRunPerID_(protein_ids, id_file_abs_path); if (e != EXECUTION_OK) return e; @@ -1092,7 +1092,7 @@ class ProteomicsLFQ : calculateSeeds_(ms_centroided, seeds, median_fwhm); if (debug_level_ > 666) { - FileHandler().storeFeatures("debug_seeds_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + ".featureXML", seeds, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures("debug_seeds_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + ".featureXML", seeds, {FileTypes::FEATUREXML}, log_type_); } } @@ -1309,12 +1309,12 @@ class ProteomicsLFQ : if (debug_level_ > 666) { - FileHandler().storeFeatures("debug_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + ".featureXML", feature_maps.back(), {FileTypes::FEATUREXML}); + FileHandler().storeFeatures("debug_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + ".featureXML", feature_maps.back(), {FileTypes::FEATUREXML}, log_type_); } if (debug_level_ > 10000) { - FileHandler().storeExperiment("debug_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + "_chroms.mzML", ffi.getChromatograms(), {FileTypes::MZML}); + FileHandler().storeExperiment("debug_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + "_chroms.mzML", ffi.getChromatograms(), {FileTypes::MZML}, log_type_); } ++fraction_group; @@ -1393,7 +1393,7 @@ class ProteomicsLFQ : if (debug_level_ >= 666) { - FileHandler().storeConsensusFeatures("debug_fraction_" + String(ms_files.first) + ".consensusXML", consensus_fraction, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures("debug_fraction_" + String(ms_files.first) + ".consensusXML", consensus_fraction, {FileTypes::CONSENSUSXML}, log_type_); writeDebug_("to produce a consensus map with: " + String(consensus_fraction.getColumnHeaders().size()) + " columns.", 1); } @@ -1784,7 +1784,7 @@ class ProteomicsLFQ : if (debug_level_ >= 666) { - FileHandler().storeConsensusFeatures("debug_after_normalization.consensusXML", consensus, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures("debug_after_normalization.consensusXML", consensus, {FileTypes::CONSENSUSXML}, log_type_); } } else if (getStringOption_("quantification_method") == "spectral_counting") @@ -1939,7 +1939,7 @@ class ProteomicsLFQ : { // Note: idXML and consensusXML doesn't support writing quantification at protein groups // (they are nevertheless stored and passed to mzTab for proper export) - FileHandler().storeConsensusFeatures(getStringOption_("out_cxml"), consensus, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(getStringOption_("out_cxml"), consensus, {FileTypes::CONSENSUSXML}, log_type_); } // Fill MzTab with meta data and quants annotated in identification data structure diff --git a/src/topp/QCCalculator.cpp b/src/topp/QCCalculator.cpp index a789c4065ad..0fa0d40a5f8 100644 --- a/src/topp/QCCalculator.cpp +++ b/src/topp/QCCalculator.cpp @@ -132,7 +132,7 @@ class TOPPQCCalculator : // prepare input cout << "Reading mzML file..." << endl; MSExperiment exp; - FileHandler().loadExperiment(inputfile_name, exp, {FileTypes::MZML}); + FileHandler().loadExperiment(inputfile_name, exp, {FileTypes::MZML}, log_type_); exp.sortSpectra(); exp.updateRanges(); @@ -140,7 +140,7 @@ class TOPPQCCalculator : if (!inputfile_feature.empty()) { cout << "Reading featureXML file..." << endl; - FileHandler().loadFeatures(inputfile_feature, feature_map, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(inputfile_feature, feature_map, {FileTypes::FEATUREXML}, log_type_); feature_map.updateRanges(); feature_map.sortByRT(); } @@ -149,7 +149,7 @@ class TOPPQCCalculator : if (!inputfile_consensus.empty()) { cout << "Reading consensusXML file..." << endl; - FileHandler().loadConsensusFeatures(inputfile_consensus, consensus_map, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(inputfile_consensus, consensus_map, {FileTypes::CONSENSUSXML}, log_type_); } vector prot_ids; @@ -157,7 +157,7 @@ class TOPPQCCalculator : if (!inputfile_id.empty()) { cout << "Reading idXML file..." << endl; - FileHandler().loadIdentifications(inputfile_id, prot_ids, pep_ids, {FileTypes::IDXML}); + FileHandler().loadIdentifications(inputfile_id, prot_ids, pep_ids, {FileTypes::IDXML}, log_type_); } // collect QC data and store according to output file extension diff --git a/src/topp/QualityControl.cpp b/src/topp/QualityControl.cpp index 119f73ab7a6..a80f855aa6d 100644 --- a/src/topp/QualityControl.cpp +++ b/src/topp/QualityControl.cpp @@ -275,7 +275,7 @@ class TOPPQualityControl : public TOPPBase //------------------------------------------------------------- if (i < in_raw.size()) { // we either have 'n' or 1 mzML ... use the correct one in each iteration - FileHandler().loadExperiment(in_raw[i], exp, {FileTypes::MZML}); + FileHandler().loadExperiment(in_raw[i], exp, {FileTypes::MZML}, log_type_); spec_map.calculateMap(exp); } @@ -283,7 +283,7 @@ class TOPPQualityControl : public TOPPBase FeatureMap fmap_local; if (!in_postFDR.empty()) { - FileHandler().loadFeatures(in_postFDR[i], fmap_local, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(in_postFDR[i], fmap_local, {FileTypes::FEATUREXML}, log_type_); fmap = &fmap_local; } else @@ -387,7 +387,7 @@ class TOPPQualityControl : public TOPPBase StringList out_feat = getStringList_("out_feat"); if (!out_feat.empty()) { - FileHandler().storeFeatures(out_feat[i], *fmap, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures(out_feat[i], *fmap, {FileTypes::FEATUREXML}, log_type_); } //------------------------------------------------------------- // Annotate calculated meta values from FeatureMap to given ConsensusMap @@ -460,7 +460,7 @@ class TOPPQualityControl : public TOPPBase String out_cm = getStringOption_("out_cm"); if (!out_cm.empty()) { - FileHandler().storeConsensusFeatures(out_cm, cmap, {FileTypes::CONSENSUSXML}); + FileHandler().storeConsensusFeatures(out_cm, cmap, {FileTypes::CONSENSUSXML}, log_type_); } String out = getStringOption_("out"); diff --git a/src/topp/RNPxlXICFilter.cpp b/src/topp/RNPxlXICFilter.cpp index 1de44b1a0e2..ed5204a44ed 100644 --- a/src/topp/RNPxlXICFilter.cpp +++ b/src/topp/RNPxlXICFilter.cpp @@ -199,10 +199,10 @@ class TOPPRNPxlXICFilter : // load experiments PeakMap exp_control; - FileHandler().loadExperiment(control_mzml, exp_control, {FileTypes::MZML}); + FileHandler().loadExperiment(control_mzml, exp_control, {FileTypes::MZML}, log_type_); PeakMap exp_treatment; - FileHandler().loadExperiment(treatment_mzml, exp_treatment, {FileTypes::MZML}); + FileHandler().loadExperiment(treatment_mzml, exp_treatment, {FileTypes::MZML}, log_type_); // extract precursor mz and rts vector pc_mzs; @@ -261,7 +261,7 @@ class TOPPRNPxlXICFilter : } } - FileHandler().storeExperiment(out_mzml, exp_out, {FileTypes::MZML}); + FileHandler().storeExperiment(out_mzml, exp_out, {FileTypes::MZML}, log_type_); return EXECUTION_OK; } diff --git a/src/topp/Resampler.cpp b/src/topp/Resampler.cpp index 208304a803a..b6d04d026f9 100644 --- a/src/topp/Resampler.cpp +++ b/src/topp/Resampler.cpp @@ -116,15 +116,15 @@ class TOPPResampler : lin_resampler.raster(exp[i]); } } - else if(!exp.RangeRT::isEmpty()) + else if(!exp.spectrumRanges().RangeRT::isEmpty()) { // start with even position - auto start_pos = floor(exp.getMinRT()); + auto start_pos = floor(exp.spectrumRanges().getMinRT()); // resample every scan for (Size i = 0; i < exp.size(); ++i) { - lin_resampler.raster_align(exp[i], start_pos, exp.getMaxRT()); + lin_resampler.raster_align(exp[i], start_pos, exp.spectrumRanges().getMaxRT()); } } diff --git a/src/topp/SeedListGenerator.cpp b/src/topp/SeedListGenerator.cpp index 2d0be2271aa..c999644c88e 100644 --- a/src/topp/SeedListGenerator.cpp +++ b/src/topp/SeedListGenerator.cpp @@ -129,7 +129,7 @@ namespace OpenMS if (in_type == FileTypes::CONSENSUSXML) { ConsensusMap consensus; - FileHandler().loadConsensusFeatures(in, consensus, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in, consensus, {FileTypes::CONSENSUSXML}, log_type_); num_maps = consensus.getColumnHeaders().size(); ConsensusMap::ColumnHeaders ch = consensus.getColumnHeaders(); size_t map_count = 0; @@ -157,21 +157,21 @@ namespace OpenMS else if (in_type == FileTypes::MZML) { PeakMap experiment; - FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}); + FileHandler().loadExperiment(in, experiment, {FileTypes::MZML}, log_type_); seed_gen.generateSeedList(experiment, seed_lists[0]); } else if (in_type == FileTypes::IDXML) { vector proteins; vector peptides; - FileHandler().loadIdentifications(in, proteins, peptides, {FileTypes::IDXML}); + FileHandler().loadIdentifications(in, proteins, peptides, {FileTypes::IDXML}, log_type_); seed_gen.generateSeedList(peptides, seed_lists[0], getFlag_("use_peptide_mass")); } else if (in_type == FileTypes::FEATUREXML) { FeatureMap features; - FileHandler().loadFeatures(in, features, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(in, features, {FileTypes::FEATUREXML}, log_type_); seed_gen.generateSeedList( features.getUnassignedPeptideIdentifications(), seed_lists[0]); } @@ -187,7 +187,7 @@ namespace OpenMS addDataProcessing_(features, getProcessingInfo_( DataProcessing::DATA_PROCESSING)); OPENMS_LOG_INFO << "Writing " << features.size() << " seeds to " << out[num_maps] << endl; - FileHandler().storeFeatures(out[num_maps], features, {FileTypes::FEATUREXML}); + FileHandler().storeFeatures(out[num_maps], features, {FileTypes::FEATUREXML}, log_type_); } return EXECUTION_OK; diff --git a/src/topp/SpecLibCreator.cpp b/src/topp/SpecLibCreator.cpp deleted file mode 100644 index 358851d2ecb..00000000000 --- a/src/topp/SpecLibCreator.cpp +++ /dev/null @@ -1,258 +0,0 @@ -// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin -// SPDX-License-Identifier: BSD-3-Clause -// -// -------------------------------------------------------------------------- -// $Maintainer: Timo Sachsenberg $ -// $Authors: $ -// -------------------------------------------------------------------------- - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -using namespace OpenMS; -using namespace std; - -//------------------------------------------------------------- -//Doxygen docu -//------------------------------------------------------------- - -/** -@page TOPP_SpecLibCreator SpecLibCreator - -@brief creates with given data a .MSP format spectral library. - -Information file should have the following information: peptide, retention time, measured weight, charge state. -Extra information is allowed. - -@experimental This Utility is not well tested and some features might not work as expected. - -The command line parameters of this tool are: -@verbinclude TOPP_SpecLibCreator.cli -INI file documentation of this tool: -@htmlinclude TOPP_SpecLibCreator.html -*/ - -// We do not want this class to show up in the docu: -/// @cond TOPPCLASSES - -class TOPPSpecLibCreator : - public TOPPBase -{ -public: - TOPPSpecLibCreator() : - TOPPBase("SpecLibCreator", "Creates an MSP formatted spectral library.") - { - } - -protected: - void registerOptionsAndFlags_() override - { - registerInputFile_("info", "", "", "Holds id, peptide, retention time etc."); - setValidFormats_("info", ListUtils::create("csv")); - - registerStringOption_("itemseperator", "", ",", " Separator between items. e.g. ,", false); - registerStringOption_("itemenclosed", "", "false", "'true' or 'false' if true every item is enclosed e.g. '$peptide$,$run$...", false); - setValidStrings_("itemenclosed", ListUtils::create("true,false")); - - registerInputFile_("spec", "", "", "spectra"); - setValidFormats_("spec", ListUtils::create("mzData,mzXML")); - - registerOutputFile_("out", "", "", "output MSP formatted spectra library"); - setValidFormats_("out", ListUtils::create("msp")); - } - - ExitCodes main_(int, const char**) override - { - //------------------------------------------------------------- - // parameter handling - //------------------------------------------------------------- - - String info = getStringOption_("info"); - String itemseperator = getStringOption_("itemseperator"); - String out = getStringOption_("out"); - bool itemenclosed; - if (getStringOption_("itemenclosed") == "true") - { - itemenclosed = true; - } - else - { - itemenclosed = false; - } - - String spec = getStringOption_("spec"); - if (info == String::EMPTY) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "info"); - } - if (spec == String::EMPTY) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "spec"); - } - - - //------------------------------------------------------------- - // loading input - //------------------------------------------------------------- - Int retention_time = -1; - Int peptide = -1; - Int measured_weight = -1; - //UInt first_scan; - UInt charge_state(0), Experimental_id(0); //,found_by, track, comment, vaccination_peptid,epitope, confident, hlaallele; - const char* sepi = itemseperator.c_str(); - char sepo = *sepi; - CsvFile csv_file(info, sepo, itemenclosed); - vector list; - - list.resize(csv_file.rowCount()); - - for (UInt i = 0; i < csv_file.rowCount(); ++i) - { - csv_file.getRow(i, list[i]); - } - for (UInt i = 0; i < list[0].size(); ++i) - { - - if (list[0][i].toLower().removeWhitespaces().compare("retentiontime") == 0) - { - retention_time = i; - } - else if (list[0][i].toLower().hasSubstring("_id")) - { - Experimental_id = i; - } - else if (list[0][i].toLower() == "last scan") - { - // last_scan = i; - } - else if (list[0][i].toLower() == "modification") - { - // modification = i; - } - else if (list[0][i].toLower().removeWhitespaces().compare("chargestate") == 0 || list[0][i].toLower().removeWhitespaces().hasSubstring("charge")) - { - charge_state = i; - } - else if (list[0][i].toLower().trim().compare("peptide") == 0) - { - peptide = i; - } - else if (list[0][i].toLower().removeWhitespaces().hasSubstring("measuredweight") || list[0][i].removeWhitespaces().compare("measuredweight[M+nH]n+") == 0) - { - measured_weight = i; - } - } - if (retention_time == -1) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "unclear which parameter is retention time"); - } - if (peptide == -1) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "unclear which parameter is peptide"); - } - if (measured_weight == -1) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "unclear which parameter is measured weight"); - } - FileHandler fh; - FileTypes::Type in_type = fh.getType(spec); - PeakMap msexperiment; - - if (in_type == FileTypes::UNKNOWN) - { - writeLogWarn_("Warning: Could not determine input file type!"); - } - else if (in_type == FileTypes::MZDATA || in_type == FileTypes::MZXML) - { - FileHandler().loadExperiment(spec, msexperiment, {FileTypes::MZDATA, FileTypes::MZXML}); - } - if (msexperiment.getMinRT() == 0) - { - throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "EMPTY??"); - } - PeakMap library; - - //------------------------------------------------------------- - // creating library - //------------------------------------------------------------- - UInt found_counter = 0; - - for (UInt i = 1; i < list.size(); ++i) - { - bool no_peptide = true; - double rt = (60 * (list[i][retention_time].toFloat())); // from minutes to seconds - double mz = list[i][measured_weight].toFloat(); - for (PeakMap::Iterator it = msexperiment.begin(); it < msexperiment.end(); ++it) - { - if ((abs(rt - it->getRT()) < 5) && (abs(mz - it->getPrecursors()[0].getMZ()) < 0.1)) - { - //if ( ceil(rt) == ceil(it->getRT()) || ceil(rt) == floor(it->getRT()) || floor(rt) == ceil(it->getRT()) || floor(rt) == floor(it->getRT())) - ++found_counter; - no_peptide = false; - cout << "Found Peptide " << list[i][peptide] << " with id: " << list[i][Experimental_id] << "\n"; - cout << "rt: " << it->getRT() << " and mz: " << it->getPrecursors()[0].getMZ() << "\n"; - - MSSpectrum speci; - speci.setRT(it->getRT()); - speci.setMSLevel(2); - speci.setPrecursors(it->getPrecursors()); - for (UInt j = 0; j < it->size(); ++j) - { - - Peak1D richy; - richy.setIntensity(it->operator[](j).getIntensity()); - richy.setPosition(it->operator[](j).getPosition()); - richy.setMZ(it->operator[](j).getMZ()); - richy.setPos(it->operator[](j).getPos()); //ALIAS for setMZ??? - - speci.push_back(richy); - } - PeptideHit hit; // = *it->getPeptideIdentifications().begin()->getHits().begin(); - AASequence aa = AASequence::fromString(list[i][peptide]); - hit.setSequence(aa); - hit.setCharge(list[i][charge_state].toInt()); - vector hits; - hits.push_back(hit); - vector pepi; - PeptideIdentification pep; - pep.setHits(hits); - pepi.push_back(pep); - speci.setPeptideIdentifications(pepi); - //it->getPeptideIdentifications().begin()->setHits(hits); - library.addSpectrum(speci); - } - } - if (no_peptide) - { - cout << "Peptide: " << list[i][peptide] << " not found\n"; - } - } - cout << "Found " << found_counter << " peptides\n"; - - //------------------------------------------------------------- - // writing output - //------------------------------------------------------------- - in_type = fh.getType(out); - FileHandler().storeExperiment(out, library, {FileTypes::MZDATA, FileTypes::MZXML, FileTypes::MSP}); - return EXECUTION_OK; - } - -}; - - - - -int main(int argc, const char** argv) -{ - TOPPSpecLibCreator tool; - return tool.main(argc, argv); -} - -/// @endcond diff --git a/src/topp/SpecLibSearcher.cpp b/src/topp/SpecLibSearcher.cpp deleted file mode 100644 index a01a3762cd3..00000000000 --- a/src/topp/SpecLibSearcher.cpp +++ /dev/null @@ -1,605 +0,0 @@ -// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin -// SPDX-License-Identifier: BSD-3-Clause -// -// -------------------------------------------------------------------------- -// $Maintainer: Timo Sachsenberg $ -// $Authors: David Wojnar, Timo Sachsenberg $ -// -------------------------------------------------------------------------- - -#include - -#include -#include -#include -#include -#include -#include -// TODO add ID support to Handler -#include -#include -#include -#include - -#include -#include -#include -#include -using namespace OpenMS; -using namespace std; - -//------------------------------------------------------------- -//Doxygen docu -//------------------------------------------------------------- - -/** -@page TOPP_SpecLibSearcher SpecLibSearcher - -@brief Identifies peptide MS/MS spectra by spectral matching with a searchable spectral library. - -
- - - - - - - - -
pot. predecessor tools - → SpecLibSearcher → pot. successor tools -
@ref TOPP_SpecLibCreator @ref TOPP_IDFilter or @n any protein/peptide processing tool
-
- -@experimental This TOPP-tool is not well tested and not all features might be properly implemented and tested. - -@note Currently mzIdentML (mzid) is not directly supported as an input/output format of this tool. Convert mzid files to/from idXML using @ref TOPP_IDFileConverter if necessary. - -The command line parameters of this tool are: -@verbinclude TOPP_SpecLibSearcher.cli -INI file documentation of this tool: -@htmlinclude TOPP_SpecLibSearcher.html -*/ - -// We do not want this class to show up in the docu: -/// @cond TOPPCLASSES - -class TOPPSpecLibSearcher : - public TOPPBase -{ -public: - TOPPSpecLibSearcher() : - TOPPBase("SpecLibSearcher", "Identifies peptide MS/MS spectra by spectral matching with a searchable spectral library.") - { - } - -protected: - void registerOptionsAndFlags_() override - { - registerInputFileList_("in", "", ListUtils::create(""), "Input files"); - setValidFormats_("in", ListUtils::create("mzML")); - registerInputFile_("lib", "", "", "searchable spectral library (MSP format)"); - setValidFormats_("lib", ListUtils::create("msp")); - registerOutputFileList_("out", "", ListUtils::create(""), "Output files. Have to be as many as input files"); - setValidFormats_("out", ListUtils::create("idXML")); - - registerTOPPSubsection_("precursor", "Precursor (Parent Ion) Options"); - registerDoubleOption_("precursor:mass_tolerance", "", 10.0, "Width of precursor mass tolerance window", false); - - StringList precursor_mass_tolerance_unit_valid_strings; - precursor_mass_tolerance_unit_valid_strings.push_back("ppm"); - precursor_mass_tolerance_unit_valid_strings.push_back("Da"); - - registerStringOption_("precursor:mass_tolerance_unit", "", "ppm", "Unit of precursor mass tolerance.", false, false); - setValidStrings_("precursor:mass_tolerance_unit", precursor_mass_tolerance_unit_valid_strings); - - registerIntOption_("precursor:min_charge", "", 2, "Minimum precursor charge to be considered.", false, true); - registerIntOption_("precursor:max_charge", "", 5, "Maximum precursor charge to be considered.", false, true); - - // consider one before annotated monoisotopic peak and the annotated one - IntList isotopes = {0, 1}; - registerIntList_("precursor:isotopes", "", isotopes, "Corrects for mono-isotopic peak misassignments. (E.g.: 1 = prec. may be misassigned to first isotopic peak)", false, false); - - registerTOPPSubsection_("fragment", "Fragments (Product Ion) Options"); - registerDoubleOption_("fragment:mass_tolerance", "", 10.0, "Fragment mass tolerance", false); - -// StringList fragment_mass_tolerance_unit_valid_strings; -// fragment_mass_tolerance_unit_valid_strings.push_back("ppm"); -// fragment_mass_tolerance_unit_valid_strings.push_back("Da"); - -// registerStringOption_("fragment:mass_tolerance_unit", "", "ppm", "Unit of fragment m", false, false); -// setValidStrings_("fragment:mass_tolerance_unit", fragment_mass_tolerance_unit_valid_strings); - - registerStringOption_("compare_function", "", "ZhangSimilarityScore", "function for similarity comparison", false); - setValidStrings_("compare_function", {"ZhangSimilarityScore", "SpectraSTSimilarityScore"}); - - registerTOPPSubsection_("report", "Reporting Options"); - registerIntOption_("report:top_hits", "", 10, "Maximum number of top scoring hits per spectrum that are reported.", false, true); - - addEmptyLine_(); - - registerTOPPSubsection_("filter", "Filtering options. Most are especially useful when the query spectra are raw."); - registerDoubleOption_("filter:remove_peaks_below_threshold", "", 2.01, "All peaks of a query spectrum with intensities below will be zeroed.", false); - registerIntOption_("filter:min_peaks", "", 5, "required minimum number of peaks for a query spectrum", false); - registerIntOption_("filter:max_peaks", "", 150, "Use only the top of peaks.", false); - registerIntOption_("filter:cut_peaks_below", "", 1000, "Remove all peaks which are lower than 1/ of the highest peaks. Default equals all peaks which are lower than 0.001 of the maximum intensity peak", false); - - registerTOPPSubsection_("modifications", "Modifications Options"); - vector all_mods; - ModificationsDB::getInstance()->getAllSearchModifications(all_mods); - registerStringList_("modifications:fixed", "", ListUtils::create(""), "Fixed modifications, specified using UniMod (www.unimod.org) terms, e.g. 'Carbamidomethyl (C)'", false); - setValidStrings_("modifications:fixed", all_mods); - registerStringList_("modifications:variable", "", ListUtils::create(""), "Variable modifications, specified using UniMod (www.unimod.org) terms, e.g. 'Oxidation (M)'", false); - setValidStrings_("modifications:variable", all_mods); - registerIntOption_("modifications:variable_max_per_peptide", "", 2, "Maximum number of residues carrying a variable modification per candidate peptide", false, false); - - addEmptyLine_(); - } - - using MapLibraryPrecursorToLibrarySpectrum = multimap; - - MapLibraryPrecursorToLibrarySpectrum annotateIdentificationsToSpectra_(const vector& ids, - const PeakMap& library, - StringList variable_modifications, - StringList fixed_modifications, - double remove_peaks_below_threshold) - { - MapLibraryPrecursorToLibrarySpectrum annotated_lib; - - ModificationsDB* mdb = ModificationsDB::getInstance(); - - - // iterate over library spectra and add associated annotations - PeakMap::const_iterator library_it = library.begin(); - vector::const_iterator id_it = ids.begin(); - for (; library_it < library.end(); ++library_it, ++id_it) - { - const MSSpectrum& lib_spec = *library_it; - const double& precursor_MZ = lib_spec.getPrecursors()[0].getMZ(); - - const PeptideIdentification& id = *id_it; - const AASequence& aaseq = id.getHits()[0].getSequence(); - - PeakSpectrum lib_entry; - bool variable_modifications_ok(true), fixed_modifications_ok(true); - - // check if each amino acid listed as modified in fixed modifications are modified - if (!fixed_modifications.empty()) - { - for (Size j = 0; j < aaseq.size(); ++j) - { - const Residue& mod = aaseq.getResidue(j); - for (Size k = 0; k < fixed_modifications.size(); ++k) - { - if (mod.getOneLetterCode()[0] == mdb->getModification(fixed_modifications[k])->getOrigin() && fixed_modifications[k] != mod.getModificationName()) - { - fixed_modifications_ok = false; - break; - } - } - } - } - - // check if each amino acid listed in variable modifications is either unmodified or modified with the corresponding modification - // Note: this code currently does not allow for multiple variable modifications with same origin - if (aaseq.isModified() && (!variable_modifications.empty())) - { - for (Size j = 0; j < aaseq.size(); ++j) - { - if (!aaseq[j].isModified()) { continue; } - - const Residue& mod = aaseq.getResidue(j); - for (Size k = 0; k < variable_modifications.size(); ++k) - { - if (mod.getOneLetterCode()[0] == mdb->getModification(variable_modifications[k])->getOrigin() && variable_modifications[k] != mod.getModificationName()) - { - variable_modifications_ok = false; - break; - } - } - } - } - - // TODO: check entries that don't adhere to this rule - if (!variable_modifications_ok || !fixed_modifications_ok) { continue; } - - // copy peptide identification over to spectrum meta data - lib_entry.getPeptideIdentifications().push_back(id); - lib_entry.setPrecursors(lib_spec.getPrecursors()); - - // empty array would segfault - if (id.getHits().empty() || id.getHits()[0].getPeakAnnotations().empty()) - { - throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Expected StringDataArray of type MSPeakInfo"); - } - - const vector& pa = id.getHits()[0].getPeakAnnotations(); - // library entry transformation - for (UInt l = 0; l < lib_spec.size(); ++l) - { - Peak1D peak; - if (lib_spec[l].getIntensity() > remove_peaks_below_threshold) - { - // this is the "MSPPeakInfo" array, see MSPFile which creates a single StringDataArray - const String& sa = pa[l].annotation; - - // TODO: check why this scaling is done for ? peaks (dubious peaks?) - if (sa[0] == '?') - { - peak.setIntensity(sqrt(0.2 * lib_spec[l].getIntensity())); - } - else - { - peak.setIntensity(sqrt(lib_spec[l].getIntensity())); - } - - peak.setMZ(lib_spec[l].getMZ()); - lib_entry.push_back(peak); - } - } - annotated_lib.insert(make_pair(precursor_MZ, lib_entry)); - } - return annotated_lib; - } - - ExitCodes main_(int, const char**) override - { - //------------------------------------------------------------- - // parameter handling - //------------------------------------------------------------- - StringList in_spec = getStringList_("in"); - StringList out = getStringList_("out"); - String in_lib = getStringOption_("lib"); - String compare_function = getStringOption_("compare_function"); - - float precursor_mass_tolerance = getDoubleOption_("precursor:mass_tolerance"); - bool precursor_mass_tolerance_unit_ppm = getStringOption_("precursor:mass_tolerance_unit") == "ppm" ? true : false; - - int pc_min_charge = getIntOption_("precursor:min_charge"); - int pc_max_charge = getIntOption_("precursor:max_charge"); - - // consider one before annotated monoisotopic peak and the annotated one - IntList isotopes = getIntList_("precursor:isotopes"); - -// float fragment_mass_tolerance = getDoubleOption_("fragment:mass_tolerance"); -// bool fragment_mass_tolerance_unit_ppm = getStringOption_("fragment:mass_tolerance_unit") == "ppm" ? true : false; - - int top_hits = getIntOption_("report:top_hits"); - - float remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold"); - UInt min_peaks = getIntOption_("filter:min_peaks"); - UInt max_peaks = getIntOption_("filter:max_peaks"); - Int cut_peaks_below = getIntOption_("filter:cut_peaks_below"); - - StringList fixed_modifications = getStringList_("modifications:fixed"); - StringList variable_modifications = getStringList_("modifications:variable"); - - if (top_hits < -1) - { - writeLogError_("top_hits (should be >= -1 )"); - return ILLEGAL_PARAMETERS; - } - - // ------------------------------------------------------------- - // loading input - // ------------------------------------------------------------- - if (out.size() != in_spec.size()) - { - writeLogError_("out (should be as many as input files)"); - return ILLEGAL_PARAMETERS; - } - - time_t prog_time = time(nullptr); - MSPFile spectral_library; - PeakMap query, library; - - time_t start_build_time = time(nullptr); - // ------------------------------------------------------------- - // building map for faster search - // ------------------------------------------------------------- - - // library containing already identified peptide spectra - vector ids; - spectral_library.load(in_lib, ids, library); - - /* - // Output bin histogram - BinnedSpectrum bin_frequency(0.01, 1, PeakSpectrum()); - for (auto const & s : library) - { - BinnedSpectrum b(0.01, 1, s); - // e.g.: bin_frequency.getBins() += b.getBins(); // sum up itensities - // e.g.: bin_frequency.getBins() += b.getBins().coeffs().cwiseMin(1.0f); // count occupied bins (by truncating intensities >= 1 to 1) - } - - for (BinnedSpectrum::SparseVectorIteratorType it(bin_frequency.getBins()); it; ++it) - { - // output m/z of bin start and average bin intensity - cout << it.index() * bin_frequency.getBinSize() << "\t" << static_cast(it.value()/library.size()) << "\n"; - cout << static_cast(it.value()) << "\n"; - cout << static_cast(library.size()) << "\n"; - } - cout << endl; - */ - - MapLibraryPrecursorToLibrarySpectrum mslib = annotateIdentificationsToSpectra_(ids, library, variable_modifications, fixed_modifications, remove_peaks_below_threshold); - - time_t end_build_time = time(nullptr); - OPENMS_LOG_INFO << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n"; - - //compare function - std::unique_ptr comparator; - if (compare_function == "SpectraSTSimilarityScore") - { - comparator.reset(new SpectraSTSimilarityScore()); - } - else if (compare_function == "ZhangSimilarityScore") - { - comparator.reset(new ZhangSimilarityScore()); - } - else - { - writeLogError_("Unknown compare function"); - return ILLEGAL_PARAMETERS; - } - - //------------------------------------------------------------- - // calculations - //------------------------------------------------------------- - double score; - StringList::iterator in, out_file; - for (in = in_spec.begin(), out_file = out.begin(); in < in_spec.end(); ++in, ++out_file) - { - time_t start_time = time(nullptr); - FileHandler().loadExperiment(*in, query, {FileTypes::MZML}, log_type_); - - // results - vector peptide_ids; - vector protein_ids; - ProteinIdentification prot_id; - - //Parameters of identification - prot_id.setIdentifier("test"); - prot_id.setSearchEngineVersion("SpecLibSearcher"); - prot_id.setDateTime(DateTime::now()); - prot_id.setScoreType(compare_function); - - ProteinIdentification::SearchParameters search_parameters; - search_parameters.db = getStringOption_("lib"); - search_parameters.charges = String(getIntOption_("precursor:min_charge")) + ":" + String(getIntOption_("precursor:max_charge")); - - ProteinIdentification::PeakMassType mass_type = ProteinIdentification::MONOISOTOPIC; - search_parameters.mass_type = mass_type; - search_parameters.fixed_modifications = getStringList_("modifications:fixed"); - search_parameters.variable_modifications = getStringList_("modifications:variable"); - // search_parameters.missed_cleavages = getIntOption_("peptide:missed_cleavages"); - search_parameters.precursor_mass_tolerance = getDoubleOption_("precursor:mass_tolerance"); - search_parameters.precursor_mass_tolerance_ppm = getStringOption_("precursor:mass_tolerance_unit") == "ppm" ? true : false; -// search_parameters.fragment_mass_tolerance = getDoubleOption_("fragment:mass_tolerance"); -// search_parameters.fragment_mass_tolerance_ppm = getStringOption_("fragment:mass_tolerance_unit") == "ppm" ? true : false; - -//TODO: report an Enzyme? - - prot_id.setSearchParameters(search_parameters); - - - /***********SEARCH**********/ - for (UInt j = 0; j < query.size(); ++j) - { - //Set identifier for each identifications - PeptideIdentification pid; - pid.setIdentifier("test"); - pid.setScoreType(compare_function); - ProteinHit pr_hit; - pr_hit.setAccession(j); - prot_id.insertHit(pr_hit); - - // proper MS2? - if (query[j].empty() || query[j].getMSLevel() != 2) - { - continue; - } - - if (query[j].getPrecursors().empty()) - { - writeLogWarn_("Warning MS2 spectrum without precursor information"); - continue; - } - - // filter query spectrum - double max_intensity = std::max_element(query[j].begin(), query[j].end(), - [](const Peak1D& l, const Peak1D& r) - { - return (l.getIntensity() < r.getIntensity()); - })->getIntensity(); - - double min_high_intensity = max_intensity / cut_peaks_below; - - PeakSpectrum filtered_query; - for (UInt k = 0; k < query[j].size(); ++k) - { - if (query[j][k].getIntensity() >= remove_peaks_below_threshold - && query[j][k].getIntensity() >= min_high_intensity) - { - Peak1D peak; - peak.setIntensity(sqrt(query[j][k].getIntensity())); - peak.setMZ(query[j][k].getMZ()); - filtered_query.push_back(peak); - } - } - - // retain only top N peaks - if (filtered_query.size() > max_peaks) - { - filtered_query.sortByIntensity(true); - filtered_query.resize(max_peaks); - filtered_query.sortByPosition(); - } - - if (filtered_query.size() < min_peaks) - { - continue; - } - - const double& query_rt = query[j].getRT(); - const int& query_charge = query[j].getPrecursors()[0].getCharge(); - const double query_mz = query[j].getPrecursors()[0].getMZ(); - - if (query_charge > 0 && (query_charge < pc_min_charge || query_charge > pc_max_charge)) - { - continue; - } - - for (auto const & iso : isotopes) - { - // isotopic misassignment corrected query - const double ic_query_mz = query_mz - iso * Constants::C13C12_MASSDIFF_U; - - // if tolerance unit is ppm convert to m/z - const double precursor_mass_tolerance_mz = precursor_mass_tolerance_unit_ppm ? ic_query_mz * precursor_mass_tolerance * 1e-6 : precursor_mass_tolerance; - - // skip matching of isotopic misassignments if charge not annotated - if (iso != 0 && query_charge == 0) - { - continue; - } - - // skip matching of isotopic misassignments if search windows around isotopic peaks would overlap (resulting in more than one report of the same hit) - const double isotopic_peak_distance_mz = Constants::C13C12_MASSDIFF_U / query_charge; - if (iso != 0 && precursor_mass_tolerance_mz >= 0.5 * isotopic_peak_distance_mz) - { - continue; - } - - /* TODO: remove old code for charge estimation? - bool charge_one = false; - Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0); - Int margin = (Int) Math::round((query[j].size() / 100.0) * 1.0); - for (vector::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent) - { - if (peak->getMZ() < query_MZ) - { - break; - } - } - if (percent > margin) - { - charge_one = true; - } - */ - - - // determine MS2 precursors that match to the current peptide mass - MapLibraryPrecursorToLibrarySpectrum::const_iterator low_it, up_it; - - low_it = mslib.lower_bound(ic_query_mz - 0.5 * precursor_mass_tolerance_mz); - up_it = mslib.upper_bound(ic_query_mz + 0.5 * precursor_mass_tolerance_mz); - - // no matching precursor in data - if (low_it == up_it) - { - continue; - } - - for (; low_it != up_it; ++low_it) - { - const PeakSpectrum& lib_spec = low_it->second;; - PeptideHit hit = lib_spec.getPeptideIdentifications()[0].getHits()[0]; - const int& lib_charge = hit.getCharge(); - - // check if charge state between library and experimental spectrum match - if (query_charge > 0 && lib_charge != query_charge) - { - continue; - } - - // Special treatment for SpectraST score as it computes a score based on the whole library - if (compare_function == "SpectraSTSimilarityScore") - { - auto& sp = dynamic_cast(*comparator); - BinnedSpectrum quer_bin_spec = sp.transform(filtered_query); - BinnedSpectrum lib_bin_spec = sp.transform(lib_spec); - score = sp(filtered_query, lib_spec); //(*sp)(quer_bin,librar_bin); - double dot_bias = sp.dot_bias(quer_bin_spec, lib_bin_spec, score); - hit.setMetaValue("DOTBIAS", dot_bias); - } - else - { - score = (*comparator)(filtered_query, lib_spec); - } - - DataValue RT(lib_spec.getRT()); - DataValue MZ(lib_spec.getPrecursors()[0].getMZ()); - hit.setMetaValue("lib:RT", RT); - hit.setMetaValue("lib:MZ", MZ); - hit.setMetaValue(Constants::UserParam::ISOTOPE_ERROR, iso); - hit.setScore(score); - PeptideEvidence pe; - pe.setProteinAccession(pr_hit.getAccession()); - hit.addPeptideEvidence(pe); - pid.insertHit(hit); - } - } - - pid.setHigherScoreBetter(true); - pid.sort(); - - if (compare_function == "SpectraSTSimilarityScore") - { - if (!pid.empty() && !pid.getHits().empty()) - { - vector final_hits; - final_hits.resize(pid.getHits().size()); - auto& sp = dynamic_cast(*comparator); - Size runner_up = 1; - for (; runner_up < pid.getHits().size(); ++runner_up) - { - if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() - || runner_up > 5) - { - break; - } - } - double delta_D = sp.delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore()); - for (Size s = 0; s < pid.getHits().size(); ++s) - { - final_hits[s] = pid.getHits()[s]; - final_hits[s].setMetaValue("delta D", delta_D); - final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore()); - final_hits[s].setScore(sp.compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS"))); - } - pid.setHits(final_hits); - pid.sort(); - pid.setMZ(query[j].getPrecursors()[0].getMZ()); - pid.setRT(query_rt); - } - } - - if (top_hits != -1 && (UInt)top_hits < pid.getHits().size()) - { - pid.getHits().resize(top_hits); - } - peptide_ids.push_back(pid); - } - protein_ids.push_back(prot_id); - - //------------------------------------------------------------- - // writing output - //------------------------------------------------------------- - FileHandler().storeIdentifications(*out_file, protein_ids, peptide_ids, {FileTypes::IDXML}); - time_t end_time = time(nullptr); - OPENMS_LOG_INFO << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n"; - } - time_t end_time = time(nullptr); - OPENMS_LOG_INFO << "Total time: " << difftime(end_time, prog_time) << " seconds\n"; - return EXECUTION_OK; - } - -}; - -int main(int argc, const char** argv) -{ - TOPPSpecLibSearcher tool; - return tool.main(argc, argv); -} - -/// @endcond diff --git a/src/topp/TextExporter.cpp b/src/topp/TextExporter.cpp index 3d96cecfc66..72689e2e3de 100644 --- a/src/topp/TextExporter.cpp +++ b/src/topp/TextExporter.cpp @@ -666,7 +666,7 @@ namespace OpenMS //------------------------------------------------------------- FeatureMap feature_map; - FileHandler().loadFeatures(in, feature_map, {FileTypes::FEATUREXML}); + FileHandler().loadFeatures(in, feature_map, {FileTypes::FEATUREXML}, log_type_); // extract common id and hit meta values StringList peptide_id_meta_keys; @@ -828,7 +828,7 @@ namespace OpenMS ConsensusMap consensus_map; - FileHandler().loadConsensusFeatures(in, consensus_map, {FileTypes::CONSENSUSXML}); + FileHandler().loadConsensusFeatures(in, consensus_map, {FileTypes::CONSENSUSXML}, log_type_); // for optional export of ConsensusFeature meta values, collect all possible meta value keys std::set meta_value_keys; @@ -1335,7 +1335,7 @@ namespace OpenMS { vector prot_ids; vector pep_ids; - FileHandler().loadIdentifications(in, prot_ids, pep_ids, {FileTypes::IDXML}); + FileHandler().loadIdentifications(in, prot_ids, pep_ids, {FileTypes::IDXML}, log_type_); StringList peptide_id_meta_keys; StringList peptide_hit_meta_keys; StringList protein_hit_meta_keys; @@ -1436,7 +1436,7 @@ namespace OpenMS else if (in_type == FileTypes::MZML) { PeakMap exp; - FileHandler().loadExperiment(in, exp, {FileTypes::MZML}, ProgressLogger::NONE, false, false); + FileHandler().loadExperiment(in, exp, {FileTypes::MZML}, log_type_, false, false); if (exp.getSpectra().empty() && exp.getChromatograms().empty()) { diff --git a/src/topp/executables.cmake b/src/topp/executables.cmake index 776339e253e..4f16bf65fc4 100644 --- a/src/topp/executables.cmake +++ b/src/topp/executables.cmake @@ -49,7 +49,6 @@ IDExtractor IDFileConverter IDFilter IDMapper -IDMassAccuracy IDMerger IDPosteriorErrorProbability IDRipper @@ -123,8 +122,6 @@ SemanticValidator SequenceCoverageCalculator SimpleSearchEngine SiriusExport -SpecLibCreator -SpecLibSearcher SpectraFilterNLargest SpectraFilterNormalizer SpectraFilterThresholdMower diff --git a/tools/PythonExtensionChecker.py b/tools/PythonExtensionChecker.py index 6781aa533f2..57ea98b5a21 100755 --- a/tools/PythonExtensionChecker.py +++ b/tools/PythonExtensionChecker.py @@ -196,7 +196,7 @@ def handle_member_definition(mdef, pxd_class, cnt): tres.setMessage("Renamed constructor") else: tres.setPassed(False) - tres.setMessage(" -- TODO missing constructor in PXD: %s nogil except +" % mdef.format_definition_for_cython()) + tres.setMessage(" -- TODO missing constructor in PXD: %s except + nogil " % mdef.format_definition_for_cython()) elif (mdef.name.find("operator") != -1 or mdef.name.find("begin") != -1 or @@ -206,7 +206,7 @@ def handle_member_definition(mdef, pxd_class, cnt): tres.setMessage("Cannot wrap method with iterator/operator %s" % mdef.name) else: tres.setPassed(False) - tres.setMessage(" -- TODO missing function in PXD: %s nogil except +" % mdef.format_definition_for_cython()) + tres.setMessage(" -- TODO missing function in PXD: %s except + nogil " % mdef.format_definition_for_cython()) else: # It is neither public function/enum/variable tres.setPassed(True) @@ -486,10 +486,10 @@ def get_pxd_from_class(self, dfile, internal_file_name, xml_output_path): # assignment operator, cannot be overriden in Python continue if mdef.definition.find("static") != -1: - methods += " # TODO: static # %s nogil except +\n" % declaration - static_methods += " %s nogil except + # wrap-attach:%s\n" % (declaration, preferred_classname) + methods += " # TODO: static # %s except + nogil \n" % declaration + static_methods += " %s except + nogil # wrap-attach:%s\n" % (declaration, preferred_classname) continue - methods += " %s nogil except +\n" % declaration + methods += " %s except + nogil \n" % declaration # Build up the whole file res = DoxygenCppFunction.generate_imports(imports_needed) # add default cimport @@ -498,11 +498,11 @@ def get_pxd_from_class(self, dfile, internal_file_name, xml_output_path): # We need to create a default ctor in any case, however we do not need # to *wrap* the copy constructor even though we need to have one for Cython if True: # not default_ctor: - res += " %s() nogil except +\n" % comp_name.split("::")[-1] + res += " %s() except + nogil \n" % comp_name.split("::")[-1] if not copy_ctor: - res += " %s(%s) nogil except + #wrap-ignore\n" % (comp_name.split("::")[-1], comp_name.split("::")[-1]) + res += " %s(%s) except + nogil #wrap-ignore\n" % (comp_name.split("::")[-1], comp_name.split("::")[-1]) else: - res += " %s(%s) nogil except +\n" % (comp_name.split("::")[-1], comp_name.split("::")[-1]) + res += " %s(%s) except + nogil \n" % (comp_name.split("::")[-1], comp_name.split("::")[-1]) res += methods res += enum res += "\n" @@ -645,8 +645,8 @@ def format_definition_for_cython(self, replace_nogil=True): # Add nogil if replace_nogil: - cpp_def = cpp_def.replace(";", "nogil except +") - cpp_def = cpp_def.replace("const;", "nogil except +") + cpp_def = cpp_def.replace(";", "except + nogil ") + cpp_def = cpp_def.replace("const;", "except + nogil ") else: cpp_def = cpp_def.replace("const;", "") cpp_def = cpp_def.replace(";", "") diff --git a/tools/ci/cibuild.cmake b/tools/ci/cibuild.cmake index 461347d0168..3d6d52d5b33 100644 --- a/tools/ci/cibuild.cmake +++ b/tools/ci/cibuild.cmake @@ -89,7 +89,7 @@ set(VARS_TO_LOAD "ENABLE_TOPP_TESTING" "ENABLE_PIPELINE_TESTING" "ENABLE_DOCS" - "ENABLE_CWL" + "ENABLE_CWL_GENERATION" "ENABLE_TUTORIALS" "ENABLE_UPDATE_CHECK" "MT_ENABLE_OPENMP" @@ -107,6 +107,7 @@ set(VARS_TO_LOAD "Python_FIND_STRATEGY" "WITH_GUI" "WITH_THERMORAWFILEPARSER_TEST" + "COMPILE_PXDS" ) message("tools/ci/cibuild.cmake: Loading the following vars from ENV if available: ${VARS_TO_LOAD}") @@ -131,6 +132,7 @@ SEARCH_ENGINES_DIRECTORY=$ENV{SEARCH_ENGINES_DIRECTORY} ENABLE_TUTORIALS=Off ENABLE_GCC_WERROR=Off PYOPENMS=$ENV{PYOPENMS} +COMPILE_PXDS=$ENV{COMPILE_PXDS} MT_ENABLE_OPENMP=$ENV{OPENMP} PYTHON_EXECUTABLE:FILEPATH=$ENV{PYTHON_EXE} PY_NUM_THREADS=4 @@ -179,12 +181,16 @@ endif() if("$ENV{ENABLE_STYLE_TESTING}" STREQUAL "OFF") if("$ENV{PYOPENMS}" STREQUAL "ON") ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" TARGET "pyopenms" NUMBER_ERRORS _build_errors) - # Generate and valdiate the CWL files if "ENABLE_CWL" is set + # Generate and validate the CWL files if "ENABLE_CWL_GENERATION" is set else() ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" NUMBER_ERRORS _build_errors) endif() - if("$ENV{ENABLE_CWL}" STREQUAL "ON") - ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" TARGET "generate_cwl_files" NUMBER_ERRORS _build_errors) + # Only build compile_pxds if PYOPENMS is not ON (since it's already a subtarget of pyopenms) + if("$ENV{COMPILE_PXDS}" STREQUAL "ON" AND "$ENV{PYOPENMS}" STREQUAL "OFF") + ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" TARGET "compile_pxds" NUMBER_ERRORS _build_errors) + endif() + if("$ENV{ENABLE_CWL_GENERATION}" STREQUAL "ON") + ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" TARGET "generate_cwl_files" NUMBER_ERRORS _build_errors) endif() else() set(_build_errors 0) diff --git a/tools/update_version_numbers.sh b/tools/update_version_numbers.sh new file mode 100755 index 00000000000..89a90eb3c4d --- /dev/null +++ b/tools/update_version_numbers.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash + +################################################################################ +set -eu +set -o pipefail + +################################################################################ +usage() { + cat <&2 "ERROR: please provide three numbers (use --help for more info)" + exit 1 +fi + +################################################################################ +package_version_major=$1 +package_version_minor=$2 +package_version_patch=$3 +package_version="$1.$2.$3" + +echo "Setting version $package_version" + +################################################################################ +# update main cmakelist +sed -i -e "s#.*set(OPENMS_PACKAGE_VERSION_MAJOR.*#set(OPENMS_PACKAGE_VERSION_MAJOR \"$package_version_major\")#" CMakeLists.txt +sed -i -e "s#.*set(OPENMS_PACKAGE_VERSION_MINOR.*#set(OPENMS_PACKAGE_VERSION_MINOR \"$package_version_minor\")#" CMakeLists.txt +sed -i -e "s#.*set(OPENMS_PACKAGE_VERSION_PATCH.*#set(OPENMS_PACKAGE_VERSION_PATCH \"$package_version_patch\")#" CMakeLists.txt + +# update version info test +sed -i -e "s#detail.version_major =.*#detail.version_major = $package_version_major;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp +sed -i -e "s#detail.version_minor =.*#detail.version_minor = $package_version_minor;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp +sed -i -e "s#detail.version_patch =.*#detail.version_patch = $package_version_patch;#" ./src/tests/class_tests/openms/source/VersionInfo_test.cpp + +# update vcpkg.json +sed -i -e "s/\"version-string\": \".*\"/\"version-string\": \"$package_version\"/" vcpkg.json + +# update test write ini out: +sed -i -e "s#