Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
73731f9
added .janno column Species
nevrome Aug 24, 2025
9fcbbf1
turned Collection_ID into a list column
nevrome Aug 24, 2025
7ae46fc
added a Custodian_Institution column
nevrome Aug 24, 2025
f2c2fec
added .janno columns for cultural and archaeological group attribution
nevrome Aug 27, 2025
ff70a71
added the new Chromosomal_Anomalies .janno column
nevrome Aug 27, 2025
014e154
soft-retiring of the Capture_Type ReferenceGenome
nevrome Aug 27, 2025
45ab988
turned Damage into a list-column
nevrome Aug 27, 2025
1823987
mechanism to apply adjustments to .janno columns Endogenous and Damag…
nevrome Aug 27, 2025
6858403
added the new submitted_md5 column to the .ssf implementation and als…
nevrome Aug 28, 2025
566ef0b
replaced Source_Tissue with Source_Material + Source_Material_Note
nevrome Aug 28, 2025
6afce07
note fields should not be list columns
nevrome Aug 28, 2025
7f9b8b2
stylish-haskell
nevrome Aug 28, 2025
acc47e6
reworking the tests after the preceding changes
nevrome Aug 28, 2025
ca6174e
moved the Species column after the mandatory ones
nevrome Aug 28, 2025
f3576be
testing two new possible error states
nevrome Aug 28, 2025
5f31750
update of golden test data
nevrome Aug 28, 2025
6c6400c
stylish-haskell
nevrome Aug 28, 2025
4c232d1
first, entirely untested algorithm to automatically position _Note co…
nevrome Sep 8, 2025
7411850
testing and tweaking of makeHeaderWithAdditionalColumns + removing no…
nevrome Sep 9, 2025
4581e6d
brought back some note columns for the init template
nevrome Sep 9, 2025
35f2eca
stylish-haskell
nevrome Sep 9, 2025
e329520
adjusted regular tests
nevrome Sep 9, 2025
7707e3e
adjustments to golden test test data
nevrome Sep 9, 2025
12a8f32
added a little test for the new makeHeaderWithAdditionalColumns
nevrome Sep 9, 2025
1820432
implemented code layout changes suggested by @stschiff
nevrome Sep 13, 2025
cb2b59a
changelog entry
nevrome Sep 13, 2025
ca59cd9
Merge pull request #358 from poseidon-framework/jannoColOutSort
nevrome Sep 13, 2025
b7b840e
merge conflict
nevrome Oct 27, 2025
667edfb
solving merge conflict
nevrome Oct 27, 2025
d20720c
stylish haskell
nevrome Oct 27, 2025
0d4a557
update of changelog, to bring it in sync with the schema repo dev PR
nevrome Dec 1, 2025
b276cdb
added the new column Alternative_IDs_Context
nevrome Dec 8, 2025
b864005
ran and updated tests, found a bug in the implementation
nevrome Dec 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
- V X.X.X.X:
- Changes to .janno columns according to Poseidon v3.0.0:
- Replaced column `Source_Tissue` with column `Source_Material`.
- New column `Species`.
- New column `Custodian_Institution`.
- New columns `Cultural_Era` + `Cultural_Era_URL` and `Archaeological_Culture` + `Archaeological_Culture_URL`.
- New column `Chromosomal_Anomalies`.
- New column `Alternative_IDs_Context` linked to `Alternative_IDs`.
- Made column `Collection_ID` a list column.
- Soft-retired the option `ReferenceGenome` in the column `Capture_Type`.
- Added rescaling feature for the columns `Endogenous` and `Damage` for packages below Poseidon v3.0.0.. (TODO: Rethink the order in which that is done)
- Made column `Damage` a list column.
- Changed the handling of `_Note` columns. Previously they were explicitly specified and part of the `JannoRow` record type. Now they are just treated as arbitrary additional columns that get algorithmically sorted in when writing .janno files (e.g. in `forge`). See `makeHeaderWithAdditionalColumns`.
- Changes to .ssf columns according to Poseidon v3.0.0:
- New column `submitted_md5`.
- V 1.6.8.0:
- Added a mechanism to check for the presence and completeness of usually optional .janno and .ssf columns. It is exclusively used in `validate`, where a user can set one or multiple of these additional mandatory columns with `-j,--mandatoryJannoColumn` and `-s,--mandatorySSFColumn`.
- Fixed the golden tests for `validate`. They had become ineffective, because `validate` does not generate stdout any more.
Expand Down
54 changes: 28 additions & 26 deletions src/Poseidon/CLI/Rectify.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,34 @@ module Poseidon.CLI.Rectify (
runRectify, RectifyOptions (..), PackageVersionUpdate (..), ChecksumsToRectify (..)
) where

import Poseidon.Contributor (ContributorSpec (..))
import Poseidon.EntityTypes (HasNameAndVersion (..),
PacNameAndVersion (..),
renderNameWithVersion)
import Poseidon.GenotypeData (GenotypeDataSpec (..),
GenotypeFileSpec (..))
import Poseidon.Janno (writeJannoFileWithoutEmptyCols)
import Poseidon.Package (PackageReadOptions (..),
PoseidonPackage (..),
defaultPackageReadOptions,
readPoseidonPackageCollection,
writePoseidonPackage)
import Poseidon.Utils (PoseidonIO, getChecksum, logDebug,
logInfo, logWarning)
import Poseidon.Version (VersionComponent (..),
updateThreeComponentVersion)
import Poseidon.Contributor (ContributorSpec (..))
import Poseidon.EntityTypes (HasNameAndVersion (..),
PacNameAndVersion (..),
renderNameWithVersion)
import Poseidon.GenotypeData (GenotypeDataSpec (..),
GenotypeFileSpec (..))
import Poseidon.Janno (writeJannoFileWithoutEmptyCols)
import Poseidon.Package (PackageReadOptions (..),
PoseidonPackage (..),
defaultPackageReadOptions,
readPoseidonPackageCollection,
writePoseidonPackage)
import Poseidon.PoseidonVersion (PoseidonVersion (..))
import Poseidon.Utils (PoseidonIO, getChecksum, logDebug,
logInfo, logWarning)
import Poseidon.Version (VersionComponent (..),
updateThreeComponentVersion)

import Control.DeepSeq ((<$!!>))
import Control.Monad (when)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.List (nub)
import Data.Maybe (fromJust)
import Data.Time (UTCTime (..), getCurrentTime)
import Data.Version (Version (..), makeVersion, showVersion)
import System.Directory (doesFileExist, removeFile)
import System.FilePath ((</>))
import Control.DeepSeq ((<$!!>))
import Control.Monad (when)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.List (nub)
import Data.Maybe (fromJust)
import Data.Time (UTCTime (..), getCurrentTime)
import Data.Version (Version (..), makeVersion,
showVersion)
import System.Directory (doesFileExist, removeFile)
import System.FilePath ((</>))

data RectifyOptions = RectifyOptions
{ _rectifyBaseDirs :: [FilePath]
Expand Down Expand Up @@ -96,7 +98,7 @@ updatePoseidonVersion :: Maybe Version -> PoseidonPackage -> PoseidonIO Poseidon
updatePoseidonVersion Nothing pac = return pac
updatePoseidonVersion (Just ver) pac = do
logDebug "Updating Poseidon version"
return pac { posPacPoseidonVersion = ver }
return pac { posPacPoseidonVersion = PoseidonVersion ver }

addContributors :: Maybe [ContributorSpec] -> PoseidonPackage -> PoseidonIO PoseidonPackage
addContributors Nothing pac = return pac
Expand Down
133 changes: 106 additions & 27 deletions src/Poseidon/ColumnTypesJanno.hs
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,18 @@ instance Csv.FromField GeneticSex where parseField = parseTypeCSV "Genetic_Sex"
newtype GroupName = GroupName T.Text deriving (Eq, Ord)
$(makeInstances ''GroupName "Group_Name")

-- | A datatype for the Species .janno column
newtype JannoSpecies = JannoSpecies T.Text deriving (Eq, Ord)
$(makeInstances ''JannoSpecies "Species")

-- | A datatype for the Alternative_IDs .janno column
newtype JannoAlternativeID = JannoAlternativeID T.Text deriving (Eq)
$(makeInstances ''JannoAlternativeID "Alternative_IDs")

-- | A datatype for the Alternative_IDs_Context .janno column
newtype JannoAlternativeIDContext = JannoAlternativeIDContext T.Text deriving (Eq)
$(makeInstances ''JannoAlternativeIDContext "Alternative_IDs_Context")

-- | A datatype for the Relation_To .janno column
newtype JannoRelationTo = JannoRelationTo T.Text deriving (Eq)
$(makeInstances ''JannoRelationTo "Relation_To")
Expand Down Expand Up @@ -94,14 +102,56 @@ instance Csv.FromField JannoRelationDegree where parseField = parseTypeCSV "Rela
newtype JannoRelationType = JannoRelationType T.Text deriving (Eq)
$(makeInstances ''JannoRelationType "Relation_Type")

-- | A datatype for the Relation_Note .janno column
newtype JannoRelationNote = JannoRelationNote T.Text deriving (Eq)
$(makeInstances ''JannoRelationNote "Relation_Note")

-- | A datatype for the Collection_ID .janno column
newtype JannoCollectionID = JannoCollectionID T.Text deriving (Eq)
$(makeInstances ''JannoCollectionID "Collection_ID")

-- | A datatype for the Custodian_Institution .janno column
newtype JannoCustodianInstitution = JannoCustodianInstitution T.Text deriving (Eq)
$(makeInstances ''JannoCustodianInstitution "Custodian_Institution")

-- | A datatype for the Cultural_Era .janno column
newtype JannoCulturalEra = JannoCulturalEra T.Text deriving (Eq)
$(makeInstances ''JannoCulturalEra "Cultural_Era")

-- | A datatype for the Cultural_Era_URL .janno column
newtype JannoCulturalEraURL = JannoCulturalEraURL T.Text deriving (Eq, Ord, Generic)

instance Makeable JannoCulturalEraURL where
make x
| isURIReference (T.unpack x) = pure $ JannoCulturalEraURL x
| otherwise = fail $ "Cultural_Era_URL " ++ show x ++ " is not a well structured URI."
instance Suspicious JannoCulturalEraURL where
inspect (JannoCulturalEraURL x)
| T.isInfixOf "n2t.net/ark" x = Nothing
| T.isInfixOf "chronontology.dainst.org/period" x = Nothing
| otherwise = Just ["Archaeological_Culture_URL " ++ show x ++ " probably not a valid PeriodO \
\or ChronOntology permalink."]
instance Show JannoCulturalEraURL where show (JannoCulturalEraURL x) = T.unpack x
instance Csv.ToField JannoCulturalEraURL where toField (JannoCulturalEraURL x) = Csv.toField x
instance Csv.FromField JannoCulturalEraURL where parseField = parseTypeCSV "Cultural_Era_URL"

-- | A datatype for the Archaeological_Culture .janno column
newtype JannoArchaeologicalCulture = JannoArchaeologicalCulture T.Text deriving (Eq)
$(makeInstances ''JannoArchaeologicalCulture "Archaeological_Culture")

-- | A datatype for the Archaeological_Culture_URL .janno column
newtype JannoArchaeologicalCultureURL = JannoArchaeologicalCultureURL T.Text deriving (Eq, Ord, Generic)

instance Makeable JannoArchaeologicalCultureURL where
make x
| isURIReference (T.unpack x) = pure $ JannoArchaeologicalCultureURL x
| otherwise = fail $ "Archaeological_Culture_URL " ++ show x ++ " is not a well structured URI."
instance Suspicious JannoArchaeologicalCultureURL where
inspect (JannoArchaeologicalCultureURL x)
| T.isInfixOf "n2t.net/ark" x = Nothing
| T.isInfixOf "chronontology.dainst.org/period" x = Nothing
| otherwise = Just ["Archaeological_Culture_URL " ++ show x ++ " probably not a valid PeriodO \
\or ChronOntology permalink."]
instance Show JannoArchaeologicalCultureURL where show (JannoArchaeologicalCultureURL x) = T.unpack x
instance Csv.ToField JannoArchaeologicalCultureURL where toField (JannoArchaeologicalCultureURL x) = Csv.toField x
instance Csv.FromField JannoArchaeologicalCultureURL where parseField = parseTypeCSV "Archaeological_Culture_URL"

-- | A datatype for the Country .janno column
newtype JannoCountry = JannoCountry T.Text deriving (Eq, Ord)
$(makeInstances ''JannoCountry "Country")
Expand Down Expand Up @@ -278,9 +328,9 @@ instance Show JannoDateBCADStop where show (JannoDateBCADStop x) = show
instance Csv.ToField JannoDateBCADStop where toField (JannoDateBCADStop x) = Csv.toField x
instance Csv.FromField JannoDateBCADStop where parseField = parseTypeCSV "Date_BC_AD_Stop"

-- | A datatype for the Date_Note .janno column
newtype JannoDateNote = JannoDateNote T.Text deriving (Eq, Ord)
$(makeInstances ''JannoDateNote "Date_Note")
-- | A datatype for the Chromosomal_Anomalies .janno column
newtype JannoChromosomalAnomalies = JannoChromosomalAnomalies T.Text deriving (Eq)
$(makeInstances ''JannoChromosomalAnomalies "Chromosomal_Anomalies")

-- | A datatype for the MT_Haplogroup .janno column
newtype JannoMTHaplogroup = JannoMTHaplogroup T.Text deriving (Eq, Ord)
Expand All @@ -290,9 +340,40 @@ $(makeInstances ''JannoMTHaplogroup "MT_Haplogroup")
newtype JannoYHaplogroup = JannoYHaplogroup T.Text deriving (Eq, Ord)
$(makeInstances ''JannoYHaplogroup "Y_Haplogroup")

-- | A datatype for the Source_Tissue .janno column
newtype JannoSourceTissue = JannoSourceTissue T.Text deriving (Eq)
$(makeInstances ''JannoSourceTissue "Source_Tissue")
-- | A datatype for the Source_Material .janno column
data JannoSourceMaterial =
MaterialPetrous
| MaterialBone
| MaterialTooth
| MaterialHair
| MaterialSoft
| MaterialSediment
| MaterialOther
deriving (Eq, Ord, Generic, Enum, Bounded)

instance Makeable JannoSourceMaterial where
make x
| x == "petrous" = pure MaterialPetrous
| x == "bone" = pure MaterialBone
| x == "tooth" = pure MaterialTooth
| x == "hair" = pure MaterialHair
| x == "soft" = pure MaterialSoft
| x == "sediment" = pure MaterialSediment
| x == "other" = pure MaterialOther
| otherwise = fail $ "Source_Material is set to " ++ show x ++ ". " ++
"That is not in the allowed set [petrous, bone, tooth, hair, \
\soft, sediment, other]."
instance Suspicious JannoSourceMaterial where inspect _ = Nothing
instance Show JannoSourceMaterial where
show MaterialPetrous = "petrous"
show MaterialBone = "bone"
show MaterialTooth = "tooth"
show MaterialHair = "hair"
show MaterialSoft = "soft"
show MaterialSediment = "sediment"
show MaterialOther = "other"
instance Csv.ToField JannoSourceMaterial where toField x = Csv.toField $ show x
instance Csv.FromField JannoSourceMaterial where parseField = parseTypeCSV "Source_Material"

-- | A datatype for the Nr_Libraries .janno column
newtype JannoNrLibraries = JannoNrLibraries Int deriving (Eq, Ord, Generic)
Expand Down Expand Up @@ -324,7 +405,7 @@ data JannoCaptureType =
| ArborAncestralPlus
| TwistAncientDNA
| OtherCapture
| ReferenceGenome
| LegacyReferenceGenome -- was removed in Poseidon v3.0.0, kept here for compatibility
deriving (Eq, Ord, Generic, Enum, Bounded)

instance Makeable JannoCaptureType where
Expand All @@ -336,21 +417,23 @@ instance Makeable JannoCaptureType where
| x == "ArborAncestralPlus" = pure ArborAncestralPlus
| x == "TwistAncientDNA" = pure TwistAncientDNA
| x == "OtherCapture" = pure OtherCapture
| x == "ReferenceGenome" = pure ReferenceGenome
| x == "ReferenceGenome" = pure LegacyReferenceGenome
| otherwise = fail $ "Capture_Type is set to " ++ show x ++ ". " ++
"That is not in the allowed set [Shotgun, 1240K, ArborComplete, \
\ArborPrimePlus, ArborAncestralPlus, TwistAncientDNA, OtherCapture, \
\ReferenceGenome]."
instance Suspicious JannoCaptureType where inspect _ = Nothing
\ArborPrimePlus, ArborAncestralPlus, TwistAncientDNA, OtherCapture]."
instance Suspicious JannoCaptureType where
inspect LegacyReferenceGenome = Just ["Capture_Type is set to ReferenceGenome, which is not a \
\capture setup. This option was retired in Poseidon v3.0.0."]
inspect _ = Nothing
instance Show JannoCaptureType where
show Shotgun = "Shotgun"
show A1240K = "1240K"
show ArborComplete = "ArborComplete"
show ArborPrimePlus = "ArborPrimePlus"
show ArborAncestralPlus = "ArborAncestralPlus"
show TwistAncientDNA = "TwistAncientDNA"
show OtherCapture = "OtherCapture"
show ReferenceGenome = "ReferenceGenome"
show Shotgun = "Shotgun"
show A1240K = "1240K"
show ArborComplete = "ArborComplete"
show ArborPrimePlus = "ArborPrimePlus"
show ArborAncestralPlus = "ArborAncestralPlus"
show TwistAncientDNA = "TwistAncientDNA"
show OtherCapture = "OtherCapture"
show LegacyReferenceGenome = "ReferenceGenome"
instance Csv.ToField JannoCaptureType where toField x = Csv.toField $ show x
instance Csv.FromField JannoCaptureType where parseField = parseTypeCSV "Capture_Type"

Expand Down Expand Up @@ -515,10 +598,6 @@ $(makeInstances ''JannoContaminationErr "Contamination_Err")
newtype JannoContaminationMeas = JannoContaminationMeas T.Text deriving (Eq)
$(makeInstances ''JannoContaminationMeas "Contamination_Meas")

-- | A datatype for the Contamination_Note .janno column
newtype JannoContaminationNote = JannoContaminationNote T.Text deriving (Eq)
$(makeInstances ''JannoContaminationNote "Contamination_Note")

-- | A datatype for the Genetic_Source_Accession_IDs .janno column
newtype JannoGeneticSourceAccessionID = JannoGeneticSourceAccessionID AccessionID
deriving (Eq, Ord, Generic)
Expand Down
12 changes: 12 additions & 0 deletions src/Poseidon/ColumnTypesSSF.hs
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,15 @@ instance Show SSFSubmittedFTPURI where show (SSFSubmittedFTPURI x) = T.unpack x
instance Csv.ToField SSFSubmittedFTPURI where toField x = Csv.toField $ show x
instance Csv.FromField SSFSubmittedFTPURI where parseField = parseTypeCSV "submitted_ftp"

-- | A datatype for the submitted_md5 .ssf column
newtype SSFSubmittedMD5 = SSFSubmittedMD5 T.Text deriving (Eq, Ord, Generic)

instance Makeable SSFSubmittedMD5 where
make x
| isMD5Hash x = pure $ SSFSubmittedMD5 x
| otherwise = fail $ "submitted_md5 " ++ show x ++
" does not contain a well-structured MD5 hash"
instance Suspicious SSFSubmittedMD5 where inspect _ = Nothing
instance Show SSFSubmittedMD5 where show (SSFSubmittedMD5 x) = T.unpack x
instance Csv.ToField SSFSubmittedMD5 where toField x = Csv.toField $ show x
instance Csv.FromField SSFSubmittedMD5 where parseField = parseTypeCSV "submitted_md5"
14 changes: 14 additions & 0 deletions src/Poseidon/ColumnTypesUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

module Poseidon.ColumnTypesUtils where

import qualified Control.Monad.Except as E
import qualified Control.Monad.Writer as W
import Data.ByteString as S
import qualified Data.ByteString.Char8 as Bchs
import Data.Char (chr, ord)
Expand Down Expand Up @@ -86,6 +88,18 @@ encodingOptions = Csv.defaultEncodeOptions {
, Csv.encQuoting = Csv.QuoteMinimal
}

-- | A data type for row-wise cross-column consistency checks in either .janno or .ssf
type RowLog = E.ExceptT String (W.Writer [String])
-- first string: error in case of failure
-- string list: warnings

getCellLength :: Maybe (ListColumn a) -> Int
getCellLength = maybe 0 (Prelude.length . getListColumn)

allEqual :: Eq a => [a] -> Bool
allEqual [] = True
allEqual x = Prelude.length (L.nub x) == 1

-- | A datatype to collect additional, unpecified .csv/.tsv file columns (a hashmap in cassava/Data.Csv)
newtype CsvNamedRecord = CsvNamedRecord Csv.NamedRecord deriving (Show, Eq, G.Generic)

Expand Down
Loading