Skip to content

Commit b43f0bb

Browse files
authored
Fix reading of lammpsdump ATOM card. (#3360)
Fixes #3358 Supersedes #2333 ## Changes made in this PR - Allow coordinates in different conventions to be read by LAMMPSDumpReader - Unhardcode LAMMPSDumpReader to expect a set number of columns - S->R transform with distances.transform_StoR is no longer applied by default, only if the coordinates are in the scaled (xs,ys,zs) or scaled_unwrapped (xsu, ysu, zsu) convention
1 parent ddb1d95 commit b43f0bb

File tree

9 files changed

+220
-24
lines changed

9 files changed

+220
-24
lines changed

package/CHANGELOG

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ Fixes
126126
* new `Results` class now can be pickled/unpickled. (PR #3309)
127127

128128
Enhancements
129+
* LAMMPSDumpReader can now read coordinates in all the different LAMMPS
130+
coordinate conventions. Both LAMMPSDumpReader and LAMMPSDumpParser are no
131+
longer hardcoded to a set column layout (Issue #3358, PR #3360).
129132
* Add a ``sort`` keyword to AtomGroup.select_atoms for ordered selections
130133
(Issue #3364, PR #3368)
131134
* Adds AtomGroup.asunique, ResidueGroup.asunique, SegmentGroup.asunique

package/MDAnalysis/coordinates/LAMMPS.py

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@
119119
.. autoclass:: DATAWriter
120120
:members:
121121
:inherited-members:
122+
.. autoclass:: DumpReader
123+
:members:
124+
:inherited-members:
122125
123126
"""
124127
import os
@@ -449,19 +452,46 @@ def write(self, selection, frame=None):
449452
class DumpReader(base.ReaderBase):
450453
"""Reads the default `LAMMPS dump format`_
451454
452-
Expects trajectories produced by the default 'atom' style dump.
455+
Supports coordinates in the LAMMPS "unscaled" (x,y,z), "scaled" (xs,ys,zs),
456+
"unwrapped" (xu,yu,zu) and "scaled_unwrapped" (xsu,ysu,zsu) coordinate
457+
conventions (see https://docs.lammps.org/dump.html for more details).
458+
If `lammps_coordinate_convention='auto'` (default),
459+
one will be guessed. Guessing checks whether the coordinates fit each convention in the order "unscaled",
460+
"scaled", "unwrapped", "scaled_unwrapped" and whichever set of coordinates
461+
is detected first will be used. If coordinates are given in the scaled
462+
coordinate convention (xs,ys,zs) or scaled unwrapped coordinate convention
463+
(xsu,ysu,zsu) they will automatically be converted from their
464+
scaled/fractional representation to their real values.
453465
454-
Will automatically convert positions from their scaled/fractional
455-
representation to their real values.
456466
467+
.. versionchanged:: 2.0.0
468+
Now parses coordinates in multiple lammps conventions (x,xs,xu,xsu)
457469
.. versionadded:: 0.19.0
458470
"""
459471
format = 'LAMMPSDUMP'
460-
461-
def __init__(self, filename, **kwargs):
472+
_conventions = ["auto", "unscaled", "scaled", "unwrapped",
473+
"scaled_unwrapped"]
474+
_coordtype_column_names = {
475+
"unscaled": ["x", "y", "z"],
476+
"scaled": ["xs", "ys", "zs"],
477+
"unwrapped": ["xu", "yu", "zu"],
478+
"scaled_unwrapped": ["xsu", "ysu", "zsu"]
479+
}
480+
481+
def __init__(self, filename, lammps_coordinate_convention="auto",
482+
**kwargs):
462483
super(DumpReader, self).__init__(filename, **kwargs)
463484

464485
root, ext = os.path.splitext(self.filename)
486+
if lammps_coordinate_convention in self._conventions:
487+
self.lammps_coordinate_convention = lammps_coordinate_convention
488+
else:
489+
option_string = "'" + "', '".join(self._conventions) + "'"
490+
raise ValueError("lammps_coordinate_convention="
491+
f"'{lammps_coordinate_convention}'"
492+
" is not a valid option. "
493+
f"Please choose one of {option_string}")
494+
465495
self._cache = {}
466496

467497
self._reopen()
@@ -518,15 +548,15 @@ def _read_next_timestep(self):
518548
if ts.frame >= len(self):
519549
raise EOFError
520550

521-
f.readline() # ITEM TIMESTEP
551+
f.readline() # ITEM TIMESTEP
522552
step_num = int(f.readline())
523553
ts.data['step'] = step_num
524554

525-
f.readline() # ITEM NUMBER OF ATOMS
555+
f.readline() # ITEM NUMBER OF ATOMS
526556
n_atoms = int(f.readline())
527557
if n_atoms != self.n_atoms:
528558
raise ValueError("Number of atoms in trajectory changed "
529-
"this is not suported in MDAnalysis")
559+
"this is not supported in MDAnalysis")
530560

531561
triclinic = len(f.readline().split()) == 9 # ITEM BOX BOUNDS
532562
if triclinic:
@@ -552,16 +582,42 @@ def _read_next_timestep(self):
552582

553583
indices = np.zeros(self.n_atoms, dtype=int)
554584

555-
f.readline() # ITEM ATOMS etc
556-
for i in range(self.n_atoms):
557-
idx, _, xs, ys, zs = f.readline().split()
585+
atomline = f.readline() # ITEM ATOMS etc
586+
attrs = atomline.split()[2:] # attributes on coordinate line
587+
attr_to_col_ix = {x: i for i, x in enumerate(attrs)}
588+
convention_to_col_ix = {}
589+
for cv_name, cv_col_names in self._coordtype_column_names.items():
590+
try:
591+
convention_to_col_ix[cv_name] = [attr_to_col_ix[x] for x in cv_col_names]
592+
except KeyError:
593+
pass
594+
# this should only trigger on first read of "ATOM" card, after which it
595+
# is fixed to the guessed value. Auto proceeds unscaled -> scaled
596+
# -> unwrapped -> scaled_unwrapped
597+
if self.lammps_coordinate_convention == "auto":
598+
try:
599+
# this will automatically select in order of priority
600+
# unscaled, scaled, unwrapped, scaled_unwrapped
601+
self.lammps_coordinate_convention = list(convention_to_col_ix)[0]
602+
except IndexError:
603+
raise ValueError("No coordinate information detected")
604+
elif not self.lammps_coordinate_convention in convention_to_col_ix:
605+
raise ValueError(f"No coordinates following convention {self.lammps_coordinate_convention} found in timestep")
606+
607+
coord_cols = convention_to_col_ix[self.lammps_coordinate_convention]
558608

559-
indices[i] = idx
560-
ts.positions[i] = xs, ys, zs
609+
ids = "id" in attr_to_col_ix
610+
for i in range(self.n_atoms):
611+
fields = f.readline().split()
612+
if ids:
613+
indices[i] = fields[attr_to_col_ix["id"]]
614+
ts.positions[i] = [fields[dim] for dim in coord_cols]
561615

562616
order = np.argsort(indices)
563617
ts.positions = ts.positions[order]
564-
# by default coordinates are given in scaled format, undo that
565-
ts.positions = distances.transform_StoR(ts.positions, ts.dimensions)
618+
if (self.lammps_coordinate_convention.startswith("scaled")):
619+
# if coordinates are given in scaled format, undo that
620+
ts.positions = distances.transform_StoR(ts.positions,
621+
ts.dimensions)
566622

567623
return ts

package/MDAnalysis/topology/LAMMPSParser.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -584,10 +584,12 @@ def _parse_box(self, header):
584584

585585

586586
class LammpsDumpParser(TopologyReaderBase):
587-
"""Parses Lammps ascii dump files in 'atom' format
587+
"""Parses Lammps ascii dump files in 'atom' format.
588588
589-
Only reads atom ids. Sets all masses to 1.0.
589+
Sets all masses to 1.0.
590590
591+
592+
.. versionchanged:: 2.0.0
591593
.. versionadded:: 0.19.0
592594
"""
593595
format = 'LAMMPSDUMP'
@@ -608,12 +610,15 @@ def parse(self, **kwargs):
608610
indices = np.zeros(natoms, dtype=int)
609611
types = np.zeros(natoms, dtype=object)
610612

611-
fin.readline() # ITEM ATOMS
613+
atomline = fin.readline() # ITEM ATOMS
614+
attrs = atomline.split()[2:] # attributes on coordinate line
615+
col_ids = {attr: i for i, attr in enumerate(attrs)} # column ids
616+
612617
for i in range(natoms):
613-
idx, atype, _, _, _ = fin.readline().split()
618+
fields = fin.readline().split()
614619

615-
indices[i] = idx
616-
types[i] = atype
620+
indices[i] = fields[col_ids["id"]]
621+
types[i] = fields[col_ids["type"]]
617622

618623
order = np.argsort(indices)
619624
indices = indices[order]

testsuite/MDAnalysisTests/coordinates/test_lammps.py

Lines changed: 121 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
RefLAMMPSData, RefLAMMPSDataMini, RefLAMMPSDataDCD,
3737
)
3838
from MDAnalysisTests.datafiles import (
39-
LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSDUMP
39+
LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSDUMP,
40+
LAMMPSDUMP_allcoords, LAMMPSDUMP_nocoords
4041
)
4142

4243

@@ -119,7 +120,7 @@ def test_Writer_dimensions(self, LAMMPSDATAWriter):
119120
assert_almost_equal(u_ref.dimensions, u_new.dimensions,
120121
err_msg="attributes different after writing",
121122
decimal=6)
122-
123+
123124
def test_Writer_atoms_types(self, LAMMPSDATAWriter):
124125
u_ref, u_new = LAMMPSDATAWriter
125126
assert_equal(u_ref.atoms.types, u_new.atoms.types,
@@ -434,7 +435,8 @@ def u(self, tmpdir, request):
434435
with gzip.GzipFile(f, 'wb') as fout:
435436
fout.write(data)
436437

437-
yield mda.Universe(f, format='LAMMPSDUMP')
438+
yield mda.Universe(f, format='LAMMPSDUMP',
439+
lammps_coordinate_convention="auto")
438440

439441
@pytest.fixture()
440442
def reference_positions(self):
@@ -507,3 +509,119 @@ def test_atom_reordering(self, u, reference_positions):
507509
reference_positions['atom13_pos']):
508510
assert_almost_equal(atom1.position, atom1_pos, decimal=5)
509511
assert_almost_equal(atom13.position, atom13_pos, decimal=5)
512+
513+
514+
@pytest.mark.parametrize("convention",
515+
["unscaled", "unwrapped", "scaled_unwrapped"])
516+
def test_open_absent_convention_fails(convention):
517+
with pytest.raises(ValueError, match="No coordinates following"):
518+
mda.Universe(LAMMPSDUMP, format='LAMMPSDUMP',
519+
lammps_coordinate_convention=convention)
520+
521+
522+
def test_open_incorrect_convention_fails():
523+
with pytest.raises(ValueError,
524+
match="is not a valid option"):
525+
mda.Universe(LAMMPSDUMP, format='LAMMPSDUMP',
526+
lammps_coordinate_convention="42")
527+
528+
529+
@pytest.mark.parametrize("convention,result",
530+
[("auto", "unscaled"), ("unscaled", "unscaled"),
531+
("scaled", "scaled"), ("unwrapped", "unwrapped"),
532+
("scaled_unwrapped", "scaled_unwrapped")])
533+
def test_open_all_convention(convention, result):
534+
u = mda.Universe(LAMMPSDUMP_allcoords, format='LAMMPSDUMP',
535+
lammps_coordinate_convention=convention)
536+
assert(u.trajectory.lammps_coordinate_convention == result)
537+
538+
539+
def test_no_coordinate_info():
540+
with pytest.raises(ValueError, match="No coordinate information detected"):
541+
u = mda.Universe(LAMMPSDUMP_nocoords, format='LAMMPSDUMP',
542+
lammps_coordinate_convention="auto")
543+
544+
545+
class TestCoordinateMatches(object):
546+
@pytest.fixture()
547+
def universes(self):
548+
coordinate_conventions = ["auto", "unscaled", "scaled", "unwrapped",
549+
"scaled_unwrapped"]
550+
universes = {i: mda.Universe(LAMMPSDUMP_allcoords, format='LAMMPSDUMP',
551+
lammps_coordinate_convention=i)
552+
for i in coordinate_conventions}
553+
return universes
554+
555+
@pytest.fixture()
556+
def reference_unscaled_positions(self):
557+
# copied from trajectory file
558+
# atom 340 is the first one in the trajectory so we use that
559+
atom340_pos1_unscaled = [4.48355, 0.331422, 1.59231]
560+
atom340_pos2_unscaled = [4.41947, 35.4403, 2.25115]
561+
atom340_pos3_unscaled = [4.48989, 0.360633, 2.63623]
562+
return np.asarray([atom340_pos1_unscaled, atom340_pos2_unscaled,
563+
atom340_pos3_unscaled])
564+
565+
def test_unscaled_reference(self, universes, reference_unscaled_positions):
566+
atom_340 = universes["unscaled"].atoms[339]
567+
for i, ts_u in enumerate(universes["unscaled"].trajectory[0:3]):
568+
assert_almost_equal(atom_340.position,
569+
reference_unscaled_positions[i, :], decimal=5)
570+
571+
def test_scaled_reference(self, universes, reference_unscaled_positions):
572+
# NOTE use of unscaled positions here due to S->R transform
573+
atom_340 = universes["scaled"].atoms[339]
574+
for i, ts_u in enumerate(universes["scaled"].trajectory[0:3]):
575+
assert_almost_equal(atom_340.position,
576+
reference_unscaled_positions[i, :], decimal=1)
577+
# NOTE this seems a bit inaccurate?
578+
579+
@pytest.fixture()
580+
def reference_unwrapped_positions(self):
581+
# copied from trajectory file
582+
# atom 340 is the first one in the trajectory so we use that
583+
atom340_pos1_unwrapped = [4.48355, 35.8378, 1.59231]
584+
atom340_pos2_unwrapped = [4.41947, 35.4403, 2.25115]
585+
atom340_pos3_unwrapped = [4.48989, 35.867, 2.63623]
586+
return np.asarray([atom340_pos1_unwrapped, atom340_pos2_unwrapped,
587+
atom340_pos3_unwrapped])
588+
589+
def test_unwrapped_scaled_reference(self, universes,
590+
reference_unwrapped_positions):
591+
atom_340 = universes["unwrapped"].atoms[339]
592+
for i, ts_u in enumerate(universes["unwrapped"].trajectory[0:3]):
593+
assert_almost_equal(atom_340.position,
594+
reference_unwrapped_positions[i, :], decimal=5)
595+
596+
def test_unwrapped_scaled_reference(self, universes,
597+
reference_unwrapped_positions):
598+
# NOTE use of unscaled positions here due to S->R transform
599+
atom_340 = universes["scaled_unwrapped"].atoms[339]
600+
for i, ts_u in enumerate(
601+
universes["scaled_unwrapped"].trajectory[0:3]):
602+
assert_almost_equal(atom_340.position,
603+
reference_unwrapped_positions[i, :], decimal=1)
604+
# NOTE this seems a bit inaccurate?
605+
606+
def test_scaled_unscaled_match(self, universes):
607+
assert(len(universes["unscaled"].trajectory)
608+
== len(universes["scaled"].trajectory))
609+
for ts_u, ts_s in zip(universes["unscaled"].trajectory,
610+
universes["scaled"].trajectory):
611+
assert_almost_equal(ts_u.positions, ts_s.positions, decimal=1)
612+
# NOTE this seems a bit inaccurate?
613+
614+
def test_unwrapped_scaled_unwrapped_match(self, universes):
615+
assert(len(universes["unwrapped"].trajectory) ==
616+
len(universes["scaled_unwrapped"].trajectory))
617+
for ts_u, ts_s in zip(universes["unwrapped"].trajectory,
618+
universes["scaled_unwrapped"].trajectory):
619+
assert_almost_equal(ts_u.positions, ts_s.positions, decimal=1)
620+
# NOTE this seems a bit inaccurate?
621+
622+
def test_auto_is_unscaled_match(self, universes):
623+
assert(len(universes["auto"].trajectory) ==
624+
len(universes["unscaled"].trajectory))
625+
for ts_a, ts_s in zip(universes["auto"].trajectory,
626+
universes["unscaled"].trajectory):
627+
assert_almost_equal(ts_a.positions, ts_s.positions, decimal=5)
Binary file not shown.
Binary file not shown.
1003 Bytes
Binary file not shown.

testsuite/MDAnalysisTests/datafiles.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@
137137
"LAMMPShyd", "LAMMPShyd2",
138138
"LAMMPSdata_deletedatoms", # with deleted atoms
139139
"LAMMPSDUMP",
140+
"LAMMPSDUMP_long", # lammpsdump file with a few zeros sprinkled in the first column first frame
141+
"LAMMPSDUMP_allcoords", # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example
142+
"LAMMPSDUMP_nocoords", # lammpsdump file with no coordinates
140143
"unordered_res", # pdb file with resids non sequential
141144
"GMS_ASYMOPT", # GAMESS C1 optimization
142145
"GMS_SYMOPT", # GAMESS D4h optimization
@@ -484,6 +487,10 @@
484487
LAMMPShyd2 = resource_filename(__name__, "data/lammps/hydrogen-class1.data2")
485488
LAMMPSdata_deletedatoms = resource_filename(__name__, 'data/lammps/deletedatoms.data')
486489
LAMMPSDUMP = resource_filename(__name__, "data/lammps/wat.lammpstrj.bz2")
490+
LAMMPSDUMP_long = resource_filename(__name__, "data/lammps/wat.lammpstrj_long.bz2")
491+
LAMMPSDUMP_allcoords = resource_filename(__name__, "data/lammps/spce_all_coords.lammpstrj.bz2")
492+
LAMMPSDUMP_nocoords = resource_filename(__name__, "data/lammps/spce_no_coords.lammpstrj.bz2")
493+
487494

488495
unordered_res = resource_filename(__name__, "data/unordered_res.pdb")
489496

testsuite/MDAnalysisTests/topology/test_lammpsdata.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
LAMMPShyd2,
3636
LAMMPSdata_deletedatoms,
3737
LAMMPSDUMP,
38+
LAMMPSDUMP_long,
3839
)
3940

4041

@@ -268,3 +269,9 @@ def test_id_ordering(self):
268269
u = mda.Universe(self.ref_filename, format='LAMMPSDUMP')
269270
# the 4th in file has id==13, but should have been sorted
270271
assert u.atoms[3].id == 4
272+
273+
# this tests that topology can still be constructed if non-standard or uneven
274+
# column present.
275+
class TestDumpParserLong(TestDumpParser):
276+
277+
ref_filename = LAMMPSDUMP_long

0 commit comments

Comments
 (0)