diff --git a/eex/README.md b/eex/README.md index 041b035..6384945 100644 --- a/eex/README.md +++ b/eex/README.md @@ -22,19 +22,19 @@ Information for each individual atom is stored in the file layer in store.?? This dictionary is used for atom properties which are *not* unique (i.e. several atoms will have the same values). Has following structure: -``` +``` atom_metadata = { - 'atom_property1' : - { - 'uvals' : { - uid : value, - } - 'inv_uvals' : { - value: uid - }, - 'atom_property2' : ... - - } + 'atom_property1': { + 'uvals': { + 'uid': value, + }, + + 'inv_uvals': { + value: uid + }, + 'atom_property2': ... + }, +} ``` where `atom_property1`, `atom_property2` come from atom_metadata.py @@ -56,11 +56,18 @@ Lists the number of values stored for each metadata item #### dl._terms dict +``` +dl._terms = { + order: { + uid : ['keyword', const, const] + } +} +``` + #### dl._term_count dict ### Nonbonded Terms - # File Layer diff --git a/eex/__init__.py b/eex/__init__.py index ad3a6c9..0aa113b 100644 --- a/eex/__init__.py +++ b/eex/__init__.py @@ -10,6 +10,7 @@ from . import translators from . import utility from . import form_converters +from . import metadata from .units import ureg # Initializes the eex_init metadata diff --git a/eex/datalayer.py b/eex/datalayer.py index dadc17d..825678b 100644 --- a/eex/datalayer.py +++ b/eex/datalayer.py @@ -1987,3 +1987,4 @@ def list_nb_parameters(self, utype=utype) return return_parameters + diff --git a/eex/metadata/__init__.py b/eex/metadata/__init__.py index be7032e..3204cf6 100644 --- a/eex/metadata/__init__.py +++ b/eex/metadata/__init__.py @@ -19,3 +19,6 @@ # Bring in validation function from .validator import validate_term_dict, validate_functional_form_dict, validate_units + +# Bring in compatibility functions +from .compatibility_tools import check_term_compatibility diff --git a/eex/metadata/compatibility_tools.py b/eex/metadata/compatibility_tools.py new file mode 100644 index 0000000..b4cc384 --- /dev/null +++ b/eex/metadata/compatibility_tools.py @@ -0,0 +1,125 @@ +""" + +Functions for performing compatibility checks and conversions for MD programs. + +""" + +import eex +import numpy as np + +def check_term_compatibility(dl, program_term_data): + """ + Checks that term parameters in datalayer are compatible with the output program. Performs conversion if necessary + and possible. + + Parameters + ------------- + dl : eex datalayer + The EEX datalayer object containing information for the system being translated + + program_metadata: dict + Term metadata for program being converted to + + + Return + --------------- + dl : eex datalayer + EEX datalayer which is compatible with program metadata + """ + + # Check that keywords are compatible. This loops through the term data of the program to convert to + for order, term_data in program_term_data.items(): + + # Get allowed keywords for conversion (this is what is in the metadata for a program) + allowed_keywords = list(term_data.keys()) + allowed_canonical_keywords = [] + + # Get canonical forms for what is "allowed" + for allowed_keyword in allowed_keywords: + # Need utility function for this instead of this mess. TODO + term_md = eex.metadata.get_term_metadata(order, "forms", allowed_keyword) + allowed_canonical_keywords.append(term_md["canonical_form"]) + + # Get stored keywords and uids + stored_terms = dl.list_term_parameters(order) + uids = list(stored_terms.keys()) + + stored_keywords = [] + [stored_keywords.append(stored_terms[uids[x]][0]) for x in uids] + + stored_canonical_keywords = [] + + # Get canonical forms for what is stored. + for stored_keyword in stored_keywords: + term_md = eex.metadata.get_term_metadata(order, "forms", stored_keyword) + stored_canonical_keywords.append(term_md["canonical_form"]) + + # Get difference between what is stored in datalayer and what is allowed. Diff will give you what is in + # the datalayer. "Canonical" forms and those that are stored are both compared here. + diff_canonical = np.setdiff1d(stored_canonical_keywords, allowed_canonical_keywords) + diff = np.setdiff1d(stored_keywords, allowed_keywords) + + # If diff is found between canonical, not compatible. Otherwise try for conversion. + if diff_canonical.size > 0: + raise TypeError("Functional forms, %s, found in datalayer are not compatible with required forms %s" % ( + diff_canonical, allowed_keywords)) + elif diff.size == 0: + # Then no conversion is necessary + pass + else: + # The conversion here is complicated/confusing and can likely be improved to be much clearer with some utility functions. + + for uid, term in list(stored_terms.items()): + # Need a utility function for this. TODO + term_parameters = eex.metadata.get_term_metadata(order, "forms", term[0])["parameters"] + convert_parameters = {k: v for k,v in zip(term_parameters, term[1:])} + + # Call conversion function. TODO - this conversion may not work (allowed_keywords[0]) + print("Converting", term[0], "to", allowed_keywords[0]) + new_form = eex.form_converters.convert_form(order, convert_parameters, term[0], allowed_keywords[0]) + + # Remove old form + + # Get indices for terms to remove + all_terms = dl.get_terms(order) + remove_terms = all_terms[all_terms["term_index"] == uid] + atom_columns = [x for x in remove_terms.columns if "atom" in x] + + dl.remove_terms(order, index=remove_terms.index, propagate=False) + + # Remove this term parameter from datalayer. TODO - problem here for multiple removals!! + dl.remove_term_parameter(order, uid) + + # Add new form - Two things must be done here: + # First, the converted functional form must be added using dl.add_term_parameter + # Then, terms themselves must be re-added using dl.add term + + # Build dataframe + # 1. Get atoms involed in term from remvoe terms - 'base_atoms' + # 2. Build df with atoms and term_index + + base_atoms = remove_terms[atom_columns] + new_form_keys = new_form.keys() + + # Loop through new terms to add to dl + for val in range(len(list(new_form.values())[0])): + to_add = {} + + for key in new_form_keys: + to_add[key] = new_form[key][val] + + uid = dl.add_term_parameter(order, allowed_keywords[0], to_add) + base_atoms['term_index'] = uid + + dl.add_terms(order, base_atoms) + + # Return modified dl + + return dl + +def check_atom_metadata(): + """ + Check that all necessary atom metadata is present. + :return: + """ + diff --git a/eex/tests/scratch/test_output b/eex/tests/scratch/test_output index ba779e6..49b8fae 100644 --- a/eex/tests/scratch/test_output +++ b/eex/tests/scratch/test_output @@ -39,8 +39,6 @@ Dihedral Coeffs Masses -1 15.0452 -2 14.02658 Atoms @@ -51,19 +49,10 @@ Dihedral Coeffs Bonds -1 1 1 2 -2 2 2 3 -3 1 3 4 Angles -1 1 1 2 3 -2 1 2 3 4 Dihedrals -1 1 1 2 3 4 -2 2 1 2 3 4 -3 3 1 2 3 4 -4 4 1 2 3 4 diff --git a/eex/tests/test_compatibility_tools.py b/eex/tests/test_compatibility_tools.py new file mode 100644 index 0000000..7df11f7 --- /dev/null +++ b/eex/tests/test_compatibility_tools.py @@ -0,0 +1,195 @@ +""" + +Tests datalayer compatbility functions + +""" +import eex +import pytest + +from eex.translators.amber.amber_ff import term_data as amber_term_data + +""" What tests do we need? + +1. Passes when everything is compatible +2. Fails when not compatible + - Bonds + - Angles + - Dihedrals +3. Performs conversion when applicable + - Dihedrals + +------------ +NB Tests (different format that n body terms) +* Metadata format must be finalized. + +1. Fails with incompatible NB type +2. Returns correct - pair or single + + +""" + +def test_compatibility_no_changes(butane_dl): + + # Initialize datalayer + dl = butane_dl() + + # Grab details from datalayer + term_3_type, term_3 = dl.get_term_parameter(order=3, uid=0) + term_2_type, term_2 = dl.get_term_parameter(order=2, uid=0) + + # Run compatibility check function + eex.metadata.check_term_compatibility(dl, amber_term_data) + + # Grab new details from datalayer + term_3_new_type, term_3_new = dl.get_term_parameter(order=3, uid=0) + term_2_new_type, term_2_new = dl.get_term_parameter(order=2, uid=0) + + # Assert that keywords are the same. + assert term_2_type == term_2_new_type + assert term_3_type == term_3_new_type + + # Assert that parameters are unchanged + eex.testing.dict_compare(term_3_new, term_3) + eex.testing.dict_compare(term_2_new, term_2) + +def test_compatibility_term_2(butane_dl): + + dl = butane_dl(ff=False) + + # Add bond type which is incompatible with amber + dl.add_term_parameter(2, "fene", {'K': 300.9, 'R0': 1.540, 'epsilon': 0, 'sigma': 0}, uid=0) + + # Run compatibility check + with pytest.raises(TypeError): + eex.metadata.check_term_compatibility(dl, amber_term_data) + + +def test_compatibility_term_3(butane_dl): + + dl = butane_dl(ff=False) + + # Add angle type which is incompatible with amber + dl.add_term_parameter(3, "cosine", {'K': 62.100}, uid=0) + + # Run compatibility check + with pytest.raises(TypeError): + eex.metadata.check_term_compatibility(dl, amber_term_data) + +def test_compatibility_term_4(butane_dl): + + dl = butane_dl(ff=False) + + # Add term parameter which is incompatible with amber + dl.add_term_parameter(4, "quadratic", {'K': 1, 'phi0': 0}, uid=0) + + # Run compatibility check + with pytest.raises(TypeError): + eex.metadata.check_term_compatibility(dl, amber_term_data) + + +def test_compatibility_conversion(butane_dl): + + dl = butane_dl() + + # Add term parameter which must be converted to be compatible with amber (opls) + dl.add_term_parameter(4, "opls", {'K_1': 1.41103414, 'K_2': -0.27101489, + 'K_3': 3.14502869, 'K_4': 0}, uid=0, utype={'K_1': 'kcal * mol ** -1', + 'K_2': 'kcal * mol ** -1', + 'K_3': 'kcal * mol ** -1', + 'K_4': 'kcal * mol ** -1'}) + + # Run compatibility check + eex.metadata.check_term_compatibility(dl, amber_term_data) + + term_4_new = dl.list_term_parameters(4) + + for parameter in term_4_new.values(): + assert parameter[0] == list(amber_term_data[4].keys())[0] + + +""" +def test_amber_compatibility_NB_number(butane_dl): + + dl = butane_dl(nb=False) + + oname = eex_find_files.get_scratch_directory("dl_compatibility.prmtop") + + dl.add_nb_parameter(atom_type=1, atom_type2=2, nb_name="LJ", + nb_model="epsilon/sigma", nb_parameters={'sigma': 3.75, 'epsilon': 0.1947460018}, + utype={'sigma': 'angstrom', 'epsilon': 'kcal * mol ** -1'}) + + with pytest.raises(ValueError): + eex.translators.amber.write_amber_file(dl, oname) + +def test_amber_compatibility_NB_type(butane_dl): + + dl = butane_dl(nb=False) + + dl.add_nb_parameter(atom_type=1, nb_name="Buckingham", nb_model=None, nb_parameters=[1.0, 1.0, 1.0]) + + oname = eex_find_files.get_scratch_directory("dl_compatibility.prmtop") + + with pytest.raises(KeyError): + eex.translators.amber.write_amber_file(dl, oname) + +def test_amber_compatibility_check_mixing_rule(butane_dl): + + # Get butane topology + dl = butane_dl(nb=True) + + oname = eex_find_files.get_scratch_directory("dl_compatibility.prmtop") + + # Check to make sure that mixing rule will be applied by compatibility check + eex.translators.amber.write_amber_file(dl, oname) + + # Make sure we're getting pair parameters from DL + nb_pairs = dl.list_nb_parameters(nb_name="LJ", nb_model="AB", itype="pair") + + assert (len(nb_pairs.keys()) == 3) + +def test_amber_compatibility_functional_form(butane_dl): + + dl = butane_dl(ff=False) + + oname = eex_find_files.get_scratch_directory("dl_compatibility.prmtop") + + # Add incompatible functional form + with pytest.raises(TypeError): + dl.add_term_parameter(2, "fene", {"K": 1, "R0": 1, "epsilon": 1, "sigma": 1,}) + eex.translators.amber.write_amber_file(dl, oname) + +def test_amber_compatibility_scaling(butane_dl): + + dl = butane_dl(scale=False) + + oname = eex_find_files.get_scratch_directory("dl_compatibility.prmtop") + + # Set with noncompatible scale13 + scaling_factors = { + "coul": { + "scale12": 0.0, + "scale13": 0.50, + "scale14": 0.75, + }, + + "vdw": { + "scale12": 0.0, + "scale13": 0.0, + "scale14": 0.75, + } + } + + dl.set_nb_scaling_factors(scaling_factors) + + with pytest.raises(ValueError): + eex.translators.amber.write_amber_file(dl, oname) + +def test_amber_compatibility_no_scaling(butane_dl): + + dl = butane_dl(scale=False) + + oname = eex_find_files.get_scratch_directory("dl_compatibility.prmtop") + + with pytest.raises(ValueError): + eex.translators.amber.write_amber_file(dl, oname) +""" diff --git a/eex/translators/README.md b/eex/translators/README.md new file mode 100644 index 0000000..002f98f --- /dev/null +++ b/eex/translators/README.md @@ -0,0 +1,85 @@ +# Steps for writing a plugin for EEX + +This guide is currently *under construction*, but when completed, should explain EEX's reader/writer strategy +and contain instructions for writing reader/writer plugins. + +## 1. Build forcefield files + +**filename**: `program_ff.py` + +**examples**: +- [amber_ff.py](./amber/amber_ff.py) +- [lammps_ff.py](./lammps/lammps_ff.py) + +where `program` refers to the software pacakge for which you are developing a reader/writer plugin. + +This file contains dictionaries for two body (bonds), three body (angles), four body (dihedrals), and nonbonded terms which are valid for the software package. +Bonded terms (i.e. bonds, angles, and dihedrals) should be a dictionary named using the convention `_n_body_functional_forms`. + +Each the keys of each dictionary are keywords (names) for functional forms which are valid in the program. The keywords for each +term dictionary (`_two_body_functional_forms`, etc), should match (excluding constant coefficients) the keyword used in EEX's +internal representation. Valid term forms can be found in `metadata`. + +For bonded terms, each key should have the following sub-keys: + - form: The overall mathematical expression of the term + - parameters: The ordered name of the terms as they will be stored with their expected unit contexts + - units: A dictionary that contains the unit context for each parameter + - description: A short word description of the two-body term style + +For example, the entry for two body (bonded) terms in Amber is shown here: + +``` +_two_body_functional_forms = { + "harmonic": { + "form": "K*(r-R0) ** 2", + "parameters": ["K", "R0"], + "units": { + "K": "kcal * mol ** -1 angstrom ** -2", + "R0": "angstrom" + }, + "description": "This is an amber harmonic bond" + }, +} +``` + +The bottom of the file builds the dictionary `term_data`, where the keys are the order of the term. This metadata will be used first to perform compatibility checks, +to see if a particular translation can be performed, then in the reader and writer plugins. + +In the example above (the amber force field metadata), the units are always the same for a given functional +form. However, in some instances (such as LAMMPS), it may be necessary to specify the general units and build the units used based on keywords. + +For example, the LAMMPS entry for a harmonic bond does not contain specific units. + +``` + "harmonic": { + "form": "K*(r-R0) ** 2", + "parameters": ["K", "R0"], + "units": { + "K": "[energy] * [length] ** -2", + "R0": "[length]" + }, + "description": "This is a harmonic bond" + }, +``` + +There is an additional level where the preferred style is set for nonbonded terms. + +``` +_nonbond_functional_forms = { + "LJ": { + "AB": { + "form": "A/(r ** 12.0) - B/(r ** 6.0)", + "parameters": ["A", "B"], + "units": { + "A": "kcal * mol ** -1 * angstrom ** 12", + "B": "kcal * mol ** -1 * angstrom ** 6", + }, + }, + }, +} +``` + +TODO - program metadata validator + +## 2. Build list of required metadata + diff --git a/eex/translators/amber/amber_ff.py b/eex/translators/amber/amber_ff.py new file mode 100644 index 0000000..b04ade9 --- /dev/null +++ b/eex/translators/amber/amber_ff.py @@ -0,0 +1,73 @@ +""" +Metadata the amber forcefield. + +Each style follows conventions outlined for EEX internal metadata. + +Bonded terms (typically bonds, angles, and dihedrals) should be a dictionary named using the convention `_n_body_functional_forms` + +Each dictionary has the has the following keys: + - form: The overall mathematical expression of the term + - parameters: The ordered name of the terms as they will be stored with their expected unit contexts + - units: A dictionary that contains the unit context for each parameter + - description: A short word description of the two-body term style + +Nonbonded terms are similar, except that an extra key sub-key (again, following the convention set by the internal EEX metadata. + +""" +_two_body_functional_forms = { + "harmonic": { + "form": "K*(r-R0) ** 2", + "parameters": ["K", "R0"], + "units": { + "K": "kcal * mol ** -1 angstrom ** -2", + "R0": "angstrom" + }, + "description": "This is a harmonic bond" + }, +} + +_three_body_functional_forms = { + "harmonic": { + "form": "K*(theta-theta0)**2", + "parameters": ["K", "theta0"], + "units": { + "K": "kcal radian**-2", + "theta0": "degree" + }, + "description": "This is a harmonic angle" + }, +} + +_four_body_functional_forms = { + "charmmfsw": { + "form": "K*(1 + cos(n*phi-d))", + "parameters": ["K", "n", "d"], + "units": { + "K": "kcal * mol ** -1", + "n": "phase", + "d": "radians", + }, + "description": "This is a charmm dihedral" + }, + +} + +_nonbond_functional_forms = { + "LJ": { + "AB": { + "form": "A/(r ** 12.0) - B/(r ** 6.0)", + "parameters": ["A", "B"], + "units": { + "A": "kcal * mol ** -1 * angstrom ** 12", + "B": "kcal * mol ** -1 * angstrom ** 6", + }, + }, + } +} + + +term_data = {} +term_data[2] = _two_body_functional_forms +term_data[3] = _three_body_functional_forms +term_data[4] = _four_body_functional_forms + diff --git a/eex/translators/amber/amber_required.py b/eex/translators/amber/amber_required.py new file mode 100644 index 0000000..62ba464 --- /dev/null +++ b/eex/translators/amber/amber_required.py @@ -0,0 +1,19 @@ +""" + +Required sections + +file name and format is WIP. + +Model after eex/metadata/atom_metadata.py (?) + +""" + +atom_property_names = { + "ATOM_NAME": "atom_name", + "CHARGE": "charge", + "MASS": "mass", + "ATOM_TYPE_INDEX": "atom_type", + "ATOMIC_NUMBER": "atomic_number", + # "AMBER_ATOM_TYPE": "atom_type_name", + # "RADII" : "implicit_solvent_radius", +} \ No newline at end of file diff --git a/eex/translators/lammps/lammps_ff.py b/eex/translators/lammps/lammps_ff.py index ba0d35c..1747b29 100644 --- a/eex/translators/lammps/lammps_ff.py +++ b/eex/translators/lammps/lammps_ff.py @@ -17,7 +17,7 @@ } } -bond_styles = { +_two_body_functional_forms = { # "none": { # "form": "NYI", # "parameters": "NYI", @@ -145,7 +145,7 @@ }, } -angle_styles = { +_three_body_functional_forms = { # "none": { # "form": "NYI", # "parameters": "NYI", @@ -288,7 +288,7 @@ # }, } -dihedral_styles = { +_four_body_functional_forms = { # "none": { # "form": "NYI", # "parameters": "NYI", @@ -415,6 +415,7 @@ # }, } +# TODO: Improper styles NYI in internal EEX improper_styles = { # "class2": { # "form": "NYI", @@ -504,6 +505,7 @@ } term_data = {} -term_data[2] = bond_styles -term_data[3] = angle_styles -term_data[4] = dihedral_styles +term_data[2] = _two_body_functional_forms +term_data[3] = _three_body_functional_forms +term_data[4] = _four_body_functional_forms +