diff --git a/packtools/sps/validation/article_contribs.py b/packtools/sps/validation/article_contribs.py index cfbaefaa1..3c129714b 100644 --- a/packtools/sps/validation/article_contribs.py +++ b/packtools/sps/validation/article_contribs.py @@ -30,6 +30,7 @@ def info(self): def _get_default_params(self): return { # Error levels + "contrib_type_error_level": "ERROR", "contrib_role_error_level": "ERROR", "orcid_format_error_level": "ERROR", "orcid_is_registered_error_level": "ERROR", @@ -39,8 +40,117 @@ def _get_default_params(self): "contrib_error_level": "ERROR", # ORCID validation function - "is_orcid_registered": _callable_extern_validate_default + "is_orcid_registered": _callable_extern_validate_default, + + # Contrib type validation + "contrib_type_list": ["author", "compiler"], } + + def validate_contrib_type(self): + """ + Validates presence and value of @contrib-type attribute. + + SciELO Rules: + - @contrib-type is mandatory + - Valid values: 'author', 'compiler' + - 'author' is mandatory for all documents except reviewer reports + + References: + - SPS documentation: : e + """ + error_level = self.data.get("contrib_type_error_level", "ERROR") + contrib_type = self.contrib.get("contrib_type") + parent_article_type = self.data.get("parent_article_type") + valid_values = self.data.get("contrib_type_list", ["author", "compiler"]) + + # 1. Verifica presença do atributo + if not contrib_type: + valid_values_str = ", ".join(valid_values) + advice = f'{self.info} Add @contrib-type attribute to . Valid values: {valid_values_str}' + advice_text = ( + '{info} Add @contrib-type attribute to . Valid values: {values}' + ) + advice_params = { + "info": self.info, + "values": ", ".join(valid_values), + } + + yield build_response( + title="@contrib-type attribute", + parent=self.contrib, + item="contrib", + sub_item="@contrib-type", + validation_type="exist", + is_valid=False, + expected="@contrib-type attribute", + obtained=None, + advice=advice, + data=self.contrib, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + return + + # 2. Valida valor do atributo + is_valid_value = contrib_type in valid_values + + if not is_valid_value: + valid_values_str = " or ".join(valid_values) + advice = f'{self.info} @contrib-type="{contrib_type}" is invalid. Use: {valid_values_str}' + advice_text = ( + '{info} @contrib-type="{obtained}" is invalid. Use: {expected}' + ) + advice_params = { + "info": self.info, + "obtained": contrib_type, + "expected": " or ".join(valid_values), + } + + yield build_response( + title="@contrib-type value", + parent=self.contrib, + item="contrib", + sub_item="@contrib-type", + validation_type="value", + is_valid=False, + expected=valid_values, + obtained=contrib_type, + advice=advice, + data=self.contrib, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + # 3. Valida que 'author' é mandatório (exceto para reviewer report) + if parent_article_type != "reviewer-report": + is_author = contrib_type == "author" + if not is_author: + advice = f'{self.info} @contrib-type must be "author" for this document type (except reviewer reports)' + advice_text = ( + '{info} @contrib-type must be "author" for this document type (except reviewer reports)' + ) + advice_params = { + "info": self.info, + } + + yield build_response( + title="@contrib-type mandatory value", + parent=self.contrib, + item="contrib", + sub_item="@contrib-type", + validation_type="value", + is_valid=False, + expected="author", + obtained=contrib_type, + advice=advice, + data=self.contrib, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + def validate_role(self): try: roles = self.contrib["contrib_role"] @@ -49,6 +159,14 @@ def validate_role(self): parent_id = self.data.get("parent_id") parent_article_type = self.data.get("parent_article_type") + advice = f"{self.info} Mark the contrib role. Consult SPS documentation for detailed instructions" + advice_text = ( + "{info} Mark the contrib role. Consult SPS documentation for detailed instructions" + ) + advice_params = { + "info": self.info, + } + yield build_response( title=f"contributor role", parent=self.contrib, @@ -58,9 +176,11 @@ def validate_role(self): is_valid=False, expected=f" in ", obtained=None, - advice=f"{self.info} Mark the contrib role. Consult SPS documentation for detailed instructions", + advice=advice, data=self.contrib, error_level=self.data.get("contrib_role_error_level"), + advice_text=advice_text, + advice_params=advice_params, ) else: for role in roles: @@ -72,6 +192,12 @@ def validate_orcid_format(self): """ Validates format of contributor ORCID identifiers. + SciELO Rules: + - ORCID is mandatory + - Format: XXXX-XXXX-XXXX-XXXX (alphanumeric) + - DO NOT use URLs (https://orcid.org/...) + - Use only the alphanumeric identifier + Returns ------- generator @@ -91,12 +217,55 @@ def validate_orcid_format(self): ) _orcid = self.contrib.get("contrib_ids", {}).get("orcid") or "" + + # NOVA VERIFICAÇÃO: Detecta URLs + if _orcid and ("http://" in _orcid or "https://" in _orcid or "orcid.org" in _orcid): + advice = f'{self.info} Do not use URLs. Extract only the alphanumeric identifier from {_orcid}' + advice_text = ( + "{info} Do not use URLs. Extract only the alphanumeric identifier from {orcid}" + ) + advice_params = { + "info": self.info, + "orcid": _orcid, + } + + yield build_response( + title="ORCID format - URL detected", + parent=self.contrib, + item="contrib-id", + sub_item='@contrib-id-type="orcid"', + validation_type="format", + is_valid=False, + expected="alphanumeric ORCID (XXXX-XXXX-XXXX-XXXX)", + obtained=_orcid, + advice=advice, + data=self.contrib, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + return + + # Validação de formato is_valid = bool(_orcid and re.match(_default_orcid, _orcid)) expected_value = _orcid if is_valid else "valid ORCID" + if _orcid: advice = f'Fix ORCID format {_orcid}' + advice_text = ( + 'Fix ORCID format {orcid}' + ) + advice_params = { + "orcid": _orcid, + } else: - advice = f'{self.info} Add ORCID in ' + advice = f'{self.info} Add ORCID in ' + advice_text = ( + '{info} Add ORCID in ' + ) + advice_params = { + "info": self.info, + } yield build_response( title="ORCID format", @@ -110,6 +279,8 @@ def validate_orcid_format(self): advice=advice or "(validate_orcid_format)", data=self.contrib, error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, ) def validate_orcid_is_registered(self): @@ -142,6 +313,16 @@ def validate_orcid_is_registered(self): return result = is_orcid_registered(orcid, self.contrib_name) + + advice = f'{self.info} Unable to automatically check the {orcid}. Check it manually' + advice_text = ( + '{info} Unable to automatically check the {orcid}. Check it manually' + ) + advice_params = { + "info": self.info, + "orcid": orcid, + } + yield build_response( title="Registered ORCID", parent=self.contrib, @@ -151,9 +332,11 @@ def validate_orcid_is_registered(self): is_valid=result["status"] == "registered", expected="registered", obtained=result["status"], - advice=f'{self.info} Unable to automatically check the {orcid}. Check it manually', + advice=advice, data=result, error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, ) def validate_affiliations(self): @@ -169,24 +352,44 @@ def validate_affiliations(self): """ error_level = self.data["affiliations_error_level"] affs = [item["id"] for item in self.contrib.get("affs") or []] + + advice = f'{self.info} Add in ' + advice_text = ( + '{info} Add in ' + ) + advice_params = { + "info": self.info, + } + yield build_response( title="affiliation", parent=self.contrib, item="contrib", sub_item="xref", validation_type="exist", - is_valid=not affs, + is_valid=bool(affs), # CORRIGIDO: válido quando TEM afiliações expected="affiliation", - obtained=affs, - advice=f'{self.info} Add in ', + obtained=affs or None, + advice=advice, data=self.contrib, error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, ) def validate_name(self): """Validates presence of contributor name elements.""" error_level = self.data["name_error_level"] item = self.contrib.get("contrib_name") + + advice = f"{self.info} Mark contributor name with in " + advice_text = ( + "{info} Mark contributor name with in " + ) + advice_params = { + "info": self.info, + } + yield build_response( title="contributor name", parent=self.contrib, @@ -196,15 +399,26 @@ def validate_name(self): is_valid=bool(item), expected="contributor name", obtained=item, - advice=f"{self.info} Mark contributor name with in ", + advice=advice, data=self.contrib, error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, ) def validate_collab(self): """Validates presence of collaboration information.""" error_level = self.data["collab_error_level"] item = self.contrib.get("collab") + + advice = f"{self.info} Mark institutional contributor with in " + advice_text = ( + "{info} Mark institutional contributor with in " + ) + advice_params = { + "info": self.info, + } + yield build_response( title="collab", parent=self.contrib, @@ -214,9 +428,11 @@ def validate_collab(self): is_valid=bool(item), expected="collab", obtained=None, - advice=f"{self.info} Mark institutional contributor with in ", + advice=advice, data=self.contrib, error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, ) def validate_contrib(self): @@ -227,14 +443,27 @@ def validate_contrib(self): error_level = self.data["contrib_error_level"] value = None expected = [] + if self.contrib.get("original_article_type") == "reviewer-report": expected = ["name", "anonymous"] value = self.contrib.get("contrib_name") or self.contrib.get("anonymous") advice = f"{self.info} Mark contributor with and anonymous contributor with in " + advice_text = ( + "{info} Mark contributor with and anonymous contributor with in " + ) + advice_params = { + "info": self.info, + } else: expected = ["name", "collab"] value = self.contrib.get("contrib_name") or self.contrib.get("collab") advice = f"{self.info} Mark contributor with and institutional contributor with in " + advice_text = ( + "{info} Mark contributor with and institutional contributor with in " + ) + advice_params = { + "info": self.info, + } yield build_response( title="contributor", @@ -245,13 +474,16 @@ def validate_contrib(self): is_valid=bool(value), expected=expected, obtained=value, - advice=advice or '(validate_contrib)', + advice=advice, data=self.contrib, error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, ) def validate(self): """Runs all validation checks on contributor metadata.""" + yield from self.validate_contrib_type() yield from self.validate_contrib() yield from self.validate_role() yield from self.validate_orcid_format() @@ -269,7 +501,9 @@ def __init__(self, xmltree, params): def _get_default_params(self): # Include all params from ContribValidation plus its own return { - "orcid_is_unique_error_level": "ERROR" + "orcid_is_unique_error_level": "ERROR", + "credit_consistency_error_level": "ERROR", + "subarticle_collab_id_error_level": "ERROR", } def validate_orcid_is_unique(self): @@ -288,6 +522,11 @@ def validate_orcid_is_unique(self): questions = "; ".join(questions) advice = f"ORCID must be unique. {questions}" + advice_text = ("ORCID must be unique. {questions}") + advice_params = { + "questions": questions, + } + yield build_response( title="Unique ORCID", parent=parent, @@ -297,15 +536,25 @@ def validate_orcid_is_unique(self): is_valid=not bool(repeated_orcid), expected="Unique ORCID", obtained=repeated_orcid, - advice=advice or '(validate_orcid_is_unique)', + advice=advice, data=repeated_orcid, error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, ) def validate(self): # A validação da unicidade do ORCID é feita uma única vez por artigo yield from self.validate_orcid_is_unique() + # Nova validação: Consistência CRediT + credit_validator = DocumentCreditConsistencyValidation(self.xmltree, self.params) + yield from credit_validator.validate_credit_consistency() + + # Nova validação: IDs únicos em sub-articles + subarticle_validator = SubArticleCollabIDValidation(self.xmltree, self.params) + yield from subarticle_validator.validate() + validator = TextContribsValidation(self.xmltree.find("."), self.params) yield from validator.validate() @@ -323,6 +572,10 @@ def validate(self): validator = CollabListValidation(self.node, self.params) yield from validator.validate() + # Nova validação: Grupos completos + collab_validator = CollabGroupValidation(self.node, self.params) + yield from collab_validator.validate_collab_members_completeness() + for node in self.text_contribs.sub_articles: validator = TextContribsValidation(node, self.params) yield from validator.validate() @@ -360,9 +613,21 @@ def validate(self): advice = "" if expected_type == "collab-list": advice = f'Add person authors, members of {self.text_contribs.collab}, with ... in ' + advice_text = ( + 'Add person authors, members of {collab}, with ... in ' + ) + advice_params = { + "collab": self.text_contribs.collab, + } else: - type = contrib_group_data["type"] - advice = f'Remove content-type="{type}" from ' + type_value = contrib_group_data["type"] + advice = f'Remove content-type="{type_value}" from ' + advice_text = ( + 'Remove content-type="{type}" from ' + ) + advice_params = { + "type": type_value, + } yield build_response( title=f"{title} contributor group type", @@ -373,12 +638,414 @@ def validate(self): is_valid=valid, expected=expected_type, obtained=contrib_group_data["type"], - advice=advice or f'(validate {contrib_group_data})', + advice=advice, data=contrib_group_data, error_level=self.params["collab_list_error_level"], + advice_text=advice_text, + advice_params=advice_params, ) +class CollabGroupValidation: + """ + Validates complete structure and requirements for collaboration groups. + + SciELO Rules: + - Members in collab-list must have: + 1. Full name (described in PDF) + 2. Complete affiliation (described in PDF) + 3. ORCID (described in PDF) + - Without this identification, authors cannot assign DOI to their curriculum + """ + + def __init__(self, node, params): + self.params = self._get_default_params() + self.params.update(params or {}) + self.node = node + self.text_contribs = TextContribs(node) + + def _get_default_params(self): + return { + "collab_member_name_error_level": "ERROR", + "collab_member_aff_error_level": "ERROR", + "collab_member_orcid_error_level": "ERROR", + } + + def validate_collab_members_completeness(self): + """ + Validates that all members of a collaboration group have complete information. + """ + # Encontra contrib-group com content-type="collab-list" + collab_list_groups = [ + cg for cg in self.text_contribs.contrib_groups + if cg.data.get("type") == "collab-list" + ] + + if not collab_list_groups: + return + + for contrib_group in collab_list_groups: + for contrib_data in contrib_group.data.get("contribs", []): + # Valida nome + if not contrib_data.get("contrib_name"): + advice = "All members of collaboration group must have name in " + advice_text = ( + "All members of collaboration group must have name in " + ) + advice_params = {} + + yield build_response( + title="collab member name", + parent=contrib_data, + item="contrib", + sub_item="name", + validation_type="exist", + is_valid=False, + expected="author name in collab-list", + obtained=None, + advice=advice, + data=contrib_data, + error_level=self.params["collab_member_name_error_level"], + advice_text=advice_text, + advice_params=advice_params, + ) + + # Valida afiliação + affs = contrib_data.get("affs") or [] + + # Para collab-list, afiliação pode ser indicada via + contrib_xref = contrib_data.get("contrib_xref") or [] + has_aff_xref = any( + xref.get("ref_type") == "aff" or xref.get("ref-type") == "aff" + for xref in contrib_xref + ) + + # Tem afiliação se: affs populado OU xref para aff existe + has_affiliation = bool(affs) or has_aff_xref + + if not has_affiliation: + advice = "All members of collaboration group must have complete affiliation (described in PDF)" + advice_text = ( + "All members of collaboration group must have complete affiliation (described in PDF)" + ) + advice_params = {} + + yield build_response( + title="collab member affiliation", + parent=contrib_data, + item="contrib", + sub_item="xref", + validation_type="exist", + is_valid=False, + expected="affiliation for collab member", + obtained=None, + advice=advice, + data=contrib_data, + error_level=self.params["collab_member_aff_error_level"], + advice_text=advice_text, + advice_params=advice_params, + ) + + # Valida ORCID (mais rigoroso para membros de grupo) + orcid = contrib_data.get("contrib_ids", {}).get("orcid") + if not orcid: + advice = ( + "All members of collaboration group MUST have ORCID (described in PDF). " + "Without ORCID identification, authors cannot assign DOI as their work in curriculum databases" + ) + advice_text = ( + "All members of collaboration group MUST have ORCID (described in PDF). " + "Without ORCID identification, authors cannot assign DOI as their work in curriculum databases" + ) + advice_params = {} + + yield build_response( + title="collab member ORCID", + parent=contrib_data, + item="contrib-id", + sub_item='@contrib-id-type="orcid"', + validation_type="exist", + is_valid=False, + expected="ORCID for collab member", + obtained=None, + advice=advice, + data=contrib_data, + error_level=self.params["collab_member_orcid_error_level"], + advice_text=advice_text, + advice_params=advice_params, + ) + + +class DocumentCreditConsistencyValidation: + """ + Validates that CRediT taxonomy is used consistently across the document. + + SciELO Rule: + - If using CRediT, use it for ALL contributors + - Do not mix CRediT with other taxonomies + - "All or nothing" principle + """ + + def __init__(self, xmltree, params): + self.xmltree = xmltree + self.xml_contribs = XMLContribs(self.xmltree) + self.params = self._get_default_params() + self.params.update(params or {}) + + def _get_default_params(self): + return { + "credit_consistency_error_level": "ERROR", + } + + def validate_credit_consistency(self): + """ + Validates "all or nothing" rule for CRediT taxonomy. + """ + # Coleta estatísticas de uso de CRediT + total_contribs = 0 + contribs_with_credit = 0 + contribs_without_credit = 0 + mixed_contribs = [] # Contribs que misturam CRediT e não-CRediT + + for contrib in self.xml_contribs.all_contribs: + if contrib.get("anonymous"): + continue + + roles = contrib.get("contrib_role", []) + if not roles: + continue + + total_contribs += 1 + has_credit = False + has_non_credit = False + + for role in roles: + if role.get("content-type"): + has_credit = True + else: + has_non_credit = True + + # Detecta mistura no mesmo contrib + if has_credit and has_non_credit: + # Use fallback para evitar None em contribuidores institucionais + name = contrib.get("contrib_full_name") or contrib.get("collab") or "" + mixed_contribs.append(name) + + if has_credit: + contribs_with_credit += 1 + else: + contribs_without_credit += 1 + + # Valida consistência + if total_contribs == 0: + return + + parent = self.xml_contribs.text_contribs.attribs_parent_prefixed + + # Caso 1: Mistura no mesmo contrib (erro grave) + if mixed_contribs: + advice = ( + "Do not mix CRediT taxonomy with other taxonomies in the same contributor. " + "All roles for a contributor must use the same taxonomy." + ) + advice_text = ( + "Do not mix CRediT taxonomy with other taxonomies in the same contributor. " + "All roles for a contributor must use the same taxonomy." + ) + advice_params = {} + + mixed_contribs_str = ', '.join(str(c) for c in mixed_contribs if c) + + yield build_response( + title="CRediT taxonomy consistency - mixed roles", + parent=parent, + item="role", + sub_item="@content-type", + validation_type="consistency", + is_valid=False, + expected="consistent taxonomy (all CRediT or all non-CRediT)", + obtained=f"mixed taxonomy in contributors: {mixed_contribs_str}", + advice=advice, + data={"mixed_contribs": mixed_contribs}, + error_level=self.params["credit_consistency_error_level"], + advice_text=advice_text, + advice_params=advice_params, + ) + + # Caso 2: Alguns usam CRediT, outros não (erro de consistência) + if 0 < contribs_with_credit < total_contribs: + advice = ( + "CRediT taxonomy must be used consistently: either ALL contributors use CRediT " + "or NONE use it. Do not mix taxonomies in the document. " + "SciELO Rule: 'tudo ou nada' (all or nothing)." + ) + advice_text = ( + "CRediT taxonomy must be used consistently: either ALL contributors use CRediT " + "or NONE use it. Do not mix taxonomies in the document. " + "SciELO Rule: 'tudo ou nada' (all or nothing)." + ) + advice_params = {} + + yield build_response( + title="CRediT taxonomy consistency - document level", + parent=parent, + item="role", + sub_item="@content-type", + validation_type="consistency", + is_valid=False, + expected="consistent taxonomy across all contributors", + obtained=( + f"{contribs_with_credit} contributors with CRediT, " + f"{contribs_without_credit} without CRediT" + ), + advice=advice, + data={ + "total_contribs": total_contribs, + "with_credit": contribs_with_credit, + "without_credit": contribs_without_credit, + }, + error_level=self.params["credit_consistency_error_level"], + advice_text=advice_text, + advice_params=advice_params, + ) + + +class SubArticleCollabIDValidation: + """ + Validates that collaboration IDs are unique between article and sub-articles. + + SciELO Rule: + - If article uses id="collab", sub-article must use id="collab1" + - If article uses rid="collab", sub-article must use rid="collab1" + - Prevents ID collisions between translations and original + """ + + def __init__(self, xmltree, params): + self.xmltree = xmltree + self.params = self._get_default_params() + self.params.update(params or {}) + + def _get_default_params(self): + return { + "subarticle_collab_id_error_level": "ERROR", + } + + def collect_collab_ids(self, node, context="article"): + """Coleta todos os IDs de colaboração em um nó.""" + ids = {"id": set(), "rid": set()} + + # Para article principal, excluir contrib dentro de sub-article + # Para sub-article, buscar normalmente + if context == "article": + # Busca contrib[@id] que NÃO estão dentro de sub-article + xpath_id = ".//contrib[@id][not(ancestor::sub-article)]" + xpath_rid = ".//contrib[@rid][not(ancestor::sub-article)]" + else: + # Para sub-articles, buscar normalmente + xpath_id = ".//contrib[@id]" + xpath_rid = ".//contrib[@rid]" + + for contrib in node.xpath(xpath_id): + collab_id = contrib.get("id") + if collab_id: + ids["id"].add((collab_id, context)) + + for contrib in node.xpath(xpath_rid): + collab_rid = contrib.get("rid") + if collab_rid: + ids["rid"].add((collab_rid, context)) + + return ids + + def validate(self): + """Valida unicidade de IDs entre article e sub-articles.""" + # Coleta IDs do article principal + article_node = self.xmltree.find(".//article") + if article_node is None: + article_node = self.xmltree + + article_ids = self.collect_collab_ids(article_node, "article") + + # Para cada sub-article + for sub_article in self.xmltree.findall(".//sub-article"): + sub_article_type = sub_article.get("article-type", "") + sub_article_id = sub_article.get("id", "unknown") + + sub_ids = self.collect_collab_ids(sub_article, f"sub-article({sub_article_id})") + + parent = {"parent": f"sub-article", "parent_id": sub_article_id, "parent_article_type": sub_article_type, "parent_lang": None} + + # Verifica colisões de @id + article_id_values = {id_val for id_val, _ in article_ids["id"]} + sub_id_values = {id_val for id_val, _ in sub_ids["id"]} + collisions_id = article_id_values & sub_id_values + + if collisions_id: + advice = ( + f"Sub-article {sub_article_id} uses same @id as main article: {list(collisions_id)}. " + f"If article uses id='collab', sub-article should use id='collab1'" + ) + advice_text = ( + "Sub-article {sub_id} uses same @id as main article: {collisions}. " + "If article uses id='collab', sub-article should use id='collab1'" + ) + advice_params = { + "sub_id": sub_article_id, + "collisions": ", ".join(list(collisions_id)), + } + + yield build_response( + title="collaboration @id uniqueness in sub-article", + parent=parent, + item="contrib", + sub_item="@id", + validation_type="uniqueness", + is_valid=False, + expected="unique @id values between article and sub-article", + obtained=f"collision: {list(collisions_id)}", + advice=advice, + data={"collisions": list(collisions_id)}, + error_level=self.params["subarticle_collab_id_error_level"], + advice_text=advice_text, + advice_params=advice_params, + ) + + # Verifica colisões de @rid + article_rid_values = {rid_val for rid_val, _ in article_ids["rid"]} + sub_rid_values = {rid_val for rid_val, _ in sub_ids["rid"]} + collisions_rid = article_rid_values & sub_rid_values + + if collisions_rid: + advice = ( + f"Sub-article {sub_article_id} uses same @rid as main article: {list(collisions_rid)}. " + f"If article uses rid='collab', sub-article should use rid='collab1'" + ) + advice_text = ( + "Sub-article {sub_id} uses same @rid as main article: {collisions}. " + "If article uses rid='collab', sub-article should use rid='collab1'" + ) + advice_params = { + "sub_id": sub_article_id, + "collisions": ", ".join(list(collisions_rid)), + } + + yield build_response( + title="collaboration @rid uniqueness in sub-article", + parent=parent, + item="contrib", + sub_item="@rid", + validation_type="uniqueness", + is_valid=False, + expected="unique @rid values between article and sub-article", + obtained=f"collision: {list(collisions_rid)}", + advice=advice, + data={"collisions": list(collisions_rid)}, + error_level=self.params["subarticle_collab_id_error_level"], + advice_text=advice_text, + advice_params=advice_params, + ) + + class ContribRoleValidation: """Validates contributor information in scientific article XML.""" @@ -419,28 +1086,26 @@ def _get_default_params(self): # CRediT taxonomy terms and their URIs "credit_taxonomy_terms_and_urls": [ - {"term": "Conceptualization", "uri": "http://credit.niso.org/contributor-roles/conceptualization/"}, - {"term": "Data curation", "uri": "http://credit.niso.org/contributor-roles/data-curation/"}, - {"term": "Formal analysis", "uri": "http://credit.niso.org/contributor-roles/formal-analysis/"}, - {"term": "Funding acquisition", "uri": "http://credit.niso.org/contributor-roles/funding-acquisition/"}, - {"term": "Investigation", "uri": "http://credit.niso.org/contributor-roles/investigation/"}, - {"term": "Methodology", "uri": "http://credit.niso.org/contributor-roles/methodology/"}, - {"term": "Project administration", "uri": "http://credit.niso.org/contributor-roles/project-administration/"}, - {"term": "Resources", "uri": "http://credit.niso.org/contributor-roles/resources/"}, - {"term": "Software", "uri": "http://credit.niso.org/contributor-roles/software/"}, - {"term": "Supervision", "uri": "http://credit.niso.org/contributor-roles/supervision/"}, - {"term": "Validation", "uri": "http://credit.niso.org/contributor-roles/validation/"}, - {"term": "Visualization", "uri": "http://credit.niso.org/contributor-roles/visualization/"}, - {"term": "Writing – original draft", "uri": "http://credit.niso.org/contributor-roles/writing-original-draft/"}, - {"term": "Writing – review & editing", "uri": "http://credit.niso.org/contributor-roles/writing-review-editing/"} + {"term": "Conceptualization", "uri": "https://credit.niso.org/contributor-roles/conceptualization/"}, + {"term": "Data curation", "uri": "https://credit.niso.org/contributor-roles/data-curation/"}, + {"term": "Formal analysis", "uri": "https://credit.niso.org/contributor-roles/formal-analysis/"}, + {"term": "Funding acquisition", "uri": "https://credit.niso.org/contributor-roles/funding-acquisition/"}, + {"term": "Investigation", "uri": "https://credit.niso.org/contributor-roles/investigation/"}, + {"term": "Methodology", "uri": "https://credit.niso.org/contributor-roles/methodology/"}, + {"term": "Project administration", "uri": "https://credit.niso.org/contributor-roles/project-administration/"}, + {"term": "Resources", "uri": "https://credit.niso.org/contributor-roles/resources/"}, + {"term": "Software", "uri": "https://credit.niso.org/contributor-roles/software/"}, + {"term": "Supervision", "uri": "https://credit.niso.org/contributor-roles/supervision/"}, + {"term": "Validation", "uri": "https://credit.niso.org/contributor-roles/validation/"}, + {"term": "Visualization", "uri": "https://credit.niso.org/contributor-roles/visualization/"}, + {"term": "Writing – original draft", "uri": "https://credit.niso.org/contributor-roles/writing-original-draft/"}, + {"term": "Writing – review & editing", "uri": "https://credit.niso.org/contributor-roles/writing-review-editing/"} ], - # List of valid contributor role types + # List of valid contributor role types (CORRIGIDO) "contrib_role_specific_use_list": [ - "author", "editor", - "reviewer", - "translator" + "reviewer" ] } @@ -474,18 +1139,31 @@ def validate_credit(self): valid_uri = uri and expected_uri == uri valid_term = text and expected_term and (expected_term.upper() == text.upper()) + advice = "" + advice_text = None + advice_params = {} + if not valid_uri: if expected_uri and uri: advice = f'{self.info} replace by ' + advice_text = ('{info} replace by ') + advice_params = {"info": self.info, "uri": uri, "expected_uri": expected_uri} elif expected_uri: advice = f'{self.info} replace {text} by {text}' + advice_text = ('{info} replace {text} by {text}') + advice_params = {"info": self.info, "text": text, "expected_uri": expected_uri} elif uri: expected_uris = list(credit_taxonomy_by_uri.keys()) - advice = f'{self.info} check if {text} has corresponding CRediT URI: {expected_uris}' + advice = f'{self.info} check if {text} has corresponding CRediT URI: {expected_uris}' + advice_text = ('{info} check if {text} has corresponding CRediT URI') + advice_params = {"info": self.info, "uri": uri, "text": text} elif text: expected_uris = list(credit_taxonomy_by_uri.keys()) - advice = f'{self.info} check if {text} has corresponding CRediT URI: {expected_uris}' + advice = f'{self.info} check if {text} has corresponding CRediT URI: {expected_uris}' + advice_text = ('{info} check if {text} has corresponding CRediT URI') + advice_params = {"info": self.info, "text": text} + yield build_response( title="CRediT taxonomy URI", parent=self.contrib, @@ -498,6 +1176,8 @@ def validate_credit(self): advice=advice, data=self.contrib, error_level=uri_error_level, + advice_text=advice_text, + advice_params=advice_params, ) if not valid_term: @@ -507,14 +1187,22 @@ def validate_credit(self): content_type = '' if expected_term and text: advice = f'{self.info} replace {text} by {expected_term}' + advice_text = ('{info} replace {text} by {expected_term}') + advice_params = {"info": self.info, "content_type": content_type, "text": text, "expected_term": expected_term} elif expected_term: advice = f'{self.info} replace by {expected_term}' + advice_text = ('{info} replace by {expected_term}') + advice_params = {"info": self.info, "content_type": content_type, "expected_term": expected_term} elif text: expected_terms = self.params["credit_taxonomy_by_terms"] - advice = f'{self.info} check if {text} has corresponding CRediT term: {expected_terms}' + advice = f'{self.info} check if {text} has corresponding CRediT term: {expected_terms}' + advice_text = ('{info} check if {text} has corresponding CRediT term') + advice_params = {"info": self.info, "content_type": content_type, "text": text} else: expected_terms = self.params["credit_taxonomy_by_terms"] - advice = f'{self.info} check if {text} has corresponding CRediT term: {expected_terms}' + advice = f'{self.info} check if {text} has corresponding CRediT term: {expected_terms}' + advice_text = ('{info} check if {text} has corresponding CRediT term') + advice_params = {"info": self.info, "content_type": content_type, "text": text} yield build_response( title="CRediT taxonomy term", @@ -528,27 +1216,74 @@ def validate_credit(self): advice=advice, data=self.contrib, error_level=term_error_level, + advice_text=advice_text, + advice_params=advice_params, ) def validate_role_specific_use(self): + """ + Validates @specific-use attribute in . + + SciELO Rule: + - For reviewer reports: @specific-use is MANDATORY + - Valid values: "reviewer", "editor" + """ expected = self.params["contrib_role_specific_use_list"] error_level = self.params["contrib_role_specific_use_error_level"] specific_use = self.contrib_role.get("specific-use") + parent_article_type = self.contrib.get("parent_article_type") + + # Determina se specific-use é obrigatório + is_reviewer_report = parent_article_type == "reviewer-report" + + # VALIDAÇÃO 1: Existência (obrigatório para reviewer-report) + if not specific_use: + if is_reviewer_report: + # Para reviewer-report, ausência é ERRO + advice = f'{self.info} add for reviewer report' + advice_text = '{info} add with {expected}' + advice_params = {"info": self.info, "expected": " or ".join(expected)} + + yield build_response( + title="contributor role type (reviewer report)", + parent=self.contrib, + item="role", + sub_item="specific-use", + validation_type="exist", + is_valid=False, + expected="specific-use attribute", + obtained=None, + advice=advice, + data=self.contrib, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + # Para outros tipos, specific-use é opcional - não gera erro + return + + # VALIDAÇÃO 2: Valor (se presente, deve ser válido) valid = specific_use in expected - if specific_use: - advice = f'{self.info} replace {specific_use} in with {expected}' - else: - advice = f'{self.info} add contributor role type with {expected}' - yield build_response( - title="contributor role", - parent=self.contrib, - item="role", - sub_item="specific-use", - validation_type="value in list", - is_valid=valid, - expected=expected, - obtained=specific_use, - advice=advice or f'(validate_role_specific_use {self.contrib_role})', - data=self.contrib, - error_level=error_level, - ) + + if not valid: + expected_str = " or ".join(expected) + advice = f'{self.info} replace with {expected_str}' + advice_text = '{info} replace with {expected}' + advice_params = {"info": self.info, "specific_use": specific_use, "expected": expected_str} + + yield build_response( + title="contributor role type value", + parent=self.contrib, + item="role", + sub_item="specific-use", + validation_type="value in list", + is_valid=False, + expected=expected, + obtained=specific_use, + advice=advice, + data=self.contrib, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + diff --git a/packtools/sps/validation_rules/article_contribs_rules.json b/packtools/sps/validation_rules/article_contribs_rules.json index 80f574b9e..9e3a50d38 100644 --- a/packtools/sps/validation_rules/article_contribs_rules.json +++ b/packtools/sps/validation_rules/article_contribs_rules.json @@ -4,18 +4,28 @@ "credit_taxonomy_term_error_level": "CRITICAL", "orcid_format_error_level": "CRITICAL", "orcid_is_registered_error_level": "ERROR", + "orcid_is_unique_error_level": "CRITICAL", "collab_list_error_level": "CRITICAL", "name_error_level": "CRITICAL", "collab_error_level": "CRITICAL", "contrib_error_level": "CRITICAL", + "contrib_type_error_level": "CRITICAL", "affiliations_error_level": "CRITICAL", - "orcid_is_unique_error_level": "CRITICAL", - "contrib_type_list": [ - "author" - ], "contrib_role_error_level": "CRITICAL", - "contrib_role_specific_use_list": [], "contrib_role_specific_use_error_level": "CRITICAL", + "collab_member_name_error_level": "CRITICAL", + "collab_member_aff_error_level": "CRITICAL", + "collab_member_orcid_error_level": "CRITICAL", + "credit_consistency_error_level": "ERROR", + "subarticle_collab_id_error_level": "ERROR", + "contrib_type_list": [ + "author", + "compiler" + ], + "contrib_role_specific_use_list": [ + "reviewer", + "editor" + ], "credit_taxonomy_terms_and_urls": [ { "term": "Conceptualization", @@ -70,9 +80,9 @@ "uri": "https://credit.niso.org/contributor-roles/writing-original-draft/" }, { - "term": "Writing – review e editing", + "term": "Writing – review & editing", "uri": "https://credit.niso.org/contributor-roles/writing-review-editing/" } ] } -} \ No newline at end of file +} diff --git a/tests/sps/validation/test_article_contribs.py b/tests/sps/validation/test_article_contribs.py index f4c2a53c5..247f2f682 100644 --- a/tests/sps/validation/test_article_contribs.py +++ b/tests/sps/validation/test_article_contribs.py @@ -5,9 +5,12 @@ from packtools.sps.models.article_contribs import XMLContribs from packtools.sps.validation.article_contribs import ( + CollabGroupValidation, CollabListValidation, ContribRoleValidation, ContribValidation, + DocumentCreditConsistencyValidation, + SubArticleCollabIDValidation, XMLContribsValidation, ) @@ -28,8 +31,8 @@ def setUp(self): Smith John - - Writing – original draft + + Writing – original draft 0000-0002-1234-5678 @@ -52,18 +55,50 @@ def setUp(self): "contrib_type": "author", "contrib_ids": {"orcid": "0000-0002-1234-5678"}, "contrib_role": [{ - "specific-use": "author", + "content-type": "https://credit.niso.org/contributor-roles/writing-original-draft/", "text": "Writing – original draft" }], "affs": [{"id": "aff1"}] } self.validator = ContribValidation(self.contrib_data, {}) + def test_validate_contrib_type_success(self): + """Test validate_contrib_type with valid contrib-type""" + results = list(self.validator.validate_contrib_type()) + errors = [r for r in results if r['response'] != 'OK'] + self.assertEqual(len(errors), 0) + + def test_validate_contrib_type_missing(self): + """Test validate_contrib_type with missing contrib-type""" + contrib_data = self.contrib_data.copy() + del contrib_data["contrib_type"] + validator = ContribValidation(contrib_data, {}) + + results = list(validator.validate_contrib_type()) + errors = [r for r in results if r['response'] != 'OK'] + + self.assertEqual(len(errors), 1) + self.assertEqual(errors[0]['title'], '@contrib-type attribute') + + def test_validate_contrib_type_invalid(self): + """Test validate_contrib_type with invalid contrib-type""" + contrib_data = self.contrib_data.copy() + contrib_data["contrib_type"] = "invalid" + validator = ContribValidation(contrib_data, {}) + + results = list(validator.validate_contrib_type()) + errors = [r for r in results if r['response'] != 'OK'] + + # Deve retornar 2 erros: valor inválido + não é "author" (mandatório) + self.assertEqual(len(errors), 2) + self.assertEqual(errors[0]['title'], '@contrib-type value') + self.assertEqual(errors[1]['title'], '@contrib-type mandatory value') + def test_validate_role_success(self): """Test validate_role with valid contributor role""" results = list(self.validator.validate_role()) errors = [r for r in results if r['response'] != 'OK'] - self.assertEqual(len(errors), 2) + self.assertEqual(len(errors), 0) def test_validate_role_missing(self): """Test validate_role with missing role""" @@ -112,6 +147,19 @@ def test_validate_orcid_format_invalid(self): self.assertEqual(responses, expected_responses) self.assertEqual(advices, expected_advices) + def test_validate_orcid_format_url_detected(self): + """Test validate_orcid_format with URL instead of identifier""" + contrib_data = self.contrib_data.copy() + contrib_data["contrib_ids"] = {"orcid": "https://orcid.org/0000-0002-1234-5678"} + validator = ContribValidation(contrib_data, {}) + + results = list(validator.validate_orcid_format()) + errors = [r for r in results if r['response'] != 'OK'] + + self.assertEqual(len(errors), 1) + self.assertEqual(errors[0]['title'], 'ORCID format - URL detected') + self.assertIn('Do not use URLs', errors[0]['advice']) + def test_validate_orcid_is_registered_success(self): """Test validate_orcid_is_registered with registered ORCID""" mock_orcid_validator = Mock(return_value={"status": "registered"}) @@ -139,6 +187,24 @@ def test_validate_orcid_is_registered_not_found(self): self.assertEqual(responses, expected_responses) self.assertEqual(advices, expected_advices) + def test_validate_affiliations_success(self): + """Test validate_affiliations with valid affiliation""" + results = list(self.validator.validate_affiliations()) + errors = [r for r in results if r['response'] != 'OK'] + self.assertEqual(len(errors), 0) + + def test_validate_affiliations_missing(self): + """Test validate_affiliations with missing affiliation""" + contrib_data = self.contrib_data.copy() + contrib_data["affs"] = [] + validator = ContribValidation(contrib_data, {}) + + results = list(validator.validate_affiliations()) + errors = [r for r in results if r['response'] != 'OK'] + + self.assertEqual(len(errors), 1) + self.assertEqual(errors[0]['title'], 'affiliation') + class TestContribRoleValidation(unittest.TestCase): def setUp(self): @@ -147,8 +213,7 @@ def setUp(self): "contrib_type": "author" } self.role_data = { - "specific-use": "author", - "content-type": "http://credit.niso.org/contributor-roles/writing-original-draft/", + "content-type": "https://credit.niso.org/contributor-roles/writing-original-draft/", "text": "Writing – original draft" } self.validator = ContribRoleValidation(self.contrib_data, self.role_data, {}) @@ -175,7 +240,11 @@ def test_validate_credit_invalid_uri(self): def test_validate_role_specific_use_success(self): """Test validate_role_specific_use with valid role""" - results = list(self.validator.validate_role_specific_use()) + role_data = self.role_data.copy() + role_data["specific-use"] = "reviewer" + validator = ContribRoleValidation(self.contrib_data, role_data, {}) + + results = list(validator.validate_role_specific_use()) errors = [r for r in results if r['response'] != 'OK'] self.assertEqual(len(errors), 0) @@ -193,7 +262,8 @@ def test_validate_role_specific_use_invalid(self): advices = [error['advice'] for error in errors] expected_responses = ['ERROR'] - expected_advices = ["""Smith, John : replace invalid-role in with ['author', 'editor', 'reviewer', 'translator']"""] + # Novo formato após correção da lógica de validação + expected_advices = ['Smith, John : replace with editor or reviewer'] self.assertEqual(responses, expected_responses) self.assertEqual(advices, expected_advices) @@ -277,5 +347,340 @@ def test_validate_orcid_is_unique_duplicate(self): self.assertEqual(advices, expected_advices) +class TestCollabGroupValidation(unittest.TestCase): + def setUp(self): + self.sample_xml = """ +
+ + + + + The Research Group + + + + + + Smith + John + + 0000-0002-1234-5678 + + + + + University Example + + + +
+ """ + self.xmltree = etree.fromstring(self.sample_xml.encode('utf-8')) + + def test_validate_collab_members_complete(self): + """Test validate_collab_members_completeness with complete member info""" + validator = CollabGroupValidation(self.xmltree.find("."), {}) + results = list(validator.validate_collab_members_completeness()) + errors = [r for r in results if r['response'] != 'OK'] + + # XML tem: nome, ORCID, e afiliação via + # Após correção do bug de afiliações, não deve haver erros + self.assertEqual(len(errors), 0) + + + def test_validate_collab_members_missing_name(self): + """Test validate_collab_members_completeness with missing name""" + xml_missing_name = """ +
+ + + + + The Research Group + + + + + 0000-0002-1234-5678 + + + + +
+ """ + xmltree = etree.fromstring(xml_missing_name.encode('utf-8')) + validator = CollabGroupValidation(xmltree.find("."), {}) + + results = list(validator.validate_collab_members_completeness()) + errors = [r for r in results if r['response'] != 'OK'] + + # Deve haver erro de nome faltando + name_errors = [e for e in errors if e['title'] == 'collab member name'] + self.assertGreater(len(name_errors), 0) + + def test_validate_collab_members_missing_orcid(self): + """Test validate_collab_members_completeness with missing ORCID""" + xml_missing_orcid = """ +
+ + + + + The Research Group + + + + + + Smith + John + + + + + +
+ """ + xmltree = etree.fromstring(xml_missing_orcid.encode('utf-8')) + validator = CollabGroupValidation(xmltree.find("."), {}) + + results = list(validator.validate_collab_members_completeness()) + errors = [r for r in results if r['response'] != 'OK'] + + # Deve haver erro de ORCID faltando + orcid_errors = [e for e in errors if e['title'] == 'collab member ORCID'] + self.assertGreater(len(orcid_errors), 0) + + +class TestDocumentCreditConsistencyValidation(unittest.TestCase): + def test_validate_credit_consistency_success(self): + """Test validate_credit_consistency with consistent CRediT usage""" + xml_consistent = """ +
+ + + + + + Smith + John + + + Writing – original draft + + + + + Johnson + Mary + + + Validation + + + + + +
+ """ + xmltree = etree.fromstring(xml_consistent.encode('utf-8')) + validator = DocumentCreditConsistencyValidation(xmltree, {}) + + results = list(validator.validate_credit_consistency()) + errors = [r for r in results if r['response'] != 'OK'] + self.assertEqual(len(errors), 0) + + def test_validate_credit_consistency_mixed_document(self): + """Test validate_credit_consistency with mixed CRediT usage across document""" + xml_mixed = """ +
+ + + + + + Smith + John + + + Writing – original draft + + + + + Johnson + Mary + + Writing + + + + +
+ """ + xmltree = etree.fromstring(xml_mixed.encode('utf-8')) + validator = DocumentCreditConsistencyValidation(xmltree, {}) + + results = list(validator.validate_credit_consistency()) + errors = [r for r in results if r['response'] != 'OK'] + + # Deve haver erro de inconsistência + self.assertGreater(len(errors), 0) + consistency_errors = [e for e in errors if 'consistency' in e['title'].lower()] + self.assertGreater(len(consistency_errors), 0) + + def test_validate_credit_consistency_mixed_roles(self): + """Test validate_credit_consistency with mixed CRediT in same contributor""" + xml_mixed_roles = """ +
+ + + + + + Smith + John + + + Writing – original draft + + Methodology + + + + +
+ """ + xmltree = etree.fromstring(xml_mixed_roles.encode('utf-8')) + validator = DocumentCreditConsistencyValidation(xmltree, {}) + + results = list(validator.validate_credit_consistency()) + errors = [r for r in results if r['response'] != 'OK'] + + # Deve haver erro de mistura no mesmo contrib + self.assertGreater(len(errors), 0) + mixed_errors = [e for e in errors if 'mixed roles' in e['title'].lower()] + self.assertGreater(len(mixed_errors), 0) + + +class TestSubArticleCollabIDValidation(unittest.TestCase): + def test_validate_unique_ids_success(self): + """Test validate with unique IDs between article and sub-article""" + xml_unique = """ +
+ + + + + Research Group + + + + + + + + + Grupo de Pesquisa + + + + +
+ """ + xmltree = etree.fromstring(xml_unique.encode('utf-8')) + validator = SubArticleCollabIDValidation(xmltree, {}) + + results = list(validator.validate()) + errors = [r for r in results if r['response'] != 'OK'] + self.assertEqual(len(errors), 0) + + def test_validate_duplicate_id(self): + """Test validate with duplicate @id between article and sub-article""" + xml_duplicate = """ +
+ + + + + Research Group + + + + + + + + + Grupo de Pesquisa + + + + +
+ """ + xmltree = etree.fromstring(xml_duplicate.encode('utf-8')) + validator = SubArticleCollabIDValidation(xmltree, {}) + + results = list(validator.validate()) + errors = [r for r in results if r['response'] != 'OK'] + + # Deve haver erro de ID duplicado + self.assertGreater(len(errors), 0) + id_errors = [e for e in errors if '@id' in e['sub_item']] + self.assertGreater(len(id_errors), 0) + + def test_validate_duplicate_rid(self): + """Test validate with duplicate @rid between article and sub-article""" + xml_duplicate_rid = """ +
+ + + + + Main Group + + + + + + Smith + John + + + + + + + + + + Grupo Principal + + + + + + Smith + John + + + + + +
+ """ + xmltree = etree.fromstring(xml_duplicate_rid.encode('utf-8')) + validator = SubArticleCollabIDValidation(xmltree, {}) + + results = list(validator.validate()) + errors = [r for r in results if r['response'] != 'OK'] + + # Deve haver erro de RID duplicado + self.assertGreater(len(errors), 0) + rid_errors = [e for e in errors if '@rid' in e['sub_item']] + self.assertGreater(len(rid_errors), 0) + + if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main()