Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions birdnames/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
"""

import warnings
import pandas as pd
from pathlib import Path
from typing import Tuple, Union, List, Optional
from typing import List, Optional, Tuple, Union

import numpy as np
from .utils import fuzzy_match, TAXONOMIES, load_taxonomy, normalize_string
import pandas as pd

from .utils import TAXONOMIES, fuzzy_match, load_taxonomy, normalize_string


def _get_column_name(name_type: str, authority: str) -> str:
Expand Down Expand Up @@ -83,10 +84,15 @@ def __init__(
# create pd.Series for mapping from one name type to another
source_taxonomy = load_taxonomy(from_authority, from_year)
same_taxonomy = to_authority == from_authority and to_year == from_year
# if converting to the same type, create identity mapping
if from_col == to_col:
self.lookup = source_taxonomy[[from_col]].copy()
# Create a new column with different name temporarily for the identity mapping
self.lookup["_temp_col"] = self.lookup[from_col]
# if within a taxonomy: simply index=from and values=to
# if converting to scientific name, we don't need to cross taxonomies
if to_col == "scientific_name" or same_taxonomy:
self.lookup = source_taxonomy[[from_col, to_col]]
elif to_col == "scientific_name" or same_taxonomy:
self.lookup = source_taxonomy[[from_col, to_col]].copy()
else:
# dest_cols = [to_col] if to_col != "scientific_name" else []
dest_taxonomy = load_taxonomy(to_authority, to_year).set_index(
Expand All @@ -97,7 +103,7 @@ def __init__(
self.lookup = source_taxonomy.set_index("scientific_name")[
source_cols
].join(dest_taxonomy)
self.lookup = self.lookup.reset_index(drop=False)[[from_col, to_col]]
self.lookup = self.lookup.reset_index(drop=False)[[from_col, to_col]].copy()

# if soft matching, apply normalization to source column
if soft_matching:
Expand All @@ -110,7 +116,9 @@ def __init__(
self.lookup = self.lookup.drop_duplicates(subset=[from_col])

# convert to a pd.Series for fast lookup
self.lookup = self.lookup.set_index(from_col)[to_col]
# Use '_temp_col' if it exists (identity mapping case), otherwise use to_col
value_col = "_temp_col" if "_temp_col" in self.lookup.columns else to_col
self.lookup = self.lookup.set_index(from_col)[value_col]

def _get_most_recent_year(self, authority: str) -> str:
"""Get the most recent year available for an authority.
Expand Down Expand Up @@ -443,7 +451,7 @@ def common(
authorities_with_common_name = set(
TAXONOMIES[TAXONOMIES["common_name"] == True]["authority"].values
)
if not common_name_authority in authorities_with_common_name:
if common_name_authority not in authorities_with_common_name:
raise ValueError(
f"`common_name_authority` must be one of {authorities_with_common_name}. Got {common_name_authority}."
)
Expand Down
48 changes: 43 additions & 5 deletions tests/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@
Comprehensive tests for the Converter class.
"""

import pytest
import pandas as pd
import numpy as np
from pathlib import Path
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import pytest

# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from birdnames import Converter
import birdnames
from birdnames import Converter


class TestConverter:
Expand Down Expand Up @@ -395,6 +396,34 @@ def test_bbl_cross_authority_conversion(self):
# Just ensure it doesn't crash and returns something reasonable
assert result is None or isinstance(result, str)

def test_identity_conversion(self):
"""Test converting from a type to the same type (identity mapping)."""
# Test scientific name to scientific name
converter = Converter(
from_type="scientific_name",
to_type="scientific_name",
from_authority="avilist",
to_authority="avilist",
)

result = converter.convert("Struthio camelus")
assert result == "Struthio camelus"

# Test with list
result_list = converter.convert(["Struthio camelus", "Struthio molybdophanes"])
assert result_list == ["Struthio camelus", "Struthio molybdophanes"]

# Test common name to common name
converter_common = Converter(
from_type="common_name",
to_type="common_name",
from_authority="avilist",
to_authority="avilist",
)

result = converter_common.convert("Common Ostrich")
assert result == "Common Ostrich"


def test_determine_name_type():
"""Test automatic detection of name type and authority."""
Expand Down Expand Up @@ -461,6 +490,15 @@ def test_scientific():
# test with ebird codes
assert birdnames.scientific(["norcar"]) == ["Cardinalis cardinalis"]

# test with scientific names (issue fix: should return input as-is)
scientific_names = ["Struthio camelus", "Struthio molybdophanes"]
result = birdnames.scientific(scientific_names)
assert result == ["Struthio camelus", "Struthio molybdophanes"]

# test with single scientific name
result = birdnames.scientific("Struthio camelus")
assert result == "Struthio camelus"


def test_common():
"""Test the convenience function for common name conversion."""
Expand Down