Skip to content
This repository was archived by the owner on May 5, 2022. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions openaddr/conform.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def gdal_error_handler(err_class, err_num, err_msg):
'id': 'OA:id'
}

float_pattern = re.compile('(?<=[0-9])\.0+(?![0-9])')

var_types = attrib_types.copy()

UNZIPPED_DIRNAME = 'unzipped'
Expand Down Expand Up @@ -997,7 +999,7 @@ def row_fxn_join(sd, row, key, fxn):
"Create new columns by merging arbitrary other columns with a separator"
separator = fxn.get("separator", " ")
try:
fields = [(row[n] or u'').strip() for n in fxn["fields"]]
fields = [float_pattern.sub(u'', (row[n] or u'').strip()) for n in fxn["fields"]]
row[var_types[key]] = separator.join([f for f in fields if f])
except Exception as e:
_L.debug("Failure to merge row %r %s", e, row)
Expand Down Expand Up @@ -1072,13 +1074,12 @@ def row_fxn_format(sd, row, key, fxn):
parts.append(format_str[idx:start])

if field:
# if the value being added ends with '.0', remove it
# certain fields ending with '.0' are normalized by removing that
# if the value being added ends with '.0+', remove it
# certain fields ending with '.0+' are normalized by removing that
# suffix in row_canonicalize_unit_and_number but this isn't
# possible when not-the-last component fields submitted to the format
# function end with '.0'
if field.endswith(".0"):
field = field[:-2]
# function end with '.0+'
field = float_pattern.sub(u'', field)

parts.append(field)
num_fields_added += 1
Expand Down Expand Up @@ -1116,9 +1117,7 @@ def row_fxn_chain(sd, row, key, fxn):
def row_canonicalize_unit_and_number(sd, row):
"Canonicalize address unit and number"
row["UNIT"] = (row["UNIT"] or '').strip()
row["NUMBER"] = (row["NUMBER"] or '').strip()
if row["NUMBER"].endswith(".0"):
row["NUMBER"] = row["NUMBER"][:-2]
row["NUMBER"] = float_pattern.sub(u'', (row["NUMBER"] or u'').strip())
row["STREET"] = (row["STREET"] or '').strip()
return row

Expand Down
27 changes: 23 additions & 4 deletions openaddr/tests/conform.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,26 @@ def test_row_fxn_join(self):
"function": "join",
"fields": ["b1","b2"],
"separator": "-"
},
"unit": {
"function": "join",
"fields": ["c1", "c2"],
"separator": ""
}
} }
d = { "a1": "va1", "b1": "vb1", "b2": "vb2" }
d = { "a1": "val1", "b1": "vb1", "b2": "vb2", "c1": "12.0", "c2": "A"}
e = copy.deepcopy(d)
e.update({ "OA:number": "va1", "OA:street": "vb1-vb2" })
e.update({ "OA:number": "val1", "OA:street": "vb1-vb2", "OA:unit": "12A" })
d = row_fxn_join(c, d, "number", c["conform"]["number"])
d = row_fxn_join(c, d, "street", c["conform"]["street"])
d = row_fxn_join(c, d, "unit", c["conform"]["unit"])
self.assertEqual(e, d)
d = { "a1": "va1", "b1": "vb1", "b2": None}
d = { "a1": "va1", "b1": "vb1", "b2": None, "c1": "12.00000", "c2": None}
e = copy.deepcopy(d)
e.update({ "OA:number": "va1", "OA:street": "vb1" })
e.update({ "OA:number": "va1", "OA:street": "vb1", "OA:unit": "12"})
d = row_fxn_join(c, d, "number", c["conform"]["number"])
d = row_fxn_join(c, d, "street", c["conform"]["street"])
d = row_fxn_join(c, d, "unit", c["conform"]["unit"])
self.assertEqual(e, d)

def test_row_fxn_format(self):
Expand All @@ -107,13 +114,23 @@ def test_row_fxn_format(self):
self.assertEqual(d.get("OA:street", ""), "foo 1B-3 bar")

d = copy.deepcopy(e)
d["a1"] = "12.0000000000"
d["a2"] = None
d["b3"] = None
d = row_fxn_format(c, d, "number", c["conform"]["number"])
d = row_fxn_format(c, d, "street", c["conform"]["street"])
self.assertEqual(d.get("OA:number", ""), "12-56")
self.assertEqual(d.get("OA:street", ""), "foo 1B bar")

d = copy.deepcopy(e)
d["a1"] = "12.0000000000A"
d["a2"] = None
d["b3"] = None
d = row_fxn_format(c, d, "number", c["conform"]["number"])
d = row_fxn_format(c, d, "street", c["conform"]["street"])
self.assertEqual(d.get("OA:number", ""), "12A-56")
self.assertEqual(d.get("OA:street", ""), "foo 1B bar")

def test_row_fxn_chain(self):
c = { "conform": {
"number": {
Expand Down Expand Up @@ -279,6 +296,8 @@ def test_row_canonicalize_unit_and_number(self):
# Tests for integer conversion
for e, a in (("324", " 324.0 "),
("", ""),
("324", "324.0000000"),
("324A", "324.00000000A"),
("3240", "3240"),
("INVALID", "INVALID"),
("324.5", "324.5")):
Expand Down