|
21 | 21 | from lingua_franca.lang.common_data_de import _DE_NUMBERS |
22 | 22 | from lingua_franca.lang.format_de import pronounce_number_de |
23 | 23 | from lingua_franca.time import now_local |
| 24 | +from lingua_franca.parse import normalize_decimals |
24 | 25 |
|
25 | 26 |
|
26 | 27 | de_numbers = { |
@@ -143,20 +144,28 @@ def repl(match): |
143 | 144 | return (duration, text) |
144 | 145 |
|
145 | 146 |
|
146 | | -def extract_number_de(text, short_scale=True, ordinals=False): |
| 147 | +def extract_number_de(text, short_scale=True, ordinals=False, decimal='.'): |
147 | 148 | """ |
148 | | - This function prepares the given text for parsing by making |
149 | | - numbers consistent, getting rid of contractions, etc. |
| 149 | + This function extracts a number from a text string, |
| 150 | + handles pronunciations in long scale and short scale |
| 151 | +
|
| 152 | + https://en.wikipedia.org/wiki/Names_of_large_numbers |
| 153 | +
|
150 | 154 | Args: |
151 | 155 | text (str): the string to normalize |
| 156 | + short_scale (bool): use short scale if True, long scale if False |
| 157 | + ordinals (bool): consider ordinal numbers, third=3 instead of 1/3 |
| 158 | + decimal (str): character to use as decimal point. defaults to '.' |
152 | 159 | Returns: |
153 | | - (int) or (float): The value of extracted number |
154 | | -
|
155 | | -
|
156 | | - undefined articles cannot be suppressed in German: |
157 | | - 'ein Pferd' means 'one horse' and 'a horse' |
| 160 | + (int) or (float) or False: The extracted number or False if no number |
| 161 | + was found |
| 162 | + Note: |
| 163 | + will always extract numbers formatted with a decimal dot/full stop, |
| 164 | + such as '3.5', even if 'decimal' is specified. |
158 | 165 |
|
159 | 166 | """ |
| 167 | + if decimal != '.': |
| 168 | + text = normalize_decimals(text, decimal) |
160 | 169 | # TODO: short_scale and ordinals don't do anything here. |
161 | 170 | # The parameters are present in the function signature for API compatibility |
162 | 171 | # reasons. |
@@ -1003,20 +1012,28 @@ def normalize_de(text, remove_articles=True): |
1003 | 1012 | return normalized[1:] # strip the initial space |
1004 | 1013 |
|
1005 | 1014 |
|
1006 | | -def extract_numbers_de(text, short_scale=True, ordinals=False): |
1007 | | - """ |
1008 | | - Takes in a string and extracts a list of numbers. |
1009 | | -
|
1010 | | - Args: |
1011 | | - text (str): the string to extract a number from |
1012 | | - short_scale (bool): Use "short scale" or "long scale" for large |
1013 | | - numbers -- over a million. The default is short scale, which |
1014 | | - is now common in most English speaking countries. |
1015 | | - See https://en.wikipedia.org/wiki/Names_of_large_numbers |
1016 | | - ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 |
1017 | | - Returns: |
1018 | | - list: list of extracted numbers as floats |
| 1015 | +def extract_numbers_de(text, short_scale=True, ordinals=False, decimal='.'): |
1019 | 1016 | """ |
| 1017 | + This function extracts a number from a text string, |
| 1018 | + handles pronunciations in long scale and short scale |
| 1019 | +
|
| 1020 | + https://en.wikipedia.org/wiki/Names_of_large_numbers |
| 1021 | +
|
| 1022 | + Args: |
| 1023 | + text (str): the string to normalize |
| 1024 | + short_scale (bool): use short scale if True, long scale if False |
| 1025 | + ordinals (bool): consider ordinal numbers, third=3 instead of 1/3 |
| 1026 | + decimal (str): character to use as decimal point. defaults to '.' |
| 1027 | + Returns: |
| 1028 | + (int) or (float) or False: The extracted number or False if no number |
| 1029 | + was found |
| 1030 | + Note: |
| 1031 | + will always extract numbers formatted with a decimal dot/full stop, |
| 1032 | + such as '3.5', even if 'decimal' is specified. |
| 1033 | +
|
| 1034 | + """ |
| 1035 | + if decimal != '.': |
| 1036 | + text = normalize_decimals(text, decimal) |
1020 | 1037 | return extract_numbers_generic(text, pronounce_number_de, extract_number_de, |
1021 | 1038 | short_scale=short_scale, ordinals=ordinals) |
1022 | 1039 |
|
|
0 commit comments