Skip to content

Commit

Permalink
Merge pull request #4915 from ales-erjavec/fixes/owcsvimport-number-sep
Browse files Browse the repository at this point in the history
[FIX] owcsvimport: Handle decimal and thousand separator
  • Loading branch information
ajdapretnar authored Jul 24, 2020
2 parents 945e235 + 29c053c commit 4004f6d
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
9 changes: 8 additions & 1 deletion Orange/widgets/data/owcsvimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,14 @@ def expand(ranges):
if opts.group_separator != "":
numbers_format_kwds["thousands"] = opts.group_separator

if numbers_format_kwds:
# float_precision = "round_trip" cannot handle non c-locale decimal and
# thousands sep (https://github.com/pandas-dev/pandas/issues/35365).
# Fallback to 'high'.
numbers_format_kwds["float_precision"] = "high"
else:
numbers_format_kwds["float_precision"] = "round_trip"

with ExitStack() as stack:
if isinstance(path, (str, bytes)):
f = stack.enter_context(_open(path, 'rb'))
Expand All @@ -1253,7 +1261,6 @@ def expand(ranges):
header=header, skiprows=skiprows,
dtype=dtypes, parse_dates=parse_dates, prefix=prefix,
na_values=na_values, keep_default_na=False,
float_precision="round_trip",
**numbers_format_kwds
)
df = guess_types(df, dtypes, columns_ignored)
Expand Down
19 changes: 19 additions & 0 deletions Orange/widgets/data/tests/test_owcsvimport.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=no-self-use
import unittest
from unittest import mock
from contextlib import ExitStack
Expand Down Expand Up @@ -287,6 +288,24 @@ class dialect(csv.excel):
assert_array_equal(tb.X[:, 1], [0, np.nan, np.nan])
assert_array_equal(tb.X[:, 2], [np.nan, 1, np.nan])

def test_decimal_format(self):
class Dialect(csv.excel):
delimiter = ";"

contents = b'3,21;3,37\n4,13;1.000,142'
opts = owcsvimport.Options(
encoding="ascii",
dialect=Dialect(),
decimal_separator=",",
group_separator=".",
columntypes=[
(range(0, 2), ColumnType.Numeric),
],
rowspec=[],
)
df = owcsvimport.load_csv(io.BytesIO(contents), opts)
assert_array_equal(df.values, np.array([[3.21, 3.37], [4.13, 1000.142]]))


if __name__ == "__main__":
unittest.main()

0 comments on commit 4004f6d

Please sign in to comment.