Skip to content

Commit

Permalink
table_from_frame: replace nan with "" for string variable
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Jan 18, 2022
1 parent c860359 commit 3c317f8
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Orange/data/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def vars_from_df(df, role=None, force_nominal=False):
raise ValueError("String variable must be in metas.")
_role = Role.Meta
var = StringVariable(str(column))
expr = lambda s, _: np.asarray(s, dtype=object)
expr = lambda s, _: np.asarray(s.fillna(""), dtype=object)

cols[_role].append(column)
exprs[_role].append(expr)
Expand Down
21 changes: 20 additions & 1 deletion Orange/data/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def test_table_from_frame_timezones(self):
],
)

def test_table_from_frame_no_datetim(self):
def test_table_from_frame_no_datetime(self):
"""
In case when dtype of column is object and column contains numbers only,
column could be recognized as a TimeVarialbe since pd.to_datetime can parse
Expand All @@ -402,6 +402,25 @@ def test_table_from_frame_no_datetim(self):
# check if exactly ContinuousVariable and not subtype TimeVariable
self.assertIsInstance(table.domain.attributes[0], DiscreteVariable)

def testa_table_from_frame_string(self):
"""
Test if string-like variables are handled correctly and nans are replaced
with empty string - unknown in Orange table for string variable
"""
from Orange.data.pandas_compat import table_from_frame

# s1 contains nan and s2 contains pd.Na
df = pd.DataFrame(
[["a", "b"], ["c", "d"], ["e", "f"], [np.nan, np.nan]],
columns=["s1", "s2"],
).astype({"s1": "object", "s2": "string"})
table = table_from_frame(df)
np.testing.assert_array_equal(np.empty((4, 0)), table.X)
np.testing.assert_array_equal(
np.array([["a", "b"], ["c", "d"], ["e", "f"], ["", ""]]), table.metas
)
self.assertTrue(all(isinstance(v, StringVariable) for v in table.domain.metas))

def test_time_variable_compatible(self):
from Orange.data.pandas_compat import table_from_frame

Expand Down

0 comments on commit 3c317f8

Please sign in to comment.