Skip to content

Commit

Permalink
Table.from_table: Obey is_sparse when returning subarrays
Browse files Browse the repository at this point in the history
When we return subarryas, the flag `is_sparse` wasn't considered, but we simpy returned the subarray in it's original format. Also, make sure subarrays aren't flattened to 1d, as it is required for columns.
  • Loading branch information
nikicc committed Jun 2, 2017
1 parent b2ec2f8 commit 120f92e
Showing 1 changed file with 34 additions and 22 deletions.
56 changes: 34 additions & 22 deletions Orange/data/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,27 @@ def from_table(cls, domain, source, row_indices=...):

def get_columns(row_indices, src_cols, n_rows, dtype=np.float64,
is_sparse=False):
def match_type(x, force_1d=False):
""" Assure that matrix and column are both dense or sparse.
Args:
x (np.ndarray, scipy.sparse): data
force_1d (bool): If set, flatten resulting array to 1d.
Returns:
array of correct density.
"""
if is_sparse == sp.issparse(x):
return x
if is_sparse:
x = np.asarray(x)
return sp.csc_matrix(x.reshape(-1, 1).astype(np.float))
x = x.toarray()
if force_1d:
x = np.ravel(x)
return x

match_type_1d = lambda x: match_type(x, force_1d=True)

if not len(src_cols):
if is_sparse:
Expand All @@ -278,33 +299,24 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64,
n_src_attrs = len(source.domain.attributes)
if all(isinstance(x, Integral) and 0 <= x < n_src_attrs
for x in src_cols):
return _subarray(source.X, row_indices, src_cols)
return match_type(_subarray(source.X, row_indices, src_cols))
if all(isinstance(x, Integral) and x < 0 for x in src_cols):
arr = _subarray(source.metas, row_indices,
[-1 - x for x in src_cols])
arr = match_type(_subarray(source.metas, row_indices,
[-1 - x for x in src_cols]))
if arr.dtype != dtype:
return arr.astype(dtype)
return arr
if all(isinstance(x, Integral) and x >= n_src_attrs
for x in src_cols):
return _subarray(source._Y, row_indices,
[x - n_src_attrs for x in src_cols])
return match_type(_subarray(
source._Y, row_indices,
[x - n_src_attrs for x in src_cols]))

if is_sparse:
a = sp.dok_matrix((n_rows, len(src_cols)), dtype=dtype)
else:
a = np.empty((n_rows, len(src_cols)), dtype=dtype)

def match_type(x):
""" Assure that matrix and column are both dense or sparse. """
if is_sparse == sp.issparse(x):
return x
elif is_sparse:
x = np.asarray(x)
return sp.csc_matrix(x.reshape(-1, 1).astype(np.float))
else:
return np.ravel(x.toarray())

shared_cache = _conversion_cache
for i, col in enumerate(src_cols):
if col is None:
Expand All @@ -316,22 +328,22 @@ def match_type(x):
col.compute_shared(source)
shared = shared_cache[id(col.compute_shared), id(source)]
if row_indices is not ...:
a[:, i] = match_type(
a[:, i] = match_type_1d(
col(source, shared_data=shared)[row_indices])
else:
a[:, i] = match_type(
a[:, i] = match_type_1d(
col(source, shared_data=shared))
else:
if row_indices is not ...:
a[:, i] = match_type(col(source)[row_indices])
a[:, i] = match_type_1d(col(source)[row_indices])
else:
a[:, i] = match_type(col(source))
a[:, i] = match_type_1d(col(source))
elif col < 0:
a[:, i] = match_type(source.metas[row_indices, -1 - col])
a[:, i] = match_type_1d(source.metas[row_indices, -1 - col])
elif col < n_src_attrs:
a[:, i] = match_type(source.X[row_indices, col])
a[:, i] = match_type_1d(source.X[row_indices, col])
else:
a[:, i] = match_type(
a[:, i] = match_type_1d(
source._Y[row_indices, col - n_src_attrs])

if is_sparse:
Expand Down

0 comments on commit 120f92e

Please sign in to comment.