Skip to content

Commit

Permalink
Merge pull request #1424 from lanzagar/timevar
Browse files Browse the repository at this point in the history
[ENH] SqlTable: Automatically recognize date/time fields
  • Loading branch information
kernc authored Jul 8, 2016
2 parents ff08bcd + baee17a commit b77c021
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 8 deletions.
19 changes: 16 additions & 3 deletions Orange/data/sql/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
psycopg2.pool = Orange.misc.import_late_warning("psycopg2.pool")

from .. import domain, variable, table, instance, filter,\
DiscreteVariable, ContinuousVariable, StringVariable
DiscreteVariable, ContinuousVariable, StringVariable, TimeVariable
from Orange.data.sql import filter as sql_filter


Expand Down Expand Up @@ -104,8 +104,12 @@ def get_domain(self, type_hints=None, guess_values=False):

def add_to_sql(var, field_name):
if var.is_continuous:
var.to_sql = ToSql("({})::double precision".format(
self.quote_identifier(field_name)))
if isinstance(var, TimeVariable):
var.to_sql = ToSql("extract(epoch from {})".format(
self.quote_identifier(field_name)))
else:
var.to_sql = ToSql("({})::double precision".format(
self.quote_identifier(field_name)))
elif var.is_discrete:
var.to_sql = ToSql("({})::text".format(
self.quote_identifier(field_name)))
Expand Down Expand Up @@ -137,10 +141,19 @@ def get_variable(self, field_name, type_code, inspect_values=False):
INT_TYPES = (20, 21, 23) # bigint, int, smallint
CHAR_TYPES = (25, 1042, 1043,) # text, char, varchar
BOOLEAN_TYPES = (16,) # bool
DATE_TYPES = (1082, 1114, 1184, ) # date, timestamp, timestamptz
# time, timestamp, timestamptz, timetz
TIME_TYPES = (1083, 1114, 1184, 1266,)

if type_code in FLOATISH_TYPES:
return ContinuousVariable(field_name)

if type_code in TIME_TYPES + DATE_TYPES:
tv = TimeVariable(field_name)
tv.have_date |= type_code in DATE_TYPES
tv.have_time |= type_code in TIME_TYPES
return tv

if type_code in INT_TYPES: # bigint, int, smallint
if inspect_values:
values = self.get_distinct_values(field_name)
Expand Down
9 changes: 9 additions & 0 deletions Orange/data/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,3 +986,12 @@ def parse(self, datestr):
try: return dt.timestamp()
except OverflowError:
return -(self.UNIX_EPOCH - dt).total_seconds()

def to_val(self, s):
"""
Convert a value, given as an instance of an arbitrary type, to a float.
"""
if isinstance(s, str):
return self.parse(s)
else:
return super().to_val(s)
59 changes: 55 additions & 4 deletions Orange/tests/sql/test_sql_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
# pylint: disable=missing-docstring

import unittest
import unittest.mock

import numpy as np
from numpy.testing import assert_almost_equal

from Orange.data import filter, ContinuousVariable, DiscreteVariable, \
StringVariable, Table, Domain
StringVariable, TimeVariable, Table, Domain
from Orange.data.sql.table import SqlTable
from Orange.preprocess.discretize import EqualWidth
from Orange.statistics.basic_stats import BasicStats, DomainBasicStats
Expand Down Expand Up @@ -367,16 +368,66 @@ def test_meta_varchar(self):
sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstMetaIsInstance(sql_table, StringVariable)

def test_date(self):
def test_time_date(self):
table = np.array(['2014-04-12', '2014-04-13', '2014-04-14',
'2014-04-15', '2014-04-16']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['date'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstMetaIsInstance(sql_table, StringVariable)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstMetaIsInstance(sql_table, StringVariable)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_time_time(self):
table = np.array(['17:39:51', '11:51:48.46', '05:20:21.492149',
'21:47:06', '04:47:35.8']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['time'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_time_timetz(self):
table = np.array(['17:39:51+0200', '11:51:48.46+01', '05:20:21.4921',
'21:47:06-0600', '04:47:35.8+0330']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['timetz'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_time_timestamp(self):
table = np.array(['2014-07-15 17:39:51.348149',
'2008-10-05 11:51:48.468149',
'2008-11-03 05:20:21.492149',
'2015-01-02 21:47:06.228149',
'2016-04-16 04:47:35.892149']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['timestamp'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_time_timestamptz(self):
table = np.array(['2014-07-15 17:39:51.348149+0200',
'2008-10-05 11:51:48.468149+02',
'2008-11-03 05:20:21.492149+01',
'2015-01-02 21:47:06.228149+0100',
'2016-04-16 04:47:35.892149+0330']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['timestamptz'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_double_precision(self):
table = np.arange(25).reshape((-1, 1))
Expand Down
4 changes: 3 additions & 1 deletion Orange/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,12 +312,14 @@ class TestTimeVariable(VariableTest):
('1969-12-31 23:59:59', -1, '1969-12-31 23:59:59'),
('1900-01-01', -2208988800, '1900-01-01'),
('nan', np.nan, '?'),
('1444651991.81', 1444651991.81, '2015-10-12 12:13:11.810000'),
(1444651991.81, 1444651991.81, '2015-10-12 12:13:11.810000'),
]

def test_parse_repr(self):
for datestr, timestamp, outstr in self.TESTS:
var = TimeVariable('time')
ts = var.parse(datestr)
ts = var.to_val(datestr) # calls parse for strings
if not np.isnan(ts):
self.assertEqual(ts, timestamp, msg=datestr)
self.assertEqual(var.repr_val(ts), outstr, msg=datestr)
Expand Down

0 comments on commit b77c021

Please sign in to comment.