-
Notifications
You must be signed in to change notification settings - Fork 104
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Patsy pickling #67
base: master
Are you sure you want to change the base?
Patsy pickling #67
Changes from all commits
34bc09b
3bc08d6
d5ed045
d431764
71e4a64
ddbf6be
3164cd9
c5a8e75
62b1ce4
c870d4d
858a623
4b74a05
12c7471
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
from __future__ import print_function | ||
|
||
import six | ||
from six.moves import cPickle as pickle | ||
|
||
import os | ||
import shutil | ||
|
||
from patsy import EvalFactor | ||
|
||
PICKE_TESTCASES_ROOTDIR = os.path.join(os.path.dirname(__file__), '..', 'pickle_testcases') | ||
|
||
pickling_testcases = { | ||
"evalfactor_simple": EvalFactor("a+b"), | ||
} | ||
|
||
def test_pickling_same_version_roundtrips(): | ||
for obj in six.itervalues(pickling_testcases): | ||
yield (check_pickling_same_version_roundtrips, obj) | ||
|
||
def check_pickling_same_version_roundtrips(obj): | ||
assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) | ||
|
||
def test_pickling_old_versions_still_work(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd call this and the next function |
||
for (dirpath, dirnames, filenames) in os.walk(PICKE_TESTCASES_ROOTDIR): | ||
for fname in filenames: | ||
if os.path.splitext(fname)[1] == '.pickle': | ||
yield check_pickling_old_versions_still_work, os.path.join(dirpath, fname) | ||
|
||
def check_pickling_old_versions_still_work(pickle_filename): | ||
testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] | ||
with open(pickle_filename, 'rb') as f: | ||
# When adding features to a class, it will happen that there is no | ||
# way to make an instance of that version version of that class | ||
# equal to any instance of a previous version. How do we handle | ||
# that? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: And I think the answer here is probably that when we unpickle an old object into a new patsy, if the new patsy has some new features, then we'll have to somehow initialize them for the old object (like fill in zeros or empty strings or whatever), because we need some way to represent it in memory. So we can put in a test case here that uses those default field values, and also add a new object that actually uses the new fields in a non-trivial way. |
||
# Maybe adding a minimum version requirement to each test? | ||
assert pickling_testcases[testcase_name] == pickle.load(f) | ||
|
||
def test_unpickling_future_gives_sensible_error_msg(): | ||
# TODO How would we go about testing this? | ||
pass | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess just test that we get a |
||
|
||
def create_pickles(version): | ||
# TODO Add options to overwrite pickles directory, with force=True | ||
# during development. | ||
# TODO Add safety check that said force=True option will still give an | ||
# error when trying to remove pickles for a released version, by | ||
# comparing the version argument here with patsy.__version__. | ||
pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version) | ||
if os.path.exists(pickle_testcases_dir): | ||
raise OSError("{} already exists. Aborting.".format(pickle_testcases_dir)) | ||
pickle_testcases_tempdir = pickle_testcases_dir + "_inprogress" | ||
os.mkdir(pickle_testcases_tempdir) | ||
|
||
try: | ||
for name, obj in six.iteritems(pickling_testcases): | ||
with open(os.path.join(pickle_testcases_tempdir, "{}.pickle".format(name)), "wb") as f: | ||
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) | ||
except Exception: | ||
print("Exception during creation of pickles for {}. " \ | ||
"Removing partially created directory.".format(version)) | ||
shutil.rmtree(pickle_testcases_tempdir) | ||
raise | ||
finally: | ||
os.rename(pickle_testcases_tempdir, pickle_testcases_dir) | ||
print("Successfully created pickle testcases for new version {}.".format(version)) | ||
|
||
if __name__ == "__main__": | ||
import argparse | ||
|
||
arg_parser = argparse.ArgumentParser(description="Create and save pickle testcases for a new version of patsy.") | ||
arg_parser.add_argument("version", help="The version of patsy for which to save a new set of pickle testcases.") | ||
args = arg_parser.parse_args() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might simplify the logic here if you know that our policy right now is that patsy version numbers are always of the form |
||
|
||
create_pickles(args.version) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ | |
] | ||
|
||
import sys | ||
from nose.tools import assert_raises | ||
import numpy as np | ||
import six | ||
from six.moves import cStringIO as StringIO | ||
|
@@ -698,7 +699,6 @@ def no_pickling(*args, **kwargs): | |
|
||
def assert_no_pickling(obj): | ||
import pickle | ||
from nose.tools import assert_raises | ||
assert_raises(NotImplementedError, pickle.dumps, obj) | ||
|
||
# Use like: | ||
|
@@ -720,3 +720,23 @@ def test_safe_string_eq(): | |
assert safe_string_eq(unicode("foo"), "foo") | ||
|
||
assert not safe_string_eq(np.empty((2, 2)), "foo") | ||
|
||
def check_pickle_version(version, required_version, name=""): | ||
if version > required_version: | ||
error_msg = "This version of patsy is too old to load this pickle" | ||
elif version < required_version: | ||
error_msg = "This pickle is too old and not supported by this version of patsy anymore" | ||
else: | ||
return | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Presumably we'll eventually want to support multiple pickle versions at the same time -- but I guess we can always refactor this when the time comes :-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Specifically I think our goal will be to never stop supporting old pickles, so the "too old" error should never be needed... we'll see what happens though :-) |
||
|
||
if name: | ||
error_msg += " (for object {})".format(name) | ||
error_msg += "." | ||
|
||
# TODO Use a better exception than ValueError. | ||
raise ValueError(error_msg) | ||
|
||
def test_check_pickle_version(): | ||
assert_raises(ValueError, check_pickle_version, 0, 1) | ||
assert_raises(ValueError, check_pickle_version, 1, 0) | ||
check_pickle_version(0, 0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
s/PICKE/PICKLE/, I assume?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, hmm, so it looks like you're storing the pickle tests outside the source tree. Right now we go to some effort to ship the test suite, so that we can test patsy-as-installed... and I think the current CI stuff tries hard to run the test suite without access to the source tree, to make sure that this works. So this is definitely a change. Maybe it's a good change if the pickle test suite is large... but we'll need to do some more work in that case to make it really work. OTOH, maybe the pickle test suite is small, in which case we might as well ship it, because testing is important.
Probably the simplest thing to do is to move it into a
patsy/test_data
directory inside the package for now, and then we can always move it back out again if we decide that we have too many megabytes of test pickles. (This will probably require some annoying change to setup.py as well to include the test data -- example.)