Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace JSON serialization with cloudpickle and make reference semantics explicit. #153

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
# Changelog

## 0.5.0

- replaced JSON serialization with cloudpickle. This allows extracting a much
wider range of objects from the notebook subprocess.
- Reference semantics have changed.
- Old behavior of `tb.get(name)` and `tb[name]`:
- a reference would be returned for non-JSON-serializable objects.
- a value would be returned for JSON-serializable objects.
- Old behavior of `tb.ref(name)` was identical to `tb.get(name)`.
- However, now almost all objects are serializable and as a result, under
the old semantics, a reference would almost never be returned. Therefore,
when a reference is desired, we now require explicitly requesting a
reference. The new behavior of `tb.get(name)` and `tb[name]` is to always
return the deserialized object and to never return a reference. The new
behavior of `tb.ref(name)` is to always return a reference.

## 0.4.2

- Documentation and CoC updates to improve developer access (Thank you PyLadies Vancouver!)
Expand Down
2 changes: 1 addition & 1 deletion docs/usage/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ my_list = ['list', 'from', 'notebook']

Reference objects to functions can be called with,

- explicit JSON serializable values (like `dict`, `list`, `int`, `float`, `str`, `bool`, etc)
- explicit cloudpickle-serializable values (almost all Python objects)
- other reference objects

```{code-block} python
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
nbformat>=5.0.4
nbclient>=0.4.0
cloudpickle >= 2.0.0
66 changes: 27 additions & 39 deletions testbook/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from nbclient import NotebookClient
from nbclient.exceptions import CellExecutionError
from nbformat.v4 import new_code_cell
import cloudpickle

from .exceptions import (
TestbookCellTagNotFoundError,
Expand Down Expand Up @@ -38,17 +39,13 @@ def ref(self, name: str) -> Union[TestbookObjectReference, Any]:

# Check if exists
self.inject(name, pop=True)
try:
self.inject(f"import json; json.dumps({name})", pop=True)
return self.value(name)
except Exception:
return TestbookObjectReference(self, name)
return TestbookObjectReference(self, name)

def get(self, item):
return self.ref(item)
return self.value(item)

def __getitem__(self, item):
return self.ref(item)
return self.value(item)

@staticmethod
def _construct_call_code(
Expand Down Expand Up @@ -248,11 +245,7 @@ def inject(

def value(self, code: str) -> Any:
"""
Execute given code in the kernel and return JSON serializeable result.

If the result is not JSON serializeable, it raises `TestbookAttributeError`.
This error object will also contain an attribute called `save_varname` which
can be used to create a reference object with :meth:`ref`.
Execute given code in the kernel and returns the serializeable result.

Parameters
----------
Expand All @@ -276,35 +269,30 @@ def value(self, code: str) -> Any:
raise TestbookExecuteResultNotFoundError(
'code provided does not produce execute_result'
)

save_varname = random_varname()

inject_code = f"""
import json
from IPython import get_ipython
from IPython.display import JSON

{save_varname} = get_ipython().last_execution_result.result

json.dumps({save_varname})
JSON({{"value" : {save_varname}}})
"""

try:
outputs = self.inject(inject_code, pop=True).outputs

if outputs[0].output_type == "error":
# will receive error when `allow_errors` is set to True
raise TestbookRuntimeError(
outputs[0].evalue, outputs[0].traceback, outputs[0].ename
)

return outputs[0].data['application/json']['value']

except TestbookRuntimeError:
e = TestbookSerializeError('could not JSON serialize output')
e.save_varname = save_varname
raise e
import tempfile
with tempfile.NamedTemporaryFile() as tmp:
try:
inject_code = f"""
import cloudpickle
{save_varname} = get_ipython().last_execution_result.result
with open('{tmp.name}', 'wb') as f:
cloudpickle.dump({save_varname}, f)
"""
outputs = self.inject(inject_code, pop=True).outputs
if len(outputs) > 0 and outputs[0].output_type == "error":
# will receive error when `allow_errors` is set to True
raise TestbookRuntimeError(
outputs[0].evalue, outputs[0].traceback, outputs[0].ename
)
with open(tmp.name, 'rb') as f:
return cloudpickle.load(f)
except TestbookRuntimeError:
e = TestbookSerializeError('could not serialize output')
e.save_varname = save_varname
raise e

@contextmanager
def patch(self, target, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion testbook/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class TestbookCellTagNotFoundError(TestbookError):


class TestbookSerializeError(TestbookError):
"""Raised when output cannot be JSON serialized"""
"""Raised when output cannot be serialized"""

pass

Expand Down
45 changes: 15 additions & 30 deletions testbook/tests/test_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,28 @@ def notebook():

def test_create_reference(notebook):
a = notebook.ref("a")
assert repr(a) == "[1, 2, 3]"
assert repr(a) == "'[1, 2, 3]'"


def test_create_reference_getitem(notebook):
a = notebook["a"]
assert repr(a) == "[1, 2, 3]"
def test_create_reference_resolve(notebook):
a = notebook.ref("a")
assert a.resolve() == [1, 2, 3]


def test_create_reference_get(notebook):
def test_notebook_get_value(notebook):
a = notebook.get("a")
assert repr(a) == "[1, 2, 3]"
assert a == [1, 2, 3]


def test_eq_in_notebook(notebook):
a = notebook.ref("a")
a.append(4)
assert a == [1, 2, 3, 4]
assert a.resolve() == [1, 2, 3, 4]


def test_eq_in_notebook_ref(notebook):
a, b = notebook.ref("a"), notebook.ref("b")
assert a == b
assert a.resolve()[:3] == b


def test_function_call(notebook):
Expand All @@ -43,28 +43,13 @@ def test_function_call(notebook):

def test_function_call_with_ref_object(notebook):
double, a = notebook.ref("double"), notebook.ref("a")

assert double(a) == [2, 4, 6]
# a.append(4) above applied to the referenced "a" and therefore is
# reflected here
assert double(a) == [2, 4, 6, 8]


def test_reference(notebook):
def test_nontrivial_pickling(notebook):
Foo = notebook.ref("Foo")

# Check that when a non-serializeable object is returned, it returns
# a reference to that object instead
f = Foo('bar')

assert repr(f) == "\"<Foo value='bar'>\""

# Valid attribute access
assert f.say_hello()

# Invalid attribute access
with pytest.raises(TestbookAttributeError):
f.does_not_exist

assert f.say_hello() == 'Hello bar!'

# non JSON-serializeable output
with pytest.raises(TestbookSerializeError):
f.resolve()
f = Foo("bar")
assert repr(f) == "<Foo value='bar'>"
assert(f.say_hello() == "Hello bar!")