Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RequestURL and ResponseURL #42

Merged
merged 11 commits into from
Jun 9, 2022
47 changes: 43 additions & 4 deletions tests/test_page_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import aiohttp.web_response
import pytest
import requests

import parsel

from web_poet import RequestUrl, ResponseUrl
from web_poet._base import _Url
from web_poet.page_inputs import (
HttpRequest,
HttpResponse,
Expand Down Expand Up @@ -72,7 +74,7 @@ def test_http_defaults(cls, body_cls):
http_body = body_cls(b"content")

obj = cls("url", body=http_body)
assert obj.url == "url"
assert str(obj.url) == "url"
kmike marked this conversation as resolved.
Show resolved Hide resolved
assert obj.body == b"content"
assert not obj.headers
assert obj.headers.get("user-agent") is None
Expand Down Expand Up @@ -164,7 +166,8 @@ def test_http_headers_init_dict(cls, headers_cls):

def test_http_request_init_minimal():
req = HttpRequest("url")
assert req.url == "url"
assert str(req.url) == "url"
assert isinstance(req.url, RequestUrl)
assert req.method == "GET"
assert isinstance(req.method, str)
assert not req.headers
Expand All @@ -189,12 +192,20 @@ def test_http_request_init_full():
http_body = HttpRequestBody(b"body")
req_2 = HttpRequest("url", method="POST", headers=http_headers, body=http_body)

assert req_1.url == req_2.url
assert str(req_1.url) == str(req_2.url)
assert req_1.method == req_2.method
assert req_1.headers == req_2.headers
assert req_1.body == req_2.body


def test_http_request_init_with_response_url():
resp = HttpResponse("url", b"")
assert isinstance(resp.url, ResponseUrl)
req = HttpRequest(resp.url)
assert isinstance(req.url, RequestUrl)
assert str(req.url) == str(resp.url)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Gallaecio has raised a good point if HttpResponse("url") and HttpRequest("url") are equal. We should add a test for such.

Copy link
Member Author

@kmike kmike Jun 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added a test which shows that they're not equal.

But I'm not sure this behavior is useful. What kind of errors would it prevent?

I wonder if it'd better to leave it "undefined", instead of solidifying this behavior in tests (if that's even a reasonable approach :)

pathlib does some magic when comparing paths, it's not a simple string match. E.g. on Windows path components are lower-cased before comparison.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test simply solidifies the expectation to users in case they're unsure. As a user, I might expect when initially using it for the first time that the expression response.url == request.url holds True. However, I need to cast it to its string value to bear True.

I also expect such test to conveniently break when we override __eq__ to another behavior. It helps future PRs denote that an equality expectation will change.


def test_http_response_headers_from_bytes_dict():
raw_headers = {
b"Content-Length": [b"316"],
Expand Down Expand Up @@ -434,3 +445,31 @@ def test_browser_html():
assert html.xpath("//p/text()").getall() == ["Hello, ", "world!"]
assert html.css("p::text").getall() == ["Hello, ", "world!"]
assert isinstance(html.selector, parsel.Selector)


def test_url_base_class():
url_str = "http://example.com"
url = _Url(url_str)
assert str(url) == url_str
assert repr(url) == "_Url('http://example.com')"

with pytest.raises(TypeError):
_Url(123)


def test_url_subclass():
url_str = "http://example.com"

class MyUrl(_Url):
pass

class MyUrl2(_Url):
pass

url = MyUrl(url_str)
assert str(url) == url_str
assert url._url == url_str
assert repr(url) == "MyUrl('http://example.com')"

url2 = MyUrl2(url)
assert str(url2) == str(url)
12 changes: 6 additions & 6 deletions tests/test_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def async_mock():
"""Workaround since python 3.7 doesn't ship with asyncmock."""

async def async_test(req):
return HttpResponse(req.url, body=b"")
return HttpResponse(str(req.url), body=b"")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should be casting this to str as the tests that rely on this mock are expecting ResponseUrl. We'd need to update the downstream tests instead to validate their expectations.

Copy link
Member Author

@kmike kmike Jun 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I must admit I failed to make tests working, and applied this hack instead :) Mocks are not my friends.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking it's harmless, because tests should be getting ResponseUrl, as HttpResponse should be converting str to ResponseUrl; without having mocks in mind the change doesn't really change the behavior. But I'm likely missing some interactions with mocks which are in effect here.


mock.MagicMock.__await__ = lambda x: async_test(x).__await__()

Expand All @@ -37,7 +37,7 @@ async def test_perform_request_from_httpclient(async_mock):
response = await client.get(url)

# The async downloader implementation should return the HttpResponse
assert response.url == url
assert str(response.url) == str(url)
assert isinstance(response, HttpResponse)


Expand All @@ -47,15 +47,15 @@ async def test_http_client_single_requests(async_mock):

with mock.patch("web_poet.page_inputs.client.HttpRequest") as mock_request:
response = await client.request("url")
response.url == "url"
str(response.url) == "url"

response = await client.get("url-get", headers={"X-Headers": "123"})
response.url == "url-get"
str(response.url) == "url-get"

response = await client.post(
"url-post", headers={"X-Headers": "123"}, body=b"body value"
)
response.url == "url-post"
str(response.url) == "url-post"

assert mock_request.call_args_list == [
mock.call(
Expand Down Expand Up @@ -162,7 +162,7 @@ async def test_http_client_execute(async_mock):
response = await client.execute(request)

assert isinstance(response, HttpResponse)
assert response.url == "url-1"
assert str(response.url) == "url-1"


@pytest.mark.asyncio
Expand Down
6 changes: 4 additions & 2 deletions web_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from .pages import WebPage, ItemPage, ItemWebPage, Injectable
from .requests import request_backend_var
from .page_inputs import (
Meta,
BrowserHtml,
HttpClient,
HttpRequest,
HttpResponse,
HttpRequestHeaders,
HttpResponseHeaders,
HttpRequestBody,
HttpResponseBody,
BrowserHtml,
Meta,
RequestUrl,
ResponseUrl,
)
from .overrides import PageObjectRegistry, consume_modules, OverrideRule

Expand Down
18 changes: 17 additions & 1 deletion web_poet/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""


from typing import Type, TypeVar, List, Dict
from typing import Type, TypeVar, List, Dict, Union

from multidict import CIMultiDict

Expand Down Expand Up @@ -32,3 +32,19 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers:
<_HttpHeaders('Content-Encoding': 'gzip', 'content-length': '648')>
"""
return cls([(pair["name"], pair["value"]) for pair in arg])


class _Url:
""" Base URL class.
"""
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hey @BurnzZ @Gallaecio! I tried to follow @Gallaecio's suggestion, and created a minimal version of the Url base class. To keep it minimal, I haven't added anything useful to the _Url class; that's the difference with #45

On one hand, it allows us to improve _Url class in future almost without any restrictions.

On the other hand, it kind-of gets in a way now. With this implementation, users need to cast to str almost always: for example, HttpClient backends would always need to do str(request.url).

So, for the users the current implementation is more cumbersome than just using str, or having URL a str subclass, without any benefits. It may be ok.

def __init__(self, url: Union[str, '_Url']):
if not isinstance(url, (str, _Url)):
raise TypeError(f"`url` must be a str or an instance of _Url, "
f"got {url.__class__} instance instead")
self._url = str(url)

def __str__(self) -> str:
return self._url

def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._url!r})"
4 changes: 2 additions & 2 deletions web_poet/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ class ResponseShortcutsMixin(SelectableMixin):

@property
def url(self):
"""Shortcut to HTML Response's URL."""
return self.response.url
"""Shortcut to HTML Response's URL, as a string."""
return str(self.response.url)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for backwards compatibility, and also for consistency with .base_url and .urljoin properties, where urls are strings.

Overall, I don't like ResponseShortcutsMixin, it seems using HttpResponse methods is better :)


@property
def html(self):
Expand Down
2 changes: 2 additions & 0 deletions web_poet/page_inputs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@
HttpResponseHeaders,
HttpRequestBody,
HttpResponseBody,
RequestUrl,
ResponseUrl
)
from .browser import BrowserHtml
9 changes: 5 additions & 4 deletions web_poet/page_inputs/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
from web_poet.exceptions import RequestBackendError, HttpResponseError
from web_poet.utils import as_list
from web_poet._base import _Url

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -72,12 +73,12 @@ def _handle_status(
return

status = HTTPStatus(response.status)
msg = f"{response.status} {status.name} response for {response.url}"
msg = f"{response.status} {status.name} response for {response.url!s}"
kmike marked this conversation as resolved.
Show resolved Hide resolved
raise HttpResponseError(msg, request=request, response=response)

async def request(
self,
url: str,
url: Union[str, _Url],
*,
method: str = "GET",
headers: Optional[_Headers] = None,
Expand Down Expand Up @@ -115,7 +116,7 @@ async def request(

async def get(
self,
url: str,
url: Union[str, _Url],
*,
headers: Optional[_Headers] = None,
allow_status: List[_Status] = None,
Expand All @@ -132,7 +133,7 @@ async def get(

async def post(
self,
url: str,
url: Union[str, _Url],
*,
headers: Optional[_Headers] = None,
body: Optional[_Body] = None,
Expand Down
16 changes: 13 additions & 3 deletions web_poet/page_inputs/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
http_content_type_encoding
)

from web_poet._base import _HttpHeaders
from web_poet._base import _HttpHeaders, _Url
from web_poet.utils import memoizemethod_noargs
from web_poet.mixins import SelectableMixin

Expand All @@ -18,6 +18,16 @@
_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]


class ResponseUrl(_Url):
""" URL of the response """
pass


class RequestUrl(_Url):
""" URL of the request """
pass


class HttpRequestBody(bytes):
"""A container for holding the raw HTTP request body in bytes format."""

Expand Down Expand Up @@ -152,7 +162,7 @@ class HttpRequest:
**web-poet** like :class:`~.HttpClient`.
"""

url: str = attrs.field()
url: RequestUrl = attrs.field(converter=RequestUrl)
method: str = attrs.field(default="GET", kw_only=True)
headers: HttpRequestHeaders = attrs.field(
factory=HttpRequestHeaders, converter=HttpRequestHeaders, kw_only=True
Expand Down Expand Up @@ -185,7 +195,7 @@ class HttpResponse(SelectableMixin):
is auto-detected from headers and body content.
"""

url: str = attrs.field()
url: ResponseUrl = attrs.field(converter=ResponseUrl)
body: HttpResponseBody = attrs.field(converter=HttpResponseBody)
status: Optional[int] = attrs.field(default=None, kw_only=True)
headers: HttpResponseHeaders = attrs.field(factory=HttpResponseHeaders,
Expand Down