diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..3efef37 --- /dev/null +++ b/__init__.py @@ -0,0 +1,6 @@ + +from poetree.main import Poetree +from poetree.corpus import Corpus +from poetree.author import Author +from poetree.source import Source +from poetree.poem import Poem diff --git a/author.py b/author.py new file mode 100644 index 0000000..55141cb --- /dev/null +++ b/author.py @@ -0,0 +1,166 @@ +from typing import Union +import pandas as pd +from .config import BASE_URL +from .glob import make_request, metadata, get_content +from .source import Source +from .poem import Poem + + +class Author: + ''' + Class corresponding to a particular author. + ''' + + def __init__( + self, + lang : Union[None,str] = None, + base_url : str = BASE_URL, + id_ : Union[None,int] = None, + wiki : Union[None,str] = None, + viaf : Union[None,str] = None, + metadata : dict = None + ): + ''' + Store author metadata (if initialized by Poetree instance) or get them + from API (if initialized directly). Create empty dict self.content_ + that will hold lists of Source and Poem instances. + + Arguments: + lang (str|None) : ISO code of the corpus, required if initialized directly + base_url (str) : API base URL (default: set in config.py) + id_ (int|None) : Id(DB) of the author + wiki (str|None) : Wiki id of the author + viaf (str|None) : Viaf id of the author + metadata (dict|None) : Author metadata passed when initialized by Poetree instance + + Raises: + ValueError : If neither [metadata] nor [lang] is passed + : If neither [metadata] nor one of [id_, wiki, viaf] is passed + + Returns: + None + ''' + self.base_url = base_url + self.content_ = dict() + + if metadata is not None: + self.metadata_ = metadata + elif lang is None: + raise ValueError ( + 'Argument [lang] is required when initializing ' + + f'{__class__.__name__} instance directly' + ) + elif id_ is not None: + self._get_author_metadata(lang, 'id_author', id_) + elif wiki is not None: + self._get_author_metadata(lang, 'wiki', wiki) + elif viaf is not None: + self._get_author_metadata(lang, 'viaf', viaf) + else: + raise ValueError ( + 'One of the arguments [id_,wiki,viaf] is required when initializing ' + + f'{__class__.__name__} instance directly' + ) + for k, v in self.metadata_.items(): setattr(self, k, v) + + + def _get_author_metadata( + self, + lang : str, + id_type : str, + id_val : Union[int,str], + ): + ''' + Get metadata on author and store them in self.metadata_ + + Arguments: + lang (str) : ISO code of the corpus + id_type (str) : Which identifier to use for retrueving author + id_val (int|str) : Identifier value + + Returns: + None + ''' + self.metadata_ = make_request( + self.base_url, + 'author', + **{'corpus': lang, id_type: id_val} + ) + self.metadata_['corpus'] = lang + + + def get_sources(self, **kwargs) -> list: + ''' + Get metadata of sources by the author. Create a new Source instance + for each source, store it in a list and return it. + + Arguments: + None + + Keyword arguments: + published_after (int) : Limit to sources published no sooner than a given year + published_before (int) : Limit to sources published no later than a given year + + Returns: + (list) : List holding instances of Source + ''' + self.content_['sources'] = get_content( + self.base_url, 'sources', Source, + corpus = self.metadata_['corpus'], + id_author = self.metadata_['id_'], + **kwargs + ) + return self.content_['sources'] + + + def get_poems(self, **kwargs) -> list: + ''' + Get metadata of poems by the author. Create a new Poem instance + for each poem, store it in a list and return it. + + Arguments: + None + + Keyword arguments: + id_source (int) : Limit to poems from certain source + + Returns: + (list) : List holding instances of Poem + ''' + self.content_['poems'] = get_content( + self.base_url, 'poems', Poem, + corpus = self.metadata_['corpus'], + id_author = self.metadata_['id_'], + **kwargs + ) + return self.content_['poems'] + + + def metadata( + self, + target : str = 'self', + output : str = 'list', + sortby : Union[str,list] = None, + reverse : bool = False + ) -> Union[list, pd.DataFrame, None]: + ''' + Returns targt metadata either as a formatted table (tabular=True) + or as a list as received from API (tabular=False). The list may be + sorted according to any subdict key. + + Params: + target (str) : Metadata of what to return; default: 'corpora' + output (str) : Output format: 'list': list as retrieved from API, + 'pandas': pd.DataFrame, 'print': stringified table + printed directly; default: 'list' + sortby (str|None) : Subdict key according to which sort the list; + default: None + reverse (bool) : Sort in reversed (descending) order; default False + + Returns: + (list|pd.DataFrame|None) : metadata + ''' + if target == 'self': + return metadata([self], output, sortby, reverse) + else: + return metadata(self.content_[target], output, sortby, reverse) \ No newline at end of file diff --git a/config.py b/config.py new file mode 100644 index 0000000..cf923f6 --- /dev/null +++ b/config.py @@ -0,0 +1,2 @@ + +BASE_URL = 'http://versologie.cz/poetree/api' diff --git a/corpus.py b/corpus.py new file mode 100644 index 0000000..92afa54 --- /dev/null +++ b/corpus.py @@ -0,0 +1,142 @@ +from typing import Union +import pandas as pd +from .config import BASE_URL +from .glob import make_request, metadata, get_content +from .author import Author +from .source import Source + + +class Corpus: + ''' + Class corresponding to a particular corpus. + ''' + + def __init__( + self, + lang : Union[str,None] = None, + base_url : str = BASE_URL, + metadata : Union[dict,None] = None + ): + ''' + Store corpus metadata (if initialized by Poetree instance) or get them + from API (if initialized directly). Create empty dict self.content_ + that will hold lists of Author and Source instances. + + Arguments: + lang (str|None) : ISO code of the corpus, required if initialized directly + base_url (str) : API base URL (default: set in config.py) + metadata (dict|None) : Corpus metadata passed when initialized by Poetree instance + + Raises: + ValueError : If neither [metadata] nor [lang] is passed + + Returns: + None + ''' + self.base_url = base_url + self.content_ = dict() + if metadata is not None: + self.metadata_ = metadata + elif lang is not None: + self._get_corpus_metadata(lang) + else: + raise ValueError ( + 'Argument [lang] is required when initializing ' + + f'{__class__.__name__} instance directly' + ) + for k, v in self.metadata_.items(): setattr(self, k, v) + + + def _get_corpus_metadata(self, lang:str): + ''' + Get metadata on corpus and store them in self.metadata_ + + Arguments: + lang (str) : Language of the corpus (ISO code) + + Returns: + None + ''' + self.metadata_ = make_request(self.base_url, 'corpus', corpus=lang) + self.metadata_['corpus'] = lang + + + def get_authors(self, **kwargs) -> list: + ''' + Get metadata of all available authors. Create a new Author instance for + each author, store it in a list and return it. + + Arguments: + None + + Keyword arguments: + country (str) : Limit to authors from certain countries. Either a single + value (country="pt") or stringified list (country="pt,br") + born_after (int) : Limit to authors born no sooner than a given year + born_before (int) : Limit to authors born no later than a given year + died_after (int) : Limit to authors that died no sooner than a given year + died_before (int) : Limit to authors that died no later than a given year + + Returns: + (list) : List holding instances of Author + ''' + if 'country' in kwargs and not isinstance(kwargs['country'], list): + kwargs['country'] = ','.join(kwargs['country']) + self.content_['authors'] = get_content( + self.base_url, 'authors', Author, corpus=self.metadata_['corpus'], **kwargs + ) + return self.content_['authors'] + + + def get_sources(self, **kwargs) -> list: + ''' + Get metadata of all available sources. Create a new Source instance + for each source, store it in a list and return it. + + Arguments: + None + + Keyword arguments: + id_author (int) : Limit to sources by author with this id(DB) + wiki (str) : Limit to sources by author with this wiki id + viaf (str) : Limit to sources by author with this viaf id + published_after (int) : Limit to sources published no sooner than a given year + published_before (int) : Limit to sources published no later than a given year + + Returns: + (list) : List holding instances of Source + ''' + self.content_['sources'] = get_content( + self.base_url, 'sources', Source, corpus=self.metadata_['corpus'], **kwargs + ) + return self.content_['sources'] + + + def metadata( + self, + target : str = 'self', + output : str = 'list', + sortby : Union[str,list] = None, + reverse : bool = False + ) -> Union[list, pd.DataFrame, None]: + ''' + Returns targt metadata either as a formatted table (tabular=True) + or as a list as received from API (tabular=False). The list may be + sorted according to any subdict key. + + Params: + target (str) : Metadata of what to return; default: 'self' + output (str) : Output format: 'list': list as retrieved from API, + 'pandas': pd.DataFrame, 'print': stringified table + printed directly; default: 'list' + sortby (str|None) : Subdict key according to which sort the list; + default: None + reverse (bool) : Sort in reversed (descending) order; default False + + Returns: + (list|pd.DataFrame|None) : metadata + ''' + if target == 'self': + return metadata([self], output, sortby, reverse) + else: + return metadata(self.content_[target], output, sortby, reverse) \ No newline at end of file diff --git a/glob.py b/glob.py new file mode 100644 index 0000000..ef5b90a --- /dev/null +++ b/glob.py @@ -0,0 +1,101 @@ +import requests +import json +from typing import Union, Any +from tabulate import tabulate +import pandas as pd + +def make_request( + base_url : str, + endpoint : str, + **kwargs + ) -> Union[dict,list]: + ''' + Send request to PoeTree API. Returns the decoded JSON response. + We catch two types of errors: (1) Server response with a status + code other than 200, (2) invalid JSON response. + + Arguments: + url (string) : API method name + **kwargs (dict) : URL parameters + + Returns: + response (dict|list) : response JSON decoded + ''' + if not base_url.endswith('/'): + base_url += '/' + url = requests.compat.urljoin(base_url, endpoint) + response = requests.get(url, kwargs) + if response.status_code == 200: + try: + return json.loads(response.text) + except: + raise Exception(f'Invalid JSON response') + else: + raise Exception(f'Server responded with status code {response.status_code}: {response.reason}') + + +def get_content( + base_url : str, + endpoint : str, + class_ : Any, + **kwargs + ) -> list: + ''' + Get metadata on subordinate elements (Poetree->Corpus->Author/Source->Poem). + + Params: + None + + Returns: + (list) : List holding instances of subordinate class + ''' + response = make_request(base_url, endpoint, **kwargs) + content = list() + for r in response: + if endpoint != 'corpora': + r['corpus'] = kwargs['corpus'] + content.append( + class_(base_url=base_url, metadata=r) + ) + return content + + +def metadata( + instances : list, + output : str = 'list', + sortby : Union[str,None] = None, + reverse : bool = False, + ) -> Union[list, pd.DataFrame, None]: + ''' + Takes a list of instances (corpora, authors, sources...) and returns + their metadata (values stored in self.data_). Metadata are returned either + as a formatted table (tabular=True) or as a list as received from + API (tabular=False). + + Arguments: + instances (list) : Instances their metadata to be returned + output (str) : Output format: 'list': list as retrieved from API, + 'pandas': pd.DataFrame, 'print': stringified table + printed directly; default: 'list' + sortby (str|None) : Subdict key according to which sort the list; + default: None + reverse (bool) : Sort in reversed (descending) order; default False + + Returns: + (list|pd.DataFrame|None) : metadata + ''' + if sortby is not None: + instances = sorted( + instances, key=lambda d: ( + d.metadata_[sortby] is not None, d.metadata_[sortby] + ), reverse=reverse + ) + if output == 'list': + return [x.metadata_ for x in instances] + if output == 'pandas': + return pd.DataFrame([x.metadata_ for x in instances]) + if output == 'print': + header = instances[0].metadata_.keys() + body = [list([val if val else '' for val in x.metadata_.values()]) for x in instances] + print(tabulate(body, header, maxcolwidths=[50]*len(header))) + \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..1aa3b81 --- /dev/null +++ b/main.py @@ -0,0 +1,69 @@ +from typing import Union +import pandas as pd +from tabulate import tabulate +from .config import BASE_URL +from .glob import make_request, metadata, get_content +from .corpus import Corpus + + +class Poetree: + ''' + Class corresponding to entire PoeTree collection + ''' + + def __init__(self, base_url:str=BASE_URL): + ''' + Set API base URL. Create empty dict self.content_ that will + hold a list of Corpus instances. + + Params: + base_url (str) : API base URL (default: set in config.py) + + Returns: + None + ''' + self.base_url = base_url + self.content_ = dict() + + + def get_corpora(self) -> list: + ''' + Get metadata of all available corpora. Create a new Corpus instance + for each corpus, store it in a list and return it. + + Params: + None + + Returns: + (list) : List holding instances of Author + ''' + + self.content_['corpora'] = get_content(self.base_url, 'corpora', Corpus) + return self.content_['corpora'] + + + def metadata( + self, + target : str = 'corpora', + output : str = 'list', + sortby : Union[str,list] = None, + reverse : bool = False + ) -> Union[list, pd.DataFrame, None]: + ''' + Returns metadata of selected target either as a formatted table (tabular=True) + or as a list as received from API (tabular=False). The list may be + sorted according to any subdict key. + + Params: + target (str) : Metadata of what to return; default: 'corpora' + output (str) : Output format: 'list': list as retrieved from API, + 'pandas': pd.DataFrame, 'print': stringified table + printed directly; default: 'list' + sortby (str|None) : Subdict key according to which sort the list; + default: None + reverse (bool) : Sort in reversed (descending) order; default False + + Returns: + (list|pd.DataFrame|None) : metadata + ''' + return metadata(self.content_[target], output, sortby, reverse) diff --git a/poem.py b/poem.py new file mode 100644 index 0000000..cf7b4da --- /dev/null +++ b/poem.py @@ -0,0 +1,140 @@ +from typing import Union +import pandas as pd +from .config import BASE_URL +from .glob import make_request, metadata + + +class Poem: + ''' + Class corresponding to a particular poem. + ''' + + def __init__( + self, + lang : Union[None,str] = None, + base_url : str = BASE_URL, + id_ : Union[None,int] = None, + metadata : dict = None + ): + ''' + Store poem metadata (if initialized by Poetree instance) or get them + from API (if initialized directly). + + Arguments: + lang (str|None) : ISO code of the corpus, required if initialized directly + base_url (str) : API base URL (default: set in config.py) + id_ (int|None) : Id(DB) of the poem + metadata (dict|None) : Poem metadata passed when initialized by Poetree instance + + Raises: + ValueError : If neither [metadata] nor [lang] is passed + : If neither [metadata] nor id_ is passed + + Returns: + None + ''' + self.base_url = base_url + self.content_ = list() + + if metadata is not None: + self.metadata_ = metadata + elif lang is None: + raise ValueError ( + 'Argument [lang] is required when initializing ' + + f'{__class__.__name__} instance directly' + ) + elif id_ is None: + raise ValueError ( + 'One of the arguments [id_,wiki,viaf] is required when initializing ' + + f'{__class__.__name__} instance directly' + ) + else: + self._get_poem_metadata(lang, id_) + for k, v in self.metadata_.items(): setattr(self, k, v) + + + def _get_poem_metadata(self, lang: str, id_:Union[int,str]): + ''' + Get metadata on poem and store them in self.metadata_ + + Arguments: + lang (str) : ISO code of the corpus + id_ (int|str) : Id of the poem + + Returns: + None + ''' + self.metadata_ = make_request( + self.base_url, + 'poem', + **{'corpus': lang, 'id_poem': id_, 'lines': 0} + ) + self.metadata_['corpus'] = lang + + + def get_body(self, **kwargs): + ''' + Get body of the poem (if not fetched yet), store it in self.content_ + and return it + + Arguments: + None + + Returns: + (dict) : Object representing body of the poem + ''' + if len(self.content_) == 0: + response = make_request( + self.base_url, + 'poem', + **{'corpus': self.corpus, 'id_poem': self.id_, **kwargs} + ) + self.content_ = response['body'] + return self.content_ + + + def get_all(self): + ''' + Get body of the poem (if not fetched yet), store it in self.content_ + and return it together with metadata + + Arguments: + None + + Returns: + (dict) : Object representing body and metadata of the poem + ''' + if len(self.content_) == 0: + self.get_body() + + return {**self.metadata_, **{'body': self.content_}} + + + def metadata( + self, + target : str = 'self', + output : str = 'list', + sortby : Union[str,list] = None, + reverse : bool = False + ) -> Union[list, pd.DataFrame, None]: + ''' + Returns target metadata either as a formatted table (tabular=True) + or as a list as received from API (tabular=False). The list may be + sorted according to any subdict key. + + Params: + target (str) : Metadata of what to return; default: 'corpora' + output (str) : Output format: 'list': list as retrieved from API, + 'pandas': pd.DataFrame, 'print': stringified table + printed directly; default: 'list' + sortby (str|None) : Subdict key according to which sort the list; + default: None + reverse (bool) : Sort in reversed (descending) order; default False + + Returns: + (list|pd.DataFrame|None) : metadata + ''' + if target == 'self': + return metadata([self], output, sortby, reverse) + else: + return metadata(self.content_[target], output, sortby, reverse) \ No newline at end of file diff --git a/source.py b/source.py new file mode 100644 index 0000000..104871b --- /dev/null +++ b/source.py @@ -0,0 +1,133 @@ +from typing import Union +import pandas as pd +from tabulate import tabulate +from .config import BASE_URL +from .glob import make_request, metadata, get_content +from .poem import Poem + + +class Source: + ''' + Class corresponding to a particular source. + ''' + + def __init__( + self, + lang : Union[None,str] = None, + base_url : str = BASE_URL, + id_ : Union[None,int] = None, + id_poem : Union[None,int] = None, + metadata : dict = None, + ): + ''' + Store source metadata (if initialized by Poetree instance) or get them + from API (if initialized directly). Create empty dict self.content_ + that will hold lists of Authors and Poem instances respectively. + + Arguments: + lang (str|None) : ISO code of the corpus, required if initialized directly + base_url (str) : API base URL (default: set in config.py) + id_ (int|None) : Id(DB) of the source + id_poem (int|None) : Id(DB) of the poem its source is to be found + metadata (dict|None) : Author metadata passed when initialized by Poetree instance + + Raises: + ValueError : If neither [metadata] nor [lang] is passed + : If neither [metadata] nor one of [id_, id_poem] is passed + + Returns: + None + ''' + self.base_url = base_url + self.content_ = dict() + + if metadata is not None: + self.metadata_ = metadata + elif lang is None: + raise ValueError ( + f'Argument [lang] is required when initializing {__class__.__name__} instance directly' + ) + elif id_ is not None: + self._get_source_metadata(lang, 'id_source', id_) + elif id_poem is not None: + self._get_source_metadata(lang, 'id_poem', id_poem) + else: + raise ValueError ( + f'One of the arguments [id_,id_poem] is required when initializing {__class__.__name__} instance directly' + ) + for k, v in self.metadata_.items(): setattr(self, k, v) + + + def _get_source_metadata( + self, + lang : str, + id_type : str, + id_val : Union[int,str] + ): + ''' + Get metadata on author and store them in self.metadata_ + + Arguments: + lang (str) : ISO code of the corpus + id_type (str) : Which identifier to use for retrueving author + id_val (int|str) : Identifier value + + Returns: + None + ''' + self.metadata_ = make_request( + self.base_url, + 'source', + **{'corpus': lang, id_type: id_val} + ) + self.metadata_['corpus'] = lang + + + def get_poems(self, **kwargs) -> list: + ''' + Get metadata of poems in the source. Create a new Poem instance + for each poem, store it in a list and return it. + + Arguments: + None + + Returns: + (list) : List holding instances of Poem + ''' + self.content_['poems'] = get_content( + self.base_url, 'poems', Poem, + corpus = self.metadata_['corpus'], + id_source = self.metadata_['id_'], + **kwargs + ) + return self.content_['poems'] + + + def metadata( + self, + target : str = 'self', + output : str = 'list', + sortby : Union[str,list] = None, + reverse : bool = False + ) -> Union[list, pd.DataFrame, None]: + ''' + Returns targt metadata either as a formatted table (tabular=True) + or as a list as received from API (tabular=False). The list may be + sorted according to any subdict key. + + Params: + target (str) : Metadata of what to return; default: 'corpora' + output (str) : Output format: 'list': list as retrieved from API, + 'pandas': pd.DataFrame, 'print': stringified table + printed directly; default: 'list' + sortby (str|None) : Subdict key according to which sort the list; + default: None + reverse (bool) : Sort in reversed (descending) order; default False + + Returns: + (list|pd.DataFrame|None) : metadata + ''' + if target == 'self': + return metadata([self], output, sortby, reverse) + else: + return metadata(self.content_[target], output, sortby, reverse) \ No newline at end of file