Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
versotym authored Apr 2, 2024
1 parent 0fb7499 commit 19a6360
Show file tree
Hide file tree
Showing 8 changed files with 759 additions and 0 deletions.
6 changes: 6 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

from poetree.main import Poetree
from poetree.corpus import Corpus
from poetree.author import Author
from poetree.source import Source
from poetree.poem import Poem
166 changes: 166 additions & 0 deletions author.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from typing import Union
import pandas as pd
from .config import BASE_URL
from .glob import make_request, metadata, get_content
from .source import Source
from .poem import Poem


class Author:
'''
Class corresponding to a particular author.
'''

def __init__(
self,
lang : Union[None,str] = None,
base_url : str = BASE_URL,
id_ : Union[None,int] = None,
wiki : Union[None,str] = None,
viaf : Union[None,str] = None,
metadata : dict = None
):
'''
Store author metadata (if initialized by Poetree instance) or get them
from API (if initialized directly). Create empty dict self.content_
that will hold lists of Source and Poem instances.
Arguments:
lang (str|None) : ISO code of the corpus, required if initialized directly
base_url (str) : API base URL (default: set in config.py)
id_ (int|None) : Id(DB) of the author
wiki (str|None) : Wiki id of the author
viaf (str|None) : Viaf id of the author
metadata (dict|None) : Author metadata passed when initialized by Poetree instance
Raises:
ValueError : If neither [metadata] nor [lang] is passed
: If neither [metadata] nor one of [id_, wiki, viaf] is passed
Returns:
None
'''
self.base_url = base_url
self.content_ = dict()

if metadata is not None:
self.metadata_ = metadata
elif lang is None:
raise ValueError (
'Argument [lang] is required when initializing ' +
f'{__class__.__name__} instance directly'
)
elif id_ is not None:
self._get_author_metadata(lang, 'id_author', id_)
elif wiki is not None:
self._get_author_metadata(lang, 'wiki', wiki)
elif viaf is not None:
self._get_author_metadata(lang, 'viaf', viaf)
else:
raise ValueError (
'One of the arguments [id_,wiki,viaf] is required when initializing ' +
f'{__class__.__name__} instance directly'
)
for k, v in self.metadata_.items(): setattr(self, k, v)


def _get_author_metadata(
self,
lang : str,
id_type : str,
id_val : Union[int,str],
):
'''
Get metadata on author and store them in self.metadata_
Arguments:
lang (str) : ISO code of the corpus
id_type (str) : Which identifier to use for retrueving author
id_val (int|str) : Identifier value
Returns:
None
'''
self.metadata_ = make_request(
self.base_url,
'author',
**{'corpus': lang, id_type: id_val}
)
self.metadata_['corpus'] = lang


def get_sources(self, **kwargs) -> list:
'''
Get metadata of sources by the author. Create a new Source instance
for each source, store it in a list and return it.
Arguments:
None
Keyword arguments:
published_after (int) : Limit to sources published no sooner than a given year
published_before (int) : Limit to sources published no later than a given year
Returns:
(list) : List holding instances of Source
'''
self.content_['sources'] = get_content(
self.base_url, 'sources', Source,
corpus = self.metadata_['corpus'],
id_author = self.metadata_['id_'],
**kwargs
)
return self.content_['sources']


def get_poems(self, **kwargs) -> list:
'''
Get metadata of poems by the author. Create a new Poem instance
for each poem, store it in a list and return it.
Arguments:
None
Keyword arguments:
id_source (int) : Limit to poems from certain source
Returns:
(list) : List holding instances of Poem
'''
self.content_['poems'] = get_content(
self.base_url, 'poems', Poem,
corpus = self.metadata_['corpus'],
id_author = self.metadata_['id_'],
**kwargs
)
return self.content_['poems']


def metadata(
self,
target : str = 'self',
output : str = 'list',
sortby : Union[str,list] = None,
reverse : bool = False
) -> Union[list, pd.DataFrame, None]:
'''
Returns targt metadata either as a formatted table (tabular=True)
or as a list as received from API (tabular=False). The list may be
sorted according to any subdict key.
Params:
target (str) : Metadata of what to return; default: 'corpora'
output (str) : Output format: 'list': list as retrieved from API,
'pandas': pd.DataFrame, 'print': stringified table
printed directly; default: 'list'
sortby (str|None) : Subdict key according to which sort the list;
default: None
reverse (bool) : Sort in reversed (descending) order; default False
Returns:
(list|pd.DataFrame|None) : metadata
'''
if target == 'self':
return metadata([self], output, sortby, reverse)
else:
return metadata(self.content_[target], output, sortby, reverse)
2 changes: 2 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

BASE_URL = 'http://versologie.cz/poetree/api'
142 changes: 142 additions & 0 deletions corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from typing import Union
import pandas as pd
from .config import BASE_URL
from .glob import make_request, metadata, get_content
from .author import Author
from .source import Source


class Corpus:
'''
Class corresponding to a particular corpus.
'''

def __init__(
self,
lang : Union[str,None] = None,
base_url : str = BASE_URL,
metadata : Union[dict,None] = None
):
'''
Store corpus metadata (if initialized by Poetree instance) or get them
from API (if initialized directly). Create empty dict self.content_
that will hold lists of Author and Source instances.
Arguments:
lang (str|None) : ISO code of the corpus, required if initialized directly
base_url (str) : API base URL (default: set in config.py)
metadata (dict|None) : Corpus metadata passed when initialized by Poetree instance
Raises:
ValueError : If neither [metadata] nor [lang] is passed
Returns:
None
'''
self.base_url = base_url
self.content_ = dict()
if metadata is not None:
self.metadata_ = metadata
elif lang is not None:
self._get_corpus_metadata(lang)
else:
raise ValueError (
'Argument [lang] is required when initializing ' +
f'{__class__.__name__} instance directly'
)
for k, v in self.metadata_.items(): setattr(self, k, v)


def _get_corpus_metadata(self, lang:str):
'''
Get metadata on corpus and store them in self.metadata_
Arguments:
lang (str) : Language of the corpus (ISO code)
Returns:
None
'''
self.metadata_ = make_request(self.base_url, 'corpus', corpus=lang)
self.metadata_['corpus'] = lang


def get_authors(self, **kwargs) -> list:
'''
Get metadata of all available authors. Create a new Author instance for
each author, store it in a list and return it.
Arguments:
None
Keyword arguments:
country (str) : Limit to authors from certain countries. Either a single
value (country="pt") or stringified list (country="pt,br")
born_after (int) : Limit to authors born no sooner than a given year
born_before (int) : Limit to authors born no later than a given year
died_after (int) : Limit to authors that died no sooner than a given year
died_before (int) : Limit to authors that died no later than a given year
Returns:
(list) : List holding instances of Author
'''
if 'country' in kwargs and not isinstance(kwargs['country'], list):
kwargs['country'] = ','.join(kwargs['country'])
self.content_['authors'] = get_content(
self.base_url, 'authors', Author, corpus=self.metadata_['corpus'], **kwargs
)
return self.content_['authors']


def get_sources(self, **kwargs) -> list:
'''
Get metadata of all available sources. Create a new Source instance
for each source, store it in a list and return it.
Arguments:
None
Keyword arguments:
id_author (int) : Limit to sources by author with this id(DB)
wiki (str) : Limit to sources by author with this wiki id
viaf (str) : Limit to sources by author with this viaf id
published_after (int) : Limit to sources published no sooner than a given year
published_before (int) : Limit to sources published no later than a given year
Returns:
(list) : List holding instances of Source
'''
self.content_['sources'] = get_content(
self.base_url, 'sources', Source, corpus=self.metadata_['corpus'], **kwargs
)
return self.content_['sources']


def metadata(
self,
target : str = 'self',
output : str = 'list',
sortby : Union[str,list] = None,
reverse : bool = False
) -> Union[list, pd.DataFrame, None]:
'''
Returns targt metadata either as a formatted table (tabular=True)
or as a list as received from API (tabular=False). The list may be
sorted according to any subdict key.
Params:
target (str) : Metadata of what to return; default: 'self'
output (str) : Output format: 'list': list as retrieved from API,
'pandas': pd.DataFrame, 'print': stringified table
printed directly; default: 'list'
sortby (str|None) : Subdict key according to which sort the list;
default: None
reverse (bool) : Sort in reversed (descending) order; default False
Returns:
(list|pd.DataFrame|None) : metadata
'''
if target == 'self':
return metadata([self], output, sortby, reverse)
else:
return metadata(self.content_[target], output, sortby, reverse)
Loading

0 comments on commit 19a6360

Please sign in to comment.