Skip to content

Commit

Permalink
Capturing OA flags for wiley (#143)
Browse files Browse the repository at this point in the history
Co-authored-by: Mugdha Polimera <[email protected]>
  • Loading branch information
mugdhapolimera and Mugdha Polimera authored Nov 7, 2024
1 parent e2548b8 commit 3cba4c6
Show file tree
Hide file tree
Showing 6 changed files with 919 additions and 2 deletions.
25 changes: 25 additions & 0 deletions adsingestp/parsers/wiley.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,33 @@ def _parse_title_abstract(self):
self.base_metadata["abstract"] = self._clean_output(a.get_text())

def _parse_copyright(self):
# Check for copyright license information
if self.pubmeta_unit.find("copyright"):
self.base_metadata["copyright"] = self.pubmeta_unit.find("copyright").get_text()

def _parse_permissions(self):
# Check for open-access and permissions information

if self.pubmeta_unit:
if self.pubmeta_unit.get("accessType", "") == "open":
self.base_metadata.setdefault("openAccess", {}).setdefault("open", True)

if self.pubmeta_unit.find("legalStatement"):
license_type = self.pubmeta_unit.find("legalStatement").get("type", "")
self.base_metadata.setdefault("openAccess", {}).setdefault(
"license",
self._detag(license_type, self.HTML_TAGSET["license"]).strip(),
)

license_text = self.pubmeta_unit.find("legalStatement")
license_uri = license_text.find("link")
if license_uri:
if license_uri.get("href", None):
license_uri_value = license_uri.get("href", None)
self.base_metadata.setdefault("openAccess", {}).setdefault(
"licenseURL", self._detag(license_uri_value, [])
)

def _parse_authors(self):
aff_dict = {}
for a in self.content_meta.find_all("affiliation"):
Expand Down Expand Up @@ -228,6 +252,7 @@ def parse(self, text):
self._parse_edhistory()
self._parse_title_abstract()
self._parse_copyright()
self._parse_permissions()
self._parse_authors()
self._parse_keywords()
self._parse_references()
Expand Down
Loading

0 comments on commit 3cba4c6

Please sign in to comment.