Skip to content

Commit

Permalink
Modified els parser to use 'author' abstract as default (#124)
Browse files Browse the repository at this point in the history
* Modified els parser to use 'author' abstract as default

* replacing elsevier test case with OA record

* redacted source content

---------

Co-authored-by: Mugdha Polimera <[email protected]>
Co-authored-by: Mugdha Polimera <[email protected]>
  • Loading branch information
3 people authored Aug 21, 2024
1 parent 4874484 commit 5f376ac
Show file tree
Hide file tree
Showing 4 changed files with 1,233 additions and 11 deletions.
18 changes: 7 additions & 11 deletions adsingestp/parsers/elsevier.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,23 +183,19 @@ def _parse_title_abstract(self):
for abs in abs_all:
if abs.get("class", None) == "author":
abs_text_all = abs.find_all("ce:simple-para")
break
elif abs.find("ce:section-title"):
if abs.find("ce:section-title").get_text().lower() == "abstract":
abs_text_all = abs.find_all("ce:simple-para")
break
elif abs.find("ce:section-title").get_text().lower() == "highlights":
abs_text_all = abs.find_all("p")

abstract = ""
for abs_text in abs_text_all:
abstract = (
abstract
+ " "
+ self._detag(abs_text, self.HTML_TAGSET["abstract"]).strip()
)

if abstract:
self.base_metadata["abstract"] = abstract
break
abstract = ""
for abs_text in abs_text_all:
abstract = (
abstract + " " + self._detag(abs_text, self.HTML_TAGSET["abstract"]).strip()
)

if abstract:
self.base_metadata["abstract"] = self._clean_output(abstract)
Expand Down
Loading

0 comments on commit 5f376ac

Please sign in to comment.