Skip to content

Commit

Permalink
Native language parsing for author names (#125)
Browse files Browse the repository at this point in the history
* Native language parsing for author names

* lint fix

---------

Co-authored-by: Mugdha Polimera <[email protected]>
  • Loading branch information
mugdhapolimera and Mugdha Polimera authored Aug 21, 2024
1 parent 5f376ac commit 8e1796d
Show file tree
Hide file tree
Showing 6 changed files with 9,320 additions and 1 deletion.
2 changes: 1 addition & 1 deletion adsingestp/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def format(self, input_dict, format):
"prefix": i.get("prefix", ""),
"suffix": i.get("suffix", ""),
"pubraw": i.get("nameraw", ""),
# "native_lang": "XXX",
"native_lang": i.get("native_lang", ""),
"collab": i.get("collab", ""),
},
"affiliation": [
Expand Down
8 changes: 8 additions & 0 deletions adsingestp/parsers/jats.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ def parse(self, article_metadata):
surname = contrib.find("string-name").find("surname").get_text()
else:
surname = ""

if contrib.find("name") and contrib.find("name").find("given-names"):
given = contrib.find("name").find("given-names").get_text()
elif contrib.find("string-name") and contrib.find("string-name").find(
Expand All @@ -348,6 +349,12 @@ def parse(self, article_metadata):
else:
given = ""

# get native language author name
if contrib.find("name-alternatives"):
native_lang = contrib.find("name-alternatives").get_text().strip()
else:
native_lang = ""

# NOTE: institution-id is actually useful, but at
# at the moment, strip it
# contrib = self._decompose(soup=contrib, tag="institution-id")
Expand Down Expand Up @@ -425,6 +432,7 @@ def parse(self, article_metadata):
auth["corresp"] = l_correspondent
auth["surname"] = surname
auth["given"] = given
auth["native_lang"] = native_lang
auth["aff"] = aff_text
auth["affid"] = aff_extids
auth["xaff"] = xref_aff
Expand Down
Loading

0 comments on commit 8e1796d

Please sign in to comment.