Skip to content

Commit

Permalink
fix for alternate conf proc format in xref
Browse files Browse the repository at this point in the history
 	modified:   adsingestp/parsers/crossref.py
 	new file:   tests/stubdata/input/crossref_gsa_conf.xml
 	new file:   tests/stubdata/output/crossref_gsa_conf.json
 	modified:   tests/test_crossref.py
  • Loading branch information
seasidesparrow committed Nov 7, 2024
1 parent 027876d commit eb54d51
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 0 deletions.
2 changes: 2 additions & 0 deletions adsingestp/parsers/crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def _parse_conf_event_proceedings(self):
# conferences only, parses event-level and proceedings-level metadata, not conference paper-level metadata
event_meta = self.input_metadata.find("conference").find("event_metadata")
proc_meta = self.input_metadata.find("conference").find("proceedings_metadata")
if not proc_meta:
proc_meta = self.input_metadata.find("conference").find("proceedings_series_metadata")

if event_meta.find("conference_name"):
self.base_metadata["conf_name"] = event_meta.find("conference_name").get_text()
Expand Down
57 changes: 57 additions & 0 deletions tests/stubdata/input/crossref_gsa_conf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
<?xml version="1.0" encoding="UTF-8"?>
<doi_records>
<doi_record owner="10.1130" timestamp="2016-03-09 07:11:56">
<crossref>
<conference>
<event_metadata>
<conference_name>Northeastern GSA Section Meeting</conference_name>
<conference_date>21-23 March 2016</conference_date>
</event_metadata>
<proceedings_series_metadata>
<series_metadata>
<titles>
<title>Geological Society of America Abstracts with Programs</title>
</titles>
<issn>00167592</issn>
</series_metadata>
<publisher>
<publisher_name>Geological Society of America</publisher_name>
</publisher>
<publication_date>
<year>2016</year>
</publication_date>
</proceedings_series_metadata>
<conference_paper>
<contributors>
<organization sequence="first" contributor_role="author">Ohio University</organization>
<person_name sequence="first" contributor_role="author">
<given_name>Alexander A.</given_name>
<surname>Conti</surname>
</person_name>
<person_name sequence="additional" contributor_role="author">
<given_name>Elizabeth H.</given_name>
<surname>Gierlowski-Kordesch</surname>
</person_name>
</contributors>
<titles>
<title>Delineating Lake Types of the Jurassic East Berlin Formation, Hartford Basin, Newark Supergroup</title>
</titles>
<abstract>
<p>The Mesozoic Hartford Basin, a fault-bounded half-graben in New England, is composed of four sedimentologic units displaying lacustrine, playa, and alluvial conditions separated by three tholeiitic basalt flows. Limited outcrop, however, has restricted analyses across the basin. The Jurassic East Berlin Formation, in particular, crops out only in the southern and northern extents of the basin, exposing the upper 100-118-m of deposits. As a result, a new core analysis across a 600-m-transect of East Berlin rocks has been completed in the central region of the basin, exposing the entire 195-m thickness of the formation for the first time. Cores expose eight 3-m-thick lacustrine mudrock units, the upper six of which are correlative to lake deposits identified in the southern and northern extents of the basin. Additionally, thin chicken-wire evaporites demarcate the lowermost, previously unexposed, lacustrine unit, 7-m beneath a 15-cm-thick tufa horizon. Thin playa deposits and thick sheetflood and Vertisol packages separate these lake sequences over 5-30-m of vertical distance.</p>
<p>To supplement these sedimentologic data, and better understand lake geochemistry of the basin during East Berlin time, new biomarker analyses have been applied to each of the eight lacustrine mudrock units for the first time. Biomarker data are useful for determining the lake-basin type, a paleolake classification system derived by Bohacs, Carroll, and others to describe predictable physical and geochemical evolution within rift basins from fluvial facies to over-filled, balance-filled, and under-filled lacustrine facies; subsequently, balance-filled lacustrine facies grade to a terminal fluvial facies during changes in accommodation space through time. While fluvial facies envelope lake deposits within the Hartford Basin, identifying the lake types within the East Berlin has been problematic because of limited exposures. These new sedimentologic and biomarker analyses, however, suggest balance-filled lacustrine conditions at the base of the East Berlin that grade into under-filled conditions upsection. These new biomarker data finally provide definitive evidence for changing lake types during East Berlin time.</p>
</abstract>
<publication_date>
<year>2016</year>
</publication_date>
<publisher_item>
<item_number item_number_type="article-number">271926</item_number>
</publisher_item>
<doi_data>
<doi>10.1130/abs/2016NE-271926</doi>
<resource>https://gsa.confex.com/gsa/2016NE/webprogram/Paper271926.html</resource>
</doi_data>
</conference_paper>
</conference>
</crossref>
</doi_record>
</doi_records>
53 changes: 53 additions & 0 deletions tests/stubdata/output/crossref_gsa_conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"abstract": {
"textEnglish": "The Mesozoic Hartford Basin, a fault-bounded half-graben in New England, is composed of four sedimentologic units displaying lacustrine, playa, and alluvial conditions separated by three tholeiitic basalt flows. Limited outcrop, however, has restricted analyses across the basin. The Jurassic East Berlin Formation, in particular, crops out only in the southern and northern extents of the basin, exposing the upper 100-118-m of deposits. As a result, a new core analysis across a 600-m-transect of East Berlin rocks has been completed in the central region of the basin, exposing the entire 195-m thickness of the formation for the first time. Cores expose eight 3-m-thick lacustrine mudrock units, the upper six of which are correlative to lake deposits identified in the southern and northern extents of the basin. Additionally, thin chicken-wire evaporites demarcate the lowermost, previously unexposed, lacustrine unit, 7-m beneath a 15-cm-thick tufa horizon. Thin playa deposits and thick sheetflood and Vertisol packages separate these lake sequences over 5-30-m of vertical distance. To supplement these sedimentologic data, and better understand lake geochemistry of the basin during East Berlin time, new biomarker analyses have been applied to each of the eight lacustrine mudrock units for the first time. Biomarker data are useful for determining the lake-basin type, a paleolake classification system derived by Bohacs, Carroll, and others to describe predictable physical and geochemical evolution within rift basins from fluvial facies to over-filled, balance-filled, and under-filled lacustrine facies; subsequently, balance-filled lacustrine facies grade to a terminal fluvial facies during changes in accommodation space through time. While fluvial facies envelope lake deposits within the Hartford Basin, identifying the lake types within the East Berlin has been problematic because of limited exposures. These new sedimentologic and biomarker analyses, however, suggest balance-filled lacustrine conditions at the base of the East Berlin that grade into under-filled conditions upsection. These new biomarker data finally provide definitive evidence for changing lake types during East Berlin time."
},
"authors": [
{
"name": {
"given_name": "Alexander A.",
"surname": "Conti"
}
},
{
"name": {
"given_name": "Elizabeth H.",
"surname": "Gierlowski-Kordesch"
}
}
],
"esources": [
{
"location": "https://gsa.confex.com/gsa/2016NE/webprogram/Paper271926.html",
"source": "pub_html"
}
],
"pagination": {
"electronicID": "271926"
},
"persistentIDs": [
{
"DOI": "10.1130/abs/2016NE-271926"
}
],
"pubDate": {
"printDate": "2016-00-00"
},
"publication": {
"confDates": "21-23 March 2016",
"confName": "Northeastern GSA Section Meeting",
"pubYear": "2016",
"publisher": "Geological Society of America"
},
"recordData": {
"createdTime": "",
"loadFormat": "OtherXML",
"loadLocation": "",
"loadType": "fromFile",
"parsedTime": "",
"recordOrigin": ""
},
"title": {
"textEnglish": "Delineating Lake Types of the Jurassic East Berlin Formation, Hartford Basin, Newark Supergroup"
}
}
1 change: 1 addition & 0 deletions tests/test_crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def test_crossref(self):
"crossref_no_contrib_10.4213_im9580e",
"crossref_no_contrib_10.3367_UFNe.2022.11.039660",
"crossref_book_chapter_10.1016_B978-0-12-037311-6.50008-9",
"crossref_gsa_conf",
]
for f in filenames:
test_infile = os.path.join(self.inputdir, f + ".xml")
Expand Down

0 comments on commit eb54d51

Please sign in to comment.