Skip to content

Commit

Permalink
Adjust data model.
Browse files Browse the repository at this point in the history
refs #14
  • Loading branch information
justinlittman committed Sep 4, 2024
1 parent 12cbe57 commit 7dea531
Show file tree
Hide file tree
Showing 20 changed files with 69 additions and 291 deletions.
30 changes: 29 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,32 @@ docker run -d --rm --init --ulimit core=0 -p 8070:8070 lfoppiano/grobid:latest-f
```

## Helpful task
Export the metadata for a collection to CSV: `bin/rake "export_csv[druid:jk956kb4381]"`
Export the metadata for a collection to CSV: `bin/rake "export_csv[druid:jk956kb4381]"`

## Data model for an article
```
{
"title": STRING,
"authors": [
{
"first_name": STRING (REQUIRED - includes middle name, initials, etc.),
"last_name": STRING (REQUIRED),
"affiliations": [
"department": STRING,
"organization": STRING (REQUIRED)
],
"orcid": STRING (for example, https://orcid.org/0000-0003-1527-0030)
}
],
"abstract": STRING,
"keywords": [
{
value: STRING
}
],
"related_resource_citation": STRING,
"related_resource_doi": STRING (for example, 10.5860/lrts.48n4.8259),
"published": BOOLEAN,
"collection_druid": STRING (for example, druid:jk956kb4381)
}
```
2 changes: 1 addition & 1 deletion app/components/works/fallback_form_component.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<p>Provide a DOI or a citation here or manually complete the form below.</p>
<%= form_with url: new_works_path, builder: ShroomFormBuilder, data: { controller: 'submits-with', action: 'submits-with#showStatus', turbo: false } do |form| %>
<%= form.hidden_field :work_file, value: work_file_id %>
<%= form.hidden_field :preprint, value: preprint %>
<%= form.hidden_field :published, value: published %>
<%= form.bs_label :doi, 'DOI:', class: 'mt-2' %>
<%= form.bs_text_field :doi, class: 'form-control', pattern: 'https://doi.org/10.\d+/.+' %>
<%= form.bs_help_text 'For example: https://doi.org/10.1177/1940161218781254' %>
Expand Down
6 changes: 3 additions & 3 deletions app/components/works/fallback_form_component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
module Works
# Provides a fallback to provide a DOI or citation.
class FallbackFormComponent < ViewComponent::Base
def initialize(work_form:, work_file_id:, preprint:)
def initialize(work_form:, work_file_id:, published:)
@work_form = work_form
@work_file_id = work_file_id
@preprint = preprint
@published = published
super()
end

attr_reader :work_form, :work_file_id, :preprint
attr_reader :work_form, :work_file_id, :published

def render?
work_form.title.blank? && work_file_id
Expand Down
2 changes: 1 addition & 1 deletion app/controllers/files_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
class FilesController < ApplicationController
def create
work_file = WorkFile.create!(file_params)
redirect_to new_works_path(work_file:, doi: params[:doi], preprint: params[:preprint])
redirect_to new_works_path(work_file:, doi: params[:doi], published: params[:published])
end

private
Expand Down
17 changes: 8 additions & 9 deletions app/controllers/works_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,28 +77,27 @@ def grobid_service
# rubocop:disable Metrics/AbcSize
def build_new_work_form(work_file:)
if params[:citation].present?
grobid_service.from_citation(citation: params[:citation], preprint: preprint?)
grobid_service.from_citation(citation: params[:citation], published: published?)
elsif params[:doi].present?
grobid_service.from_citation(citation: params[:doi], preprint: preprint?)
grobid_service.from_citation(citation: params[:doi], published: published?)
elsif params.key?(:work_file)
grobid_service.from_file(path: work_file.path, preprint: preprint?)
grobid_service.from_file(path: work_file.path, published: published?)
else
WorkForm.new(preprint: preprint?)
WorkForm.new(published: published?)
end
end
# rubocop:enable Metrics/AbcSize

def preprint?
params[:preprint] == 'true'
def published?
params[:published] == 'true'
end

def work_params
# Perhaps these can be introspected from the model?
params.require(:work).permit(
:title, :abstract, :publisher,
:published_year, :published_month, :published_day,
:related_resource_citation, :preprint, :collection_druid,
:doi, :related_resource_doi,
:related_resource_citation, :published, :collection_druid,
:related_resource_doi,
authors_attributes: [
:first_name, :last_name, :orcid, { affiliations_attributes: %i[organization department] }
],
Expand Down
2 changes: 1 addition & 1 deletion app/forms/base_form.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class BaseForm
include ActiveModel::Model
include ActiveModel::Attributes
include ActiveModel::Validations::Callbacks
include ActiveModel::Serialization
include ActiveModel::Serializers::JSON

def self.model_name
# Remove the "Form" suffix from the class name.
Expand Down
41 changes: 6 additions & 35 deletions app/forms/work_form.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,35 +28,6 @@ def authors_are_valid

attribute :abstract, :string

# For a preprint, the published date is the date the preprint was published not the actual publication date.
attribute :published_year, :integer
validates :published_year, numericality: { only_integer: true, in: 1900..Date.current.year }, allow_nil: true

attribute :published_month, :integer
validates :published_month, numericality: { only_integer: true, in: 1..12 }, allow_nil: true
validate :published_month_is_valid

def published_month_is_valid
errors.add(:published_month, 'requires a year') if published_year.blank? && published_month.present?
end

attribute :published_day, :integer
validates :published_day, numericality: { only_integer: true, in: 1..31 }, allow_nil: true
validate :published_day_is_valid

def published_day_is_valid
return unless (published_year.blank? || published_month.blank?) && published_day.present?

errors.add(:published_day,
'requires a year and month')
end

# Preprints don't have publishers
attribute :publisher, :string

attribute :doi, :string
validates :doi, format: { with: DoiSupport::REGEX }, allow_blank: true, unless: :preprint?

attribute :keywords, array: true, default: -> { [] }
before_validation do
keywords.compact_blank!
Expand All @@ -66,18 +37,18 @@ def keywords_attributes=(attributes)
self.keywords = attributes.map { |_, keyword| KeywordForm.new(keyword) }
end

# Preprints have a single related resource.
# Published articles have a single related resource.
attribute :related_resource_citation, :string
validates :related_resource_citation, presence: true, if: :preprint?
validates :related_resource_citation, presence: true, if: :published?

attribute :related_resource_doi, :string
validates :related_resource_doi, format: { with: DoiSupport::REGEX }, allow_blank: true, if: :preprint?
validates :related_resource_doi, format: { with: DoiSupport::REGEX }, allow_blank: true, if: :published?

attribute :preprint, :boolean, default: false
attribute :published, :boolean, default: false

attribute :collection_druid, :string

def preprint?
preprint || related_resource_citation.present?
def published?
published || related_resource_citation.present?
end
end
12 changes: 6 additions & 6 deletions app/services/grobid_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,23 @@ def self.from_citation(...)
end

# @param [String] path the path to the PDF file
# @param [Boolean] preprint whether the work is a preprint
# @param [Boolean] published whether the work is a published article
# @return [Work] a Work model with metadata extracted from the PDF
# @raise [Error] if there is an error extracting metadata from the PDF
def from_file(path:, preprint: false)
def from_file(path:, published: false)
@tei = fetch_tei_from_file(path:)
@bibtex = fetch_tei_from_file(path:, tei: false) if preprint
@bibtex = fetch_tei_from_file(path:, tei: false) if published
tei_to_work(tei:, bibtex:)
end

# @param [String] citation for the work
# @param [Boolean] preprint whether the work is a preprint
# @param [Boolean] published whether the work is a published article
# @return [Work] a Work model with metadata extracted from the PDF
# @raise [Error] if there is an error extracting metadata from the PDF
def from_citation(citation:, preprint: false)
def from_citation(citation:, published: false)
tei_fragment = fetch_tei_from_citation(citation:)
@tei = "<TEI xmlns=\"http://www.tei-c.org/ns/1.0\">#{tei_fragment}</TEI>"
@bibtex = fetch_tei_from_citation(citation:, tei: false) if preprint
@bibtex = fetch_tei_from_citation(citation:, tei: false) if published
tei_to_work(tei:, bibtex:)
end

Expand Down
28 changes: 3 additions & 25 deletions app/services/tei_cocina_mapper_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def self.call(...)
end

# @param [Nokogiri::XML::Document] tei_ng_xml
# @param [String,nil] related_resource_citation citation for the article this is a preprint of
# @param [String,nil] related_resource_citation citation for the article this is a published of
def initialize(tei_ng_xml:, related_resource_citation: nil)
@tei_doc = TeiDocument.new(ng_xml: tei_ng_xml)
@related_resource_citation = related_resource_citation
Expand Down Expand Up @@ -40,9 +40,7 @@ def description_params
title: CocinaDescriptionSupport.title(title: tei_doc.title),
contributor: tei_doc.authors.map { |author_attrs| CocinaDescriptionSupport.person_contributor(**author_attrs) },
note: note_params,
event: event_params,
subject: subject_params,
identifier: identifier_params,
relatedResource: related_resource_params
}.compact
end
Expand All @@ -53,34 +51,14 @@ def note_params
[CocinaDescriptionSupport.note(type: 'abstract', value: tei_doc.abstract)]
end

def event_params
return [] if preprint? # If a preprint, these are likely to be for the related resource.

[].tap do |params|
if tei_doc.published_date.present?
params << CocinaDescriptionSupport.event_date(date_type: 'publication',
date_value: tei_doc.published_date)
end
if tei_doc.publisher.present?
params << CocinaDescriptionSupport.event_contributor(contributor_name_value: tei_doc.publisher)
end
end
end

def subject_params
return if tei_doc.keywords.blank?

CocinaDescriptionSupport.subjects(values: tei_doc.keywords)
end

def identifier_params
return if preprint? || tei_doc.doi.blank?

[CocinaDescriptionSupport.doi_identifier(doi: tei_doc.doi)]
end

def related_resource_params
return unless preprint?
return unless published?

[
{
Expand All @@ -91,7 +69,7 @@ def related_resource_params
]
end

def preprint?
def published?
related_resource_citation.present?
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ def params
title: CocinaDescriptionSupport.title(title: work_form.title),
contributor: contributors_params.presence,
note: note_params.presence,
event: event_params.presence,
subject: subject_params.presence,
identifier: identifier_params,
purl: Sdr::Purl.from_druid(druid:),
relatedResource: related_resource_params
}.compact
Expand Down Expand Up @@ -69,29 +67,10 @@ def note_params
end
end

# rubocop:disable Metrics/AbcSize
def event_params
[].tap do |params|
date_value = EdtfSupport.to_edtf(year: work_form.published_year, month: work_form.published_month,
day: work_form.published_day)
params << CocinaDescriptionSupport.event_date(date_value:, date_type: 'publication') if date_value.present?
if work_form.publisher.present?
params << CocinaDescriptionSupport.event_contributor(contributor_name_value: work_form.publisher)
end
end
end
# rubocop:enable Metrics/AbcSize

def subject_params
CocinaDescriptionSupport.subjects(values: work_form.keywords.map(&:value))
end

def identifier_params
return if work_form.doi.blank?

[CocinaDescriptionSupport.doi_identifier(doi: work_form.doi)]
end

def related_resource_params
resource_params = {}.tap do |params|
if work_form.related_resource_citation.present?
Expand Down
34 changes: 0 additions & 34 deletions app/services/work_cocina_mapper_service/to_work_mapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,48 +19,18 @@ def call

attr_reader :cocina_object

# rubocop:disable Metrics/AbcSize
def params
{
title: CocinaSupport.title_for(cocina_object:),
authors: WorkCocinaMapperService::ToWork::AuthorsMapper.call(cocina_object:),
abstract: cocina_object.description.note.find { |note| note.type == 'abstract' }&.value,
published_year: published_date&.year,
published_month: EdtfSupport.month_for(edtf: published_date),
published_day: EdtfSupport.day_for(edtf: published_date),
publisher:,
keywords:,
doi:,
related_resource_citation:,
related_resource_doi:,
collection_druid: CocinaSupport.collection_druid_for(cocina_object:)
}
end

# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/PerceivedComplexity
def published_date
@published_date ||= begin
published_event = cocina_object.description.event.find do |event|
event.type == 'deposit' \
&& event.date.first&.encoding&.code == 'edtf' \
&& event.date.first&.type == 'publication'
end
EdtfSupport.parse_with_precision(date: published_event&.date&.first&.value)
end
end

def publisher
publisher_event = cocina_object.description.event.find do |event|
event.type == 'publication' \
&& event.contributor&.first&.role&.first&.value == 'publisher'
end
publisher_event&.contributor&.first&.name&.first&.value
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/CyclomaticComplexity
# rubocop:enable Metrics/PerceivedComplexity

def keywords
cocina_object.description.subject
.select { |subject| subject.type == 'topic' }
Expand All @@ -74,10 +44,6 @@ def related_resource_citation
note.value
end

def doi
doi_for(cocina_object.description)
end

def related_resource_doi
doi_for(related_resource)
end
Expand Down
8 changes: 4 additions & 4 deletions app/views/files/_new.html.erb
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<%= form_with url: files_path, builder: ShroomFormBuilder, data: { controller: 'submits-with', action: 'submits-with#showStatus' } do |form| %>
<div class="form-check">
<%= form.bs_radio_button :preprint, 'false', required: true %>
<%= form.bs_radio_label :preprint_false, 'This is an article or manuscript (published or unpublished).' %>
<%= form.bs_radio_button :published, 'true', required: true %>
<%= form.bs_radio_label :published_true, 'This is a preprint or copy of an article / manuscript that is forthcoming or has been published elsewhere.' %>
</div>
<div class="form-check">
<%= form.bs_radio_button :preprint, 'true', required: true %>
<%= form.bs_radio_label :preprint_true, 'This is a preprint of an article (forthcoming or published).' %>
<%= form.bs_radio_button :published, 'false', required: true %>
<%= form.bs_radio_label :published_false, 'This is an article / manuscript that will only be published in SDR or has not been submitted and / or accepted for publication.' %>
</div>
<%= form.bs_label :file, 'Upload article:', class: 'mt-2' %>
<%= form.file_field :file, required: true, accept: 'application/pdf', class: 'form-control', direct_upload: true %>
Expand Down
Loading

0 comments on commit 7dea531

Please sign in to comment.