Skip to content

Commit

Permalink
Merge pull request #34 from RatioPBC/add_ndjson
Browse files Browse the repository at this point in the history
add NDJSON support
  • Loading branch information
lei0zhou authored Aug 28, 2024
2 parents 867c200 + df45325 commit 429fcca
Show file tree
Hide file tree
Showing 28 changed files with 392 additions and 155 deletions.
4 changes: 2 additions & 2 deletions lib/epiviewpoint/audit_log/revision.ex
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ defmodule EpiViewpoint.AuditLog.Revision do
def delete_contact_investigation_note_action(), do: "delete-contact-investigation-note"
def demote_user_action(), do: "demote-user"
def enable_user_action(), do: "enable-user"
def import_csv_action(), do: "import-csv"
def import_file_action(), do: "import-file-action"
def insert_case_investigation_action(), do: "insert-case-investigation"
def insert_demographics_action(), do: "insert-demographics"
def login_user_action(), do: "login-user"
Expand Down Expand Up @@ -95,7 +95,7 @@ defmodule EpiViewpoint.AuditLog.Revision do
def edit_contact_investigation_quarantine_monitoring_event(), do: "edit-contact-investigation-quarantine-monitoring"
def edit_profile_demographics_event(), do: "edit-profile-demographics"
def edit_profile_saved_event(), do: "edit-profile-saved"
def import_csv_event(), do: "import-csv"
def import_file_event(), do: "import-file-event"
def initial_user_creation_event(), do: "initial-user-creation"
def login_user_event(), do: "login-user"
def people_archive_people_event(), do: "people-archive-people"
Expand Down
2 changes: 1 addition & 1 deletion lib/epiviewpoint/cases.ex
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ defmodule EpiViewpoint.Cases do
def change_lab_result(%LabResult{} = lab_result, attrs), do: LabResult.changeset(lab_result, attrs)
def count_lab_results(), do: LabResult |> Repo.aggregate(:count)
def create_lab_result!({attrs, audit_meta}), do: %LabResult{} |> change_lab_result(attrs) |> AuditingRepo.insert!(audit_meta)
def import_lab_results(lab_result_csv_string, originator), do: Import.import_csv(lab_result_csv_string, originator)
def import_lab_results(lab_result_data_file_string, originator), do: Import.import_data_file(lab_result_data_file_string, originator)
def list_lab_results(), do: LabResult.Query.all() |> Repo.all()
def preload_initiating_lab_result(case_investigations_or_nil), do: case_investigations_or_nil |> Repo.preload(:initiating_lab_result)
def preload_lab_results(person_or_people_or_nil), do: person_or_people_or_nil |> Repo.preload(lab_results: LabResult.Query.display_order())
Expand Down
38 changes: 23 additions & 15 deletions lib/epiviewpoint/cases/import.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,28 @@ defmodule EpiViewpoint.Cases.Import do
alias EpiViewpoint.Cases.Import
alias EpiViewpoint.Cases.LabResult
alias EpiViewpoint.Cases.Person
alias EpiViewpoint.Csv
alias EpiViewpoint.DataFile
alias EpiViewpoint.DateParser
alias EpiViewpoint.Extra
alias EpiViewpoint.Repo

# Read fields
@required_lab_result_csv_fields ~w{sampled_on result analyzed_on}
@optional_lab_result_csv_fields ~w{reported_on tid request_facility_name test_type}
@required_person_csv_fields ~w{dob first_name last_name}
@optional_person_csv_fields ~w{external_id diagaddress_street1 diagaddress_city diagaddress_state diagaddress_zip person_tid phonenumber sex_at_birth ethnicity occupation race}
@required_lab_result_data_fields ~w{sampled_on result analyzed_on}
@optional_lab_result_data_fields ~w{reported_on tid request_facility_name test_type}
@required_person_data_fields ~w{dob first_name last_name}
@optional_person_data_fields ~w{external_id diagaddress_street1 diagaddress_city diagaddress_state diagaddress_zip person_tid phonenumber sex_at_birth ethnicity occupation race}

# Insert fields
@lab_result_db_fields_to_insert ~w{result sampled_on analyzed_on reported_on request_accession_number request_facility_code request_facility_name test_type tid}
@person_db_fields_to_insert ~w{person_tid dob first_name last_name external_id preferred_language sex_at_birth ethnicity occupation race}
@address_db_fields_to_insert ~w{diagaddress_street1 diagaddress_city diagaddress_state diagaddress_zip}

@fields [
required: @required_lab_result_csv_fields ++ @required_person_csv_fields,
optional: @optional_lab_result_csv_fields ++ @optional_person_csv_fields
required: @required_lab_result_data_fields ++ @required_person_data_fields,
optional: @optional_lab_result_data_fields ++ @optional_person_data_fields
]

# Mapping from csv column name to internal db column names
# Mapping from data column name to internal db column names
@key_map %{
"caseid" => "external_id",
"datecollected" => "sampled_on",
Expand Down Expand Up @@ -59,14 +59,14 @@ defmodule EpiViewpoint.Cases.Import do
]
end

def import_csv(_file, %{admin: false}), do: {:error, "Originator must be an admin"}
def import_data_file(_file, %{admin: false}), do: {:error, "Originator must be an admin"}

def import_csv(file, %Accounts.User{} = originator) do
def import_data_file(file, %Accounts.User{} = originator) do
Repo.transaction(fn ->
try do
Cases.create_imported_file(in_audit_tuple(file, originator, AuditLog.Revision.import_csv_action()))
Cases.create_imported_file(in_audit_tuple(file, originator, AuditLog.Revision.import_file_action()))

with {:ok, rows} <- Csv.read(file.contents, &rename_headers/1, @fields),
with {:ok, rows} <- file_router(file),
{:transform_dates, {:ok, rows}} <- {:transform_dates, transform_dates(rows)},
rows = {rows, []} do
case import_rows(rows, originator) do
Expand All @@ -84,7 +84,7 @@ defmodule EpiViewpoint.Cases.Import do
|> Enum.map(&Extra.String.add_placeholder_suffix/1)
|> Enum.join(", ")

Repo.rollback(user_readable: "Missing required columns: #{headers_string}")
Repo.rollback(user_readable: "Missing required fields: #{headers_string}")

{:invalid_csv, message} ->
Repo.rollback(user_readable: "Invalid CSV: \n #{message}")
Expand All @@ -99,6 +99,14 @@ defmodule EpiViewpoint.Cases.Import do
end)
end

defp file_router(file) do
case Path.extname(file.file_name) do
".csv" -> DataFile.read(file.contents, :csv, &rename_headers/1, @fields)
".ndjson" -> DataFile.read(file.contents, :ndjson, &rename_headers/1, @fields)
_ -> {:error, "Unsupported file type: #{file.extension}"}
end
end

def reject_rows_with_blank_key_values(rows, key) do
error_message = "Missing required field: #{key}"

Expand Down Expand Up @@ -321,7 +329,7 @@ defmodule EpiViewpoint.Cases.Import do
%AuditLog.Meta{
author_id: originator.id,
reason_action: reason_action,
reason_event: AuditLog.Revision.import_csv_event()
reason_event: AuditLog.Revision.import_file_event()
}
end

Expand All @@ -330,7 +338,7 @@ defmodule EpiViewpoint.Cases.Import do
%AuditLog.Meta{
author_id: author.id,
reason_action: reason_action,
reason_event: AuditLog.Revision.import_csv_event()
reason_event: AuditLog.Revision.import_file_event()
}}
end
end
57 changes: 0 additions & 57 deletions lib/epiviewpoint/csv.ex

This file was deleted.

95 changes: 95 additions & 0 deletions lib/epiviewpoint/data_file.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
defmodule EpiViewpoint.DataFile do
alias Explorer.DataFrame
alias EpiViewpoint.Extra

NimbleCSV.define(EpiViewpoint.DataFile.Parser,
separator: ",",
escape: "\"",
line_separator: "\r\n",
trim_bom: true,
moduledoc: """
A CSV parser that uses comma as separator and double-quotes as escape according to RFC4180,
and trims byte-order marks (BOMs) that may be generated by some systems.
"""
)

def read(string, :csv, header_transformer, headers) when is_binary(string) do
read(string, header_transformer, headers, &parse_csv/1)
end

def read(string, :ndjson, header_transformer, headers) when is_binary(string) do
read(string, header_transformer, headers, &parse_ndjson/1)
end

def read(input, header_transformer, [required: required_headers, optional: optional_headers], parser) do
with {:ok, df} <- parser.(input),
{:ok, provided_headers} <- extract_provided_headers(df, header_transformer),
:ok <- validate_headers(provided_headers, required_headers) do
headers = get_valid_headers(provided_headers, required_headers, optional_headers)

data =
df
|> DataFrame.rename(provided_headers)
|> DataFrame.select(headers)
|> DataFrame.to_rows()
|> Enum.map(&process_row(&1, headers))

{:ok, data}
end
end

defp extract_provided_headers(df, header_transformer) do
provided_headers =
df
|> DataFrame.names()
|> Enum.map(&String.trim/1)
|> header_transformer.()

{:ok, provided_headers}
end

defp validate_headers(provided_headers, required_headers) do
case required_headers -- provided_headers do
[] -> :ok
missing_headers -> {:error, :missing_headers, Enum.sort(missing_headers)}
end
end

defp get_valid_headers(provided_headers, required_headers, optional_headers) do
MapSet.intersection(
MapSet.new(provided_headers),
MapSet.union(MapSet.new(required_headers), MapSet.new(optional_headers))
)
|> MapSet.to_list()
end

defp process_row(row, headers) do
for header <- headers, into: %{} do
{header, row[header] |> Extra.String.trim()}
end
end

defp parse_csv(input) do
input
|> validate_csv()
|> remove_bom()
|> DataFrame.load_csv(infer_schema_length: 0)
rescue
e ->
hint = "make sure there are no spaces between the field separators (commas) and the quotes around field contents"
{:invalid_csv, "#{Exception.message(e)} (#{hint})"}
end

defp parse_ndjson(input) do
DataFrame.load_ndjson(input)
end

defp validate_csv(input) do
EpiViewpoint.DataFile.Parser.parse_string(input, headers: true)
input
end

defp remove_bom(input) do
String.trim_leading(input, "\uFEFF")
end
end
12 changes: 10 additions & 2 deletions lib/epiviewpoint/tempfile.ex
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
defmodule EpiViewpoint.Tempfile do
def write_csv!(contents, tmp_dir) do
write_file!(contents, tmp_dir, "csv")
end

def write_ndjson!(contents, tmp_dir) do
write_file!(contents, tmp_dir, "ndjson")
end

# sobelow_skip ["Traversal.FileModule"]
def write_csv!(contents) do
path = System.tmp_dir!() |> Path.join(Ecto.UUID.generate() <> ".csv")
defp write_file!(contents, tmp_dir, extension) do
path = tmp_dir |> Path.join(Ecto.UUID.generate() <> ".#{extension}")
File.write!(path, contents)
path
end
Expand Down
4 changes: 2 additions & 2 deletions lib/epiviewpoint_web/controllers/import_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ defmodule EpiViewpointWeb.ImportController do
{_imported_people, popped_import_info} = import_info |> Map.pop(:imported_people)

conn
|> Session.set_last_csv_import_info(popped_import_info)
|> Session.set_last_file_import_info(popped_import_info)
|> redirect(to: ~p"/import/complete")

{:error, [user_readable: user_readable_message]} ->
Expand All @@ -44,6 +44,6 @@ defmodule EpiViewpointWeb.ImportController do
def show(conn, _params) do
conn
|> assign_defaults(@common_assigns)
|> render("show.html", last_csv_import_info: Session.get_last_csv_import_info(conn))
|> render("show.html", last_file_import_info: Session.get_last_file_import_info(conn))
end
end
2 changes: 1 addition & 1 deletion lib/epiviewpoint_web/live/import_live.html.heex
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
</div>
<% end %>
<p>
Choose a CSV file from your computer, click “Upload”, and then wait for the file to upload.
Choose a CSV or NDJSON file from your computer, click “Upload”, and then wait for the file to upload.
</p>
<%= form_tag("/import/upload", multipart: true, method: :post) %><input name="file" type="file" /><input
data-role="upload-labs"
Expand Down
10 changes: 5 additions & 5 deletions lib/epiviewpoint_web/session.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ defmodule EpiViewpointWeb.Session do
alias EpiViewpoint.Cases.Import.ImportInfo
alias Plug.Conn

def get_last_csv_import_info(conn),
do: Conn.get_session(conn, :last_csv_import_info)
def get_last_file_import_info(conn),
do: Conn.get_session(conn, :last_file_import_info)

def set_last_csv_import_info(conn, %ImportInfo{} = import_info),
do: conn |> clear_old_messages |> Conn.put_session(:last_csv_import_info, import_info)
def set_last_file_import_info(conn, %ImportInfo{} = import_info),
do: conn |> clear_old_messages |> Conn.put_session(:last_file_import_info, import_info)

def get_import_error_message(conn),
do: Conn.get_session(conn, :import_error_message)
Expand All @@ -17,7 +17,7 @@ defmodule EpiViewpointWeb.Session do
defp clear_old_messages(conn) do
conn
|> Conn.delete_session(:import_error_message)
|> Conn.delete_session(:last_csv_import_info)
|> Conn.delete_session(:last_file_import_info)
end

def ensure_multifactor_auth_secret(conn, if_nil: if_nil_fn) do
Expand Down
6 changes: 3 additions & 3 deletions lib/epiviewpoint_web/templates/import/show.html.heex
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
<h2>Import Labs</h2>
<h3>Success</h3>
<p>
<%= "Successfully imported #{@last_csv_import_info.imported_person_count} people and #{@last_csv_import_info.imported_lab_result_count} lab results." %>
<%= "Successfully imported #{@last_file_import_info.imported_person_count} people and #{@last_file_import_info.imported_lab_result_count} lab results." %>
</p>
<h3><%= "#{@last_csv_import_info.skipped_row_count} row(s) were skipped" %></h3>
<h3><%= "#{@last_file_import_info.skipped_row_count} row(s) were skipped" %></h3>
<div id="failed-rows">
<%= for x <- @last_csv_import_info.skipped_row_error_messages do %>
<%= for x <- @last_file_import_info.skipped_row_error_messages do %>
<div class="import-error-message">
<p><%= x %></p>
</div>
Expand Down
1 change: 1 addition & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ defmodule EpiViewpoint.MixProject do
{:ecto_sql, "~> 3.11"},
{:eqrcode, "~> 0.1"},
{:esbuild, "~> 0.8", runtime: Mix.env() == :dev},
{:explorer, "~> 0.9.1"},
{:floki, ">= 0.0.0", only: :test},
{:gettext, "~> 0.11"},
{:inflex, "~> 2.1"},
Expand Down
Loading

0 comments on commit 429fcca

Please sign in to comment.