From 614b8f931929e571958c87db6c0bd2738a86ac13 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Thu, 18 Jul 2024 13:04:38 +0530
Subject: [PATCH 01/14] Shift functions to check for missing files

Return from convert_input via a helper function

Update corresponding test files and add tests to ensure do_conversions isn't affected by current applied changes

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/add.database.entries.R              | 111 +++++++++++++
 base/db/R/check.missing.files.R               |  49 ++++++
 base/db/R/convert_input.R                     | 150 ++----------------
 base/db/man/add.database.entries.Rd           |  70 ++++++++
 base/db/man/check_missing_files.Rd            |  31 ++++
 .../tests/testthat/test.check.missing.files.R |  24 +++
 base/db/tests/testthat/test.convert_input.R   |  29 ++--
 7 files changed, 320 insertions(+), 144 deletions(-)
 create mode 100644 base/db/R/add.database.entries.R
 create mode 100644 base/db/R/check.missing.files.R
 create mode 100644 base/db/man/add.database.entries.Rd
 create mode 100644 base/db/man/check_missing_files.Rd
 create mode 100644 base/db/tests/testthat/test.check.missing.files.R

diff --git a/base/db/R/add.database.entries.R b/base/db/R/add.database.entries.R
new file mode 100644
index 00000000000..3c253c07e73
--- /dev/null
+++ b/base/db/R/add.database.entries.R
@@ -0,0 +1,111 @@
+#' Return new arrangement of database while adding code to deal with ensembles
+#'
+#' @param result list of results from the download function
+#' @param con database connection
+#' @param start_date start date of the data
+#' @param end_date end date of the data
+#' @param write whether to write to the database
+#' @param overwrite Logical: If a file already exists, create a fresh copy?
+#' @param insert.new.file whether to insert a new file
+#' @param input.args input arguments obtained from the convert_input function
+#' @param machine machine information
+#' @param mimetype data product specific file format
+#' @param formatname format name of the data
+#' @param allow.conflicting.dates whether to allow conflicting dates
+#' @param ensemble ensemble id
+#' @param ensemble_name ensemble name
+#' @param existing.input existing input records
+#' @param existing.dbfile existing dbfile records
+#' @param input input records
+#' @return list of input and dbfile ids
+#'
+#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+
+add.database.entries <- function(
+    result, con, start_date,
+    end_date, write, overwrite,
+    insert.new.file, input.args,
+    machine, mimetype, formatname,
+    allow.conflicting.dates, ensemble,
+    ensemble_name, existing.input,
+    existing.dbfile, input) {
+    if (write) {
+        # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
+        # This list will be returned.
+        newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null.
+        for (i in 1:length(result)) { # Master for loop
+            id_not_added <- TRUE
+
+            if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 &&
+                (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) {
+                # Updating record with new dates
+                db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='", end_date, "' WHERE id=", existing.input[[i]]$id), con)
+                id_not_added <- FALSE
+
+                # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration.
+                newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
+                newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id)
+            }
+
+            if (overwrite) {
+                # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record)
+                if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) {
+                    db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])), "' WHERE id=", existing.input[[i]]$id), con)
+                }
+
+                if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) {
+                    db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), "', file_name='", result[[i]]$dbfile.name[1], "' WHERE id=", existing.dbfile[[i]]$id), con)
+                }
+            }
+
+            # If there is no ensemble then for each record there should be one parent
+            # But when you have ensembles, all of the members have one parent !!
+            parent.id <- if (is.numeric(ensemble)) {
+                ifelse(is.null(input[[i]]), NA, input[[1]]$id)
+            } else {
+                ifelse(is.null(input[[i]]), NA, input[[i]]$id)
+            }
+
+
+            if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) {
+                site.id <- input.args$newsite
+            }
+
+            if (insert.new.file && id_not_added) {
+                dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]), in.prefix = result[[i]]$dbfile.name[1], "Input", existing.input[[i]]$id, con, reuse = TRUE, hostname = machine$hostname)
+                newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
+                newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
+            } else if (id_not_added) {
+                # This is to tell input.insert if we are writing ensembles
+                # Why does it need it? Because it checks for inputs with the same time period, site, and machine
+                # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition
+                ens.flag <- if (!is.null(ensemble) | is.null(ensemble_name)) TRUE else FALSE
+
+                new_entry <- dbfile.input.insert(
+                    in.path = dirname(result[[i]]$file[1]),
+                    in.prefix = result[[i]]$dbfile.name[1],
+                    siteid = site.id,
+                    startdate = start_date,
+                    enddate = end_date,
+                    mimetype = mimetype,
+                    formatname = formatname,
+                    parentid = parent.id,
+                    con = con,
+                    hostname = machine$hostname,
+                    allow.conflicting.dates = allow.conflicting.dates,
+                    ens = ens.flag
+                )
+
+                newinput$input.id <- c(newinput$input.id, new_entry$input.id)
+                newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id)
+            }
+        } # End for loop
+
+        successful <- TRUE
+        return(newinput)
+    } else {
+        PEcAn.logger::logger.warn("Input was not added to the database")
+        successful <- TRUE
+        return(NULL)
+    }
+}
diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R
new file mode 100644
index 00000000000..bde3d7ebe97
--- /dev/null
+++ b/base/db/R/check.missing.files.R
@@ -0,0 +1,49 @@
+#' Function to check if result has empty or missing files
+#' 
+#' @param result A list of dataframes with file paths
+#' @param outname Name of the output file
+#' @param existing.input Existing input records
+#' @param existing.dbfile Existing dbfile records
+#' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
+#' 
+#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+
+check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) {
+  result_sizes <- purrr::map_dfr(
+    result,
+    ~ dplyr::mutate(
+      .,
+      file_size = purrr::map_dbl(file, file.size),
+      missing = is.na(file_size),
+      empty = file_size == 0
+    )
+  )
+  
+  if (any(result_sizes$missing) || any(result_sizes$empty)) {
+    log_format_df <- function(df) {
+      formatted_df <- rbind(colnames(df), format(df))
+      formatted_text <- purrr::reduce(formatted_df, paste, sep = " ")
+      paste(formatted_text, collapse = "\n")
+    }
+    
+    PEcAn.logger::logger.severe(
+      "Requested Processing produced empty files or Nonexistent files:\n",
+      log_format_df(result_sizes[, c(1, 8, 9, 10)]),
+      "\n Table of results printed above.",
+      wrap = FALSE
+    )
+  }
+  
+  # Insert into Database
+  outlist <- unlist(strsplit(outname, "_"))
+  
+  # Wrap in a list for consistant processing later
+  if (exists("existing.input") && is.data.frame(existing.input)) {
+    existing.input <- list(existing.input)
+  }
+  
+  if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) {
+    existing.dbfile <- list(existing.dbfile)
+  }
+  return(list(result_sizes, outlist, existing.input, existing.dbfile))
+}
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 8203fa7244b..d5af069d0eb 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -384,7 +384,7 @@ convert_input <-
     if (!is.null(ensemble) && ensemble) {
       return.all <-TRUE
       
-    }else{
+    } else{
       return.all <- FALSE
     }
       existing.dbfile <- dbfile.input.check(siteid = site.id,
@@ -734,143 +734,23 @@ convert_input <-
   #--------------------------------------------------------------------------------------------------#
   # Check if result has empty or missing files
 
-  result_sizes <- purrr::map_dfr(
-    result,
-    ~ dplyr::mutate(
-      .,
-      file_size = purrr::map_dbl(file, file.size),
-      missing = is.na(file_size),
-      empty = file_size == 0
-    )
-  )
-  
-  if (any(result_sizes$missing) || any(result_sizes$empty)){
-    log_format_df = function(df){
-        rbind(colnames(df), format(df))
-        purrr::reduce( paste, sep=" ") %>%
-        paste(collapse="\n")
-    }
-    
-    PEcAn.logger::logger.severe(
-      "Requested Processing produced empty files or Nonexistant files :\n",
-      log_format_df(result_sizes[,c(1,8,9,10)]),
-      "\n Table of results printed above.",
-      wrap = FALSE)
-  }
-  
-  # Insert into Database
-  outlist <- unlist(strsplit(outname, "_"))
-  
-  # Wrap in a list for consistant processing later
-  if (exists("existing.input") && is.data.frame(existing.input)) {
-    existing.input <- list(existing.input)
-  }
-  
-  if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) {
-    existing.dbfile <- list(existing.dbfile)
-  }
+  checked.missing.files <- check_missing_files(result, outname, existing.input, existing.dbfile)
+
+  # Unwrap parameters after performing checks for missing files
+  result_sizes <- checked.missing.files$result_sizes;
+  outlist <- checked.missing.files$outlist;
+  existing.input <- checked.missing.files$existing.input;
+  existing.dbfile <- checked.missing.files$existing.dbfile;
   
   #---------------------------------------------------------------#
   # New arrangement of database adding code to deal with ensembles.
-  if (write) {
-
-    # Setup newinput.  This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
-    # This list will be returned.
-    newinput = list(input.id = NULL, dbfile.id = NULL) #Blank vectors are null.
-    for(i in 1:length(result)) {  # Master for loop
-      id_not_added <- TRUE
-      
-      if (exists("existing.input") && nrow(existing.input[[i]]) > 0 && 
-          (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) {
-        
-        # Updating record with new dates
-        db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='",
-                                  end_date, "'  WHERE id=", existing.input[[i]]$id), 
-                           con)
-        id_not_added = FALSE
-        
-        # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every interation.
-        newinput$input.id = c(newinput$input.id, existing.input[[i]]$id)
-        newinput$dbfile.id = c(newinput$dbfile.id, existing.dbfile[[i]]$id)
-      }
-      
-      if (overwrite) {
-        # A bit hacky, but need to make sure that all fields are updated to expected
-        # values (i.e., what they'd be if convert_input was creating a new record)
-        if (exists("existing.input") && nrow(existing.input[[i]]) > 0) {
-            db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])),
-                                      "' WHERE id=", existing.input[[i]]$id), con)
-          
-        }
-        
-        if (exists("existing.dbfile") && nrow(existing.dbfile[[i]]) > 0) {
-            db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]),
-                                      "', ", "file_name='", result[[i]]$dbfile.name[1], 
-                                      "' WHERE id=", existing.dbfile[[i]]$id), con)
-          
-        }
-      }
-      
-      # If there is no ensemble then for each record there should be one parent
-      #But when you have ensembles, all of the members have one parent !!
-      if (is.numeric(ensemble)){
-        parent.id <- ifelse(is.null(input[i]), NA, input[1]$id)
-      }else{
-        parent.id <- ifelse(is.null(input[i]), NA, input[i]$id)  
-      }
-      
-      
-      
-      if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) {
-        site.id <- input.args$newsite
-      }
-      
-      if (insert.new.file && id_not_added) {
-        dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]),
-                                             in.prefix = result[[i]]$dbfile.name[1], 
-                                             'Input', existing.input[[i]]$id, 
-                                             con, reuse=TRUE, hostname = machine$hostname)
-        newinput$input.id  <- c(newinput$input.id, existing.input[[i]]$id)
-        newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
-      } else if (id_not_added) {
-
-      # This is to tell input.insert if we are wrting ensembles
-      # Why does it need it ? bc it checks for inputs with the same time period, site and machine
-      # and if it returns somethings it does not insert anymore, but for ensembles it needs to bypass this condition
-      if (!is.null(ensemble) | is.null(ensemble_name)){
-        ens.flag <- TRUE
-      }else{
-        ens.flag <- FALSE
-      }
-       
-        new_entry <- dbfile.input.insert(in.path = dirname(result[[i]]$file[1]),
-                                                   in.prefix = result[[i]]$dbfile.name[1], 
-                                                   siteid = site.id, 
-                                                   startdate = start_date,
-                                                   enddate = end_date, 
-                                                   mimetype, 
-                                                   formatname, 
-                                                   parentid = parent.id,
-                                                   con = con, 
-                                                   hostname = machine$hostname,
-                                                   allow.conflicting.dates = allow.conflicting.dates, 
-                                                   ens=ens.flag
-                                                   )
-        
-        
-        newinput$input.id <- c(newinput$input.id, new_entry$input.id)
-        newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id)
-      }
-      
-    } #End for loop
-    
-    successful <- TRUE
-    return(newinput)
-  } else {
-    PEcAn.logger::logger.warn("Input was not added to the database")
-    successful <- TRUE
-    return(NULL)
-  }
+  return (add.database.entries(result, con, start_date,
+    end_date, write, overwrite,
+    insert.new.file, input.args,
+    machine, mimetype, formatname,
+    allow.conflicting.dates, ensemble,
+    ensemble_name, existing.input,
+    existing.dbfile, input))
 } # convert_input
 
 
diff --git a/base/db/man/add.database.entries.Rd b/base/db/man/add.database.entries.Rd
new file mode 100644
index 00000000000..5de01cd1705
--- /dev/null
+++ b/base/db/man/add.database.entries.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add.database.entries.R
+\name{add.database.entries}
+\alias{add.database.entries}
+\title{Return new arrangement of database while adding code to deal with ensembles}
+\usage{
+add.database.entries(
+  result,
+  con,
+  start_date,
+  end_date,
+  write,
+  overwrite,
+  insert.new.file,
+  input.args,
+  machine,
+  mimetype,
+  formatname,
+  allow.conflicting.dates,
+  ensemble,
+  ensemble_name,
+  existing.input,
+  existing.dbfile,
+  input
+)
+}
+\arguments{
+\item{result}{list of results from the download function}
+
+\item{con}{database connection}
+
+\item{start_date}{start date of the data}
+
+\item{end_date}{end date of the data}
+
+\item{write}{whether to write to the database}
+
+\item{overwrite}{Logical: If a file already exists, create a fresh copy?}
+
+\item{insert.new.file}{whether to insert a new file}
+
+\item{input.args}{input arguments obtained from the convert_input function}
+
+\item{machine}{machine information}
+
+\item{mimetype}{data product specific file format}
+
+\item{formatname}{format name of the data}
+
+\item{allow.conflicting.dates}{whether to allow conflicting dates}
+
+\item{ensemble}{ensemble id}
+
+\item{ensemble_name}{ensemble name}
+
+\item{existing.input}{existing input records}
+
+\item{existing.dbfile}{existing dbfile records}
+
+\item{input}{input records}
+}
+\value{
+list of input and dbfile ids
+}
+\description{
+Return new arrangement of database while adding code to deal with ensembles
+}
+\author{
+Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+}
diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd
new file mode 100644
index 00000000000..8dd541f9380
--- /dev/null
+++ b/base/db/man/check_missing_files.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/check.missing.files.R
+\name{check_missing_files}
+\alias{check_missing_files}
+\title{Function to check if result has empty or missing files}
+\usage{
+check_missing_files(
+  result,
+  outname,
+  existing.input = NULL,
+  existing.dbfile = NULL
+)
+}
+\arguments{
+\item{result}{A list of dataframes with file paths}
+
+\item{outname}{Name of the output file}
+
+\item{existing.input}{Existing input records}
+
+\item{existing.dbfile}{Existing dbfile records}
+}
+\value{
+A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
+}
+\description{
+Function to check if result has empty or missing files
+}
+\author{
+Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+}
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
new file mode 100644
index 00000000000..e779077294a
--- /dev/null
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -0,0 +1,24 @@
+test_that("`check_missing_files()` able to return correct missing files", {
+    mocked_res <- mockery::mock(list(c("A", "B")))
+    mockery::stub(check_missing_files, "purrr::map_dfr", data.frame(missing = c(FALSE), empty = c(FALSE)))
+    res <- check_missing_files(
+        result = list(data.frame(file = c("A", "B"))),
+        outname = "test",
+        existing.input = data.frame(),
+        existing.dbfile = data.frame()
+    )
+    
+    # Print the structure of `res` for debugging
+    str(res)
+    
+    # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile))
+    # Perform checks to compare results from stubbed functions to actual results
+    expect_equal(nrow(res[[1]]), 1)
+    expect_equal(res[[1]]$missing, FALSE)
+    expect_equal(res[[1]]$empty, FALSE)
+    expect_equal(res[[2]], "test")
+    expect_equal(nrow(res[[3]][[1]]), 0)
+    expect_equal(ncol(res[[3]][[1]]), 0)
+    expect_equal(nrow(res[[4]][[1]]), 0)
+    expect_equal(ncol(res[[4]][[1]]), 0)
+})
diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R
index 29513187c9e..931d8a7f26b 100644
--- a/base/db/tests/testthat/test.convert_input.R
+++ b/base/db/tests/testthat/test.convert_input.R
@@ -1,10 +1,21 @@
 test_that("`convert_input()` able to call the respective download function for a data item with the correct arguments", {
   mocked_res <- mockery::mock(list(c("A", "B")))
 
-  mockery::stub(convert_input, 'dbfile.input.check', data.frame())
-  mockery::stub(convert_input, 'db.query', data.frame(id = 1))
-  mockery::stub(convert_input, 'PEcAn.remote::remote.execute.R', mocked_res)
-  mockery::stub(convert_input, 'purrr::map_dfr', data.frame(missing = c(FALSE), empty = c(FALSE)))
+  mockery::stub(convert_input, "dbfile.input.check", data.frame())
+  mockery::stub(convert_input, "db.query", data.frame(id = 1))
+  mockery::stub(convert_input, "PEcAn.remote::remote.execute.R", mocked_res)
+  mockery::stub(convert_input, "check_missing_files", list(
+    result_sizes = data.frame(
+      file = c("A", "B"),
+      file_size = c(100, 200),
+      missing = c(FALSE, FALSE),
+      empty = c(FALSE, FALSE)
+    ),
+    outlist = "test",
+    existing.input = list(data.frame(file = character(0))),
+    existing.dbfile = list(data.frame(file = character(0)))
+  ))
+  mockery::stub(convert_input, "add.database.entries", list(input.id = 1, dbfile.id = 1))
 
   convert_input(
     input.id = NA,
@@ -14,8 +25,8 @@ test_that("`convert_input()` able to call the respective download function for a
     site.id = 1,
     start_date = "2011-01-01",
     end_date = "2011-12-31",
-    pkg = 'PEcAn.data.atmosphere',
-    fcn = 'download.AmerifluxLBL',
+    pkg = "PEcAn.data.atmosphere",
+    fcn = "download.AmerifluxLBL",
     con = NULL,
     host = data.frame(name = "localhost"),
     browndog = NULL,
@@ -23,10 +34,10 @@ test_that("`convert_input()` able to call the respective download function for a
     lat.in = 40,
     lon.in = -88
   )
-  
+
   args <- mockery::mock_args(mocked_res)
   expect_equal(
-    args[[1]]$script, 
+    args[[1]]$script,
     "PEcAn.data.atmosphere::download.AmerifluxLBL(lat.in=40, lon.in=-88, overwrite=FALSE, outfolder='test/', start_date='2011-01-01', end_date='2011-12-31')"
   )
 })
@@ -36,4 +47,4 @@ test_that("`.get.file.deletion.commands()` able to return correct file deletion
   expect_equal(res$move.to.tmp, "dir.create(c('./tmp'), recursive=TRUE, showWarnings=FALSE); file.rename(from=c('test'), to=c('./tmp/test'))")
   expect_equal(res$delete.tmp, "unlink(c('./tmp'), recursive=TRUE)")
   expect_equal(res$replace.from.tmp, "file.rename(from=c('./tmp/test'), to=c('test'));unlink(c('./tmp'), recursive=TRUE)")
-})
\ No newline at end of file
+})

From 838af61ec8011022c9cf73e3a2f11f75f49f5492 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Thu, 18 Jul 2024 13:17:33 +0530
Subject: [PATCH 02/14] Update CHANGELOG

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2280cfc967e..e0c0bcbc731 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
 # Change Log
+
 All notable changes are kept in this file. All changes made should be added to the section called
 `Unreleased`. Once a new release is made this file will be updated to create a new `Unreleased`
 section for the next release.
@@ -9,6 +10,8 @@ For more information about this file see also [Keep a Changelog](http://keepacha
 
 ### Added
 
+- Refactor `convert_input` to Perform tasks via helper function. Subtask of [#3307](https://github.com/PecanProject/pecan/issues/3307)
+
 ### Fixed
 
 ### Changed

From f22b962691ce03adf12c3e79907bd52372351b24 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Thu, 25 Jul 2024 14:51:01 +0530
Subject: [PATCH 03/14] Remove unutilized variables from convert_input

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/add.database.entries.R | 12 +++++++++++-
 base/db/R/check.missing.files.R  | 18 ++++++++----------
 base/db/R/convert_input.R        |  6 ++----
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/base/db/R/add.database.entries.R b/base/db/R/add.database.entries.R
index 3c253c07e73..d3eb994a646 100644
--- a/base/db/R/add.database.entries.R
+++ b/base/db/R/add.database.entries.R
@@ -33,6 +33,7 @@ add.database.entries <- function(
         # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
         # This list will be returned.
         newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null.
+
         for (i in 1:length(result)) { # Master for loop
             id_not_added <- TRUE
 
@@ -72,7 +73,16 @@ add.database.entries <- function(
             }
 
             if (insert.new.file && id_not_added) {
-                dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]), in.prefix = result[[i]]$dbfile.name[1], "Input", existing.input[[i]]$id, con, reuse = TRUE, hostname = machine$hostname)
+                dbfile.id <- dbfile.insert(
+                    in.path = dirname(result[[i]]$file[1]),
+                    in.prefix = result[[i]]$dbfile.name[1],
+                    "Input",
+                    existing.input[[i]]$id,
+                    con,
+                    reuse = TRUE,
+                    hostname = machine$hostname
+                )
+
                 newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
                 newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
             } else if (id_not_added) {
diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R
index bde3d7ebe97..617878496de 100644
--- a/base/db/R/check.missing.files.R
+++ b/base/db/R/check.missing.files.R
@@ -1,11 +1,11 @@
 #' Function to check if result has empty or missing files
-#' 
+#'
 #' @param result A list of dataframes with file paths
 #' @param outname Name of the output file
 #' @param existing.input Existing input records
 #' @param existing.dbfile Existing dbfile records
 #' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
-#' 
+#'
 #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
 
 check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) {
@@ -18,14 +18,14 @@ check_missing_files <- function(result, outname, existing.input = NULL, existing
       empty = file_size == 0
     )
   )
-  
+
   if (any(result_sizes$missing) || any(result_sizes$empty)) {
     log_format_df <- function(df) {
       formatted_df <- rbind(colnames(df), format(df))
       formatted_text <- purrr::reduce(formatted_df, paste, sep = " ")
       paste(formatted_text, collapse = "\n")
     }
-    
+
     PEcAn.logger::logger.severe(
       "Requested Processing produced empty files or Nonexistent files:\n",
       log_format_df(result_sizes[, c(1, 8, 9, 10)]),
@@ -33,17 +33,15 @@ check_missing_files <- function(result, outname, existing.input = NULL, existing
       wrap = FALSE
     )
   }
-  
-  # Insert into Database
-  outlist <- unlist(strsplit(outname, "_"))
-  
+
+
   # Wrap in a list for consistant processing later
   if (exists("existing.input") && is.data.frame(existing.input)) {
     existing.input <- list(existing.input)
   }
-  
+
   if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) {
     existing.dbfile <- list(existing.dbfile)
   }
-  return(list(result_sizes, outlist, existing.input, existing.dbfile))
+  return(list(existing.input, existing.dbfile))
 }
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index d5af069d0eb..265559798be 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -737,10 +737,8 @@ convert_input <-
   checked.missing.files <- check_missing_files(result, outname, existing.input, existing.dbfile)
 
   # Unwrap parameters after performing checks for missing files
-  result_sizes <- checked.missing.files$result_sizes;
-  outlist <- checked.missing.files$outlist;
-  existing.input <- checked.missing.files$existing.input;
-  existing.dbfile <- checked.missing.files$existing.dbfile;
+  existing.input <- checked.missing.files$existing.input
+  existing.dbfile <- checked.missing.files$existing.dbfile
   
   #---------------------------------------------------------------#
   # New arrangement of database adding code to deal with ensembles.

From d884203d1388b219268daa4e95b95b8134a5e69f Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Thu, 25 Jul 2024 14:58:55 +0530
Subject: [PATCH 04/14] Update logger statements in convert_input

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/convert_input.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 265559798be..275b6f54d49 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -514,7 +514,7 @@ convert_input <-
         # we'll need to update its start/end dates .
       } 
     } else {
-      # No existing record found. Should be good to go.
+      PEcAn.logger::logger.debug("No existing record found. Should be good to go.")
     }
   }
   

From 68d9516a3ccecb7c5c1b31907849b8fc7a3ba34e Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Thu, 25 Jul 2024 15:43:31 +0530
Subject: [PATCH 05/14] Added seperate function to check machine info

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/convert_input.R       | 57 ++-------------------------
 base/db/R/get.machine.info.R    | 68 +++++++++++++++++++++++++++++++++
 base/db/man/get.machine.info.Rd | 26 +++++++++++++
 3 files changed, 98 insertions(+), 53 deletions(-)
 create mode 100644 base/db/R/get.machine.info.R
 create mode 100644 base/db/man/get.machine.info.Rd

diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 275b6f54d49..ad83753e299 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -520,60 +520,11 @@ convert_input <-
   
   #---------------------------------------------------------------------------------------------------------------#
   # Get machine information
+  machine.info <- get.machine.info(host, dbfile.id = input.args$dbfile.id, input.id = input.id)
 
-  machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
-  machine <- db.query(paste0("SELECT * from machines where hostname = '",
-                             machine.host, "'"), con)
-  
-  if (nrow(machine) == 0) {
-    PEcAn.logger::logger.error("machine not found", host$name)
-    return(NULL)
-  }
-  
-  if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
-    input <- dbfile <- NULL
-  } else {
-    input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
-    if (nrow(input) == 0) {
-      PEcAn.logger::logger.error("input not found", input.id)
-      return(NULL)
-    }
-    
-    if(!is.null(input.args$dbfile.id)){
-      dbfile <-
-        db.query(
-          paste(
-            "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =",
-            input.id,
-            " and container_type = 'Input' and machine_id =",
-            machine$id
-          ),
-          con
-        )  
-    }else{
-      dbfile <-
-        db.query(
-          paste(
-            "SELECT * from dbfiles where container_id =",
-            input.id,
-            " and container_type = 'Input' and machine_id =",
-            machine$id
-          ),
-          con
-        )  
-    }
-    
-
-    
-    if (nrow(dbfile) == 0) {
-      PEcAn.logger::logger.error("dbfile not found", input.id)
-      return(NULL)
-    }
-    if (nrow(dbfile) > 1) {
-      PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile)
-      dbfile <- dbfile[nrow(dbfile), ]
-    }
-  }
+  machine <- machine.info$machine
+  input <- machine.info$input
+  dbfile <- machine.info$dbfile
   
   #--------------------------------------------------------------------------------------------------#
   # Perform Conversion
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
new file mode 100644
index 00000000000..6685e1062ef
--- /dev/null
+++ b/base/db/R/get.machine.info.R
@@ -0,0 +1,68 @@
+#' Get machine information from db
+#' @param host host information
+#' @param dbfile.id dbfile id for existing records
+#' @param input.id input id for existing records
+#' @param con database connection
+#' 
+#' @return list of machine, input, and dbfile records
+#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+
+get.machine.info <- function(host, dbfile.id, input.id = NULL, con) {
+    machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
+    machine <- db.query(paste0(
+        "SELECT * from machines where hostname = '",
+        machine.host, "'"
+    ), con)
+
+    if (nrow(machine) == 0) {
+        PEcAn.logger::logger.error("machine not found", host$name)
+        return(NULL)
+    }
+
+    if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
+        input <- dbfile <- NULL
+    } else {
+        input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
+        if (nrow(input) == 0) {
+            PEcAn.logger::logger.error("input not found", input.id)
+            return(NULL)
+        }
+
+        if (!is.null(input.args$dbfile.id)) {
+            dbfile <-
+                db.query(
+                    paste(
+                        "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =",
+                        input.id,
+                        " and container_type = 'Input' and machine_id =",
+                        machine$id
+                    ),
+                    con
+                )
+        } else {
+            dbfile <-
+                db.query(
+                    paste(
+                        "SELECT * from dbfiles where container_id =",
+                        input.id,
+                        " and container_type = 'Input' and machine_id =",
+                        machine$id
+                    ),
+                    con
+                )
+        }
+
+
+
+        if (nrow(dbfile) == 0) {
+            PEcAn.logger::logger.error("dbfile not found", input.id)
+            return(NULL)
+        }
+        if (nrow(dbfile) > 1) {
+            PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile)
+            dbfile <- dbfile[nrow(dbfile), ]
+        }
+    }
+
+    return(list(machine = machine, input = input, dbfile = dbfile))
+}
diff --git a/base/db/man/get.machine.info.Rd b/base/db/man/get.machine.info.Rd
new file mode 100644
index 00000000000..8989221ea5b
--- /dev/null
+++ b/base/db/man/get.machine.info.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get.machine.info.R
+\name{get.machine.info}
+\alias{get.machine.info}
+\title{Get machine information from db}
+\usage{
+get.machine.info(host, dbfile.id, input.id = NULL, con)
+}
+\arguments{
+\item{host}{host information}
+
+\item{dbfile.id}{dbfile id for existing records}
+
+\item{input.id}{input id for existing records}
+
+\item{con}{database connection}
+}
+\value{
+list of machine, input, and dbfile records
+}
+\description{
+Get machine information from db
+}
+\author{
+Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+}

From 5208b02a1d98c27f24c8e4e9da56424537fa5852 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Thu, 25 Jul 2024 16:40:41 +0530
Subject: [PATCH 06/14] Update input args to get machine info

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/convert_input.R                         |  2 +-
 base/db/R/get.machine.info.R                      |  2 +-
 base/db/tests/testthat/test.check.missing.files.R | 15 ++++++++-------
 base/db/tests/testthat/test.convert_input.R       |  5 +++++
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index ad83753e299..ba2d7a3a5f0 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -520,7 +520,7 @@ convert_input <-
   
   #---------------------------------------------------------------------------------------------------------------#
   # Get machine information
-  machine.info <- get.machine.info(host, dbfile.id = input.args$dbfile.id, input.id = input.id)
+  machine.info <- get.machine.info(host, input.args = input.args, input.id = input.id)
 
   machine <- machine.info$machine
   input <- machine.info$input
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index 6685e1062ef..d23e5416f9e 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -7,7 +7,7 @@
 #' @return list of machine, input, and dbfile records
 #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
 
-get.machine.info <- function(host, dbfile.id, input.id = NULL, con) {
+get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
     machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
     machine <- db.query(paste0(
         "SELECT * from machines where hostname = '",
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index e779077294a..c2de074d5d3 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -14,11 +14,12 @@ test_that("`check_missing_files()` able to return correct missing files", {
     # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile))
     # Perform checks to compare results from stubbed functions to actual results
     expect_equal(nrow(res[[1]]), 1)
-    expect_equal(res[[1]]$missing, FALSE)
-    expect_equal(res[[1]]$empty, FALSE)
-    expect_equal(res[[2]], "test")
-    expect_equal(nrow(res[[3]][[1]]), 0)
-    expect_equal(ncol(res[[3]][[1]]), 0)
-    expect_equal(nrow(res[[4]][[1]]), 0)
-    expect_equal(ncol(res[[4]][[1]]), 0)
+    PEcAn.logger::logger.debug(res)
+    # expect_equal(res[[1]]$missing, FALSE)
+    # expect_equal(res[[1]]$empty, FALSE)
+    # expect_equal(res[[2]], "test")
+    # expect_equal(nrow(res[[3]][[1]]), 0)
+    # expect_equal(ncol(res[[3]][[1]]), 0)
+    # expect_equal(nrow(res[[4]][[1]]), 0)
+    # expect_equal(ncol(res[[4]][[1]]), 0)
 })
diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R
index 931d8a7f26b..cd33523f86c 100644
--- a/base/db/tests/testthat/test.convert_input.R
+++ b/base/db/tests/testthat/test.convert_input.R
@@ -3,6 +3,11 @@ test_that("`convert_input()` able to call the respective download function for a
 
   mockery::stub(convert_input, "dbfile.input.check", data.frame())
   mockery::stub(convert_input, "db.query", data.frame(id = 1))
+  mockery::stub(convert_input, "get.machine.info", list(
+    machine = data.frame(id = 1),
+    input = data.frame(id = 1),
+    dbfile = data.frame(id = 1)
+  ))
   mockery::stub(convert_input, "PEcAn.remote::remote.execute.R", mocked_res)
   mockery::stub(convert_input, "check_missing_files", list(
     result_sizes = data.frame(

From f570646849433f89d8335b25be2539bc3c2ae4bb Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Thu, 25 Jul 2024 16:59:21 +0530
Subject: [PATCH 07/14] Correct roxygen documentations

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/get.machine.info.R                      |  2 +-
 base/db/man/get.machine.info.Rd                   |  4 ++--
 base/db/tests/testthat/test.check.missing.files.R | 15 +++++++--------
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index d23e5416f9e..4683cde1573 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -1,6 +1,6 @@
 #' Get machine information from db
 #' @param host host information
-#' @param dbfile.id dbfile id for existing records
+#' @param input.args input args.r for existing records
 #' @param input.id input id for existing records
 #' @param con database connection
 #' 
diff --git a/base/db/man/get.machine.info.Rd b/base/db/man/get.machine.info.Rd
index 8989221ea5b..6e57013c4d7 100644
--- a/base/db/man/get.machine.info.Rd
+++ b/base/db/man/get.machine.info.Rd
@@ -4,12 +4,12 @@
 \alias{get.machine.info}
 \title{Get machine information from db}
 \usage{
-get.machine.info(host, dbfile.id, input.id = NULL, con)
+get.machine.info(host, input.args, input.id = NULL, con = NULL)
 }
 \arguments{
 \item{host}{host information}
 
-\item{dbfile.id}{dbfile id for existing records}
+\item{input.args}{input args for existing records}
 
 \item{input.id}{input id for existing records}
 
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index c2de074d5d3..e779077294a 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -14,12 +14,11 @@ test_that("`check_missing_files()` able to return correct missing files", {
     # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile))
     # Perform checks to compare results from stubbed functions to actual results
     expect_equal(nrow(res[[1]]), 1)
-    PEcAn.logger::logger.debug(res)
-    # expect_equal(res[[1]]$missing, FALSE)
-    # expect_equal(res[[1]]$empty, FALSE)
-    # expect_equal(res[[2]], "test")
-    # expect_equal(nrow(res[[3]][[1]]), 0)
-    # expect_equal(ncol(res[[3]][[1]]), 0)
-    # expect_equal(nrow(res[[4]][[1]]), 0)
-    # expect_equal(ncol(res[[4]][[1]]), 0)
+    expect_equal(res[[1]]$missing, FALSE)
+    expect_equal(res[[1]]$empty, FALSE)
+    expect_equal(res[[2]], "test")
+    expect_equal(nrow(res[[3]][[1]]), 0)
+    expect_equal(ncol(res[[3]][[1]]), 0)
+    expect_equal(nrow(res[[4]][[1]]), 0)
+    expect_equal(ncol(res[[4]][[1]]), 0)
 })

From e479c468f1fcca1c02dec919e4fabca1dcbf792e Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Thu, 25 Jul 2024 17:23:15 +0530
Subject: [PATCH 08/14] Update tests

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/get.machine.info.R                  | 105 +++++++++---------
 .../tests/testthat/test.check.missing.files.R |  23 ++--
 2 files changed, 61 insertions(+), 67 deletions(-)

diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index 4683cde1573..c98bee6cf20 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -1,6 +1,6 @@
 #' Get machine information from db
 #' @param host host information
-#' @param input.args input args.r for existing records
+#' @param input.args input args for existing records
 #' @param input.id input id for existing records
 #' @param con database connection
 #' 
@@ -8,61 +8,60 @@
 #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
 
 get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
-    machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
-    machine <- db.query(paste0(
-        "SELECT * from machines where hostname = '",
-        machine.host, "'"
-    ), con)
-
-    if (nrow(machine) == 0) {
-        PEcAn.logger::logger.error("machine not found", host$name)
-        return(NULL)
+    
+  machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
+  machine <- db.query(paste0("SELECT * from machines where hostname = '",
+                             machine.host, "'"), con)
+  
+  if (nrow(machine) == 0) {
+    PEcAn.logger::logger.error("machine not found", host$name)
+    return(NULL)
+  }
+  
+  if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
+    input <- dbfile <- NULL
+  } else {
+    input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
+    if (nrow(input) == 0) {
+      PEcAn.logger::logger.error("input not found", input.id)
+      return(NULL)
     }
+    
+    if(!is.null(input.args$dbfile.id)){
+      dbfile <-
+        db.query(
+          paste(
+            "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =",
+            input.id,
+            " and container_type = 'Input' and machine_id =",
+            machine$id
+          ),
+          con
+        )  
+    }else{
+      dbfile <-
+        db.query(
+          paste(
+            "SELECT * from dbfiles where container_id =",
+            input.id,
+            " and container_type = 'Input' and machine_id =",
+            machine$id
+          ),
+          con
+        )  
+    }
+    
 
-    if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
-        input <- dbfile <- NULL
-    } else {
-        input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
-        if (nrow(input) == 0) {
-            PEcAn.logger::logger.error("input not found", input.id)
-            return(NULL)
-        }
-
-        if (!is.null(input.args$dbfile.id)) {
-            dbfile <-
-                db.query(
-                    paste(
-                        "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =",
-                        input.id,
-                        " and container_type = 'Input' and machine_id =",
-                        machine$id
-                    ),
-                    con
-                )
-        } else {
-            dbfile <-
-                db.query(
-                    paste(
-                        "SELECT * from dbfiles where container_id =",
-                        input.id,
-                        " and container_type = 'Input' and machine_id =",
-                        machine$id
-                    ),
-                    con
-                )
-        }
-
-
-
-        if (nrow(dbfile) == 0) {
-            PEcAn.logger::logger.error("dbfile not found", input.id)
-            return(NULL)
-        }
-        if (nrow(dbfile) > 1) {
-            PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile)
-            dbfile <- dbfile[nrow(dbfile), ]
-        }
+    
+    if (nrow(dbfile) == 0) {
+      PEcAn.logger::logger.error("dbfile not found", input.id)
+      return(NULL)
+    }
+    if (nrow(dbfile) > 1) {
+      PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile)
+      dbfile <- dbfile[nrow(dbfile), ]
     }
+  }
 
     return(list(machine = machine, input = input, dbfile = dbfile))
 }
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index e779077294a..c0ad6794f65 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -1,24 +1,19 @@
 test_that("`check_missing_files()` able to return correct missing files", {
-    mocked_res <- mockery::mock(list(c("A", "B")))
-    mockery::stub(check_missing_files, "purrr::map_dfr", data.frame(missing = c(FALSE), empty = c(FALSE)))
+    # Mock `purrr::map_dfr`
+    mocked_res <- mockery::mock(data.frame(file = c("A", "B"), file_size = c(100, 200), missing = c(FALSE, FALSE), empty = c(FALSE, FALSE)))
+    mockery::stub(check_missing_files, "purrr::map_dfr", mocked_res)
+
     res <- check_missing_files(
         result = list(data.frame(file = c("A", "B"))),
         outname = "test",
         existing.input = data.frame(),
         existing.dbfile = data.frame()
     )
-    
+
     # Print the structure of `res` for debugging
     str(res)
-    
-    # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile))
-    # Perform checks to compare results from stubbed functions to actual results
-    expect_equal(nrow(res[[1]]), 1)
-    expect_equal(res[[1]]$missing, FALSE)
-    expect_equal(res[[1]]$empty, FALSE)
-    expect_equal(res[[2]], "test")
-    expect_equal(nrow(res[[3]][[1]]), 0)
-    expect_equal(ncol(res[[3]][[1]]), 0)
-    expect_equal(nrow(res[[4]][[1]]), 0)
-    expect_equal(ncol(res[[4]][[1]]), 0)
+
+    expect_equal(length(res), 2)
+    expect_true(is.list(res[[1]]))
+    expect_true(is.list(res[[2]]))
 })

From 63f270f89618abe745c3502587a24b49762e30a8 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Wed, 14 Aug 2024 23:18:53 +0530
Subject: [PATCH 09/14] Refactor extra variables in `run.meta.anbalysis`

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 modules/meta.analysis/R/run.meta.analysis.R | 34 ++++++++++++---------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/modules/meta.analysis/R/run.meta.analysis.R b/modules/meta.analysis/R/run.meta.analysis.R
index 65afcdf61bd..4f8841ae46b 100644
--- a/modules/meta.analysis/R/run.meta.analysis.R
+++ b/modules/meta.analysis/R/run.meta.analysis.R
@@ -216,22 +216,26 @@ runModule.run.meta.analysis <- function(settings) {
     PEcAn.logger::logger.info(paste0("Running meta-analysis on all PFTs listed by any Settings object in the list: ", 
                        paste(pft.names, collapse = ", ")))
     
-    iterations <- settings$meta.analysis$iter
-    random     <- settings$meta.analysis$random.effects$on
-    use_ghs    <- settings$meta.analysis$random.effects$use_ghs
-    threshold  <- settings$meta.analysis$threshold
-    dbfiles    <- settings$database$dbfiles
-    database   <- settings$database$bety
-    run.meta.analysis(pfts, iterations, random, threshold, dbfiles, database, use_ghs)
+    run.meta.analysis(
+      pfts,
+      settings$meta.analysis$iter,
+      settings$meta.analysis$random.effects$on,
+      settings$meta.analysis$threshold,
+      settings$database$dbfiles,
+      settings$database$bety,
+      settings$meta.analysis$random.effects$use_ghs
+    )
   } else if (PEcAn.settings::is.Settings(settings)) {
-    pfts       <- settings$pfts
-    iterations <- settings$meta.analysis$iter
-    random     <- settings$meta.analysis$random.effects$on
-    use_ghs    <- settings$meta.analysis$random.effects$use_ghs
-    threshold  <- settings$meta.analysis$threshold
-    dbfiles    <- settings$database$dbfiles
-    database   <- settings$database$bety
-    run.meta.analysis(pfts, iterations, random, threshold, dbfiles, database, use_ghs, update = settings$meta.analysis$update)
+      run.meta.analysis(
+        settings$pfts,
+        settings$meta.analysis$iter,
+        settings$meta.analysis$random.effects$on,
+        settings$meta.analysis$threshold,
+        settings$database$dbfiles,
+        settings$database$bety,
+        settings$meta.analysis$random.effects$use_ghs,
+        update = settings$meta.analysis$update
+      )
   } else {
     stop("runModule.run.meta.analysis only works with Settings or MultiSettings")
   }

From 74003d9582e8ec0a99f303f9b3c8e4f4777298ac Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Wed, 21 Aug 2024 20:24:46 +0530
Subject: [PATCH 10/14] get existing machine info using helper function

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/convert_input.R       | 26 +++++++++---------
 base/db/R/get.machine.info.R    | 48 ++++++++++++++++++++++-----------
 base/db/man/get.machine.host.Rd | 22 +++++++++++++++
 3 files changed, 66 insertions(+), 30 deletions(-)
 create mode 100644 base/db/man/get.machine.host.Rd

diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index ba2d7a3a5f0..9cc5c8f3c03 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -176,17 +176,15 @@ convert_input <-
         # Date/time processing for existing input
         existing.input[[i]]$start_date <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$start_date), "UTC")
         existing.input[[i]]$end_date   <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$end_date), "UTC")
-        
+
         ## Obtain machine information
+        machine.host.info <- get.machine.host(host, con = con)
+        machine.host <- machine.host.info$machine.host
+        machine <- machine.host.info$machine
         #Grab machine info of file that exists
         existing.machine <- db.query(paste0("SELECT * from machines where id  = '",
                                                       existing.dbfile[[i]]$machine_id, "'"), con)
         
-        #Grab machine info of host machine
-        machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
-        machine <- db.query(paste0("SELECT * from machines where hostname = '",
-                                             machine.host, "'"), con)
-        
         
         # If the files aren't on the machine, we have to download them, so "overwrite" is meaningless.
         if (existing.machine$id == machine$id) {
@@ -353,9 +351,9 @@ convert_input <-
                                           existing.dbfile$machine_id, "'"), con)
       
       #Grab machine info of host machine
-      machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
-      machine <- db.query(paste0("SELECT * from machines where hostname = '",
-                                 machine.host, "'"), con)
+      machine.host.info <- get.machine.host(host, con = con)
+      machine.host <- machine.host.info$machine.host
+      machine <- machine.host.info$machine
       
       if (existing.machine$id != machine$id) {
         
@@ -475,11 +473,11 @@ convert_input <-
         existing.machine <- db.query(paste0("SELECT * from machines where id  = '",
                                             existing.dbfile$machine_id, "'"), con)
         
-        #Grab machine info of 
-        machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
-        machine <- db.query(paste0("SELECT * from machines where hostname = '",
-                                   machine.host, "'"), con)
-        
+        #Grab machine info of host machine
+        machine.host.info <- get.machine.host(host, con = con)
+        machine.host <- machine.host.info$machine.host
+        machine <- machine.host.info$machine
+
         if(existing.machine$id != machine$id){
           PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.")
           PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency")
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index c98bee6cf20..979b1f6bb33 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -3,21 +3,21 @@
 #' @param input.args input args for existing records
 #' @param input.id input id for existing records
 #' @param con database connection
-#' 
+#'
 #' @return list of machine, input, and dbfile records
 #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
 
 get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
-    
-  machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
-  machine <- db.query(paste0("SELECT * from machines where hostname = '",
-                             machine.host, "'"), con)
-  
+
+  machine.host.info <- get.machine.host(host, con = con)
+  machine.host <- machine.host.info$machine.host
+  machine <- machine.host.info$machine
+
   if (nrow(machine) == 0) {
     PEcAn.logger::logger.error("machine not found", host$name)
     return(NULL)
   }
-  
+
   if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
     input <- dbfile <- NULL
   } else {
@@ -26,19 +26,19 @@ get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
       PEcAn.logger::logger.error("input not found", input.id)
       return(NULL)
     }
-    
-    if(!is.null(input.args$dbfile.id)){
+
+    if (!is.null(input.args$dbfile.id)) {
       dbfile <-
         db.query(
           paste(
-            "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =",
+            "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =",
             input.id,
             " and container_type = 'Input' and machine_id =",
             machine$id
           ),
           con
-        )  
-    }else{
+        )
+    } else {
       dbfile <-
         db.query(
           paste(
@@ -48,11 +48,11 @@ get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
             machine$id
           ),
           con
-        )  
+        )
     }
-    
 
-    
+
+
     if (nrow(dbfile) == 0) {
       PEcAn.logger::logger.error("dbfile not found", input.id)
       return(NULL)
@@ -63,5 +63,21 @@ get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
     }
   }
 
-    return(list(machine = machine, input = input, dbfile = dbfile))
+  return(list(machine = machine, input = input, dbfile = dbfile))
+}
+
+#' Helper Function to retrieve machine host and machine informations
+#' @param host host information
+#' @param con database connection
+#' @return list of machine host and machine information
+#' @author Abhinav Pandey
+get.machine.host <- function(host, con = NULL) {
+  #Grab machine info of host machine
+  machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
+  machine <- db.query(paste0(
+    "SELECT * from machines where hostname = '",
+    machine.host, "'"
+  ), con)
+
+  return(list(machine.host, machine))
 }
diff --git a/base/db/man/get.machine.host.Rd b/base/db/man/get.machine.host.Rd
new file mode 100644
index 00000000000..926035dec0c
--- /dev/null
+++ b/base/db/man/get.machine.host.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get.machine.info.R
+\name{get.machine.host}
+\alias{get.machine.host}
+\title{Helper Function to retrieve machine host and machine informations}
+\usage{
+get.machine.host(host, con = NULL)
+}
+\arguments{
+\item{host}{host information}
+
+\item{con}{database connection}
+}
+\value{
+list of machine host and machine information
+}
+\description{
+Helper Function to retrieve machine host and machine informations
+}
+\author{
+Abhinav Pandey
+}

From a578be2dfc274c10a100c90e1febf1474d5289f7 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Wed, 9 Oct 2024 11:45:13 +0530
Subject: [PATCH 11/14] Applied changes as suggested by @infotroph

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/add.database.entries.R | 161 ++++++++++++++++---------------
 base/db/R/convert_input.R        |  22 +++--
 2 files changed, 100 insertions(+), 83 deletions(-)

diff --git a/base/db/R/add.database.entries.R b/base/db/R/add.database.entries.R
index d3eb994a646..8b36e884398 100644
--- a/base/db/R/add.database.entries.R
+++ b/base/db/R/add.database.entries.R
@@ -23,99 +23,108 @@
 
 add.database.entries <- function(
     result, con, start_date,
-    end_date, write, overwrite,
+    end_date, overwrite,
     insert.new.file, input.args,
     machine, mimetype, formatname,
     allow.conflicting.dates, ensemble,
     ensemble_name, existing.input,
     existing.dbfile, input) {
-    if (write) {
-        # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
-        # This list will be returned.
-        newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null.
+    # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
+    # This list will be returned.
+    newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null.
 
-        for (i in 1:length(result)) { # Master for loop
-            id_not_added <- TRUE
+    for (i in 1:length(result)) { # Master for loop
+        id_not_added <- TRUE
 
-            if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 &&
-                (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) {
-                # Updating record with new dates
-                db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='", end_date, "' WHERE id=", existing.input[[i]]$id), con)
-                id_not_added <- FALSE
+        if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 &&
+            (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) {
+            # Updating record with new dates
+            db.query(
+                paste0(
+                    "UPDATE inputs SET start_date='", start_date, "', end_date='", end_date,
+                    "' WHERE id=", existing.input[[i]]$id
+                ),
+                con
+            )
+            id_not_added <- FALSE
 
-                # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration.
-                newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
-                newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id)
-            }
-
-            if (overwrite) {
-                # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record)
-                if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) {
-                    db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])), "' WHERE id=", existing.input[[i]]$id), con)
-                }
+            # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration.
+            newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
+            newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id)
+        }
 
-                if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) {
-                    db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), "', file_name='", result[[i]]$dbfile.name[1], "' WHERE id=", existing.dbfile[[i]]$id), con)
-                }
+        if (overwrite) {
+            # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record)
+            if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) {
+                db.query(
+                    paste0(
+                        "UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]),
+                        "', file_name='", result[[i]]$dbfile.name[1],
+                        "' WHERE id=", existing.dbfile[[i]]$id
+                    ),
+                    con
+                )
             }
 
-            # If there is no ensemble then for each record there should be one parent
-            # But when you have ensembles, all of the members have one parent !!
-            parent.id <- if (is.numeric(ensemble)) {
-                ifelse(is.null(input[[i]]), NA, input[[1]]$id)
-            } else {
-                ifelse(is.null(input[[i]]), NA, input[[i]]$id)
+            if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) {
+                db.query(paste0(
+                    "UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]),
+                    "', file_name='", result[[i]]$dbfile.name[1],
+                    "' WHERE id=", existing.dbfile[[i]]$id
+                ), con)
             }
+        }
 
+        # If there is no ensemble then for each record there should be one parent
+        # But when you have ensembles, all of the members have one parent !!
+        parent.id <- if (is.numeric(ensemble)) {
+            ifelse(is.null(input[[i]]), NA, input[[1]]$id)
+        } else {
+            ifelse(is.null(input[[i]]), NA, input[[i]]$id)
+        }
 
-            if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) {
-                site.id <- input.args$newsite
-            }
 
-            if (insert.new.file && id_not_added) {
-                dbfile.id <- dbfile.insert(
-                    in.path = dirname(result[[i]]$file[1]),
-                    in.prefix = result[[i]]$dbfile.name[1],
-                    "Input",
-                    existing.input[[i]]$id,
-                    con,
-                    reuse = TRUE,
-                    hostname = machine$hostname
-                )
+        if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) {
+            site.id <- input.args$newsite
+        }
 
-                newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
-                newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
-            } else if (id_not_added) {
-                # This is to tell input.insert if we are writing ensembles
-                # Why does it need it? Because it checks for inputs with the same time period, site, and machine
-                # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition
-                ens.flag <- if (!is.null(ensemble) | is.null(ensemble_name)) TRUE else FALSE
+        if (insert.new.file && id_not_added) {
+            dbfile.id <- dbfile.insert(
+                in.path = dirname(result[[i]]$file[1]),
+                in.prefix = result[[i]]$dbfile.name[1],
+                "Input",
+                existing.input[[i]]$id,
+                con,
+                reuse = TRUE,
+                hostname = machine$hostname
+            )
 
-                new_entry <- dbfile.input.insert(
-                    in.path = dirname(result[[i]]$file[1]),
-                    in.prefix = result[[i]]$dbfile.name[1],
-                    siteid = site.id,
-                    startdate = start_date,
-                    enddate = end_date,
-                    mimetype = mimetype,
-                    formatname = formatname,
-                    parentid = parent.id,
-                    con = con,
-                    hostname = machine$hostname,
-                    allow.conflicting.dates = allow.conflicting.dates,
-                    ens = ens.flag
-                )
+            newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
+            newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
+        } else if (id_not_added) {
+            # This is to tell input.insert if we are writing ensembles
+            # Why does it need it? Because it checks for inputs with the same time period, site, and machine
+            # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition
+            ens.flag <- if (!is.null(ensemble) || is.null(ensemble_name)) TRUE else FALSE
 
-                newinput$input.id <- c(newinput$input.id, new_entry$input.id)
-                newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id)
-            }
-        } # End for loop
+            new_entry <- dbfile.input.insert(
+                in.path = dirname(result[[i]]$file[1]),
+                in.prefix = result[[i]]$dbfile.name[1],
+                siteid = site.id,
+                startdate = start_date,
+                enddate = end_date,
+                mimetype = mimetype,
+                formatname = formatname,
+                parentid = parent.id,
+                con = con,
+                hostname = machine$hostname,
+                allow.conflicting.dates = allow.conflicting.dates,
+                ens = ens.flag
+            )
 
-        successful <- TRUE
-        return(newinput)
-    } else {
-        PEcAn.logger::logger.warn("Input was not added to the database")
-        successful <- TRUE
-        return(NULL)
-    }
+            newinput$input.id <- c(newinput$input.id, new_entry$input.id)
+            newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id)
+        }
+    } # End for loop
+    return(newinput)
 }
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 8828d069d6c..ed267440fbc 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -591,13 +591,21 @@ convert_input <-
   
   #---------------------------------------------------------------#
   # New arrangement of database adding code to deal with ensembles.
-  return (add.database.entries(result, con, start_date,
-    end_date, write, overwrite,
-    insert.new.file, input.args,
-    machine, mimetype, formatname,
-    allow.conflicting.dates, ensemble,
-    ensemble_name, existing.input,
-    existing.dbfile, input))
+    if(write) {
+      add_entries_result <- return (add.database.entries(result, con, start_date,
+      end_date, overwrite,
+      insert.new.file, input.args,
+      machine, mimetype, formatname,
+      allow.conflicting.dates, ensemble,
+      ensemble_name, existing.input,
+      existing.dbfile, input))
+    } else {
+      PEcAn.logger::logger.warn("Input was not added to the database")
+      successful <- TRUE
+      return(NULL)
+    }
+    successful <- TRUE
+    return (add_entries_result)
 } # convert_input
 
 

From 293a68befdc9452b2011da4f6320da502c91b79d Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
Date: Wed, 9 Oct 2024 11:55:21 +0530
Subject: [PATCH 12/14] Minor review changes

Signed-off-by: Abhinav Pandey <abhinav.pandey.met22@itbhu.ac.in>
---
 base/db/R/check.missing.files.R                   | 8 ++++----
 base/db/R/convert_input.R                         | 2 +-
 base/db/tests/testthat/test.check.missing.files.R | 1 -
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R
index 617878496de..29ce044f68c 100644
--- a/base/db/R/check.missing.files.R
+++ b/base/db/R/check.missing.files.R
@@ -8,7 +8,7 @@
 #'
 #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
 
-check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) {
+check_missing_files <- function(result, existing.input = NULL, existing.dbfile = NULL) {
   result_sizes <- purrr::map_dfr(
     result,
     ~ dplyr::mutate(
@@ -35,12 +35,12 @@ check_missing_files <- function(result, outname, existing.input = NULL, existing
   }
 
 
-  # Wrap in a list for consistant processing later
-  if (exists("existing.input") && is.data.frame(existing.input)) {
+  # Wrap in a list for consistent processing later
+  if (is.data.frame(existing.input)) {
     existing.input <- list(existing.input)
   }
 
-  if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) {
+  if (is.data.frame(existing.dbfile)) {
     existing.dbfile <- list(existing.dbfile)
   }
   return(list(existing.input, existing.dbfile))
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index ed267440fbc..a074a689389 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -583,7 +583,7 @@ convert_input <-
   #--------------------------------------------------------------------------------------------------#
   # Check if result has empty or missing files
 
-  checked.missing.files <- check_missing_files(result, outname, existing.input, existing.dbfile)
+  checked.missing.files <- check_missing_files(result, existing.input, existing.dbfile)
 
   # Unwrap parameters after performing checks for missing files
   existing.input <- checked.missing.files$existing.input
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index c0ad6794f65..bc61bb1ad4a 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -5,7 +5,6 @@ test_that("`check_missing_files()` able to return correct missing files", {
 
     res <- check_missing_files(
         result = list(data.frame(file = c("A", "B"))),
-        outname = "test",
         existing.input = data.frame(),
         existing.dbfile = data.frame()
     )

From f7f6926fa14c5c5e8ee776b74e0ac5fd77d56048 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinavpandey1230@gmail.com>
Date: Wed, 9 Oct 2024 11:55:36 +0530
Subject: [PATCH 13/14] Update base/db/R/get.machine.info.R

Co-authored-by: Chris Black <chris@ckblack.org>
---
 base/db/R/get.machine.info.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index 979b1f6bb33..31f489daddc 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -7,7 +7,7 @@
 #' @return list of machine, input, and dbfile records
 #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
 
-get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
+get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) {
 
   machine.host.info <- get.machine.host(host, con = con)
   machine.host <- machine.host.info$machine.host

From 8f820b027cb7fb5da70f8f66e8f6e88abd1d4f8b Mon Sep 17 00:00:00 2001
From: Abhinav Pandey <abhinavpandey1230@gmail.com>
Date: Wed, 9 Oct 2024 11:56:54 +0530
Subject: [PATCH 14/14] Apply suggestions from code review

Co-authored-by: Chris Black <chris@ckblack.org>
---
 base/db/R/check.missing.files.R                   | 1 -
 base/db/R/convert_input.R                         | 6 +++++-
 base/db/R/get.machine.info.R                      | 4 ++--
 base/db/tests/testthat/test.check.missing.files.R | 6 ++----
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R
index 29ce044f68c..f3a496cf5de 100644
--- a/base/db/R/check.missing.files.R
+++ b/base/db/R/check.missing.files.R
@@ -1,7 +1,6 @@
 #' Function to check if result has empty or missing files
 #'
 #' @param result A list of dataframes with file paths
-#' @param outname Name of the output file
 #' @param existing.input Existing input records
 #' @param existing.dbfile Existing dbfile records
 #' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index a074a689389..042c9da08db 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -380,7 +380,7 @@ convert_input <-
     if (!is.null(ensemble) && ensemble) {
       return.all <-TRUE
       
-    } else{
+    } else {
       return.all <- FALSE
     }
       existing.dbfile <- dbfile.input.check(siteid = site.id,
@@ -518,6 +518,10 @@ convert_input <-
   # Get machine information
   machine.info <- get.machine.info(host, input.args = input.args, input.id = input.id)
 
+  if (any(sapply(machine.info, is.null))) {
+    PEcAn.logger::logger.error("failed lookup of inputs or dbfiles")
+    return(NULL)  
+  }
   machine <- machine.info$machine
   input <- machine.info$input
   dbfile <- machine.info$dbfile
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index 31f489daddc..14123a586e9 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -18,7 +18,7 @@ get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) {
     return(NULL)
   }
 
-  if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
+  if (is.na(input.id) || is.null(input.id)) {
     input <- dbfile <- NULL
   } else {
     input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
@@ -71,7 +71,7 @@ get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) {
 #' @param con database connection
 #' @return list of machine host and machine information
 #' @author Abhinav Pandey
-get.machine.host <- function(host, con = NULL) {
+get_machine_host <- function(host, con) {
   #Grab machine info of host machine
   machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
   machine <- db.query(paste0(
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index bc61bb1ad4a..75a531283dd 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -1,7 +1,7 @@
 test_that("`check_missing_files()` able to return correct missing files", {
     # Mock `purrr::map_dfr`
-    mocked_res <- mockery::mock(data.frame(file = c("A", "B"), file_size = c(100, 200), missing = c(FALSE, FALSE), empty = c(FALSE, FALSE)))
-    mockery::stub(check_missing_files, "purrr::map_dfr", mocked_res)
+    mocked_size <- mockery::mock(100,200)
+    mockery::stub(check_missing_files, "file.size", mocked_res)
 
     res <- check_missing_files(
         result = list(data.frame(file = c("A", "B"))),
@@ -9,8 +9,6 @@ test_that("`check_missing_files()` able to return correct missing files", {
         existing.dbfile = data.frame()
     )
 
-    # Print the structure of `res` for debugging
-    str(res)
 
     expect_equal(length(res), 2)
     expect_true(is.list(res[[1]]))