Merge pull request #223 from ChrisJohnNOAA/2.25.1-Patches

2.25.1 patches
ERDDAP · Oct 31, 2024 · 44c825e · 44c825e
2 parents c1718a7 + 16c1bb2
commit 44c825e
Show file tree

Hide file tree

Showing 49 changed files with 291 additions and 95 deletions.
diff --git a/DEPLOY_INSTALL.md b/DEPLOY_INSTALL.md
@@ -212,7 +212,7 @@ ERDDAP™ can run on any server that supports Java and Tomcat (and other applica
 
 4.  [Install the erddap.war file.](#erddap.war)  
     On Linux, Mac, and Windows, download [erddap.war](https://github.com/ERDDAP/erddap/releases/download/v2.25/erddap.war) into _tomcat_/webapps .  
-    (version 2.25, 592,429,675 bytes, MD5=0D93F045A3F38018117C0BB5BA419C99, dated 2024-10-17) 
+    (version 2.25, 592,291,920 bytes, MD5=BEEBE386A3514C0FB8898C6EA597F40D, dated 2024-10-31)
 
     The .war file is big because it contains high resolution coastline, boundary, and elevation data needed to create maps.
 

diff --git a/DEPLOY_UPDATE.md b/DEPLOY_UPDATE.md
@@ -6,7 +6,7 @@
 2.  If you are upgrading from ERDDAP™ version 2.18 or below, you need to switch to Java 21 (or newer) and the related Tomcat 10. See the regular ERDDAP™ installation instructions for [Java](#java) and [Tomcat](#tomcat). You'll also have to copy your _tomcat_/content/erddap directory from your old Tomcat installation to your new Tomcat installation.  
 
 3.  Download [erddap.war](https://github.com/ERDDAP/erddap/releases/download/v2.25/erddap.war) into _tomcat_/webapps .  
-    (version 2.25, 592,429,675 bytes, MD5=0D93F045A3F38018117C0BB5BA419C99, dated 2024-10-17)
+    (version 2.25, 592,291,920 bytes, MD5=BEEBE386A3514C0FB8898C6EA597F40D, dated 2024-10-31)
 
 4.  [messages.xml](#messages.xml)
     *   Common: If you are upgrading from ERDDAP™ version 1.46 (or above) and you just use the standard messages, the new standard messages.xml will be installed automatically (amongst the .class files via erddap.war).  

diff --git a/WEB-INF/classes/com/cohort/util/Calendar2.java b/WEB-INF/classes/com/cohort/util/Calendar2.java
@@ -90,6 +90,7 @@ public class Calendar2 {
   public static final long MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L;
 
   public static final String SECONDS_SINCE_1970 = "seconds since 1970-01-01T00:00:00Z";
+  public static final String MILLISECONDS_SINCE_1970 = "milliseconds since 1970-01-01T00:00:00Z";
 
   public static final String zulu = "Zulu";
   public static final TimeZone zuluTimeZone = TimeZone.getTimeZone(zulu);

diff --git a/WEB-INF/classes/gov/noaa/pfel/coastwatch/pointdata/Table.java b/WEB-INF/classes/gov/noaa/pfel/coastwatch/pointdata/Table.java
@@ -78,9 +78,12 @@
 import org.apache.parquet.io.LocalOutputFile;
 import org.apache.parquet.io.MessageColumnIO;
 import org.apache.parquet.io.RecordReader;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.MessageTypeParser;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.Types.MessageTypeBuilder;
 import org.xml.sax.InputSource;
 import org.xml.sax.XMLReader;
 import ucar.ma2.*;
@@ -588,7 +591,7 @@ private static interface WithColumnNames {
   public static BitSet ncCFcc = null; // null=inactive, new BitSet() = active
 
   /** An arrayList to hold 0 or more PrimitiveArray's with data. */
-  protected ArrayList<PrimitiveArray> columns = new ArrayList();
+  protected ArrayList<PrimitiveArray> columns = new ArrayList<>();
 
   /** An arrayList to hold the column names. */
   protected StringArray columnNames = new StringArray();
@@ -605,7 +608,7 @@ private static interface WithColumnNames {
    * Although a HashTable is more appropriate for name=value pairs, this uses ArrayList to preserve
    * the order of the attributes. This may be null if not in use.
    */
-  protected ArrayList<Attributes> columnAttributes = new ArrayList();
+  protected ArrayList<Attributes> columnAttributes = new ArrayList<>();
 
   /** The one known valid url for readIobis. */
   public static final String IOBIS_URL = "http://www.iobis.org/OBISWEB/ObisControllerServlet";
@@ -16089,52 +16092,91 @@ public void readParquet(
     }
   }
 
-  private MessageType getParquetSchemaForTable(String name) {
-    String schemaProto = "message m {";
+  private boolean isTimeColumn(int col) {
+    return "time".equalsIgnoreCase(getColumnName(col))
+        && Calendar2.SECONDS_SINCE_1970.equals(columnAttributes.get(col).getString("units"));
+  }
+
+  private MessageType getParquetSchemaForTable() {
+    MessageTypeBuilder schemaBuilder = org.apache.parquet.schema.Types.buildMessage();
     for (int j = 0; j < nColumns(); j++) {
-      String schemaType = "String";
+      String columnName = getColumnName(j);
+      if (isTimeColumn(j)) {
+        schemaBuilder
+            .optional(PrimitiveTypeName.INT64)
+            .as(LogicalTypeAnnotation.timestampType(true, TimeUnit.MILLIS))
+            .named(columnName);
+        continue;
+      }
       switch (getColumn(j).elementType()) {
         case BYTE:
-          schemaType = "INT32";
+          schemaBuilder.optional(PrimitiveTypeName.INT32).named(columnName);
           break;
         case SHORT:
-          schemaType = "INT32";
+          schemaBuilder.optional(PrimitiveTypeName.INT32).named(columnName);
           break;
         case CHAR:
-          schemaType = "BINARY";
+          schemaBuilder
+              .optional(PrimitiveTypeName.BINARY)
+              .as(LogicalTypeAnnotation.stringType())
+              .named(columnName);
           break;
         case INT:
-          schemaType = "INT32";
+          schemaBuilder.optional(PrimitiveTypeName.INT32).named(columnName);
           break;
         case LONG:
-          schemaType = "INT64";
+          schemaBuilder.optional(PrimitiveTypeName.INT64).named(columnName);
           break;
         case FLOAT:
-          schemaType = "FLOAT";
+          schemaBuilder.optional(PrimitiveTypeName.FLOAT).named(columnName);
           break;
         case DOUBLE:
-          schemaType = "DOUBLE";
+          schemaBuilder.optional(PrimitiveTypeName.DOUBLE).named(columnName);
           break;
         case STRING:
-          schemaType = "BINARY";
+          schemaBuilder
+              .optional(PrimitiveTypeName.BINARY)
+              .as(LogicalTypeAnnotation.stringType())
+              .named(columnName);
           break;
         case UBYTE:
-          schemaType = "INT32";
+          schemaBuilder.optional(PrimitiveTypeName.INT32).named(columnName);
           break;
         case USHORT:
-          schemaType = "INT32";
+          schemaBuilder.optional(PrimitiveTypeName.INT32).named(columnName);
           break;
         case UINT:
-          schemaType = "INT64";
+          schemaBuilder.optional(PrimitiveTypeName.INT64).named(columnName);
           break;
         case ULONG:
-          schemaType = "DOUBLE";
+          schemaBuilder.optional(PrimitiveTypeName.DOUBLE).named(columnName);
           break;
+        case BOOLEAN:
+          schemaBuilder.optional(PrimitiveTypeName.BOOLEAN).named(columnName);
+          break;
+      }
+    }
+    return schemaBuilder.named("m");
+  }
+
+  private void addMetadata(Map<String, String> metadata, Attributes attributes, String prefix) {
+    String names[] = attributes.getNames();
+    for (int ni = 0; ni < names.length; ni++) {
+      String tName = names[ni];
+      if (!String2.isSomething(tName)) {
+        continue;
+      }
+      PrimitiveArray tValue = attributes.get(tName);
+      if (tValue == null || tValue.size() == 0 || tValue.toString().length() == 0) {
+        continue; // do nothing
+      }
+      if ("time_".equalsIgnoreCase(prefix)
+          && Calendar2.SECONDS_SINCE_1970.equals(attributes.getString(tName))) {
+        metadata.put(prefix + tName, Calendar2.MILLISECONDS_SINCE_1970);
+      } else {
+        metadata.put(prefix + tName, tValue.toCSVString());
       }
-      schemaProto += "    optional " + schemaType + " " + getColumnName(j) + ";\n";
     }
-    schemaProto += "}";
-    return MessageTypeParser.parseMessageType(schemaProto);
   }
 
   /**
@@ -16143,23 +16185,49 @@ private MessageType getParquetSchemaForTable(String name) {
    * @param fullFileName This is just used for error messages.
    * @throws Exception if trouble, including observed nItems != expected nItems.
    */
-  public void writeParquet(String fullFileName) throws Exception {
+  public void writeParquet(String fullFileName, boolean fullMetadata) throws Exception {
     String msg = "  Table.writeParquet " + fullFileName;
     long time = System.currentTimeMillis();
 
     int randomInt = Math2.random(Integer.MAX_VALUE);
-
-    int nameStart = fullFileName.lastIndexOf('/');
-    if (nameStart == -1) {
-      nameStart = fullFileName.lastIndexOf('\\');
+    MessageType schema = getParquetSchemaForTable();
+
+    Map<String, String> metadata = new HashMap<>();
+    if (fullMetadata) {
+      addMetadata(metadata, globalAttributes, "");
+      for (int col = 0; col < nColumns(); col++) {
+        Attributes colAttributes = columnAttributes.get(col);
+        if (colAttributes == null) {
+          continue;
+        }
+        addMetadata(metadata, colAttributes, getColumnName(col) + "_");
+      }
     }
-    int nameEnd = fullFileName.lastIndexOf('.');
-    String name = fullFileName.substring(nameStart + 1, nameEnd);
-    MessageType schema = getParquetSchemaForTable(name);
-
+    String columnNames = "";
+    String columnUnits = "";
+    for (int col = 0; col < nColumns(); col++) {
+      Attributes colAttributes = columnAttributes.get(col);
+      if (colAttributes == null) {
+        continue;
+      }
+      if (columnNames.length() > 0) {
+        columnNames += ",";
+        columnUnits += ",";
+      }
+      columnNames += getColumnName(col);
+      if (isTimeColumn(col)) {
+        columnUnits += Calendar2.MILLISECONDS_SINCE_1970;
+      } else {
+        columnUnits += colAttributes.getString("units");
+      }
+    }
+    metadata.put("column_names", columnNames);
+    metadata.put("column_units", columnUnits);
     try (ParquetWriter<List<PAOne>> writer =
         new ParquetWriterBuilder(
-                schema, new LocalOutputFile(java.nio.file.Path.of(fullFileName + randomInt)))
+                schema,
+                new LocalOutputFile(java.nio.file.Path.of(fullFileName + randomInt)),
+                metadata)
             .withCompressionCodec(CompressionCodecName.SNAPPY)
             .withRowGroupSize(ParquetWriter.DEFAULT_BLOCK_SIZE)
             .withPageSize(ParquetWriter.DEFAULT_PAGE_SIZE)
@@ -16171,7 +16239,12 @@ schema, new LocalOutputFile(java.nio.file.Path.of(fullFileName + randomInt)))
       for (int row = 0; row < nRows(); row++) {
         ArrayList<PAOne> record = new ArrayList<>();
         for (int j = 0; j < nColumns(); j++) {
-          record.add(getPAOneData(j, row));
+          if (isTimeColumn(j)) {
+            // Convert from seconds since epoch to millis since epoch.
+            record.add(getPAOneData(j, row).multiply(PAOne.fromInt(1000)));
+          } else {
+            record.add(getPAOneData(j, row));
+          }
         }
         writer.write(record);
       }

diff --git a/WEB-INF/classes/gov/noaa/pfel/coastwatch/pointdata/parquet/CustomWriteSupport.java b/WEB-INF/classes/gov/noaa/pfel/coastwatch/pointdata/parquet/CustomWriteSupport.java
@@ -1,8 +1,8 @@
 package gov.noaa.pfel.coastwatch.pointdata.parquet;
 
 import com.cohort.array.PAOne;
-import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.hadoop.api.WriteSupport;
@@ -15,15 +15,17 @@ public class CustomWriteSupport extends WriteSupport<List<PAOne>> {
   MessageType schema;
   RecordConsumer recordConsumer;
   List<ColumnDescriptor> cols;
+  private Map<String, String> metadata;
 
-  CustomWriteSupport(MessageType schema) {
+  CustomWriteSupport(MessageType schema, Map<String, String> metadata) {
     this.schema = schema;
     this.cols = schema.getColumns();
+    this.metadata = metadata;
   }
 
   @Override
   public WriteContext init(Configuration config) {
-    return new WriteContext(schema, new HashMap<String, String>());
+    return new WriteContext(schema, metadata);
   }
 
   @Override
@@ -51,7 +53,7 @@ public void write(List<PAOne> values) {
       // val.length() == 0 indicates a NULL value.
       if (val != null && !val.isMissingValue()) {
         recordConsumer.startField(cols.get(i).getPath()[0], i);
-        switch (cols.get(i).getType()) {
+        switch (cols.get(i).getPrimitiveType().getPrimitiveTypeName()) {
           case BOOLEAN:
             recordConsumer.addBoolean(Boolean.parseBoolean(val.getString()));
             break;

diff --git a/WEB-INF/classes/gov/noaa/pfel/coastwatch/pointdata/parquet/ParquetWriterBuilder.java b/WEB-INF/classes/gov/noaa/pfel/coastwatch/pointdata/parquet/ParquetWriterBuilder.java
@@ -2,6 +2,7 @@
 
 import com.cohort.array.PAOne;
 import java.util.List;
+import java.util.Map;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.parquet.hadoop.ParquetWriter;
 import org.apache.parquet.hadoop.api.WriteSupport;
@@ -12,9 +13,9 @@ public class ParquetWriterBuilder extends ParquetWriter.Builder<List<PAOne>, Par
 
   private CustomWriteSupport writeSupport;
 
-  public ParquetWriterBuilder(MessageType schema, OutputFile file) {
+  public ParquetWriterBuilder(MessageType schema, OutputFile file, Map<String, String> metadata) {
     super(file);
-    writeSupport = new CustomWriteSupport(schema);
+    writeSupport = new CustomWriteSupport(schema, metadata);
   }
 
   @Override

diff --git a/WEB-INF/classes/gov/noaa/pfel/erddap/Erddap.java b/WEB-INF/classes/gov/noaa/pfel/erddap/Erddap.java
@@ -157,7 +157,7 @@ public class Erddap extends HttpServlet {
     ".jsonlCSV", ".jsonlKVP", ".nccsv", ".nccsvMetadata"
   };
   public static final String FILE_TYPES_184[] = {".dataTable", ".jsonlCSV1"};
-  public static final String FILE_TYPES_225[] = {".parquet"};
+  public static final String FILE_TYPES_225[] = {".parquet", ".parquetWMeta"};
   // General/relative width is determined by what looks good in Chrome.
   // But Firefox shows TextArea's as very wide, so leads to these values.
   public static final int dpfTFWidth = 56; // data provider form TextField width
@@ -4752,7 +4752,7 @@ public void doStatus(
               + EDStatic.youAreHere(language, loggedInAs, EDStatic.statusAr[language])
               + "<pre>");
       StringBuilder sb = new StringBuilder();
-      EDStatic.addIntroStatistics(sb);
+      EDStatic.addIntroStatistics(sb, EDStatic.showLoadErrorsOnStatusPage);
 
       // append number of active threads
       String traces = MustBe.allStackTraces(true, true);

diff --git a/WEB-INF/classes/gov/noaa/pfel/erddap/LoadDatasets.java b/WEB-INF/classes/gov/noaa/pfel/erddap/LoadDatasets.java
@@ -1215,7 +1215,7 @@ private void emailOrphanDatasetsRemoved(
 
   private void emailUnusualActivity(String threadSummary, String threadList) {
     StringBuilder sb = new StringBuilder();
-    EDStatic.addIntroStatistics(sb);
+    EDStatic.addIntroStatistics(sb, true /* includeErrors */);
 
     if (threadSummary != null) sb.append(threadSummary + "\n");
 
@@ -1280,7 +1280,7 @@ private void emailDailyReport(String threadSummary, String threadList, String re
     String stars = String2.makeString('*', 70);
     String subject = "Daily Report";
     StringBuilder contentSB = new StringBuilder(subject + "\n\n");
-    EDStatic.addIntroStatistics(contentSB);
+    EDStatic.addIntroStatistics(contentSB, true /* includeErrors */);
 
     // append number of active threads
     if (threadSummary != null) contentSB.append(threadSummary + "\n");