getFileNameLowLevel()

thejoshwolfe · Feb 18, 2024 · c18cfe4 · c18cfe4
1 parent ea39614
commit c18cfe4
Show file tree

Hide file tree

Showing 2 changed files with 103 additions and 54 deletions.
diff --git a/README.md b/README.md
@@ -168,6 +168,28 @@ Last I checked, it is at stage 3. https://github.com/tc39/proposal-temporal
 Once that new API is available and stable, better timezone handling should be possible here somehow.
 Feel free to open a feature request against this library when the time comes.
 
+### getFileNameLowLevel(generalPurposeBitFlag, fileNameBuffer, extraFields, strictFileNames)
+
+If you are setting `decodeStrings` to `false`, then this function can be used to decode the file name yourself.
+This function is effectively used internally by yauzl to populate the `entry.fileName` field when `decodeStrings` is `true`.
+
+WARNING: This method of getting the file name bypasses the security checks in [`validateFileName()`](#validatefilename-filename).
+You should call that function yourself to be sure to guard against malicious file paths.
+
+`generalPurposeBitFlag` can be found on an [`Entry`](#class-entry) or [`LocalFileHeader`](#class-localfileheader).
+Only General Purpose Bit 11 is used, and only when an Info-ZIP Unicode Path Extra Field cannot be found in `extraFields`.
+
+`fileNameBuffer` is a `Buffer` representing the file name field of the entry.
+This is `entry.fileNameRaw` or `localFileHeader.fileName`.
+
+`extraFields` is the parsed extra fields array from `entry.extraFields` or `parseExtraFields()`.
+
+`strictFileNames` is a boolean, the same as the option of the same name in `open()`.
+When `false`, backslash characters (`\`) will be replaced with forward slash characters (`/`).
+
+This function always returns a string, although it may not be a valid file name.
+See `validateFileName()`.
+
 ### validateFileName(fileName)
 
 Returns `null` or a `String` error message depending on the validity of `fileName`.
@@ -427,7 +449,16 @@ These fields are of type `Number`:
  * `externalFileAttributes`
  * `relativeOffsetOfLocalHeader`
 
+These fields are of type `Buffer`, and represent variable-length bytes before being processed:
+ * `fileNameRaw`
+ * `extraFieldRaw`
+ * `commentRaw`
+
 There are additional fields described below: `fileName`, `extraFields`, `comment`.
+These are the `*Raw` fields above after going through some processing, such as UTF-8 decoding.
+See their own sections below.
+(Note the inconsistency in pluralization of "field" vs "fields" in `extraField`, `extraFields`, and `extraFieldRaw`.
+Sorry about that.)
 
 The `new Entry()` constructor is available for clients to call, but it's usually not useful.
 The constructor takes no parameters and does nothing; no fields will exist.
@@ -729,6 +760,8 @@ This library makes no attempt to interpret the Language Encoding Flag.
  * 3.1.0
    * Added `readLocalFileHeader()` and `Class: LocalFileHeader`.
    * Added `openReadStreamLowLevel()`.
+   * Added `getFileNameLowLevel()` and `parseExtraFields()`.
+     Added fields to `Class: Entry`: `fileNameRaw`, `extraFieldRaw`, `commentRaw`.
    * Noted dropped support of node versions before 12 in the `"engines"` field of `package.json`.
  * 3.0.0
    * BREAKING CHANGE: implementations of [RandomAccessReader](#class-randomaccessreader) that implement a `destroy` method must instead implement `_destroy` in accordance with the node standard https://nodejs.org/api/stream.html#writable_destroyerr-callback (note the error and callback parameters). If you continue to override `destory` instead, some error handling may be subtly broken. Additionally, this is required for async iterators to work correctly in some versions of node. [issue #110](https://github.com/thejoshwolfe/yauzl/issues/110)

diff --git a/index.js b/index.js
@@ -13,6 +13,7 @@ exports.fromFd = fromFd;
 exports.fromBuffer = fromBuffer;
 exports.fromRandomAccessReader = fromRandomAccessReader;
 exports.dosDateTimeToDate = dosDateTimeToDate;
+exports.getFileNameLowLevel = getFileNameLowLevel;
 exports.validateFileName = validateFileName;
 exports.parseExtraFields = parseExtraFields;
 exports.ZipFile = ZipFile;
@@ -113,7 +114,7 @@ function fromRandomAccessReader(reader, totalSize, options, callback) {
     for (var i = bufferSize - eocdrWithoutCommentSize; i >= 0; i -= 1) {
       if (buffer.readUInt32LE(i) !== 0x06054b50) continue;
       // found eocdr
-      var eocdrBuffer = buffer.slice(i);
+      var eocdrBuffer = buffer.subarray(i);
 
       // 0 - End of central directory signature = 0x06054b50
       // 4 - Number of this disk
@@ -136,8 +137,8 @@ function fromRandomAccessReader(reader, totalSize, options, callback) {
       }
       // 22 - Comment
       // the encoding is always cp437.
-      var comment = decodeStrings ? decodeBuffer(eocdrBuffer, 22, eocdrBuffer.length, false)
-                                  : eocdrBuffer.slice(22);
+      var comment = decodeStrings ? decodeBuffer(eocdrBuffer.subarray(22), false)
+                                  : eocdrBuffer.subarray(22);
 
       if (!(entryCount === 0xffff || centralDirectoryOffset === 0xffffffff)) {
         return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose, options.lazyEntries, decodeStrings, options.validateEntrySizes, options.strictFileNames));
@@ -299,23 +300,32 @@ ZipFile.prototype._readEntry = function() {
       if (err) return emitErrorAndAutoClose(self, err);
       if (self.emittedError) return;
       // 46 - File name
-      var isUtf8 = (entry.generalPurposeBitFlag & 0x800) !== 0;
-      entry.fileName = self.decodeStrings ? decodeBuffer(buffer, 0, entry.fileNameLength, isUtf8)
-                                          : buffer.slice(0, entry.fileNameLength);
-
+      entry.fileNameRaw = buffer.subarray(0, entry.fileNameLength);
       // 46+n - Extra field
       var fileCommentStart = entry.fileNameLength + entry.extraFieldLength;
-      var extraFieldBuffer = buffer.slice(entry.fileNameLength, fileCommentStart);
+      entry.extraFieldRaw = buffer.subarray(entry.fileNameLength, fileCommentStart);
+      // 46+n+m - File comment
+      entry.fileCommentRaw = buffer.subarray(fileCommentStart, fileCommentStart + entry.fileCommentLength);
+
+      // Parse the extra fields, which we need for processing other fields.
       try {
-        entry.extraFields = parseExtraFields(extraFieldBuffer);
+        entry.extraFields = parseExtraFields(entry.extraFieldRaw);
       } catch (err) {
         return emitErrorAndAutoClose(self, err);
       }
 
-      // 46+n+m - File comment
-      entry.fileComment = self.decodeStrings ? decodeBuffer(buffer, fileCommentStart, fileCommentStart + entry.fileCommentLength, isUtf8)
-                                             : buffer.slice(fileCommentStart, fileCommentStart + entry.fileCommentLength);
-      // compatibility hack for https://github.com/thejoshwolfe/yauzl/issues/47
+      // Interpret strings according to bit flags, extra fields, and options.
+      if (self.decodeStrings) {
+        var isUtf8 = (entry.generalPurposeBitFlag & 0x800) !== 0;
+        entry.fileComment = decodeBuffer(entry.fileCommentRaw, isUtf8);
+        entry.fileName = getFileNameLowLevel(entry.generalPurposeBitFlag, entry.fileNameRaw, entry.extraFields, self.strictFileNames);
+        var errorMessage = validateFileName(entry.fileName);
+        if (errorMessage != null) return emitErrorAndAutoClose(self, new Error(errorMessage));
+      } else {
+        entry.fileComment = entry.fileCommentRaw;
+        entry.fileName = entry.fileNameRaw;
+      }
+      // Maintain API compatibility. See https://github.com/thejoshwolfe/yauzl/issues/47
       entry.comment = entry.fileComment;
 
       self.readEntryCursor += buffer.length;
@@ -365,36 +375,6 @@ ZipFile.prototype._readEntry = function() {
         // 24 - Disk Start Number      4 bytes
       }
 
-      // check for Info-ZIP Unicode Path Extra Field (0x7075)
-      // see https://github.com/thejoshwolfe/yauzl/issues/33
-      if (self.decodeStrings) {
-        for (var i = 0; i < entry.extraFields.length; i++) {
-          var extraField = entry.extraFields[i];
-          if (extraField.id === 0x7075) {
-            if (extraField.data.length < 6) {
-              // too short to be meaningful
-              continue;
-            }
-            // Version       1 byte      version of this extra field, currently 1
-            if (extraField.data.readUInt8(0) !== 1) {
-              // > Changes may not be backward compatible so this extra
-              // > field should not be used if the version is not recognized.
-              continue;
-            }
-            // NameCRC32     4 bytes     File Name Field CRC32 Checksum
-            var oldNameCrc32 = extraField.data.readUInt32LE(1);
-            if (crc32.unsigned(buffer.slice(0, entry.fileNameLength)) !== oldNameCrc32) {
-              // > If the CRC check fails, this UTF-8 Path Extra Field should be
-              // > ignored and the File Name field in the header should be used instead.
-              continue;
-            }
-            // UnicodeName   Variable    UTF-8 version of the entry File Name
-            entry.fileName = decodeBuffer(extraField.data, 5, extraField.data.length, true);
-            break;
-          }
-        }
-      }
-
       // validate file size
       if (self.validateEntrySizes && entry.compressionMethod === 0) {
         var expectedCompressedSize = entry.uncompressedSize;
@@ -408,14 +388,6 @@ ZipFile.prototype._readEntry = function() {
         }
       }
 
-      if (self.decodeStrings) {
-        if (!self.strictFileNames) {
-          // allow backslash
-          entry.fileName = entry.fileName.replace(/\\/g, "/");
-        }
-        var errorMessage = validateFileName(entry.fileName, self.validateFileNameOptions);
-        if (errorMessage != null) return emitErrorAndAutoClose(self, new Error(errorMessage));
-      }
       self.emit("entry", entry);
 
       if (!self.lazyEntries) self._readEntry();
@@ -646,6 +618,50 @@ function dosDateTimeToDate(date, time) {
   return new Date(year, month, day, hour, minute, second, millisecond);
 }
 
+function getFileNameLowLevel(generalPurposeBitFlag, fileNameBuffer, extraFields, strictFileNames) {
+  var fileName = null;
+
+  // check for Info-ZIP Unicode Path Extra Field (0x7075)
+  // see https://github.com/thejoshwolfe/yauzl/issues/33
+  for (var i = 0; i < extraFields.length; i++) {
+    var extraField = extraFields[i];
+    if (extraField.id === 0x7075) {
+      if (extraField.data.length < 6) {
+        // too short to be meaningful
+        continue;
+      }
+      // Version       1 byte      version of this extra field, currently 1
+      if (extraField.data.readUInt8(0) !== 1) {
+        // > Changes may not be backward compatible so this extra
+        // > field should not be used if the version is not recognized.
+        continue;
+      }
+      // NameCRC32     4 bytes     File Name Field CRC32 Checksum
+      var oldNameCrc32 = extraField.data.readUInt32LE(1);
+      if (crc32.unsigned(fileNameBuffer) !== oldNameCrc32) {
+        // > If the CRC check fails, this UTF-8 Path Extra Field should be
+        // > ignored and the File Name field in the header should be used instead.
+        continue;
+      }
+      // UnicodeName   Variable    UTF-8 version of the entry File Name
+      fileName = decodeBuffer(extraField.data.subarray(5), true);
+      break;
+    }
+  }
+
+  if (fileName == null) {
+    // The typical case.
+    var isUtf8 = (generalPurposeBitFlag & 0x800) !== 0;
+    fileName = decodeBuffer(fileNameBuffer, isUtf8);
+  }
+
+  if (!strictFileNames) {
+    // Allow backslash.
+    fileName = fileName.replace(/\\/g, "/");
+  }
+  return fileName;
+}
+
 function validateFileName(fileName) {
   if (fileName.indexOf("\\") !== -1) {
     return "invalid characters in fileName: " + fileName;
@@ -817,12 +833,12 @@ RefUnrefFilter.prototype.unref = function(cb) {
 };
 
 var cp437 = '\u0000☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼ !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⌂ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñÑªº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ';
-function decodeBuffer(buffer, start, end, isUtf8) {
+function decodeBuffer(buffer, isUtf8) {
   if (isUtf8) {
-    return buffer.toString("utf8", start, end);
+    return buffer.toString("utf8");
   } else {
     var result = "";
-    for (var i = start; i < end; i++) {
+    for (var i = 0; i < buffer.length; i++) {
       result += cp437[buffer[i]];
     }
     return result;