From 3532083784e8f379248aabc8a177b30180839fa2 Mon Sep 17 00:00:00 2001 From: Josh Wolfe Date: Sat, 2 Nov 2024 21:25:49 -0400 Subject: [PATCH 1/2] support reading extended timestamp information --- README.md | 68 +++++++++++++-------- index.js | 78 +++++++++++++++++++++++-- test/success/unix-epoch.zip | Bin 0 -> 178 bytes test/success/unix-epoch/unix-epoch.txt | 0 test/test.js | 26 +++++++-- 5 files changed, 137 insertions(+), 35 deletions(-) create mode 100644 test/success/unix-epoch.zip create mode 100644 test/success/unix-epoch/unix-epoch.txt diff --git a/README.md b/README.md index 8c99e66..0226b5e 100644 --- a/README.md +++ b/README.md @@ -147,26 +147,14 @@ See `open()` for the meaning of the options and callback. ### dosDateTimeToDate(date, time) -Converts MS-DOS `date` and `time` data into a JavaScript `Date` object. -Each parameter is a `Number` treated as an unsigned 16-bit integer. -Note that this format does not support timezones. -The returned `Date` object will be constructed using the local timezone. +*Deprecated*. Since yauzl 3.2.0, it is highly recommended to call [`entry.getLastModDate()`](#getlastmoddateoptions) +instead of this function due to enhanced support for reading third-party extra fields. +If you ever have a use case for calling this function directly please +[open an issue against yauzl](https://github.com/thejoshwolfe/yauzl/issues/new) +requesting that this function be properly supported again. -In order to interpret the parameters in UTC time instead of local time, you can convert with the following snippet: - -```js -var timestampInterpretedAsLocal = yauzl.dosDateTimeToDate(date, time); // or entry.getLastModDate() -var timestampInterpretedAsUTCInstead = new Date( - timestampInterpretedAsLocal.getTime() - - timestampInterpretedAsLocal.getTimezoneOffset() * 60 * 1000 -); -``` - -Note that there is an ECMAScript proposal to add better timezone support to JavaScript called the `Temporal` API. -Last I checked, it is at stage 3. https://github.com/tc39/proposal-temporal - -Once that new API is available and stable, better timezone handling should be possible here somehow. -Feel free to open a feature request against this library when the time comes. +This function only remains exported in order to maintain compatibility with older version of yauzl. +It will be removed in yauzl 4.0.0 unless someone asks for it to remain supported. ### getFileNameLowLevel(generalPurposeBitFlag, fileNameBuffer, extraFields, strictFileNames) @@ -437,8 +425,8 @@ These fields are of type `Number`: * `versionNeededToExtract` * `generalPurposeBitFlag` * `compressionMethod` - * `lastModFileTime` (MS-DOS format, see [`getLastModDate()`](#getlastmoddate)) - * `lastModFileDate` (MS-DOS format, see [`getLastModDate()`](#getlastmoddate)) + * `lastModFileTime` (MS-DOS format, see [`getLastModDate()`](#getlastmoddateoptions)) + * `lastModFileDate` (MS-DOS format, see [`getLastModDate()`](#getlastmoddateoptions)) * `crc32` * `compressedSize` * `uncompressedSize` @@ -507,14 +495,46 @@ Prior to yauzl version 2.7.0, this field was erroneously documented as `comment` For compatibility with any code that uses the field name `comment`, yauzl creates an alias field named `comment` which is identical to `fileComment`. -#### getLastModDate() +#### getLastModDate([options]) + +Returns the modification time of the file as a JavaScript `Date` object. +The timezone situation is a mess; read on to learn more. + +Due to the zip file specification having lackluster support for specifying timestamps natively, +there are several third-party extensions that add better support. +yauzl supports these encodings: + +1. InfoZIP "universal timestamp" extended field (`0x5455` aka `"UT"`): signed 32-bit seconds since `1970-01-01 00:00:00Z`, which supports the years 1901-2038 (partially inclusive) with 1-second precision. The value is timezone agnostic, i.e. always UTC. +2. NTFS extended field (`0x000a`): 64-bit signed 100-nanoseconds since `1601-01-01 00:00:00Z`, which supports the approximate years 20,000BCE-20,000CE with precision rounded to 1-millisecond (due to the JavaScript `Date` type). The value is timezone agnostic, i.e. always UTC. +3. DOS `lastModFileDate` and `lastModFileTime`: supports the years 1980-2108 (inclusive) with 2-second precision. Timezone is interpreted either as the local timezone or UTC depending on the `timezone` option documented below. -Effectively implemented as the following. See [`dosDateTimeToDate()`](#dosdatetimetodatedate-time). +If both the InfoZIP "universal timestamp" and NTFS extended fields are found, yauzl uses one of them, but which one is unspecified. +If neither are found, yauzl falls back to the built-in DOS `lastModFileDate` and `lastModFileTime`. +Every possible bit pattern of every encoding can be represented by a JavaScript `Date` object, +meaning this function cannot fail (barring parameter validation), and will never return an `Invalid Date` object. + +`options` may be omitted or `null`, and has the following defaults: ```js -return dosDateTimeToDate(this.lastModFileDate, this.lastModFileTime); +{ + timezone: "local", // or "UTC" + forceDosFormat: false, +} ``` +Set `forceDosFormat` to `true` (and do not set `timezone`) to enable pre-yauzl 3.2.0 behavior +where the InfoZIP "universal timestamp" and NTFS extended fields are ignored. + +The `timezone` option is only used in the DOS fallback. +If `timezone` is omitted, `null` or `"local"`, the `lastModFileDate` and `lastModFileTime` are interpreted in the system's current timezone (using `new Date(year, ...)`). +If `timezone` is `"UTC"`, the interpretation is in UTC+00:00 (using `new Date(Date.UTC(year, ...))`). + +The JavaScript `Date` object, has several inherent limitations surrounding timezones. +There is an ECMAScript proposal to add better timezone support to JavaScript called the `Temporal` API. +Last I checked, it was at stage 3. https://github.com/tc39/proposal-temporal +Once that new API is available and stable, better timezone handling should be possible here somehow. +If you notice that the new API has become widely available, please open a feature request against this library to add support for it. + #### isEncrypted() Returns is this entry encrypted with "Traditional Encryption". diff --git a/index.js b/index.js index 043440b..579f715 100644 --- a/index.js +++ b/index.js @@ -589,8 +589,68 @@ ZipFile.prototype.readLocalFileHeader = function(entry, options, callback) { function Entry() { } -Entry.prototype.getLastModDate = function() { - return dosDateTimeToDate(this.lastModFileDate, this.lastModFileTime); +Entry.prototype.getLastModDate = function(options) { + if (options == null) options = {}; + + if (!options.forceDosFormat) { + // Check extended fields. + for (var i = 0; i < this.extraFields.length; i++) { + var extraField = this.extraFields[i]; + if (extraField.id === 0x5455) { + // InfoZIP "universal timestamp" extended field (`0x5455` aka `"UT"`). + // See the InfoZIP source code unix/unix.c:set_extra_field() and zipfile.c:ef_scan_ut_time(). + var data = extraField.data; + // The flags define which of the three fields are present: mtime, atime, ctime. + // We only care about mtime. + // Also, ctime is never included in practice. + // And also, atime is only included in the local file header for some reason + // despite the flags lying about its inclusion in the central header. + var flags = data[0]; + var HAS_MTIME = 1; + if (!(flags & HAS_MTIME)) continue; // This will realistically never happen. + // Although the positions of all of the fields shift around depending on the presence of other fields, + // mtime is always first if present, and that's the only one we care about. + var posixTimestamp = data.readInt32LE(1); + return new Date(posixTimestamp * 1000); + } else if (extraField.id === 0x000a) { + var data = extraField.data; + // 4 bytes reserved + var cursor = 4; + while (cursor < data.length) { + // 2 bytes Tag + var tag = data.readUInt16LE(cursor); + cursor += 2; + // 2 bytes Size + var size = data.readUInt16LE(cursor); + cursor += 2; + if (tag !== 1) { + // Wrong tag. This will realistically never happen. + cursor += size; + continue; + } + // Tag1 is actually the only defined Tag. + if (size < 8) break; // Invalid. Ignore. + // 8 bytes Mtime + var hundredNanoSecondsSince1601 = 4294967296 * data.readInt32LE(cursor + 4) + data.readUInt32LE(cursor) + // Convert from NTFS to POSIX milliseconds. + // The big number below is the milliseconds between year 1601 and year 1970 + // (i.e. the negative POSIX timestamp of 1601-01-01 00:00:00Z) + var millisecondsSince1970 = hundredNanoSecondsSince1601 / 10000 - 11644473600000; + // Note on numeric precision: JavaScript Number objects lose precision above Number.MAX_SAFE_INTEGER, + // and NTFS timestamps are typically much bigger than that limit. + // (MAX_SAFE_INTEGER would represent 1629-07-17T23:58:45.475Z.) + // However, we're losing precision in the conversion from 100nanosecond units to millisecond units anyway, + // and the time at which we also lose 1-millisecond precision is just past the JavaScript Date limit (by design). + // Up through the year 2057, this conversion only drops 4 bits of precision, + // which is well under the 13-14 bits ratio between the milliseconds and 100nanoseconds. + return new Date(millisecondsSince1970); + } + } + } + } + + // Fallback to non-extended encoding. + return dosDateTimeToDate(this.lastModFileDate, this.lastModFileTime, options.timezone); }; Entry.prototype.isEncrypted = function() { return (this.generalPurposeBitFlag & 0x1) !== 0; @@ -602,7 +662,7 @@ Entry.prototype.isCompressed = function() { function LocalFileHeader() { } -function dosDateTimeToDate(date, time) { +function dosDateTimeToDate(date, time, timezone) { var day = date & 0x1f; // 1-31 var month = (date >> 5 & 0xf) - 1; // 1-12, 0-11 var year = (date >> 9 & 0x7f) + 1980; // 0-128, 1980-2108 @@ -612,7 +672,13 @@ function dosDateTimeToDate(date, time) { var minute = time >> 5 & 0x3f; // 0-59 var hour = time >> 11 & 0x1f; // 0-23 - return new Date(year, month, day, hour, minute, second, millisecond); + if (timezone == null || timezone === "local") { + return new Date(year, month, day, hour, minute, second, millisecond); + } else if (timezone === "UTC") { + return new Date(Date.UTC(year, month, day, hour, minute, second, millisecond)); + } else { + throw new Error("unrecognized options.timezone: " + options.timezone); + } } function getFileNameLowLevel(generalPurposeBitFlag, fileNameBuffer, extraFields, strictFileNames) { @@ -843,9 +909,11 @@ function decodeBuffer(buffer, isUtf8) { } function readUInt64LE(buffer, offset) { - // there is no native function for this, because we can't actually store 64-bit integers precisely. + // There is no native function for this, because we can't actually store 64-bit integers precisely. // after 53 bits, JavaScript's Number type (IEEE 754 double) can't store individual integers anymore. // but since 53 bits is a whole lot more than 32 bits, we do our best anyway. + // As of 2020, Node has added support for BigInt, which obviates this whole function, + // but yauzl hasn't been updated to depend on BigInt (yet?). var lower32 = buffer.readUInt32LE(offset); var upper32 = buffer.readUInt32LE(offset + 4); // we can't use bitshifting here, because JavaScript bitshifting only works on 32-bit integers. diff --git a/test/success/unix-epoch.zip b/test/success/unix-epoch.zip new file mode 100644 index 0000000000000000000000000000000000000000..13c55ff5624e2fc2bb77fe9569c97713358543c2 GIT binary patch literal 178 zcmWIWW@h1HfB;1(4WalLWEe{GGAndb3-XgQ^hzp9LPIzim_d>=C#t2FR&X;gvb+HD uSyF&P0p5&Ea?H3)l7JWqB$qUTnCRxRLd?Z5la&pmnh^*?fV2mQ!vFwN9v4ag literal 0 HcmV?d00001 diff --git a/test/success/unix-epoch/unix-epoch.txt b/test/success/unix-epoch/unix-epoch.txt new file mode 100644 index 0000000..e69de29 diff --git a/test/test.js b/test/test.js index bf94560..2b88980 100644 --- a/test/test.js +++ b/test/test.js @@ -9,12 +9,12 @@ var child_process = require("child_process"); var Readable = require("stream").Readable; var Writable = require("stream").Writable; -// this is the date i made the example zip files and their content files, -// so this timestamp will be earlier than all the ones stored in these test zip files -// (and probably all future zip files). -// no timezone awareness, because that's how MS-DOS rolls. +// This is a month before I made the example zip files and their content files, +// so this timestamp will be earlier than all the ones stored in these test zip files unless otherwise noted. var earliestTimestamp = new Date(2014, 7, 18, 0, 0, 0, 0); +if (Date.now() / 1000 >= 2147483648) throw new Error("The year is 2038. The Epochalypse is uppon us. Signed 32-bit POSIX timestamps have collapsed. TODO: fix."); + var pend = new Pend(); // 1 thing at a time for better determinism/reproducibility pend.max = 1; @@ -98,8 +98,22 @@ listZipFiles([path.join(__dirname, "success"), path.join(__dirname, "wrong-entry if (fileComment !== "") throw new Error(testId + "expected empty fileComment"); var messagePrefix = testId + fileName + ": "; var timestamp = entry.getLastModDate(); - if (timestamp < earliestTimestamp) throw new Error(messagePrefix + "timestamp too early: " + timestamp); - if (timestamp > new Date()) throw new Error(messagePrefix + "timestamp in the future: " + timestamp); + if (fileName === "unix-epoch.txt") { + if (timestamp.getTime() !== 0) throw new Error(messagePrefix + "expected timestamp to be 0. found: " + timestamp); + var dosTimestamp = entry.getLastModDate({forceDosFormat:true}); + if (dosTimestamp.getTime() === 0) throw new Error(messagePrefix + "DOS timestamp can't encode this time: " + dosTimestamp); + var dosTimestampUTC = entry.getLastModDate({forceDosFormat:true, timezone: "UTC"}); + // This test is only meaningful when the system's local UTC offset is not 0. + if (dosTimestamp - dosTimestampUTC !== dosTimestamp.getTimezoneOffset() * 60 * 1000) { + throw new Error(messagePrefix + + "expected UTC timezone to be different by the current timezone offset. " + + "local: " + dosTimestamp + ", UTC: " + dosTimestampUTC + ); + } + } else { + if (timestamp < earliestTimestamp) throw new Error(messagePrefix + "timestamp too early: " + timestamp); + if (timestamp > new Date()) throw new Error(messagePrefix + "timestamp in the future: " + timestamp); + } var fileNameKey = fileName.replace(/\/$/, ""); var expectedContents = expectedArchiveContents[fileNameKey]; From 9040c652ffb8a701122ee090ba1cb99bf13a532f Mon Sep 17 00:00:00 2001 From: Josh Wolfe Date: Sat, 2 Nov 2024 21:40:03 -0400 Subject: [PATCH 2/2] bounds checks --- index.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 579f715..0810cc6 100644 --- a/index.js +++ b/index.js @@ -600,6 +600,7 @@ Entry.prototype.getLastModDate = function(options) { // InfoZIP "universal timestamp" extended field (`0x5455` aka `"UT"`). // See the InfoZIP source code unix/unix.c:set_extra_field() and zipfile.c:ef_scan_ut_time(). var data = extraField.data; + if (data.length < 5) continue; // Too short. // The flags define which of the three fields are present: mtime, atime, ctime. // We only care about mtime. // Also, ctime is never included in practice. @@ -616,7 +617,7 @@ Entry.prototype.getLastModDate = function(options) { var data = extraField.data; // 4 bytes reserved var cursor = 4; - while (cursor < data.length) { + while (cursor < data.length + 4) { // 2 bytes Tag var tag = data.readUInt16LE(cursor); cursor += 2; @@ -629,7 +630,7 @@ Entry.prototype.getLastModDate = function(options) { continue; } // Tag1 is actually the only defined Tag. - if (size < 8) break; // Invalid. Ignore. + if (size < 8 || cursor + size > data.length) break; // Invalid. Ignore. // 8 bytes Mtime var hundredNanoSecondsSince1601 = 4294967296 * data.readInt32LE(cursor + 4) + data.readUInt32LE(cursor) // Convert from NTFS to POSIX milliseconds.