FileMiddleware.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683
  1. /*
  2. MIT License http://www.opensource.org/licenses/mit-license.php
  3. */
  4. "use strict";
  5. const { constants } = require("buffer");
  6. const { pipeline } = require("stream");
  7. const {
  8. createBrotliCompress,
  9. createBrotliDecompress,
  10. createGzip,
  11. createGunzip,
  12. constants: zConstants
  13. } = require("zlib");
  14. const createHash = require("../util/createHash");
  15. const { dirname, join, mkdirp } = require("../util/fs");
  16. const memoize = require("../util/memoize");
  17. const SerializerMiddleware = require("./SerializerMiddleware");
  18. /** @typedef {typeof import("../util/Hash")} Hash */
  19. /** @typedef {import("../util/fs").IntermediateFileSystem} IntermediateFileSystem */
  20. /** @typedef {import("./types").BufferSerializableType} BufferSerializableType */
  21. /*
  22. Format:
  23. File -> Header Section*
  24. Version -> u32
  25. AmountOfSections -> u32
  26. SectionSize -> i32 (if less than zero represents lazy value)
  27. Header -> Version AmountOfSections SectionSize*
  28. Buffer -> n bytes
  29. Section -> Buffer
  30. */
  31. // "wpc" + 1 in little-endian
  32. const VERSION = 0x01637077;
  33. const WRITE_LIMIT_TOTAL = 0x7fff0000;
  34. const WRITE_LIMIT_CHUNK = 511 * 1024 * 1024;
  35. /**
  36. * @param {Buffer[]} buffers buffers
  37. * @param {string | Hash} hashFunction hash function to use
  38. * @returns {string} hash
  39. */
  40. const hashForName = (buffers, hashFunction) => {
  41. const hash = createHash(hashFunction);
  42. for (const buf of buffers) hash.update(buf);
  43. return /** @type {string} */ (hash.digest("hex"));
  44. };
  45. const COMPRESSION_CHUNK_SIZE = 100 * 1024 * 1024;
  46. const DECOMPRESSION_CHUNK_SIZE = 100 * 1024 * 1024;
  47. const writeUInt64LE = Buffer.prototype.writeBigUInt64LE
  48. ? (buf, value, offset) => {
  49. buf.writeBigUInt64LE(BigInt(value), offset);
  50. }
  51. : (buf, value, offset) => {
  52. const low = value % 0x100000000;
  53. const high = (value - low) / 0x100000000;
  54. buf.writeUInt32LE(low, offset);
  55. buf.writeUInt32LE(high, offset + 4);
  56. };
  57. const readUInt64LE = Buffer.prototype.readBigUInt64LE
  58. ? (buf, offset) => {
  59. return Number(buf.readBigUInt64LE(offset));
  60. }
  61. : (buf, offset) => {
  62. const low = buf.readUInt32LE(offset);
  63. const high = buf.readUInt32LE(offset + 4);
  64. return high * 0x100000000 + low;
  65. };
  66. /**
  67. * @typedef {object} SerializeResult
  68. * @property {string | false} name
  69. * @property {number} size
  70. * @property {Promise=} backgroundJob
  71. */
  72. /**
  73. * @param {FileMiddleware} middleware this
  74. * @param {BufferSerializableType[] | Promise<BufferSerializableType[]>} data data to be serialized
  75. * @param {string | boolean} name file base name
  76. * @param {function(string | false, Buffer[], number): Promise<void>} writeFile writes a file
  77. * @param {string | Hash} hashFunction hash function to use
  78. * @returns {Promise<SerializeResult>} resulting file pointer and promise
  79. */
  80. const serialize = async (
  81. middleware,
  82. data,
  83. name,
  84. writeFile,
  85. hashFunction = "md4"
  86. ) => {
  87. /** @type {(Buffer[] | Buffer | SerializeResult | Promise<SerializeResult>)[]} */
  88. const processedData = [];
  89. /** @type {WeakMap<SerializeResult, function(): any | Promise<any>>} */
  90. const resultToLazy = new WeakMap();
  91. /** @type {Buffer[]} */
  92. let lastBuffers = undefined;
  93. for (const item of await data) {
  94. if (typeof item === "function") {
  95. if (!SerializerMiddleware.isLazy(item))
  96. throw new Error("Unexpected function");
  97. if (!SerializerMiddleware.isLazy(item, middleware)) {
  98. throw new Error(
  99. "Unexpected lazy value with non-this target (can't pass through lazy values)"
  100. );
  101. }
  102. lastBuffers = undefined;
  103. const serializedInfo = SerializerMiddleware.getLazySerializedValue(item);
  104. if (serializedInfo) {
  105. if (typeof serializedInfo === "function") {
  106. throw new Error(
  107. "Unexpected lazy value with non-this target (can't pass through lazy values)"
  108. );
  109. } else {
  110. processedData.push(serializedInfo);
  111. }
  112. } else {
  113. const content = item();
  114. if (content) {
  115. const options = SerializerMiddleware.getLazyOptions(item);
  116. processedData.push(
  117. serialize(
  118. middleware,
  119. content,
  120. (options && options.name) || true,
  121. writeFile,
  122. hashFunction
  123. ).then(result => {
  124. /** @type {any} */ (item).options.size = result.size;
  125. resultToLazy.set(result, item);
  126. return result;
  127. })
  128. );
  129. } else {
  130. throw new Error(
  131. "Unexpected falsy value returned by lazy value function"
  132. );
  133. }
  134. }
  135. } else if (item) {
  136. if (lastBuffers) {
  137. lastBuffers.push(item);
  138. } else {
  139. lastBuffers = [item];
  140. processedData.push(lastBuffers);
  141. }
  142. } else {
  143. throw new Error("Unexpected falsy value in items array");
  144. }
  145. }
  146. /** @type {Promise<any>[]} */
  147. const backgroundJobs = [];
  148. const resolvedData = (
  149. await Promise.all(
  150. /** @type {Promise<Buffer[] | Buffer | SerializeResult>[]} */ (
  151. processedData
  152. )
  153. )
  154. ).map(item => {
  155. if (Array.isArray(item) || Buffer.isBuffer(item)) return item;
  156. backgroundJobs.push(item.backgroundJob);
  157. // create pointer buffer from size and name
  158. const name = /** @type {string} */ (item.name);
  159. const nameBuffer = Buffer.from(name);
  160. const buf = Buffer.allocUnsafe(8 + nameBuffer.length);
  161. writeUInt64LE(buf, item.size, 0);
  162. nameBuffer.copy(buf, 8, 0);
  163. const lazy = resultToLazy.get(item);
  164. SerializerMiddleware.setLazySerializedValue(lazy, buf);
  165. return buf;
  166. });
  167. /** @type {number[]} */
  168. const lengths = [];
  169. for (const item of resolvedData) {
  170. if (Array.isArray(item)) {
  171. let l = 0;
  172. for (const b of item) l += b.length;
  173. while (l > 0x7fffffff) {
  174. lengths.push(0x7fffffff);
  175. l -= 0x7fffffff;
  176. }
  177. lengths.push(l);
  178. } else if (item) {
  179. lengths.push(-item.length);
  180. } else {
  181. throw new Error("Unexpected falsy value in resolved data " + item);
  182. }
  183. }
  184. const header = Buffer.allocUnsafe(8 + lengths.length * 4);
  185. header.writeUInt32LE(VERSION, 0);
  186. header.writeUInt32LE(lengths.length, 4);
  187. for (let i = 0; i < lengths.length; i++) {
  188. header.writeInt32LE(lengths[i], 8 + i * 4);
  189. }
  190. /** @type {Buffer[]} */
  191. const buf = [header];
  192. for (const item of resolvedData) {
  193. if (Array.isArray(item)) {
  194. for (const b of item) buf.push(b);
  195. } else if (item) {
  196. buf.push(item);
  197. }
  198. }
  199. if (name === true) {
  200. name = hashForName(buf, hashFunction);
  201. }
  202. let size = 0;
  203. for (const b of buf) size += b.length;
  204. backgroundJobs.push(writeFile(name, buf, size));
  205. return {
  206. size,
  207. name,
  208. backgroundJob:
  209. backgroundJobs.length === 1
  210. ? backgroundJobs[0]
  211. : Promise.all(backgroundJobs)
  212. };
  213. };
  214. /**
  215. * @param {FileMiddleware} middleware this
  216. * @param {string | false} name filename
  217. * @param {function(string | false): Promise<Buffer[]>} readFile read content of a file
  218. * @returns {Promise<BufferSerializableType[]>} deserialized data
  219. */
  220. const deserialize = async (middleware, name, readFile) => {
  221. const contents = await readFile(name);
  222. if (contents.length === 0) throw new Error("Empty file " + name);
  223. let contentsIndex = 0;
  224. let contentItem = contents[0];
  225. let contentItemLength = contentItem.length;
  226. let contentPosition = 0;
  227. if (contentItemLength === 0) throw new Error("Empty file " + name);
  228. const nextContent = () => {
  229. contentsIndex++;
  230. contentItem = contents[contentsIndex];
  231. contentItemLength = contentItem.length;
  232. contentPosition = 0;
  233. };
  234. /**
  235. * @param {number} n number of bytes to ensure
  236. */
  237. const ensureData = n => {
  238. if (contentPosition === contentItemLength) {
  239. nextContent();
  240. }
  241. while (contentItemLength - contentPosition < n) {
  242. const remaining = contentItem.slice(contentPosition);
  243. let lengthFromNext = n - remaining.length;
  244. const buffers = [remaining];
  245. for (let i = contentsIndex + 1; i < contents.length; i++) {
  246. const l = contents[i].length;
  247. if (l > lengthFromNext) {
  248. buffers.push(contents[i].slice(0, lengthFromNext));
  249. contents[i] = contents[i].slice(lengthFromNext);
  250. lengthFromNext = 0;
  251. break;
  252. } else {
  253. buffers.push(contents[i]);
  254. contentsIndex = i;
  255. lengthFromNext -= l;
  256. }
  257. }
  258. if (lengthFromNext > 0) throw new Error("Unexpected end of data");
  259. contentItem = Buffer.concat(buffers, n);
  260. contentItemLength = n;
  261. contentPosition = 0;
  262. }
  263. };
  264. /**
  265. * @returns {number} value value
  266. */
  267. const readUInt32LE = () => {
  268. ensureData(4);
  269. const value = contentItem.readUInt32LE(contentPosition);
  270. contentPosition += 4;
  271. return value;
  272. };
  273. /**
  274. * @returns {number} value value
  275. */
  276. const readInt32LE = () => {
  277. ensureData(4);
  278. const value = contentItem.readInt32LE(contentPosition);
  279. contentPosition += 4;
  280. return value;
  281. };
  282. /**
  283. * @param {number} l length
  284. * @returns {Buffer} buffer
  285. */
  286. const readSlice = l => {
  287. ensureData(l);
  288. if (contentPosition === 0 && contentItemLength === l) {
  289. const result = contentItem;
  290. if (contentsIndex + 1 < contents.length) {
  291. nextContent();
  292. } else {
  293. contentPosition = l;
  294. }
  295. return result;
  296. }
  297. const result = contentItem.slice(contentPosition, contentPosition + l);
  298. contentPosition += l;
  299. // we clone the buffer here to allow the original content to be garbage collected
  300. return l * 2 < contentItem.buffer.byteLength ? Buffer.from(result) : result;
  301. };
  302. const version = readUInt32LE();
  303. if (version !== VERSION) {
  304. throw new Error("Invalid file version");
  305. }
  306. const sectionCount = readUInt32LE();
  307. const lengths = [];
  308. let lastLengthPositive = false;
  309. for (let i = 0; i < sectionCount; i++) {
  310. const value = readInt32LE();
  311. const valuePositive = value >= 0;
  312. if (lastLengthPositive && valuePositive) {
  313. lengths[lengths.length - 1] += value;
  314. } else {
  315. lengths.push(value);
  316. lastLengthPositive = valuePositive;
  317. }
  318. }
  319. const result = [];
  320. for (let length of lengths) {
  321. if (length < 0) {
  322. const slice = readSlice(-length);
  323. const size = Number(readUInt64LE(slice, 0));
  324. const nameBuffer = slice.slice(8);
  325. const name = nameBuffer.toString();
  326. result.push(
  327. SerializerMiddleware.createLazy(
  328. memoize(() => deserialize(middleware, name, readFile)),
  329. middleware,
  330. {
  331. name,
  332. size
  333. },
  334. slice
  335. )
  336. );
  337. } else {
  338. if (contentPosition === contentItemLength) {
  339. nextContent();
  340. } else if (contentPosition !== 0) {
  341. if (length <= contentItemLength - contentPosition) {
  342. result.push(
  343. Buffer.from(
  344. contentItem.buffer,
  345. contentItem.byteOffset + contentPosition,
  346. length
  347. )
  348. );
  349. contentPosition += length;
  350. length = 0;
  351. } else {
  352. const l = contentItemLength - contentPosition;
  353. result.push(
  354. Buffer.from(
  355. contentItem.buffer,
  356. contentItem.byteOffset + contentPosition,
  357. l
  358. )
  359. );
  360. length -= l;
  361. contentPosition = contentItemLength;
  362. }
  363. } else {
  364. if (length >= contentItemLength) {
  365. result.push(contentItem);
  366. length -= contentItemLength;
  367. contentPosition = contentItemLength;
  368. } else {
  369. result.push(
  370. Buffer.from(contentItem.buffer, contentItem.byteOffset, length)
  371. );
  372. contentPosition += length;
  373. length = 0;
  374. }
  375. }
  376. while (length > 0) {
  377. nextContent();
  378. if (length >= contentItemLength) {
  379. result.push(contentItem);
  380. length -= contentItemLength;
  381. contentPosition = contentItemLength;
  382. } else {
  383. result.push(
  384. Buffer.from(contentItem.buffer, contentItem.byteOffset, length)
  385. );
  386. contentPosition += length;
  387. length = 0;
  388. }
  389. }
  390. }
  391. }
  392. return result;
  393. };
  394. /**
  395. * @typedef {BufferSerializableType[]} DeserializedType
  396. * @typedef {true} SerializedType
  397. * @extends {SerializerMiddleware<DeserializedType, SerializedType>}
  398. */
  399. class FileMiddleware extends SerializerMiddleware {
  400. /**
  401. * @param {IntermediateFileSystem} fs filesystem
  402. * @param {string | Hash} hashFunction hash function to use
  403. */
  404. constructor(fs, hashFunction = "md4") {
  405. super();
  406. this.fs = fs;
  407. this._hashFunction = hashFunction;
  408. }
  409. /**
  410. * @param {DeserializedType} data data
  411. * @param {object} context context object
  412. * @returns {SerializedType|Promise<SerializedType>} serialized data
  413. */
  414. serialize(data, context) {
  415. const { filename, extension = "" } = context;
  416. return new Promise((resolve, reject) => {
  417. mkdirp(this.fs, dirname(this.fs, filename), err => {
  418. if (err) return reject(err);
  419. // It's important that we don't touch existing files during serialization
  420. // because serialize may read existing files (when deserializing)
  421. const allWrittenFiles = new Set();
  422. const writeFile = async (name, content, size) => {
  423. const file = name
  424. ? join(this.fs, filename, `../${name}${extension}`)
  425. : filename;
  426. await new Promise((resolve, reject) => {
  427. let stream = this.fs.createWriteStream(file + "_");
  428. let compression;
  429. if (file.endsWith(".gz")) {
  430. compression = createGzip({
  431. chunkSize: COMPRESSION_CHUNK_SIZE,
  432. level: zConstants.Z_BEST_SPEED
  433. });
  434. } else if (file.endsWith(".br")) {
  435. compression = createBrotliCompress({
  436. chunkSize: COMPRESSION_CHUNK_SIZE,
  437. params: {
  438. [zConstants.BROTLI_PARAM_MODE]: zConstants.BROTLI_MODE_TEXT,
  439. [zConstants.BROTLI_PARAM_QUALITY]: 2,
  440. [zConstants.BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING]: true,
  441. [zConstants.BROTLI_PARAM_SIZE_HINT]: size
  442. }
  443. });
  444. }
  445. if (compression) {
  446. pipeline(compression, stream, reject);
  447. stream = compression;
  448. stream.on("finish", () => resolve());
  449. } else {
  450. stream.on("error", err => reject(err));
  451. stream.on("finish", () => resolve());
  452. }
  453. // split into chunks for WRITE_LIMIT_CHUNK size
  454. const chunks = [];
  455. for (const b of content) {
  456. if (b.length < WRITE_LIMIT_CHUNK) {
  457. chunks.push(b);
  458. } else {
  459. for (let i = 0; i < b.length; i += WRITE_LIMIT_CHUNK) {
  460. chunks.push(b.slice(i, i + WRITE_LIMIT_CHUNK));
  461. }
  462. }
  463. }
  464. const len = chunks.length;
  465. let i = 0;
  466. const batchWrite = err => {
  467. // will be handled in "on" error handler
  468. if (err) return;
  469. if (i === len) {
  470. stream.end();
  471. return;
  472. }
  473. // queue up a batch of chunks up to the write limit
  474. // end is exclusive
  475. let end = i;
  476. let sum = chunks[end++].length;
  477. while (end < len) {
  478. sum += chunks[end].length;
  479. if (sum > WRITE_LIMIT_TOTAL) break;
  480. end++;
  481. }
  482. while (i < end - 1) {
  483. stream.write(chunks[i++]);
  484. }
  485. stream.write(chunks[i++], batchWrite);
  486. };
  487. batchWrite();
  488. });
  489. if (name) allWrittenFiles.add(file);
  490. };
  491. resolve(
  492. serialize(this, data, false, writeFile, this._hashFunction).then(
  493. async ({ backgroundJob }) => {
  494. await backgroundJob;
  495. // Rename the index file to disallow access during inconsistent file state
  496. await new Promise(resolve =>
  497. this.fs.rename(filename, filename + ".old", err => {
  498. resolve();
  499. })
  500. );
  501. // update all written files
  502. await Promise.all(
  503. Array.from(
  504. allWrittenFiles,
  505. file =>
  506. new Promise((resolve, reject) => {
  507. this.fs.rename(file + "_", file, err => {
  508. if (err) return reject(err);
  509. resolve();
  510. });
  511. })
  512. )
  513. );
  514. // As final step automatically update the index file to have a consistent pack again
  515. await new Promise(resolve => {
  516. this.fs.rename(filename + "_", filename, err => {
  517. if (err) return reject(err);
  518. resolve();
  519. });
  520. });
  521. return /** @type {true} */ (true);
  522. }
  523. )
  524. );
  525. });
  526. });
  527. }
  528. /**
  529. * @param {SerializedType} data data
  530. * @param {object} context context object
  531. * @returns {DeserializedType|Promise<DeserializedType>} deserialized data
  532. */
  533. deserialize(data, context) {
  534. const { filename, extension = "" } = context;
  535. const readFile = name =>
  536. new Promise((resolve, reject) => {
  537. const file = name
  538. ? join(this.fs, filename, `../${name}${extension}`)
  539. : filename;
  540. this.fs.stat(file, (err, stats) => {
  541. if (err) {
  542. reject(err);
  543. return;
  544. }
  545. let remaining = /** @type {number} */ (stats.size);
  546. /** @type {Buffer | undefined} */
  547. let currentBuffer;
  548. /** @type {number | undefined} */
  549. let currentBufferUsed;
  550. const buf = [];
  551. /** @type {import("zlib").Zlib & import("stream").Transform | undefined} */
  552. let decompression;
  553. if (file.endsWith(".gz")) {
  554. decompression = createGunzip({
  555. chunkSize: DECOMPRESSION_CHUNK_SIZE
  556. });
  557. } else if (file.endsWith(".br")) {
  558. decompression = createBrotliDecompress({
  559. chunkSize: DECOMPRESSION_CHUNK_SIZE
  560. });
  561. }
  562. if (decompression) {
  563. let newResolve, newReject;
  564. resolve(
  565. Promise.all([
  566. new Promise((rs, rj) => {
  567. newResolve = rs;
  568. newReject = rj;
  569. }),
  570. new Promise((resolve, reject) => {
  571. decompression.on("data", chunk => buf.push(chunk));
  572. decompression.on("end", () => resolve());
  573. decompression.on("error", err => reject(err));
  574. })
  575. ]).then(() => buf)
  576. );
  577. resolve = newResolve;
  578. reject = newReject;
  579. }
  580. this.fs.open(file, "r", (err, fd) => {
  581. if (err) {
  582. reject(err);
  583. return;
  584. }
  585. const read = () => {
  586. if (currentBuffer === undefined) {
  587. currentBuffer = Buffer.allocUnsafeSlow(
  588. Math.min(
  589. constants.MAX_LENGTH,
  590. remaining,
  591. decompression ? DECOMPRESSION_CHUNK_SIZE : Infinity
  592. )
  593. );
  594. currentBufferUsed = 0;
  595. }
  596. let readBuffer = currentBuffer;
  597. let readOffset = currentBufferUsed;
  598. let readLength = currentBuffer.length - currentBufferUsed;
  599. // values passed to fs.read must be valid int32 values
  600. if (readOffset > 0x7fffffff) {
  601. readBuffer = currentBuffer.slice(readOffset);
  602. readOffset = 0;
  603. }
  604. if (readLength > 0x7fffffff) {
  605. readLength = 0x7fffffff;
  606. }
  607. this.fs.read(
  608. fd,
  609. readBuffer,
  610. readOffset,
  611. readLength,
  612. null,
  613. (err, bytesRead) => {
  614. if (err) {
  615. this.fs.close(fd, () => {
  616. reject(err);
  617. });
  618. return;
  619. }
  620. currentBufferUsed += bytesRead;
  621. remaining -= bytesRead;
  622. if (currentBufferUsed === currentBuffer.length) {
  623. if (decompression) {
  624. decompression.write(currentBuffer);
  625. } else {
  626. buf.push(currentBuffer);
  627. }
  628. currentBuffer = undefined;
  629. if (remaining === 0) {
  630. if (decompression) {
  631. decompression.end();
  632. }
  633. this.fs.close(fd, err => {
  634. if (err) {
  635. reject(err);
  636. return;
  637. }
  638. resolve(buf);
  639. });
  640. return;
  641. }
  642. }
  643. read();
  644. }
  645. );
  646. };
  647. read();
  648. });
  649. });
  650. });
  651. return deserialize(this, false, readFile);
  652. }
  653. }
  654. module.exports = FileMiddleware;