Update worker to avoid downloading entire wheel (#168)

The previous solution downloaded the entire wheel in-memory. This
solution reads lazily.

Closes https://github.com/astral-sh/puffin/issues/153.
This commit is contained in:
Charlie Marsh 2023-10-22 18:07:44 -07:00 committed by GitHub
parent 8f856dcadf
commit 888f42494e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 35 additions and 32 deletions

View file

@ -8,6 +8,7 @@
"name": "api",
"version": "0.0.0",
"dependencies": {
"@zip.js/zip.js": "^2.7.30",
"install": "^0.13.0",
"jszip": "^3.10.1",
"npm": "^10.2.1",
@ -339,6 +340,16 @@
"integrity": "sha512-TgfOX+mGY/NyNxJLIbDWrO9DjGoVSW9+aB8H2yy1fy32jsvxijhmyJI9fDFgvz3YP4lvJaq9DzdR/M1bOgVc9g==",
"dev": true
},
"node_modules/@zip.js/zip.js": {
"version": "2.7.30",
"resolved": "https://registry.npmjs.org/@zip.js/zip.js/-/zip.js-2.7.30.tgz",
"integrity": "sha512-nhMvQCj+TF1ATBqYzFds7v+yxPBhdDYHh8J341KtC1D2UrVBUIYcYK4Jy1/GiTsxOXEiKOXSUxvPG/XR+7jMqw==",
"engines": {
"bun": ">=0.7.0",
"deno": ">=1.0.0",
"node": ">=16.5.0"
}
},
"node_modules/anymatch": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",

View file

@ -13,8 +13,8 @@
"fmt": "prettier --cache -w ."
},
"dependencies": {
"@zip.js/zip.js": "^2.7.30",
"install": "^0.13.0",
"jszip": "^3.10.1",
"npm": "^10.2.1",
"prettier": "^3.0.3"
}

View file

@ -1,4 +1,4 @@
import JSZip from "jszip";
import * as zip from "@zip.js/zip.js";
export interface Env {}
@ -27,7 +27,6 @@ export default {
// /packages/d2/3d/fa76db83bf75c4f8d338c2fd15c8d33fdd7ad23a9b5e57eb6c5de26b430e/click-7.1.2-py2.py3-none-any.whl
const url = new URL(request.url);
const path = url.pathname;
const query = url.search;
if (path.startsWith("/packages/")) {
// Given the path, extract `click-7.1.2`.
@ -35,22 +34,11 @@ export default {
const name = parts[parts.length - 1].split("-")[0];
const version = parts[parts.length - 1].split("-")[1];
// Extract the zip contents.
// Now, fetch "https://files.pythonhosted.org/packages/d2/3d/fa76db83bf75c4f8d338c2fd15c8d33fdd7ad23a9b5e57eb6c5de26b430e/click-7.1.2-py2.py3-none-any.whl"
response = await fetch(`https://files.pythonhosted.org${path}`, {
cf: {
// Always cache this fetch regardless of content type
// for a max of 5 seconds before revalidating the resource
cacheTtl: 5,
cacheEverything: true,
},
});
const buffer = await response.arrayBuffer();
const archive = await JSZip.loadAsync(buffer);
const file = await archive
.folder(`${name}-${version}.dist-info`)
?.file("METADATA")
?.async("string");
// Read the metadata.
const reader = new zip.ZipReader(
new zip.HttpRangeReader(`https://files.pythonhosted.org${path}`),
);
const file = await readMetadata(reader, name, version);
if (!file) {
return new Response("Not found", { status: 404 });
}
@ -64,19 +52,6 @@ export default {
},
});
ctx.waitUntil(cache.put(cacheKey, response.clone()));
} else if (path.startsWith("/simple/")) {
// Pass the request on to `https://pypi.org/`. Include query string.
// Propagate headers.
response = await fetch(`https://pypi.org${path}${query}`, {
cf: {
// Always cache this fetch regardless of content type
// for a max of 5 seconds before revalidating the resource
cacheTtl: 5,
cacheEverything: true,
},
});
ctx.waitUntil(cache.put(cacheKey, response.clone()));
} else {
return new Response("Not found", { status: 404 });
@ -88,3 +63,20 @@ export default {
return response;
},
};
/**
* Read the `METADATA` file from the given wheel.
*/
async function readMetadata(
reader: zip.ZipReader<any>,
name: string,
version: string,
) {
const entries = await reader.getEntriesGenerator();
for await (const entry of entries) {
if (entry.filename == `${name}-${version}.dist-info/METADATA`) {
return await entry.getData!(new zip.TextWriter());
}
}
return null;
}