feat: add generic saveHTMLAssets util

This commit is contained in:
Amin Yahyaabadi 2024-09-07 02:13:15 -07:00
parent f918fcc1fc
commit 8d16de0d38
No known key found for this signature in database
GPG Key ID: F52AF77F636088F0
8 changed files with 144 additions and 109 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,4 +1,4 @@
import { saveGitHubAssetList } from "../utils/github/fetch-assets.ts"
import { saveGitHubAssetList } from "../utils/asset/fetch-github-assets.ts"
/**
* Generate the list of all releases of a GitHub repository and save it to a json file

View File

@ -13,10 +13,10 @@ import { addUpdateAlternativesToRc, installAptPack } from "setup-apt"
import { installBrewPack } from "setup-brew"
import { rcOptions } from "../cli-options.js"
import { setupMacOSSDK } from "../macos-sdk/macos-sdk.js"
import { loadGitHubAssetList, matchAsset } from "../utils/asset/load-assets.js"
import { hasDnf } from "../utils/env/hasDnf.js"
import { isArch } from "../utils/env/isArch.js"
import { isUbuntu } from "../utils/env/isUbuntu.js"
import { loadGitHubAssetList, matchAsset } from "../utils/github/load-assets.js"
import { extract7Zip } from "../utils/setup/extract.js"
import { type InstallationInfo, type PackageInfo, setupBin } from "../utils/setup/setupBin.js"
import { setupChocoPack } from "../utils/setup/setupChocoPack.js"

View File

@ -1,9 +1,5 @@
import { mkdir, readFile, readdir, writeFile } from "fs/promises"
import he from "he"
import { DownloaderHelper } from "node-downloader-helper"
import JsonStringify from "safe-stable-stringify"
import { saveGitHubAssetList } from "../utils/github/fetch-assets.ts"
import { compareVersion } from "../utils/setup/version.ts"
import { saveGitHubAssetList } from "../utils/asset/fetch-github-assets.ts"
import { saveHTMLAssets } from "../utils/asset/fetch-html-assets.ts"
/**
* Generate the list of all releases of a GitHub repository and save it to a json file
@ -18,13 +14,22 @@ async function main() {
)
// go through https://releases.llvm.org/x.y.z and get all the assets
await fetchLLVMOrgReleases()
await saveHTMLAssets({
htmlDownloadDir: "./src/llvm/assets/",
path: "./src/llvm/llvm_org_releases.json",
*getAssetVersionAndURL() {
for (let major = 1; major <= 9; major++) {
for (let minor = 0; minor <= 9; minor++) {
for (let patch = 0; patch <= 9; patch++) {
const version = `${major}.${minor}.${patch}`
yield [version, `https://releases.llvm.org/${version}`] as [string, string]
}
main().catch((err) => {
console.error(err)
process.exit(1)
}
}
},
filterAssets: isAssetArchive,
})
}
function isAssetArchive(asset: string): boolean {
return asset.endsWith("tar.xz")
@ -33,96 +38,7 @@ function isAssetArchive(asset: string): boolean {
|| asset.endsWith("tar.gz")
}
async function fetchLLVMOrgReleases() {
const assetHTMLDir = "./src/llvm/assets/"
await fetchIndexFiles(assetHTMLDir)
const assets: Record<string, string[]> = await extractAssetsFromHTML(assetHTMLDir)
// sort the assets by version
const jsonStringify = JsonStringify.configure({
deterministic: compareVersion,
})
// write the assets to a json file
const data = jsonStringify(assets, null, 2)
await writeFile("./src/llvm/llvm_org_releases.json", data)
}
async function fetchIndexFiles(assetHTMLDir: string) {
const promises: Promise<void>[] = []
await mkdir(assetHTMLDir, { recursive: true })
for (let major = 1; major <= 9; major++) {
for (let minor = 0; minor <= 9; minor++) {
for (let patch = 0; patch <= 9; patch++) {
promises.push(fetchIndexFile(major, minor, patch, assetHTMLDir))
}
}
}
await Promise.all(promises)
return assetHTMLDir
}
async function fetchIndexFile(major: number, minor: number, patch: number, assetHTMLDir: string) {
try {
const version = `${major}.${minor}.${patch}`
const dl = new DownloaderHelper(
`https://releases.llvm.org/${version}`,
assetHTMLDir,
{
fileName: `${version}.html`,
override: {
skip: true,
},
},
)
dl.on("start", () => {
console.log(`Downloading ${version}`)
})
dl.on("error", (err) => {
console.error(`Failed to download ${version}.html: ${err}`)
})
await dl.start()
} catch (err) {
main().catch((err) => {
console.error(err)
}
}
async function extractAssetsFromHTML(assetHTMLDir: string) {
const assets: Record<string, string[]> = {}
const linkRegex = /href="([^"]+)"/g // match all href in the html
const indexFiles = await readdir(assetHTMLDir)
await Promise.all(indexFiles.map(async (indexFile) => {
const version = indexFile.replace(".html", "")
if (!(version in assets)) {
assets[version] = []
}
// read the html file
const body = await readFile(`${assetHTMLDir}/${indexFile}`, "utf8")
// parse the html via regex
let match: RegExpExecArray | null
// biome-ignore lint/suspicious/noAssignInExpressions: ignore
while ((match = linkRegex.exec(body)) !== null) {
const asset = match[1]
if (isAssetArchive(asset)) {
assets[version].push(he.decode(asset))
}
}
if (assets[version].length === 0) {
// eslint-disable-next-line @typescript-eslint/no-dynamic-delete
delete assets[version]
}
}))
return assets
}
process.exit(1)
})

View File

@ -0,0 +1,119 @@
import { mkdir, readFile, readdir, writeFile } from "fs/promises"
import he from "he"
import { DownloaderHelper } from "node-downloader-helper"
import JsonStringify from "safe-stable-stringify"
import { compareVersion } from "../setup/version.ts"
type Options = {
/**
* The directory to download the HTML files
*/
htmlDownloadDir: string
/**
* The path to write the output json file
*/
path: string
/**
* A generator that returns the version and the URL of the asset to download
*
* The generator should return a tuple of the version and the URL
*/
getAssetVersionAndURL: () => Generator<[string, string], void, unknown>
/**
* Filter the assets
*/
filterAssets?: (asset: string) => boolean
}
/**
* Save the assets of the HTML files to a json file
*
* The assets are extracted from the href of the html files
*/
export async function saveHTMLAssets(opts: Options) {
await fetchIndexFiles(opts)
const assets: Record<string, string[]> = await extractAssetsFromHTML(opts)
// sort the assets by version
const jsonStringify = JsonStringify.configure({
deterministic: compareVersion,
})
// write the assets to a json file
const data = jsonStringify(assets, null, 2)
await writeFile(opts.path, data)
}
async function fetchIndexFiles(opts: Options) {
const promises: Promise<void>[] = []
await mkdir(opts.htmlDownloadDir, { recursive: true })
for (const [version, url] of opts.getAssetVersionAndURL()) {
promises.push(fetchIndexFile(version, url, opts.htmlDownloadDir))
}
await Promise.all(promises)
}
async function fetchIndexFile(version: string, url: string, htmlDownloadDir: string) {
try {
const dl = new DownloaderHelper(
url,
htmlDownloadDir,
{
fileName: `${version}.html`,
override: {
skip: true,
},
},
)
dl.on("start", () => {
console.log(`Downloading ${version}`)
})
dl.on("error", (err) => {
console.error(`Failed to download ${version}.html: ${err}`)
})
await dl.start()
} catch (err) {
console.error(err)
}
}
async function extractAssetsFromHTML(opts: Options) {
const assets: Record<string, string[]> = {}
const linkRegex = /href="([^"]+)"/g // match all href in the html
const indexFiles = await readdir(opts.htmlDownloadDir)
await Promise.all(indexFiles.map(async (indexFile) => {
const version = indexFile.replace(".html", "")
if (!(version in assets)) {
assets[version] = []
}
// read the html file
const body = await readFile(`${opts.htmlDownloadDir}/${indexFile}`, "utf8")
// parse the html via regex
let match: RegExpExecArray | null
// biome-ignore lint/suspicious/noAssignInExpressions: ignore
while ((match = linkRegex.exec(body)) !== null) {
const asset = match[1]
if (opts.filterAssets !== undefined && !opts.filterAssets(asset)) {
continue
}
assets[version].push(he.decode(asset))
}
if (assets[version].length === 0) {
// eslint-disable-next-line @typescript-eslint/no-dynamic-delete
delete assets[version]
}
}))
return assets
}