From 2c30fce7c8cf5d3dd0b4bd783c3da98b3625f893 Mon Sep 17 00:00:00 2001 From: Atreya Bain Date: Tue, 19 Aug 2025 19:13:24 +0100 Subject: [PATCH] [add] implement vulnerability checking and advisory fetching, enhance repo processing, and add utility functions --- .gitignore | 8 +- src/bundle/index.mjs | 16 +-- src/libcalls.mjs | 4 +- src/tsCalls.mjs | 6 ++ src_dataset/FILTER_LIST.mjs | 3 +- src_dataset/cache.mjs | 4 +- src_dataset/index.mjs | 4 +- src_dataset/mine.mjs | 35 ++++++- src_vuln/index.mjs | 65 ++++++++++++ src_vuln/lib.mjs | 26 +++++ src_vuln/slicedeps.mjs | 97 +++++++++++++++++ success.txt | 204 ++++++++++++++++++++++++++++++++++++ test_src/build-package.cjs | 198 ++++++++++++++++++++++++++++++++++ test_src/check_diff.cjs | 45 ++++++++ 14 files changed, 700 insertions(+), 15 deletions(-) create mode 100644 src_vuln/index.mjs create mode 100644 src_vuln/lib.mjs create mode 100644 src_vuln/slicedeps.mjs create mode 100644 success.txt create mode 100644 test_src/build-package.cjs create mode 100644 test_src/check_diff.cjs diff --git a/.gitignore b/.gitignore index 3e93284..ccd0f24 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,10 @@ dist output.csv output/ node_modules/ -cache \ No newline at end of file +cache +failed-install.txt +success copy.txt +current-processing.log +current.log +left_subtract.txt +processed.log diff --git a/src/bundle/index.mjs b/src/bundle/index.mjs index ce0b935..cf28cf0 100644 --- a/src/bundle/index.mjs +++ b/src/bundle/index.mjs @@ -1,21 +1,25 @@ import wp from 'webpack'; import path from 'node:path' import {createRequire,builtinModules} from 'node:module' +import { mkdirSync } from 'node:fs'; /** * * @param {string} l library name * @param {string} moduleLocation module location * @param {string} outputPath - * @returns + * @returns the compressed file path */ export function wpCompress(l, moduleLocation,outputPath = path.resolve('./output/')) { + const basePackage = path.basename(path.resolve(moduleLocation)); + const finalOutputPath = path.resolve(outputPath, basePackage); + mkdirSync(finalOutputPath, { recursive: true }); return new Promise((resolve, reject) => { - + const libraryLocation = extractFunctionForModule(l, moduleLocation); - console.log(libraryLocation); + // console.log(libraryLocation); const outputFile = l + '.bundle.cjs'; - console.log(`[WebPack] Compressing ${l} in ${moduleLocation} to ${path.join(outputPath, outputFile)}`); + console.log(`[WebPack] Compressing ${l} in ${moduleLocation} to ${path.join(finalOutputPath, outputFile)}`); const moduleFallbackMap = builtinModules.reduce((prev, current) => { prev[current] = false; return prev; @@ -36,7 +40,7 @@ export function wpCompress(l, moduleLocation,outputPath = path.resolve('./output fallback:moduleFallbackMap }, output: { - path: outputPath, + path: finalOutputPath, filename: outputFile, clean: false, iife: false, @@ -52,7 +56,7 @@ export function wpCompress(l, moduleLocation,outputPath = path.resolve('./output // console.log(`[WebPack]`,stats?.toJson().errors); reject(err || stats); }else{ - resolve(path.resolve(outputPath, outputFile)); + resolve(path.resolve(finalOutputPath, outputFile)); } }); }); diff --git a/src/libcalls.mjs b/src/libcalls.mjs index 9e2271c..d383a55 100644 --- a/src/libcalls.mjs +++ b/src/libcalls.mjs @@ -82,7 +82,9 @@ export class LibraryTypesRecorder { return undefined; } else if(type.isNull()){ return null; - } else if(type.isBigInt()){ + } else if (type.isVoid()){ + return undefined; + }else if(type.isBigInt()){ return simpleFaker.number.bigInt(); }else if (type.isString()) { diff --git a/src/tsCalls.mjs b/src/tsCalls.mjs index f5c267f..cdb4374 100644 --- a/src/tsCalls.mjs +++ b/src/tsCalls.mjs @@ -16,8 +16,14 @@ import {builtinModules} from 'node:module' export function getImportCallsAndArgumentTypes(importDecls, checker, mainFilePath, libraryTypesRecorder) { // const libraryTypesRecorder = new LibraryTypesRecorder(checker); for (const importStringDecl of importDecls) { + // console.log(importStringDecl); const importDecl = importStringDecl.getFirstAncestor(); + const packageName = importStringDecl.getLiteralValue(); + if(isNodeModule(packageName)) { + // just skip node module scanning. + continue; + } if (importDecl === undefined) { console.error("Import declaration is undefined for", importStringDecl.getText()); continue; diff --git a/src_dataset/FILTER_LIST.mjs b/src_dataset/FILTER_LIST.mjs index f41ad08..7e50e3e 100644 --- a/src_dataset/FILTER_LIST.mjs +++ b/src_dataset/FILTER_LIST.mjs @@ -41,7 +41,8 @@ const FILTER_LIST = [ "https://github.com/foliojs-fork/linebreaker", "https://github.com/segmentio/analytics.js-video-plugins", "https://github.com/cucumber/cucumber-expressions-javascript", - "https://github.com/jakwings/node-temp-fs" + "https://github.com/jakwings/node-temp-fs", + "https://github.com/bower/bower/tree/master/packages/*" ]; const FILTER_LIST_REGEX = FILTER_LIST.map(GlobToRegExp) diff --git a/src_dataset/cache.mjs b/src_dataset/cache.mjs index bdb0c4b..45c710d 100644 --- a/src_dataset/cache.mjs +++ b/src_dataset/cache.mjs @@ -9,9 +9,9 @@ import { resolve } from "node:path"; * @param {()=>Promise} asyncCallback * @returns {Promise} */ -export async function cacheFunctionOutput(fileName, asyncCallback, silent=false) { +export async function cacheFunctionOutput(fileName, asyncCallback, silent=false,passthrough=false) { const fileLoc = resolve('../cache-repos', fileName); - if (existsSync(fileLoc)) { + if (!passthrough && existsSync(fileLoc)) { !silent && console.log("[cacher] Using cached ", fileLoc); const fileContents = (await readFile(fileLoc)).toString(); return JSON.parse(fileContents); diff --git a/src_dataset/index.mjs b/src_dataset/index.mjs index 6a28053..b34e65a 100644 --- a/src_dataset/index.mjs +++ b/src_dataset/index.mjs @@ -30,9 +30,9 @@ const intermediateRepoList = await cacheFunctionOutput('repos.n2.json', async fu // const packageMap = new Map(packageList) console.log(`Total repos`,intermediateRepoList.length) -const intermediateRepoListSmaller = intermediateRepoList.slice(0,10000); +const intermediateRepoListSmaller = intermediateRepoList.slice(0,20000); -const repoStatus = await processPromisesBatch(intermediateRepoListSmaller,20,cloneRepoAndCheck) +const repoStatus = await processPromisesBatch(intermediateRepoListSmaller,40,cloneRepoAndCheck) const repoStatusString = csv.stringify(repoStatus); await fsp.writeFile('repostatus.csv', repoStatusString); diff --git a/src_dataset/mine.mjs b/src_dataset/mine.mjs index be80516..fa003a0 100644 --- a/src_dataset/mine.mjs +++ b/src_dataset/mine.mjs @@ -43,6 +43,15 @@ export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) { // console.log(repoName, packageJSONContents.license) if (!hasAnyActualDependencies(packageJSONContents, repoName)) { // console.log("[git] skipping", repoName, "has no dependencies"); + await removeUnnecessaryClone(repoPath); + // console.log("Cleaned up ", repoPath); + return [repoName, null]; + } + + if(isLikelyTypescriptProject(packageJSONContents)) { + await removeUnnecessaryClone(repoPath); + // console.warn("[git] Ignoring ", repoName, "because it is a typescript project."); + // console.log("Cleaned up ", repoPath); return [repoName, null]; } @@ -58,15 +67,37 @@ export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) { } const packageFile = resolve(repoPath, 'package.json') if (!existsSync(packageFile)){ - console.warn("[git] Unexpected package.json not found in", repoName, "at", packageFile); + // console.warn("[git] Unexpected package.json not found in", repoName, "at", packageFile); return [repoName, null];} // finally, return the test script if it exists return [repoName, ((packageJSONContents?.scripts?.test))] } - else return [repoName, null] + else{ + await removeUnnecessaryClone(repoPath); + + return [repoName, null] + } } +function isLikelyTypescriptProject(packageJSONContents) { + if (packageJSONContents.devDependencies !== undefined) { + if (Object.keys(packageJSONContents.devDependencies).some(e => e.startsWith('typescript'))) { + return true; + } + if (Object.keys(packageJSONContents.dependencies).some(e => e.startsWith('typescript'))) { + return true; + } + } + return false; +} +async function removeUnnecessaryClone(repoPath) { + if(existsSync(repoPath)){ + console.log("[git] unnecessary clone, removing", repoPath) ; + // while(true){} + await rm(repoPath, { recursive: true, force: true }); + } +} function filterRepo(repoGitUrl) { return matchFilterList(repoGitUrl); diff --git a/src_vuln/index.mjs b/src_vuln/index.mjs new file mode 100644 index 0000000..0103dfe --- /dev/null +++ b/src_vuln/index.mjs @@ -0,0 +1,65 @@ +import { cacheFunctionOutput } from "../src_dataset/cache.mjs"; +import { bifurcateArray, getGithubTokenFromEnvironment } from "./lib.mjs"; +import { checkForParentDep, findSlicedDeps } from "./slicedeps.mjs"; +import { basename } from "path"; + +const githubToken = getGithubTokenFromEnvironment(); + +const vulnTargets = await findSlicedDeps(); +const affects = [...vulnTargets].join(','); + +// console.log(query) + +const res = await cacheFunctionOutput('advisories.json', async () => { + const query = `?ecosystem=npm&affects=${affects}`; + const res = await fetch('https://api.github.com/advisories'+query, + { + headers:{ + Authorization: `Bearer ${githubToken}`, + } + } + ); + const x = await res.json(); + return x; +},true, false); + +const cveMap = res.map(e=>({ + summary: e.summary, + source: e.source_code_location, + severity: e.severity, + repo_name: basename(e.source_code_location), + cve: e.cve_id, + identifiers: e.identifiers, + cvss: e.cvss, + })); + +const [fullMaps,emptyMap]= bifurcateArray(cveMap, (e)=>e.source) + + +// const slicedReposSoFar = await findSlicedDepsSoFar(); +const depMap = new Map(); +for(const depo of fullMaps){ + if(!depMap.has(depo.repo_name)) { + depMap.set(depo.repo_name, []); + } + depMap.get(depo.repo_name).push(depo); +} +const depKeys = ([...depMap.keys()]) +console.log(depKeys) +const repoKeys = await checkForParentDep(depKeys); +console.log(repoKeys); +// for(const repo of slicedReposSoFar) { +// const deps = await getDepsOfRepo(repo); +// console.log(repo,deps); +// const depCVEs = fullMaps.filter(e=>(deps).includes(e.repo_name)); +// depMap.set(repo, depCVEs); +// } +console.log(cveMap.length, "advisories found"); +console.log(fullMaps.length, "advisories found"); +console.log(emptyMap.length, "advisories found"); +// what is pending +// see what's been sliced so far. Find their dependencies, link back to + + + + diff --git a/src_vuln/lib.mjs b/src_vuln/lib.mjs new file mode 100644 index 0000000..abf1661 --- /dev/null +++ b/src_vuln/lib.mjs @@ -0,0 +1,26 @@ +import assert from "assert"; + +/** + * Bifurcate an array into two arrays based on a predicate function. + * @template T + * @param {T[]} arr + * @param {(T)=>boolean} predicate + * @returns {[T[], T[]]} + */ +export function bifurcateArray(arr, predicate) { + const truthy = []; + const falsy = []; + for (const item of arr) { + if (predicate(item)) { + truthy.push(item); + } else { + falsy.push(item); + } + } + return [truthy, falsy]; +} +export function getGithubTokenFromEnvironment() { + assert(process.env.GITHUB_TOKEN, "No token :("); + const githubToken = process.env.GITHUB_TOKEN; + return githubToken; +} diff --git a/src_vuln/slicedeps.mjs b/src_vuln/slicedeps.mjs new file mode 100644 index 0000000..9fa64fa --- /dev/null +++ b/src_vuln/slicedeps.mjs @@ -0,0 +1,97 @@ +import { readdir, opendir } from 'node:fs/promises' +import path, { basename, dirname } from 'node:path'; + + + +/** + * Finds all dependencies that are sliced by the slicer. + * this function will search find the list of dependencies that are sliced by the slicer. + * in the dist folder, theres a folder of repos. in that, there is a folder for each dependency. + * collate into a set and return it. + * Eg. + * dist/ + * └── align-text/ + * └── kind-of + * └── longest + * + * it will return kind-of and longest as sliced deps. + */ +export async function findSlicedDeps(){ + /** + * @type {Set} + */ + const slicedDeps = new Set(); + const distPath = path.resolve('dist'); + + for await(const p of walk(distPath)) { + if(p.endsWith("package.json")) {slicedDeps.add(basename(dirname(p)))} + else continue; + } + return slicedDeps; +} + +/** + * Given a list of deps, find the repos that have these + * @param {string[]} dependencies + */ +export async function checkForParentDep(dependencies){ + // dep -> main + const map = await getReverseDeps(); + const reposet = dependencies.flatMap(dep => (map.get(dep)??[])); + const repos = new Set(reposet); + return repos; +} + +// for a given dep, find the list of main repo that has this dep. return map. +async function getReverseDeps() { + const x = new Map(); + const distPath = path.resolve('dist'); + + for await(const p of walk(distPath)) { + if (p.endsWith("package.json")) { + const repo = basename(dirname(dirname(p))); + const depName = basename(dirname(p)); + + if (!x.has(depName)) { + x.set(depName, []); + } + x.get(depName).push(repo); + // console.log(p,repo, depName); + } + else continue; + } + return x; +} + +export async function findSlicedDepsSoFar() { + // return all folder names in the output directory. + const distPath = path.resolve('output'); + const dirEntries = await readdir(distPath, { withFileTypes: true }); + const repos = dirEntries.filter(dirent => dirent.isDirectory()).map(dirent => dirent.name); + return repos; +} +/** + * + * @param {string} repo + */ +export async function getDepsOfRepo(repo){ + const distPath = path.resolve('output', repo); + const dirEntry = await readdir(distPath, { withFileTypes: true }); + const deps = dirEntry.filter(dirent => dirent.isFile()).map(dirent => dirent.name.replace('.bundle.cjs','')); + return deps; +} + +/** + * FS walk primitive + * Ref: https://gist.github.com/lovasoa/8691344 + * @param {string} dir + * @returns {AsyncGenerator} + */ +async function* walk(dir) { + for await (const d of await opendir(dir)) { + const entry = path.join(dir, d.name); + if (d.isDirectory()) yield* walk(entry); + else if (d.isFile()) yield entry; + } +} +// checkForParentDep('thenify', 'glob-parent', 'terser', 'url-parse').then(console.log).catch(console.error); \ No newline at end of file diff --git a/success.txt b/success.txt new file mode 100644 index 0000000..0a7b6a9 --- /dev/null +++ b/success.txt @@ -0,0 +1,204 @@ +glob-parent +is-glob +mime-types +doctrine +define-property +jsonfile +optionator +http-errors +type-check +levn +is-extendable +esquery +on-finished +error-ex +finalhandler +content-disposition +terser +proxy-addr +prop-types +has-values +has-value +url-parse +simple-swizzle +compressible +global-modules +thenify +mz +clone-deep +shallow-clone +prettier-linter-helpers +cors +fd-slicer +object.pick +language-tags +union-value +object-copy +static-extend +memory-fs +pinkie-promise +hash.js +pretty-error +renderkid +wbuf +browserify-zlib +hmac-drbg +des.js +dom-converter +expand-tilde +homedir-polyfill +zip-stream +crc32-stream +one-time +resolve-dir +yargs-unparser +# warning +bplist-parser +for-own +md5 +is-relative +is-absolute +is-unc-path +redis-parser +recursive-readdir +path-root +lazy-cache +# css-to-react-native +parse-filepath +request-progress +jest-junit +postcss-initial +unixify +cookie-parser +saslprep +window-size +cookies +keygrip +contains-path +fined +object.defaults +is-color-stop +gonzales-pe +bufferutil +make-iterator +glob-base +uid-safe +fancy-log +object.map +object.omit +find-babel-config +utf-8-validate +mquery +xlsx +json-to-pretty-yaml +easy-table +named-placeholders +is-dotfile +parse-glob +plugin-error +is-equal-shallow +original +detective-typescript +detective-es6 +json2mq +create-error-class +detective-cjs +to-through +resolve-options +ansi-gray +bcrypt +mixin-object +optimize-css-assets-webpack-plugin +ordered-read-streams +sync-fetch +to-absolute-glob +glogg +unique-stream +align-text +gulplog +blob +center-align +right-align +wkx +chai-as-promised +json-pointer +has-glob +# promptly +hot-shots +semver-greatest-satisfied-range +each-props +is2 +levenary +airbnb-prop-types +remove-bom-stream +remove-bom-buffer +dotenv-defaults +rework +vizion +array-sort +default-compare +pad-right +passport-local +console.table +cli-tableau +condense-newlines +requireg +object.reduce +properties-reader +array-initial +default-resolution +collection-map +ansi-red +broccoli-merge-trees +eslint-plugin-react-native +is-valid-path +strip-hex-prefix +uglify-es +ansi-cyan +method-override +# readline2 +number-allocator +has-gulplog +ethjs-util +unescape +validate.io-integer +stream-parser +compute-gcd +validate.io-integer-array +compute-lcm +set-getter +passport-oauth2 +i18n-iso-countries +sha1 +json-diff +dreamopt +highlight-es +basic-auth-connect +glob2base +third-party-capital +new-date +webrtc-adapter +xhr-request-promise +contentful-resolve-response +jest-sonar-reporter +parse-author +amd-name-resolver +mocha-multi-reporters +eslint-plugin-filenames +apache-crypt +semver-intersect +fetch-ponyfill +karma-mocha +is-odd +babel-plugin-ember-modules-api-polyfill +csurf +taketalk +require-and-forget +geojson-equality +relative +pkg-config +rss-parser +xml-but-prettier +karma-spec-reporter +speakeasy +parsejson diff --git a/test_src/build-package.cjs b/test_src/build-package.cjs new file mode 100644 index 0000000..68b65c4 --- /dev/null +++ b/test_src/build-package.cjs @@ -0,0 +1,198 @@ +const {writeFile} = require('fs/promises'); + +const libs = `glob-parent +is-glob +mime-types +doctrine +define-property +jsonfile +optionator +http-errors +type-check +levn +is-extendable +esquery +on-finished +error-ex +finalhandler +content-disposition +terser +proxy-addr +prop-types +has-values +has-value +url-parse +simple-swizzle +compressible +global-modules +thenify +mz +clone-deep +shallow-clone +prettier-linter-helpers +cors +fd-slicer +object.pick +language-tags +union-value +object-copy +static-extend +memory-fs +pinkie-promise +hash.js +pretty-error +renderkid +wbuf +browserify-zlib +hmac-drbg +des.js +dom-converter +expand-tilde +homedir-polyfill +zip-stream +crc32-stream +one-time +resolve-dir +yargs-unparser +warning +bplist-parser +for-own +md5 +is-relative +is-absolute +is-unc-path +redis-parser +recursive-readdir +path-root +lazy-cache +css-to-react-native +parse-filepath +request-progress +jest-junit +postcss-initial +unixify +cookie-parser +saslprep +window-size +cookies +keygrip +contains-path +fined +object.defaults +is-color-stop +gonzales-pe +bufferutil +make-iterator +glob-base +uid-safe +fancy-log +object.map +object.omit +find-babel-config +utf-8-validate +mquery +xlsx +json-to-pretty-yaml +easy-table +named-placeholders +is-dotfile +parse-glob +plugin-error +is-equal-shallow +original +detective-typescript +detective-es6 +json2mq +create-error-class +detective-cjs +to-through +resolve-options +ansi-gray +bcrypt +mixin-object +optimize-css-assets-webpack-plugin +ordered-read-streams +sync-fetch +to-absolute-glob +glogg +unique-stream +align-text +gulplog +blob +center-align +right-align +wkx +chai-as-promised +json-pointer +has-glob +promptly +hot-shots +semver-greatest-satisfied-range +each-props +is2 +levenary +airbnb-prop-types +remove-bom-stream +remove-bom-buffer +dotenv-defaults +rework +vizion +array-sort +default-compare +pad-right +passport-local +console.table +cli-tableau +condense-newlines +requireg +object.reduce +properties-reader +array-initial +default-resolution +collection-map +ansi-red +broccoli-merge-trees +eslint-plugin-react-native +is-valid-path +strip-hex-prefix +uglify-es +ansi-cyan +method-override +readline2 +number-allocator +has-gulplog +ethjs-util +unescape +validate.io-integer +stream-parser +compute-gcd +validate.io-integer-array +compute-lcm +set-getter +passport-oauth2 +i18n-iso-countries +sha1 +json-diff +dreamopt +highlight-es +basic-auth-connect +glob2base +third-party-capital +new-date +webrtc-adapter +xhr-request-promise +contentful-resolve-response +jest-sonar-reporter +parse-author +amd-name-resolver +mocha-multi-reporters +eslint-plugin-filenames +apache-crypt`; +const libsArray = libs.split('\n').map(e => e.trim()); + +const newPackageJson = { + dependencies: Object.fromEntries(libsArray.map(lib => [lib, "latest"])), +} + +console.log(JSON.stringify(newPackageJson, null, 2)); +writeFile('vulncheck.package.json', JSON.stringify(newPackageJson, null, 2)); \ No newline at end of file diff --git a/test_src/check_diff.cjs b/test_src/check_diff.cjs new file mode 100644 index 0000000..fa79255 --- /dev/null +++ b/test_src/check_diff.cjs @@ -0,0 +1,45 @@ +// diff the two csv files, and perform a left subtract and right subtract. +const fsp = require('fs/promises') + +async function main(){ + const file1 = 'success.txt'; + const file2String = `mime-types +http-errors +source-map-support +compressible +global-modules +thenify +mz +memory-fs +pinkie-promise +pretty-error +renderkid +hpack.js +wbuf +expand-tilde +homedir-polyfill +basic-auth +for-own +is-unc-path +recursive-readdir +path-root +cookies +bufferutil +utf-8-validate +easy-table +is-dotfile`; + const file1String = await fsp.readFile(file1, 'utf8'); + const f1Elements = file1String.split('\n').filter(Boolean); + const f2Elements = file2String.split('\n').filter(Boolean); + const leftSubtract = f1Elements.filter(x => !f2Elements.includes(x)); + // const rightSubtract = f2Elements.filter(x => !f1Elements.includes(x)); + console.log('Left Subtract:f1, not in f2'); + const leftSubtractString = leftSubtract.join('\n') + await fsp.writeFile('left_subtract.txt', leftSubtractString, 'utf8'); + // console.log('Right Subtract: done, but not in main list', rightSubtract); +} + + +main().catch(err => { + console.error('Error:', err); +}); \ No newline at end of file