[add] implement vulnerability checking and advisory fetching, enhance repo processing, and add utility functions

This commit is contained in:
2025-08-19 19:13:24 +01:00
parent 52d0c7b649
commit 2c30fce7c8
14 changed files with 700 additions and 15 deletions

8
.gitignore vendored
View File

@@ -2,4 +2,10 @@ dist
output.csv
output/
node_modules/
cache
cache
failed-install.txt
success copy.txt
current-processing.log
current.log
left_subtract.txt
processed.log

View File

@@ -1,21 +1,25 @@
import wp from 'webpack';
import path from 'node:path'
import {createRequire,builtinModules} from 'node:module'
import { mkdirSync } from 'node:fs';
/**
*
* @param {string} l library name
* @param {string} moduleLocation module location
* @param {string} outputPath
* @returns
* @returns the compressed file path
*/
export function wpCompress(l, moduleLocation,outputPath = path.resolve('./output/')) {
const basePackage = path.basename(path.resolve(moduleLocation));
const finalOutputPath = path.resolve(outputPath, basePackage);
mkdirSync(finalOutputPath, { recursive: true });
return new Promise((resolve, reject) => {
const libraryLocation = extractFunctionForModule(l, moduleLocation);
console.log(libraryLocation);
// console.log(libraryLocation);
const outputFile = l + '.bundle.cjs';
console.log(`[WebPack] Compressing ${l} in ${moduleLocation} to ${path.join(outputPath, outputFile)}`);
console.log(`[WebPack] Compressing ${l} in ${moduleLocation} to ${path.join(finalOutputPath, outputFile)}`);
const moduleFallbackMap = builtinModules.reduce((prev, current) => {
prev[current] = false;
return prev;
@@ -36,7 +40,7 @@ export function wpCompress(l, moduleLocation,outputPath = path.resolve('./output
fallback:moduleFallbackMap
},
output: {
path: outputPath,
path: finalOutputPath,
filename: outputFile,
clean: false,
iife: false,
@@ -52,7 +56,7 @@ export function wpCompress(l, moduleLocation,outputPath = path.resolve('./output
// console.log(`[WebPack]`,stats?.toJson().errors);
reject(err || stats);
}else{
resolve(path.resolve(outputPath, outputFile));
resolve(path.resolve(finalOutputPath, outputFile));
}
});
});

View File

@@ -82,7 +82,9 @@ export class LibraryTypesRecorder {
return undefined;
} else if(type.isNull()){
return null;
} else if(type.isBigInt()){
} else if (type.isVoid()){
return undefined;
}else if(type.isBigInt()){
return simpleFaker.number.bigInt();
}else if (type.isString()) {

View File

@@ -16,8 +16,14 @@ import {builtinModules} from 'node:module'
export function getImportCallsAndArgumentTypes(importDecls, checker, mainFilePath, libraryTypesRecorder) {
// const libraryTypesRecorder = new LibraryTypesRecorder(checker);
for (const importStringDecl of importDecls) {
// console.log(importStringDecl);
const importDecl = importStringDecl.getFirstAncestor();
const packageName = importStringDecl.getLiteralValue();
if(isNodeModule(packageName)) {
// just skip node module scanning.
continue;
}
if (importDecl === undefined) {
console.error("Import declaration is undefined for", importStringDecl.getText());
continue;

View File

@@ -41,7 +41,8 @@ const FILTER_LIST = [
"https://github.com/foliojs-fork/linebreaker",
"https://github.com/segmentio/analytics.js-video-plugins",
"https://github.com/cucumber/cucumber-expressions-javascript",
"https://github.com/jakwings/node-temp-fs"
"https://github.com/jakwings/node-temp-fs",
"https://github.com/bower/bower/tree/master/packages/*"
];
const FILTER_LIST_REGEX = FILTER_LIST.map(GlobToRegExp)

View File

@@ -9,9 +9,9 @@ import { resolve } from "node:path";
* @param {()=>Promise<T>} asyncCallback
* @returns {Promise<T>}
*/
export async function cacheFunctionOutput(fileName, asyncCallback, silent=false) {
export async function cacheFunctionOutput(fileName, asyncCallback, silent=false,passthrough=false) {
const fileLoc = resolve('../cache-repos', fileName);
if (existsSync(fileLoc)) {
if (!passthrough && existsSync(fileLoc)) {
!silent && console.log("[cacher] Using cached ", fileLoc);
const fileContents = (await readFile(fileLoc)).toString();
return JSON.parse(fileContents);

View File

@@ -30,9 +30,9 @@ const intermediateRepoList = await cacheFunctionOutput('repos.n2.json', async fu
// const packageMap = new Map(packageList)
console.log(`Total repos`,intermediateRepoList.length)
const intermediateRepoListSmaller = intermediateRepoList.slice(0,10000);
const intermediateRepoListSmaller = intermediateRepoList.slice(0,20000);
const repoStatus = await processPromisesBatch(intermediateRepoListSmaller,20,cloneRepoAndCheck)
const repoStatus = await processPromisesBatch(intermediateRepoListSmaller,40,cloneRepoAndCheck)
const repoStatusString = csv.stringify(repoStatus);
await fsp.writeFile('repostatus.csv', repoStatusString);

View File

@@ -43,6 +43,15 @@ export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) {
// console.log(repoName, packageJSONContents.license)
if (!hasAnyActualDependencies(packageJSONContents, repoName)) {
// console.log("[git] skipping", repoName, "has no dependencies");
await removeUnnecessaryClone(repoPath);
// console.log("Cleaned up ", repoPath);
return [repoName, null];
}
if(isLikelyTypescriptProject(packageJSONContents)) {
await removeUnnecessaryClone(repoPath);
// console.warn("[git] Ignoring ", repoName, "because it is a typescript project.");
// console.log("Cleaned up ", repoPath);
return [repoName, null];
}
@@ -58,15 +67,37 @@ export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) {
}
const packageFile = resolve(repoPath, 'package.json')
if (!existsSync(packageFile)){
console.warn("[git] Unexpected package.json not found in", repoName, "at", packageFile);
// console.warn("[git] Unexpected package.json not found in", repoName, "at", packageFile);
return [repoName, null];}
// finally, return the test script if it exists
return [repoName, ((packageJSONContents?.scripts?.test))]
}
else return [repoName, null]
else{
await removeUnnecessaryClone(repoPath);
return [repoName, null]
}
}
function isLikelyTypescriptProject(packageJSONContents) {
if (packageJSONContents.devDependencies !== undefined) {
if (Object.keys(packageJSONContents.devDependencies).some(e => e.startsWith('typescript'))) {
return true;
}
if (Object.keys(packageJSONContents.dependencies).some(e => e.startsWith('typescript'))) {
return true;
}
}
return false;
}
async function removeUnnecessaryClone(repoPath) {
if(existsSync(repoPath)){
console.log("[git] unnecessary clone, removing", repoPath) ;
// while(true){}
await rm(repoPath, { recursive: true, force: true });
}
}
function filterRepo(repoGitUrl) {
return matchFilterList(repoGitUrl);

65
src_vuln/index.mjs Normal file
View File

@@ -0,0 +1,65 @@
import { cacheFunctionOutput } from "../src_dataset/cache.mjs";
import { bifurcateArray, getGithubTokenFromEnvironment } from "./lib.mjs";
import { checkForParentDep, findSlicedDeps } from "./slicedeps.mjs";
import { basename } from "path";
const githubToken = getGithubTokenFromEnvironment();
const vulnTargets = await findSlicedDeps();
const affects = [...vulnTargets].join(',');
// console.log(query)
const res = await cacheFunctionOutput('advisories.json', async () => {
const query = `?ecosystem=npm&affects=${affects}`;
const res = await fetch('https://api.github.com/advisories'+query,
{
headers:{
Authorization: `Bearer ${githubToken}`,
}
}
);
const x = await res.json();
return x;
},true, false);
const cveMap = res.map(e=>({
summary: e.summary,
source: e.source_code_location,
severity: e.severity,
repo_name: basename(e.source_code_location),
cve: e.cve_id,
identifiers: e.identifiers,
cvss: e.cvss,
}));
const [fullMaps,emptyMap]= bifurcateArray(cveMap, (e)=>e.source)
// const slicedReposSoFar = await findSlicedDepsSoFar();
const depMap = new Map();
for(const depo of fullMaps){
if(!depMap.has(depo.repo_name)) {
depMap.set(depo.repo_name, []);
}
depMap.get(depo.repo_name).push(depo);
}
const depKeys = ([...depMap.keys()])
console.log(depKeys)
const repoKeys = await checkForParentDep(depKeys);
console.log(repoKeys);
// for(const repo of slicedReposSoFar) {
// const deps = await getDepsOfRepo(repo);
// console.log(repo,deps);
// const depCVEs = fullMaps.filter(e=>(deps).includes(e.repo_name));
// depMap.set(repo, depCVEs);
// }
console.log(cveMap.length, "advisories found");
console.log(fullMaps.length, "advisories found");
console.log(emptyMap.length, "advisories found");
// what is pending
// see what's been sliced so far. Find their dependencies, link back to

26
src_vuln/lib.mjs Normal file
View File

@@ -0,0 +1,26 @@
import assert from "assert";
/**
* Bifurcate an array into two arrays based on a predicate function.
* @template T
* @param {T[]} arr
* @param {(T)=>boolean} predicate
* @returns {[T[], T[]]}
*/
export function bifurcateArray(arr, predicate) {
const truthy = [];
const falsy = [];
for (const item of arr) {
if (predicate(item)) {
truthy.push(item);
} else {
falsy.push(item);
}
}
return [truthy, falsy];
}
export function getGithubTokenFromEnvironment() {
assert(process.env.GITHUB_TOKEN, "No token :(");
const githubToken = process.env.GITHUB_TOKEN;
return githubToken;
}

97
src_vuln/slicedeps.mjs Normal file
View File

@@ -0,0 +1,97 @@
import { readdir, opendir } from 'node:fs/promises'
import path, { basename, dirname } from 'node:path';
/**
* Finds all dependencies that are sliced by the slicer.
* this function will search find the list of dependencies that are sliced by the slicer.
* in the dist folder, theres a folder of repos. in that, there is a folder for each dependency.
* collate into a set and return it.
* Eg.
* dist/
* └── align-text/
* └── kind-of
* └── longest
*
* it will return kind-of and longest as sliced deps.
*/
export async function findSlicedDeps(){
/**
* @type {Set<string>}
*/
const slicedDeps = new Set();
const distPath = path.resolve('dist');
for await(const p of walk(distPath)) {
if(p.endsWith("package.json")) {slicedDeps.add(basename(dirname(p)))}
else continue;
}
return slicedDeps;
}
/**
* Given a list of deps, find the repos that have these
* @param {string[]} dependencies
*/
export async function checkForParentDep(dependencies){
// dep -> main
const map = await getReverseDeps();
const reposet = dependencies.flatMap(dep => (map.get(dep)??[]));
const repos = new Set(reposet);
return repos;
}
// for a given dep, find the list of main repo that has this dep. return map.
async function getReverseDeps() {
const x = new Map();
const distPath = path.resolve('dist');
for await(const p of walk(distPath)) {
if (p.endsWith("package.json")) {
const repo = basename(dirname(dirname(p)));
const depName = basename(dirname(p));
if (!x.has(depName)) {
x.set(depName, []);
}
x.get(depName).push(repo);
// console.log(p,repo, depName);
}
else continue;
}
return x;
}
export async function findSlicedDepsSoFar() {
// return all folder names in the output directory.
const distPath = path.resolve('output');
const dirEntries = await readdir(distPath, { withFileTypes: true });
const repos = dirEntries.filter(dirent => dirent.isDirectory()).map(dirent => dirent.name);
return repos;
}
/**
*
* @param {string} repo
*/
export async function getDepsOfRepo(repo){
const distPath = path.resolve('output', repo);
const dirEntry = await readdir(distPath, { withFileTypes: true });
const deps = dirEntry.filter(dirent => dirent.isFile()).map(dirent => dirent.name.replace('.bundle.cjs',''));
return deps;
}
/**
* FS walk primitive
* Ref: https://gist.github.com/lovasoa/8691344
* @param {string} dir
* @returns {AsyncGenerator<string>}
*/
async function* walk(dir) {
for await (const d of await opendir(dir)) {
const entry = path.join(dir, d.name);
if (d.isDirectory()) yield* walk(entry);
else if (d.isFile()) yield entry;
}
}
// checkForParentDep('thenify', 'glob-parent', 'terser', 'url-parse').then(console.log).catch(console.error);

204
success.txt Normal file
View File

@@ -0,0 +1,204 @@
glob-parent
is-glob
mime-types
doctrine
define-property
jsonfile
optionator
http-errors
type-check
levn
is-extendable
esquery
on-finished
error-ex
finalhandler
content-disposition
terser
proxy-addr
prop-types
has-values
has-value
url-parse
simple-swizzle
compressible
global-modules
thenify
mz
clone-deep
shallow-clone
prettier-linter-helpers
cors
fd-slicer
object.pick
language-tags
union-value
object-copy
static-extend
memory-fs
pinkie-promise
hash.js
pretty-error
renderkid
wbuf
browserify-zlib
hmac-drbg
des.js
dom-converter
expand-tilde
homedir-polyfill
zip-stream
crc32-stream
one-time
resolve-dir
yargs-unparser
# warning
bplist-parser
for-own
md5
is-relative
is-absolute
is-unc-path
redis-parser
recursive-readdir
path-root
lazy-cache
# css-to-react-native
parse-filepath
request-progress
jest-junit
postcss-initial
unixify
cookie-parser
saslprep
window-size
cookies
keygrip
contains-path
fined
object.defaults
is-color-stop
gonzales-pe
bufferutil
make-iterator
glob-base
uid-safe
fancy-log
object.map
object.omit
find-babel-config
utf-8-validate
mquery
xlsx
json-to-pretty-yaml
easy-table
named-placeholders
is-dotfile
parse-glob
plugin-error
is-equal-shallow
original
detective-typescript
detective-es6
json2mq
create-error-class
detective-cjs
to-through
resolve-options
ansi-gray
bcrypt
mixin-object
optimize-css-assets-webpack-plugin
ordered-read-streams
sync-fetch
to-absolute-glob
glogg
unique-stream
align-text
gulplog
blob
center-align
right-align
wkx
chai-as-promised
json-pointer
has-glob
# promptly
hot-shots
semver-greatest-satisfied-range
each-props
is2
levenary
airbnb-prop-types
remove-bom-stream
remove-bom-buffer
dotenv-defaults
rework
vizion
array-sort
default-compare
pad-right
passport-local
console.table
cli-tableau
condense-newlines
requireg
object.reduce
properties-reader
array-initial
default-resolution
collection-map
ansi-red
broccoli-merge-trees
eslint-plugin-react-native
is-valid-path
strip-hex-prefix
uglify-es
ansi-cyan
method-override
# readline2
number-allocator
has-gulplog
ethjs-util
unescape
validate.io-integer
stream-parser
compute-gcd
validate.io-integer-array
compute-lcm
set-getter
passport-oauth2
i18n-iso-countries
sha1
json-diff
dreamopt
highlight-es
basic-auth-connect
glob2base
third-party-capital
new-date
webrtc-adapter
xhr-request-promise
contentful-resolve-response
jest-sonar-reporter
parse-author
amd-name-resolver
mocha-multi-reporters
eslint-plugin-filenames
apache-crypt
semver-intersect
fetch-ponyfill
karma-mocha
is-odd
babel-plugin-ember-modules-api-polyfill
csurf
taketalk
require-and-forget
geojson-equality
relative
pkg-config
rss-parser
xml-but-prettier
karma-spec-reporter
speakeasy
parsejson

198
test_src/build-package.cjs Normal file
View File

@@ -0,0 +1,198 @@
const {writeFile} = require('fs/promises');
const libs = `glob-parent
is-glob
mime-types
doctrine
define-property
jsonfile
optionator
http-errors
type-check
levn
is-extendable
esquery
on-finished
error-ex
finalhandler
content-disposition
terser
proxy-addr
prop-types
has-values
has-value
url-parse
simple-swizzle
compressible
global-modules
thenify
mz
clone-deep
shallow-clone
prettier-linter-helpers
cors
fd-slicer
object.pick
language-tags
union-value
object-copy
static-extend
memory-fs
pinkie-promise
hash.js
pretty-error
renderkid
wbuf
browserify-zlib
hmac-drbg
des.js
dom-converter
expand-tilde
homedir-polyfill
zip-stream
crc32-stream
one-time
resolve-dir
yargs-unparser
warning
bplist-parser
for-own
md5
is-relative
is-absolute
is-unc-path
redis-parser
recursive-readdir
path-root
lazy-cache
css-to-react-native
parse-filepath
request-progress
jest-junit
postcss-initial
unixify
cookie-parser
saslprep
window-size
cookies
keygrip
contains-path
fined
object.defaults
is-color-stop
gonzales-pe
bufferutil
make-iterator
glob-base
uid-safe
fancy-log
object.map
object.omit
find-babel-config
utf-8-validate
mquery
xlsx
json-to-pretty-yaml
easy-table
named-placeholders
is-dotfile
parse-glob
plugin-error
is-equal-shallow
original
detective-typescript
detective-es6
json2mq
create-error-class
detective-cjs
to-through
resolve-options
ansi-gray
bcrypt
mixin-object
optimize-css-assets-webpack-plugin
ordered-read-streams
sync-fetch
to-absolute-glob
glogg
unique-stream
align-text
gulplog
blob
center-align
right-align
wkx
chai-as-promised
json-pointer
has-glob
promptly
hot-shots
semver-greatest-satisfied-range
each-props
is2
levenary
airbnb-prop-types
remove-bom-stream
remove-bom-buffer
dotenv-defaults
rework
vizion
array-sort
default-compare
pad-right
passport-local
console.table
cli-tableau
condense-newlines
requireg
object.reduce
properties-reader
array-initial
default-resolution
collection-map
ansi-red
broccoli-merge-trees
eslint-plugin-react-native
is-valid-path
strip-hex-prefix
uglify-es
ansi-cyan
method-override
readline2
number-allocator
has-gulplog
ethjs-util
unescape
validate.io-integer
stream-parser
compute-gcd
validate.io-integer-array
compute-lcm
set-getter
passport-oauth2
i18n-iso-countries
sha1
json-diff
dreamopt
highlight-es
basic-auth-connect
glob2base
third-party-capital
new-date
webrtc-adapter
xhr-request-promise
contentful-resolve-response
jest-sonar-reporter
parse-author
amd-name-resolver
mocha-multi-reporters
eslint-plugin-filenames
apache-crypt`;
const libsArray = libs.split('\n').map(e => e.trim());
const newPackageJson = {
dependencies: Object.fromEntries(libsArray.map(lib => [lib, "latest"])),
}
console.log(JSON.stringify(newPackageJson, null, 2));
writeFile('vulncheck.package.json', JSON.stringify(newPackageJson, null, 2));

45
test_src/check_diff.cjs Normal file
View File

@@ -0,0 +1,45 @@
// diff the two csv files, and perform a left subtract and right subtract.
const fsp = require('fs/promises')
async function main(){
const file1 = 'success.txt';
const file2String = `mime-types
http-errors
source-map-support
compressible
global-modules
thenify
mz
memory-fs
pinkie-promise
pretty-error
renderkid
hpack.js
wbuf
expand-tilde
homedir-polyfill
basic-auth
for-own
is-unc-path
recursive-readdir
path-root
cookies
bufferutil
utf-8-validate
easy-table
is-dotfile`;
const file1String = await fsp.readFile(file1, 'utf8');
const f1Elements = file1String.split('\n').filter(Boolean);
const f2Elements = file2String.split('\n').filter(Boolean);
const leftSubtract = f1Elements.filter(x => !f2Elements.includes(x));
// const rightSubtract = f2Elements.filter(x => !f1Elements.includes(x));
console.log('Left Subtract:f1, not in f2');
const leftSubtractString = leftSubtract.join('\n')
await fsp.writeFile('left_subtract.txt', leftSubtractString, 'utf8');
// console.log('Right Subtract: done, but not in main list', rightSubtract);
}
main().catch(err => {
console.error('Error:', err);
});