[add] more repos and generic filtering

This commit is contained in:
2025-08-11 13:56:13 +01:00
parent aa21c1d866
commit b044536a3b
9 changed files with 5320 additions and 27 deletions

View File

@@ -1,10 +1,10 @@
export const FILTER_LIST = [
import GlobToRegExp from "glob-to-regexp";
const FILTER_LIST = [
"https://github.com/substack/*",
"https://gitlab.com/contexttesting/zoroaster.git",
"https://github.com/Eternity-Bots",
"https://github.com/node-x-extras/x-path",
"https://github.com/substack/node-x256",
"https://github.com/substack/node-wordwrap",
"https://github.com/zkochan/packages/blob/main/which-pm-runs",
"https://github.com/webpack-contrib/webpack-addons",
"https://github.com/zznoillusion1026/MyImage",
@@ -15,23 +15,34 @@ export const FILTER_LIST = [
"https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.scandir",
"https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.walk",
"https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.macchiato",
"https://github.com/substack/text-table",
"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-object-rest-spread",
"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-optional-catch-binding",
"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-async-generators",
"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-optional-chaining",
"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-json-strings",
"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-nullish-coalescing-operator",
"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-bigint",
"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-dynamic-import",
"https://github.com/substack/node-commondir",
"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-export-namespace-from",
"https://github.com/substack/https-browserify",
"https://github.com/babel/babel/tree/master/packages/babel-runtime",
"https://github.com/paulmillr/async-each",
"https://github.com/yarnpkg/yarn/blob/master/packages",
"https://github.com/substack/semver-compare",
"https://github.com/substack/node-archy",
"https://github.com/substack/github-from-package",
"https://github.com/babel/babel/tree/master/packages/babel-core"
"https://github.com/emotion-js/emotion/tree/master/packages/stylis",
"https://github.com/kogosoftwarellc/open-api/tree/master/packages/openapi-types",
"https://github.com/thenativeweb/boolean",
"https://github.com/zkochan/packages/tree/master/read-yaml-file",
"https://github.com/johnotander/rgba-regex",
"https://github.com/adobe/react-spectrum/tree/main/packages/@internationalized/date",
"https://github.com/pnpm/pnpm/blob/main/packages",
"https://github.com/jhermsmeier/node-scuid",
"https://github.com/emotion-js/emotion/tree/master/packages/babel-plugin-emotion",
"https://github.com/emotion-js/emotion/tree/master/removed-packages/core",
"https://github.com/babel/babel/tree/master/packages/*",
"https://github.com/pugjs/pug/tree/master/packages/*",
"https://github.com/zkochan/packages/tree/master/*",
"https://github.com/Marak/Faker.js",
"https://github.com/ethanent/phin",
"https://github.com/Popmotion/popmotion/tree/master/packages/*",
"https://github.com/gulpjs/copy-prop"
];
const FILTER_LIST_REGEX = FILTER_LIST.map(GlobToRegExp)
/**
*
* @param {string} repoUrl
* @returns
*/
export function matchFilterList(repoUrl) {
return FILTER_LIST_REGEX.some(filter => filter.test(repoUrl));
}

View File

@@ -30,9 +30,9 @@ const intermediateRepoList = await cacheFunctionOutput('repos.json', async funct
// const packageMap = new Map(packageList)
console.log(`Total repos`,intermediateRepoList.length)
const intermediateRepoListSmaller = intermediateRepoList.slice(0,4000);
const intermediateRepoListSmaller = intermediateRepoList.slice(0,5000);
const repoStatus = await processPromisesBatch(intermediateRepoListSmaller,20,cloneRepoAndCheck)
const repoStatus = await processPromisesBatch(intermediateRepoListSmaller,15,cloneRepoAndCheck)
const repoStatusString = csv.stringify(repoStatus);
await fsp.writeFile('repostatus.csv', repoStatusString);

View File

@@ -3,7 +3,7 @@ import { lstat, readFile,rm } from 'fs/promises'
import git from 'git-client'
import { resolve } from 'path'
import int from 'set.prototype.intersection';
import { FILTER_LIST } from './FILTER_LIST.mjs';
import { matchFilterList } from './FILTER_LIST.mjs';
/**
*
* @param {[string,string,number]} param0
@@ -12,7 +12,7 @@ import { FILTER_LIST } from './FILTER_LIST.mjs';
export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) {
const repoPath = resolve('../cache-repos/repos', repoName)
if (FILTER_LIST.includes(repoGitUrl)) {
if (filterRepo(repoGitUrl)) {
console.log("[git] ignoring ", repoName)
return [repoName, null]
};
@@ -44,6 +44,12 @@ export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) {
}
else return [repoName, null]
}
function filterRepo(repoGitUrl) {
return matchFilterList(repoGitUrl);
}
function hasAnyActualDependencies(packageJSONContents, repoName) {
if (packageJSONContents.dependencies !== undefined && Object.keys(packageJSONContents.dependencies).length > 0) {
return true;