Files
safeImport/src_dataset/index.mjs

45 lines
1.8 KiB
JavaScript
Raw Normal View History

2025-07-09 13:26:51 +01:00
import * as csv from 'csv'
import fsp from 'fs/promises'
2025-07-26 13:44:32 +01:00
import { cloneRepoAndCheck } from './mine.mjs';
import { cacheFunctionOutput } from './cache.mjs';
import { processPromisesBatch } from './batch.mjs';
2025-07-09 13:26:51 +01:00
2025-07-26 13:44:32 +01:00
const intermediateRepoList = await cacheFunctionOutput('repos.json', async function () {
const [packagesM, packageReposM] = await Promise.all([
import('download-counts', { with:{type: 'json'}}),
import('all-the-package-repos', { with: { type: 'json' } })
]);
const packages = packagesM.default;
const packageRepos = packageReposM.default;
const packageList = Object.keys(packages).map(e => [e, packages[e]])
.filter(e => e[1] > 100).filter(e => !e[0].startsWith("@types/"))
console.log('packagelist', packageList.length)
/**
* @type {[string,string,number][]} repo, link count
*/
const withRepos = packageList.map(e => [e[0], packageRepos[e[0]], e[1]])
console.log('withrepos', withRepos.length);
2025-08-03 15:55:42 +01:00
const withExactRepos = withRepos.filter(e => ((e[1]) !== null && (e[1]) !== undefined && (e[1]) !== "")) // filter out repos that are not available
2025-07-26 13:44:32 +01:00
console.log('withreposCleaned', withExactRepos.length);
2025-08-03 15:55:42 +01:00
withExactRepos.sort((a,b)=>(-a[2]+b[2])) // sort by download count
2025-07-26 13:44:32 +01:00
return withExactRepos;
})
// const packageMap = new Map(packageList)
2025-08-07 19:32:41 +01:00
console.log(`Total repos`,intermediateRepoList.length)
const intermediateRepoListSmaller = intermediateRepoList.slice(0,4000);
2025-07-26 13:44:32 +01:00
2025-08-07 19:32:41 +01:00
const repoStatus = await processPromisesBatch(intermediateRepoListSmaller,20,cloneRepoAndCheck)
2025-07-26 13:44:32 +01:00
const repoStatusString = csv.stringify(repoStatus);
await fsp.writeFile('repostatus.csv', repoStatusString);
const minableRepositories = repoStatus.filter(e=>(e!==null && e?.[1]));
const output = csv.stringify(minableRepositories);
await fsp.writeFile('minableRepositories2.csv', output);
// console.log("written results")