2025-07-09 13:26:51 +01:00
import * as csv from 'csv'
import fsp from 'fs/promises'
2025-07-26 13:44:32 +01:00
import { cloneRepoAndCheck } from './mine.mjs' ;
import { cacheFunctionOutput } from './cache.mjs' ;
import { processPromisesBatch } from './batch.mjs' ;
2025-07-09 13:26:51 +01:00
2025-07-26 13:44:32 +01:00
2025-08-14 21:29:36 +01:00
const intermediateRepoList = await cacheFunctionOutput ( 'repos.n2.json' , async function ( ) {
2025-07-26 13:44:32 +01:00
const [ packagesM , packageReposM ] = await Promise . all ( [
import ( 'download-counts' , { with : { type : 'json' } } ) ,
import ( 'all-the-package-repos' , { with : { type : 'json' } } )
] ) ;
const packages = packagesM . default ;
const packageRepos = packageReposM . default ;
const packageList = Object . keys ( packages ) . map ( e => [ e , packages [ e ] ] )
2025-08-14 21:29:36 +01:00
. filter ( e => e [ 1 ] > 100_000 ) . filter ( e => ! e [ 0 ] . startsWith ( "@types/" ) ) . filter ( e => ! e [ 0 ] . startsWith ( "@webassemblyjs/" ) ) // filter out typescript packages and @types packages
2025-07-26 13:44:32 +01:00
console . log ( 'packagelist' , packageList . length )
/ * *
* @ type { [ string , string , number ] [ ] } repo , link count
* /
const withRepos = packageList . map ( e => [ e [ 0 ] , packageRepos [ e [ 0 ] ] , e [ 1 ] ] )
console . log ( 'withrepos' , withRepos . length ) ;
2025-08-03 15:55:42 +01:00
const withExactRepos = withRepos . filter ( e => ( ( e [ 1 ] ) !== null && ( e [ 1 ] ) !== undefined && ( e [ 1 ] ) !== "" ) ) // filter out repos that are not available
2025-07-26 13:44:32 +01:00
console . log ( 'withreposCleaned' , withExactRepos . length ) ;
2025-08-03 15:55:42 +01:00
withExactRepos . sort ( ( a , b ) => ( - a [ 2 ] + b [ 2 ] ) ) // sort by download count
2025-07-26 13:44:32 +01:00
return withExactRepos ;
} )
// const packageMap = new Map(packageList)
2025-08-07 19:32:41 +01:00
console . log ( ` Total repos ` , intermediateRepoList . length )
2025-08-19 19:13:24 +01:00
const intermediateRepoListSmaller = intermediateRepoList . slice ( 0 , 20000 ) ;
2025-07-26 13:44:32 +01:00
2025-08-19 19:13:24 +01:00
const repoStatus = await processPromisesBatch ( intermediateRepoListSmaller , 40 , cloneRepoAndCheck )
2025-07-26 13:44:32 +01:00
const repoStatusString = csv . stringify ( repoStatus ) ;
await fsp . writeFile ( 'repostatus.csv' , repoStatusString ) ;
const minableRepositories = repoStatus . filter ( e => ( e !== null && e ? . [ 1 ] ) ) ;
const output = csv . stringify ( minableRepositories ) ;
await fsp . writeFile ( 'minableRepositories2.csv' , output ) ;
// console.log("written results")