diff --git a/.vscode/settings.json b/.vscode/settings.json index f02018d..df44008 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,23 @@ { "files.watcherExclude": { - "cache/**": true + "cache/**": true, + "candidates/**": true, + }, + "search.exclude": { + "cache/**": true, + "candidates/**": true + }, + "files.exclude": { + "**/.git": true, + "**/.svn": true, + "**/.hg": true, + "**/.DS_Store": true, + "**/Thumbs.db": true, + "cache/**": true, + }, + "debug.javascript.codelens.npmScripts": "never", + + + } \ No newline at end of file diff --git a/src/tsCalls.mjs b/src/tsCalls.mjs index 7396f9e..66dfb20 100644 --- a/src/tsCalls.mjs +++ b/src/tsCalls.mjs @@ -85,8 +85,8 @@ export function getImportCallsAndArgumentTypes(importDecls, checker, mainFilePat } else if (importDecl.isKind(SyntaxKind.ImportDeclaration)) {// import {x,z} from 'module'; - console.log("Found import declaration", importDecl.getPos()); - console.log("Named imports", importDecl.getNamedImports().length); + // console.log("Found import declaration", importDecl.getPos()); + // console.log("Named imports", importDecl.getNamedImports().length); const namedImports = importDecl.getNamedImports(); for (const namedImport of namedImports) { @@ -134,7 +134,7 @@ function handleImportForGivenImport(checker, importStringLiteral,namedImport, ma console.error("Unhandled named import alias", aliasNode.getText()); } - console.log("Named import", namedImport.getNameNode().getText()); + // console.log("Named import", namedImport.getNameNode().getText()); const importNode = namedImport.getNameNode(); if (importNode.isKind(SyntaxKind.StringLiteral)) { throw Error("Unexpected string literal import node. Expected identifier"); @@ -156,7 +156,7 @@ function recordNamespaceImportIdentifierUsage(checker, importNode, mainFilePath, const referenceSourceFile = importRef.getDefinition().getSourceFile(); const comparePath = path.relative(mainFilePath, referenceSourceFile.getFilePath()); if (comparePath !== '') { - console.warn("Skipping import reference from other file", referenceSourceFile.getFilePath()); + // console.warn("Skipping import reference from other file", referenceSourceFile.getFilePath()); continue; } // const filePath = referenceSourceFile.getFilePath(); @@ -242,7 +242,7 @@ function recordImportedIdentifierUsage(checker, importNode, mainFilePath, librar const referenceSourceFile = importRef.getDefinition().getSourceFile(); const comparePath = path.relative(mainFilePath, referenceSourceFile.getFilePath()); if (comparePath !== '') { - console.warn("Skipping import reference from other file", referenceSourceFile.getFilePath()); + // console.warn("Skipping import reference from other file", referenceSourceFile.getFilePath()); continue; } // const filePath = referenceSourceFile.getFilePath(); diff --git a/src_dataset/FILTER_LIST.mjs b/src_dataset/FILTER_LIST.mjs index b488db2..139505e 100644 --- a/src_dataset/FILTER_LIST.mjs +++ b/src_dataset/FILTER_LIST.mjs @@ -31,5 +31,7 @@ export const FILTER_LIST = [ "https://github.com/paulmillr/async-each", "https://github.com/yarnpkg/yarn/blob/master/packages", "https://github.com/substack/semver-compare", - "https://github.com/substack/node-archy" + "https://github.com/substack/node-archy", + "https://github.com/substack/github-from-package", + "https://github.com/babel/babel/tree/master/packages/babel-core" ]; diff --git a/src_dataset/batch.mjs b/src_dataset/batch.mjs index 4c6de7c..d70c847 100644 --- a/src_dataset/batch.mjs +++ b/src_dataset/batch.mjs @@ -15,7 +15,7 @@ export async function processPromisesBatch( asyncCallback, ) { const results = []; - const fileHandle = await open('cache/progress.txt',"w+"); + const fileHandle = await open('../cache-repos/progress.txt',"w+"); for (let start = 0; start < items.length; start += limit) { const end = start + limit > items.length ? items.length : start + limit; diff --git a/src_dataset/cache.mjs b/src_dataset/cache.mjs index 44188c0..f270983 100644 --- a/src_dataset/cache.mjs +++ b/src_dataset/cache.mjs @@ -10,7 +10,7 @@ import { resolve } from "node:path"; * @returns {Promise} */ export async function cacheFunctionOutput(fileName, asyncCallback) { - const fileLoc = resolve('./cache', fileName); + const fileLoc = resolve('../cache-repos', fileName); if (existsSync(fileLoc)) { console.log("[cacher] Using cached ", fileLoc); const fileContents = (await readFile(fileLoc)).toString(); diff --git a/src_dataset/index.mjs b/src_dataset/index.mjs index 28776b6..d3a10fd 100644 --- a/src_dataset/index.mjs +++ b/src_dataset/index.mjs @@ -29,10 +29,10 @@ const intermediateRepoList = await cacheFunctionOutput('repos.json', async funct }) // const packageMap = new Map(packageList) -console.log(intermediateRepoList.length) -const intermediateRepoListSmaller = intermediateRepoList.slice(0,250); +console.log(`Total repos`,intermediateRepoList.length) +const intermediateRepoListSmaller = intermediateRepoList.slice(0,4000); -const repoStatus = await processPromisesBatch(intermediateRepoListSmaller,15,cloneRepoAndCheck) +const repoStatus = await processPromisesBatch(intermediateRepoListSmaller,20,cloneRepoAndCheck) const repoStatusString = csv.stringify(repoStatus); await fsp.writeFile('repostatus.csv', repoStatusString); diff --git a/src_dataset/mine.mjs b/src_dataset/mine.mjs index 95175fb..4b93fab 100644 --- a/src_dataset/mine.mjs +++ b/src_dataset/mine.mjs @@ -1,5 +1,5 @@ import { existsSync, } from 'fs' -import { lstat, readFile } from 'fs/promises' +import { lstat, readFile,rm } from 'fs/promises' import git from 'git-client' import { resolve } from 'path' import int from 'set.prototype.intersection'; @@ -10,7 +10,7 @@ import { FILTER_LIST } from './FILTER_LIST.mjs'; * @returns {Promise<[string,string|null]>} second argument is null if ineligible for slicing */ export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) { - const repoPath = resolve('cache/repos', repoName) + const repoPath = resolve('../cache-repos/repos', repoName) if (FILTER_LIST.includes(repoGitUrl)) { console.log("[git] ignoring ", repoName) @@ -19,9 +19,11 @@ export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) { // console.log('[git] fetching',repoName, repoGitUrl); await cacheCloneIdempotently(repoPath, repoName, repoGitUrl) - const tsConfigFileLocation = resolve(repoPath,'tsconfig.json'); + const tsConfigFileLocation = resolve(repoPath, 'tsconfig.json'); const tsConfigFileExists = existsSync(tsConfigFileLocation); - if(tsConfigFileExists) return [repoName, null]; + if (tsConfigFileExists){ + return [repoName, null]; + } const packageFile = resolve(repoPath, 'package.json') @@ -31,14 +33,15 @@ export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) { // console.log(packageJSONContentsString); const packageJSONContents = JSON.parse(packageJSONContentsString) // console.log(repoName, packageJSONContents.license) - if(!hasAnyActualDependencies(packageJSONContents, repoName)) { - console.log("[git] skipping", repoName, "has no dependencies"); + if (!hasAnyActualDependencies(packageJSONContents, repoName)) { + // console.log("[git] skipping", repoName, "has no dependencies"); return [repoName, null]; } const hasDependencies = checkTestingDependencies(packageJSONContents, repoName); - if (hasDependencies) + if (hasDependencies) { return [repoName, ((packageJSONContents?.scripts?.test))] + } else return [repoName, null] } function hasAnyActualDependencies(packageJSONContents, repoName) { @@ -49,7 +52,7 @@ function hasAnyActualDependencies(packageJSONContents, repoName) { } function checkTestingDependencies(packageJSONContents, repoName) { - const testingLibraries = new Set(['mocha', 'istanbul']); + const testingLibraries = new Set(['mocha']); const dependencies = new Set(); if (packageJSONContents.dependencies !== undefined) { for (const dep of Object.keys(packageJSONContents.dependencies)) { @@ -78,10 +81,10 @@ async function cacheCloneIdempotently(repoPath, repoName, repoGitUrl) { if (!isDir) throw new Error(repoName, " is mangled. delete directory and re-clone.") else { // const path = await git.status({ $cwd: repoPath }) - + } } else { console.log("[git] cloning", repoGitUrl); - await git.clone(repoGitUrl, repoPath,{'single-branch':true,depth:1}) + await git.clone(repoGitUrl, repoPath, { 'single-branch': true, depth: 1 }) } }