Files
safeImport/src_dataset/mine.mjs

98 lines
3.3 KiB
JavaScript
Raw Normal View History

2025-07-26 13:44:32 +01:00
import { existsSync, } from 'fs'
2025-08-07 19:32:41 +01:00
import { lstat, readFile,rm } from 'fs/promises'
2025-07-26 13:44:32 +01:00
import git from 'git-client'
import { resolve } from 'path'
import int from 'set.prototype.intersection';
2025-08-11 13:56:13 +01:00
import { matchFilterList } from './FILTER_LIST.mjs';
2025-07-26 13:44:32 +01:00
/**
*
* @param {[string,string,number]} param0
2025-08-03 15:55:42 +01:00
* @returns {Promise<[string,string|null]>} second argument is null if ineligible for slicing
2025-07-26 13:44:32 +01:00
*/
export async function cloneRepoAndCheck([repoName, repoGitUrl, downloadCount]) {
2025-08-07 19:32:41 +01:00
const repoPath = resolve('../cache-repos/repos', repoName)
2025-07-26 13:44:32 +01:00
2025-08-11 13:56:13 +01:00
if (filterRepo(repoGitUrl)) {
2025-07-26 13:44:32 +01:00
console.log("[git] ignoring ", repoName)
return [repoName, null]
};
// console.log('[git] fetching',repoName, repoGitUrl);
await cacheCloneIdempotently(repoPath, repoName, repoGitUrl)
2025-08-07 19:32:41 +01:00
const tsConfigFileLocation = resolve(repoPath, 'tsconfig.json');
2025-07-26 13:44:32 +01:00
const tsConfigFileExists = existsSync(tsConfigFileLocation);
2025-08-07 19:32:41 +01:00
if (tsConfigFileExists){
return [repoName, null];
}
2025-07-26 13:44:32 +01:00
const packageFile = resolve(repoPath, 'package.json')
if (!existsSync(packageFile)) return [repoName, null];
// console.log("[git] checking", repoName, "for dependencies at ", packageFile);
2025-07-26 13:44:32 +01:00
const packageJSONContentsString = (await readFile(packageFile)).toString()
// console.log(packageJSONContentsString);
const packageJSONContents = JSON.parse(packageJSONContentsString)
// console.log(repoName, packageJSONContents.license)
2025-08-07 19:32:41 +01:00
if (!hasAnyActualDependencies(packageJSONContents, repoName)) {
// console.log("[git] skipping", repoName, "has no dependencies");
2025-08-03 15:55:42 +01:00
return [repoName, null];
}
2025-07-26 13:44:32 +01:00
const hasDependencies = checkTestingDependencies(packageJSONContents, repoName);
2025-08-07 19:32:41 +01:00
if (hasDependencies) {
2025-07-26 13:44:32 +01:00
return [repoName, ((packageJSONContents?.scripts?.test))]
2025-08-07 19:32:41 +01:00
}
2025-07-26 13:44:32 +01:00
else return [repoName, null]
}
2025-08-11 13:56:13 +01:00
function filterRepo(repoGitUrl) {
return matchFilterList(repoGitUrl);
}
2025-08-03 15:55:42 +01:00
function hasAnyActualDependencies(packageJSONContents, repoName) {
if (packageJSONContents.dependencies !== undefined && Object.keys(packageJSONContents.dependencies).length > 0) {
return true;
}
return false;
}
2025-07-26 13:44:32 +01:00
function checkTestingDependencies(packageJSONContents, repoName) {
const testingLibraries = new Set(['mocha','jest']);
2025-07-26 13:44:32 +01:00
const dependencies = new Set();
if (packageJSONContents.dependencies !== undefined) {
for (const dep of Object.keys(packageJSONContents.dependencies)) {
dependencies.add(dep)
}
}
if (packageJSONContents.devDependencies !== undefined) {
for (const dep of Object.keys(packageJSONContents.devDependencies)) {
dependencies.add(dep)
}
}
// console.log(dependencies)
/**
* @type {Set}
*/
const x = int(testingLibraries, dependencies);
// console.log(`join`, x)
return x.size > 0;
}
async function cacheCloneIdempotently(repoPath, repoName, repoGitUrl) {
if (existsSync(repoPath)) {
const isDir = (await lstat(repoPath)).isDirectory()
if (!isDir) throw new Error(repoName, " is mangled. delete directory and re-clone.")
else {
// const path = await git.status({ $cwd: repoPath })
2025-08-07 19:32:41 +01:00
2025-07-26 13:44:32 +01:00
}
} else {
console.log("[git] cloning", repoGitUrl);
2025-08-07 19:32:41 +01:00
await git.clone(repoGitUrl, repoPath, { 'single-branch': true, depth: 1 })
2025-07-26 13:44:32 +01:00
}
}