Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions bin/diff-changes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import { isCosmeticOnlyJsonSchemaChange } from './diff-json-schema.js';
import type { ActorConfig, Commit } from './types.js';

interface ShouldBuildAndTestOptions {
filepathsChanged: string[];
actorConfigs: ActorConfig[];
isLatest?: boolean;
commits: Commit[];
}

export const maybeParseActorFolder = (lowercaseFilePath: string): { isActorFolder: true, actorName: string } | { isActorFolder: false } => {
const match = lowercaseFilePath.match(/^(?:standalone-)?actors\/([^/]+)\/.+/);
if (match) {
// Some usernames weirdly use underscores, e.g. google_maps_email_extractor_standby-contact-details-scraper so we only need replace the last one
return { isActorFolder: true, actorName: match[1].replace(/_(?=[^_]*$)/, '/') };
}
return { isActorFolder: false };
}

/**
* Also works for folders
*/
const isIgnoredTopLevelFile = (lowercaseFilePath: string) => {
// On top level, we should only have dev-only readme and .actor/ is just for apify push CLI (real Actor configs are in /actors)
const IGNORED_TOP_LEVEL_FILES = ['.vscode/', '.gitignore', 'readme.md', '.husky/', '.eslintrc', 'eslint.config.mjs', '.prettierrc', '.editorconfig', '.actor/'];
// Strip out deprecated /code and /shared folders, treat them as top-level code
const sanitizedLowercaseFilePath = lowercaseFilePath.replace(/^code\//, '').replace(/^shared\//, '');

return IGNORED_TOP_LEVEL_FILES.some((ignoredFile) => sanitizedLowercaseFilePath.startsWith(ignoredFile));
};

type FileChange =
{ impact: 'ignored' } |
// Only things that influence how the Actor looks - e.g. README and CHANGELOG files, schema titles, descriptions, reordering, etc. We only need to rebuild on release
{ impact: 'cosmetic', includes: 'all-actors' | ActorConfig } |
// Influences how the Actor works - we need to run tests
{
impact: 'functional', includes: 'all-actors' | ActorConfig
};

const classifyFileChange = (lowercaseFilePath: string, actorConfigs: ActorConfig[], commits: Commit[]): FileChange => {
if (isIgnoredTopLevelFile(lowercaseFilePath)) {
return { impact: 'ignored' };
}

if (lowercaseFilePath.endsWith('changelog.md')) {
return { impact: 'cosmetic', includes: 'all-actors' };
}

const actorFolderInfo = maybeParseActorFolder(lowercaseFilePath);
if (actorFolderInfo.isActorFolder) {
const actorConfigChanged = actorConfigs.find(({ actorName }) => actorName.toLowerCase() === actorFolderInfo.actorName);
// This is some super weird case that happened once in the past but I don't remember the context anymore
if (actorConfigChanged === undefined) {
console.error('SHOULD NEVER HAPPEN: changes was found in an actor folder which no longer exists in the current commit, skipping this file', {
actorName: actorFolderInfo.actorName,
lowercaseFilePath,
});
return { impact: 'ignored' };
}
if (lowercaseFilePath.endsWith('readme.md')) {
return { impact: 'cosmetic', includes: actorConfigChanged };
}
if (lowercaseFilePath.endsWith('.json') && isCosmeticOnlyJsonSchemaChange(commits, lowercaseFilePath)) {
return { impact: 'cosmetic', includes: actorConfigChanged };
}

return { impact: 'functional', includes: actorConfigChanged };
}

// For any other files, we assume they can interact with the code
return { impact: 'functional', includes: 'all-actors' };
}

export const getChangedActors = (
{ filepathsChanged, actorConfigs, isLatest = false, commits }: ShouldBuildAndTestOptions,
): ActorConfig[] => {
// folder -> ActorConfig
const actorsChangedMap = new Map<string, ActorConfig>();

const actorConfigsWithoutStandalone = actorConfigs.filter(({ isStandalone }) => !isStandalone);

const lowercaseFiles = filepathsChanged.map((file) => file.toLowerCase());

for (const lowercaseFilePath of lowercaseFiles) {
const fileChange = classifyFileChange(lowercaseFilePath, actorConfigs, commits);
if (fileChange.impact === 'ignored') {
continue;
}

if (fileChange.impact === 'cosmetic' && !isLatest) {
continue;
}

if (fileChange.includes !== 'all-actors') {
actorsChangedMap.set(fileChange.includes.folder, fileChange.includes);
} else if (fileChange.includes === 'all-actors') {
// Standalone Actors are handled always via specific actors change, not all-actors
for (const actorConfig of actorConfigsWithoutStandalone) {
actorsChangedMap.set(actorConfig.folder, actorConfig);
}
}
}

const actorsChanged = Array.from(actorsChangedMap.values());

// All below here is just for logging
const ignoredFilesChanged = lowercaseFiles.filter((file) => classifyFileChange(file, actorConfigs, commits).impact === 'ignored');
console.error(`[DIFF]: Ignored files (don't trigger test or build): ${ignoredFilesChanged.join(', ')}`);

const cosmeticFilesChanged = lowercaseFiles.filter((file) => classifyFileChange(file, actorConfigs, commits).impact === 'cosmetic');
console.error(`[DIFF]: Cosmetic files (should only trigger release build): ${cosmeticFilesChanged.join(', ')}`);

const functionalFilesChanged = lowercaseFiles.filter((file) => classifyFileChange(file, actorConfigs, commits).impact === 'functional');
console.error(`[DIFF]: Functional files (trigger test & release build): ${functionalFilesChanged.join(', ')}`);

if (actorsChanged.length > 0) {
const miniactors = actorsChanged.filter((config) => !config.isStandalone).map((config) => config.actorName);
const standaloneActors = actorsChanged.filter((config) => config.isStandalone).map((config) => config.actorName);
console.error(`[DIFF]: MiniActors to be built and tested: ${miniactors.join(', ')}`);
console.error(`[DIFF]: Standalone Actors to be built and tested: ${standaloneActors.join(', ')}`);
} else {
console.error(`[DIFF]: No relevant files changed, skipping builds and tests`);
}

return actorsChanged;
};
43 changes: 43 additions & 0 deletions bin/diff-json-schema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import type { Commit } from './types.js';
import { spawnCommandInGhWorkspace } from './utils.js';

const COSMETIC_JSON_FIELD_NAMES = new Set([
'title', 'description', 'example', 'enumTitles', 'sectionCaption', 'sectionDescription',
Comment thread
metalwarrior665 marked this conversation as resolved.
]);

const isPlainObject = (val: unknown): val is Record<string, unknown> =>
typeof val === 'object' && val !== null && !Array.isArray(val);

const isCosmeticObjectChange = (oldVal: unknown, newVal: unknown, currentKey?: string): boolean => {
// If the key itself is cosmetic, any change under it is fine
if (currentKey && COSMETIC_JSON_FIELD_NAMES.has(currentKey)) return true;
if (JSON.stringify(oldVal) === JSON.stringify(newVal)) return true;
if (isPlainObject(oldVal) && isPlainObject(newVal)) {
const allKeys = new Set([...Object.keys(oldVal), ...Object.keys(newVal)]);
return [...allKeys].every((key) => isCosmeticObjectChange(oldVal[key], newVal[key], key));
}
return false;
};

/**
* Returns true if the two JSON strings differ only in cosmetic fields
* (title, description, example, enumTitles, sectionCaption, sectionDescription).
*/
export const isCosmeticOnlyJsonSchemaChange = (commits: Commit[], changedFilepath: string): boolean => {
// TODO: validate this is the right commit range
const oldRef = `${commits[0].sha}~`;
const newRef = commits[commits.length - 1].sha;
let oldJson: unknown;
let newJson: unknown;
try {
const oldContent = spawnCommandInGhWorkspace(`git show ${oldRef}:${changedFilepath}`);
const newContent = spawnCommandInGhWorkspace(`git show ${newRef}:${changedFilepath}`);

oldJson = JSON.parse(oldContent);
newJson = JSON.parse(newContent);
} catch {
console.error(`Failed to get or parse JSON content for ${changedFilepath} at refs ${oldRef} and ${newRef}, maybe it is new file or deleted? Treating it as a non-cosmetic change.`);
return false;
}
return isCosmeticObjectChange(oldJson, newJson);
};
17 changes: 13 additions & 4 deletions bin/git.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Commit, Config } from './types.js';
import type { Commit, Config } from './types.js';
import { spawnCommandInGhWorkspace } from './utils.js';

export const GIT_FORMAT_SEPARATOR = '»¦«';
Expand All @@ -8,11 +8,13 @@ const GIT_LOG_FORMAT = ['%H', '%aN<%aE>', '%aD', '%s'].join(GIT_FORMAT_SEPARATOR
* Gets the list of changed files between the given commits (inclusive).
*/
export const getChangedFiles = (commits: Commit[]) => {
const changedFiles = spawnCommandInGhWorkspace(
const changedFilesString = spawnCommandInGhWorkspace(
`git diff --name-only ${commits[0].sha}~..${commits[commits.length - 1].sha}`,
);

return changedFiles.split('\n');
const changedFiles = changedFilesString.split('\n');
console.error(`Changed files (up to 50): ${changedFiles.slice(0, 50).join(', ')}`);
return changedFiles;
};

/**
Expand All @@ -29,8 +31,15 @@ export const getCommits = ({ sourceBranch, targetBranch, baseCommit: baseCommitS
const baseCommitIndex = commits.findIndex((commit) => commit.sha === baseCommitSha);

const hasBaseCommit = baseCommitIndex !== -1;
if (hasBaseCommit) return commits.slice(baseCommitIndex + 1);
if (hasBaseCommit) {
const commitsUpToBaseCommit = commits.slice(baseCommitIndex + 1);
console.error(`Found base commit ${baseCommitSha} at index ${baseCommitIndex}, returning ${commitsUpToBaseCommit.length} commits after it`);
console.error(`Commits being returned: ${commitsUpToBaseCommit.map((c) => c.sha).join(', ')}`);
return commitsUpToBaseCommit;
}

console.error(`Base commit ${baseCommitSha} not found in the commit range, returning all ${commits.length} commits`);
console.error(`Commits being returned: ${commits.map((c) => c.sha).join(', ')}`);
return commits;
};

Expand Down
14 changes: 9 additions & 5 deletions bin/main.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#!/usr/bin/env node

import process from 'process';
import process from 'node:process';

import yargs, { type Argv } from 'yargs';
import { hideBin } from 'yargs/helpers';

import { getRepoActors, getChangedActors, spawnCommandInGhWorkspace, setCwd } from './utils.js';
import { runBuilds } from './build.js';
import { getChangedActors } from './diff-changes.js';
import { getChangedFiles, getCommits } from './git.js';
import { getPushData } from './github.js';
import { notifyToSlack } from './slack.js';
import { reportTestResults } from './test-report.js';
import { getRepoActors, setCwd,spawnCommandInGhWorkspace } from './utils.js';

/**
* Middlewares to be run before every command execution
Expand Down Expand Up @@ -69,7 +71,7 @@ await yargs()
const commits = getCommits(args);
const changedFiles = getChangedFiles(commits);
const actorConfigs = await getRepoActors();
const { actorsChanged } = getChangedActors({ filepathsChanged: changedFiles, actorConfigs, isLatest: false });
const actorsChanged = getChangedActors({ filepathsChanged: changedFiles, actorConfigs, isLatest: false, commits });
console.log(JSON.stringify(actorsChanged));
})
.command(
Expand All @@ -93,9 +95,10 @@ await yargs()
const commits = getCommits(args);
const changedFiles = getChangedFiles(commits);
const actorConfigs = await getRepoActors();
const { actorsChanged } = getChangedActors({
const actorsChanged = getChangedActors({
filepathsChanged: changedFiles,
actorConfigs,
commits,
});
// https://github.com/apify-store/google-maps#:actors/lukaskrivka_google-maps-with-contact-details
// git@github.com:apify-store/google-maps#:actors/lukaskrivka_google-maps-with-contact-details
Expand Down Expand Up @@ -128,10 +131,11 @@ await yargs()
);
const isLatest = true;
const actorConfigs = await getRepoActors();
const { actorsChanged } = getChangedActors({
const actorsChanged = getChangedActors({
filepathsChanged: changedFiles,
actorConfigs,
isLatest,
commits,
});
const { dryRun, reportSlackChannel, releaseSlackChannel } = args;
const builds = await runBuilds({
Expand Down
121 changes: 2 additions & 119 deletions bin/utils.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import fs from 'node:fs/promises';
import { spawnSync } from 'node:child_process';
import fs from 'node:fs/promises';

import type {
ActorConfig,
Commit,
GitHubEvent,
} from './types.js';

Expand Down Expand Up @@ -92,120 +92,3 @@ export const getHeadCommitSha = (githubEvent: GitHubEvent) => {
? githubEvent.pull_request.head.sha
: githubEvent.head_commit.id;
};

export interface GetChangedActorsResult {
actorsChanged: ActorConfig[];
codeChanged: boolean;
}

interface ShouldBuildAndTestOptions {
filepathsChanged: string[];
actorConfigs: ActorConfig[];
// Just for logging
isLatest?: boolean;
}

/**
* Also works for folders
*/
const isIgnoredTopLevelFile = (lowercaseFilePath: string) => {
// On top level, we should only have dev-only readme and .actor/ is just for apify push CLI (real Actor configs are in /actors)
const IGNORED_TOP_LEVEL_FILES = ['.vscode/', '.gitignore', 'readme.md', '.husky/', '.eslintrc', '.editorconfig', '.actor/'];
// Strip out deprecated /code and /shared folders, treat them as top-level code
const sanitizedLowercaseFilePath = lowercaseFilePath.replace(/^code\//, '').replace(/^shared\//, '');

return IGNORED_TOP_LEVEL_FILES.some((ignoredFile) => sanitizedLowercaseFilePath.startsWith(ignoredFile));
};

const isLatestBuildOnlyFile = (lowercaseFilePath: string) => {
if (lowercaseFilePath.endsWith('changelog.md')) {
return true;
}

// Either in /actors or /standalone-actors, we need to rebuild readme but we don't rebuild top-level dev-only readme
if ((lowercaseFilePath.startsWith('actors/') || lowercaseFilePath.startsWith('standalone-actors/')) && lowercaseFilePath.endsWith('readme.md')) {
return true;
}

return false;
};

/**
* Latest and devel are the same except that for latest we also rebuild with README and CHANGELOG files
*/
export const getChangedActors = (
{ filepathsChanged, actorConfigs, isLatest = false }: ShouldBuildAndTestOptions,
): GetChangedActorsResult => {
let codeChanged = false;
// folder -> ActorConfig
const actorsChangedMap = new Map<string, ActorConfig>();

const actorConfigsWithoutStandalone = actorConfigs.filter(({ isStandalone }) => !isStandalone);

const lowercaseFiles = filepathsChanged.map((file) => file.toLowerCase());

for (const lowercaseFilePath of lowercaseFiles) {
if (isIgnoredTopLevelFile(lowercaseFilePath)) {
continue;
}
// First we check for specific actors that have configs in /actors or standalone actors in /standalone-actors
// This matches both actors/username_actorName and standalone-actors/username_actorName
const changedActorConfigMatch = lowercaseFilePath.match(/^(?:standalone-)?actors\/([^/]+)\/.+/);
if (changedActorConfigMatch) {
const sanitizedActorName = changedActorConfigMatch[1].replace('_', '/');
const actorConfigChanged = actorConfigs.find(({ actorName }) => actorName.toLowerCase() === sanitizedActorName);
if (actorConfigChanged === undefined) {
console.warn('changes was found in an actor folder which no longer exists in the current commit', {
actorName: sanitizedActorName,
actorFolderName: changedActorConfigMatch[1],
});
continue;
}

console.error(`actorConfigChanged ${actorConfigChanged.actorName}: sanitizedActorName ${sanitizedActorName} ${lowercaseFilePath} `);
// These can be nested at various folders inside the actor folder
if (isLatest || !isLatestBuildOnlyFile(lowercaseFilePath)) {
// We assume other files will are either actor.json or input_schema.json and those needs to be tested
// TODO: Check what changed in schema, we don't need to test description changes
actorsChangedMap.set(actorConfigChanged.folder, actorConfigChanged);
}
continue;
}

// We check top level files (formerly in /code and /shared folders) that are shared among all non-standalone Actors
// Standalone actors are always handled separately by name via changedActorConfigMatch
if (isLatest || !isLatestBuildOnlyFile(lowercaseFilePath)) {
codeChanged = !isLatest; // NOTE: code is changed only in PR
for (const actorConfig of actorConfigsWithoutStandalone) {
actorsChangedMap.set(actorConfig.folder, actorConfig);
}
}
}

const actorsChanged = Array.from(actorsChangedMap.values());

// All below here is just for logging
const ignoredFilesChanged = lowercaseFiles.filter((file) => isIgnoredTopLevelFile(file));
console.error(`[DIFF]: Top level files changed that we ignore (don't trigger test or build): ${ignoredFilesChanged.join(', ')}`);

const onlyLatestFilesChanged = lowercaseFiles.filter((file) => isLatestBuildOnlyFile(file));
console.error(`[DIFF]: Files changed that only trigger latest build: ${onlyLatestFilesChanged.join(', ')}`);

if (!isLatest && codeChanged) {
console.error(`[DIFF]: All non-standalone Actors need to be built and tested (changes in top-level code)`);
}

if (actorsChanged.length > 0) {
const miniactors = actorsChanged.filter((config) => !config.isStandalone).map((config) => config.actorName);
const standaloneActors = actorsChanged.filter((config) => config.isStandalone).map((config) => config.actorName);
console.error(`[DIFF]: MiniActors to be built and tested: ${miniactors.join(', ')}`);
console.error(`[DIFF]: Standalone Actors to be built and tested: ${standaloneActors.join(', ')}`);
} else {
console.error(`[DIFF]: No relevant files changed, skipping builds and tests`);
}

return {
actorsChanged,
codeChanged,
};
};
Loading
Loading