diff options
| author | bdunahu <bdunahu@operationnull.com> | 2026-04-27 22:16:12 -0400 |
|---|---|---|
| committer | bdunahu <bdunahu@operationnull.com> | 2026-04-28 00:15:09 -0400 |
| commit | 9e143d1d84817ec7e6d139d234f0fff07749621c (patch) | |
| tree | 7565eac131cc3528d33d5ea3597cdd8006fdb968 /src | |
Diffstat (limited to 'src')
| -rw-r--r-- | src/build-actions.scm | 74 | ||||
| -rw-r--r-- | src/config.scm | 22 | ||||
| -rw-r--r-- | src/crawl-actions-wrapper.scm | 61 | ||||
| -rwxr-xr-x | src/crawl-lockfiles.scm | 98 | ||||
| -rwxr-xr-x | src/crawl-newest-commits.scm | 104 | ||||
| -rw-r--r-- | src/crawl-type-wrapper.scm | 49 | ||||
| -rw-r--r-- | src/poll-repos.scm | 62 | ||||
| -rwxr-xr-x | src/scripts/actions_to_type.sh | 77 | ||||
| -rw-r--r-- | src/scripts/build-action.sh | 135 | ||||
| -rw-r--r-- | src/scripts/channels.scm | 20 | ||||
| -rwxr-xr-x | src/scripts/repos_to_actions_map.sh | 53 | ||||
| -rw-r--r-- | src/utils.scm | 137 |
12 files changed, 892 insertions, 0 deletions
diff --git a/src/build-actions.scm b/src/build-actions.scm new file mode 100644 index 0000000..f2b1d02 --- /dev/null +++ b/src/build-actions.scm @@ -0,0 +1,74 @@ +;; kenku --- crawl and reproduce github actions +;; Copyright © 2026 bdunahu <bdunahu@operationnull.com> +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +(define-module (src build-actions) + #:use-module (srfi srfi-1) + #:use-module (guix scripts environment) + #:use-module ((src utils) #:prefix util:) + #:use-module ((src config) #:prefix conf:) + #:use-module ((ice-9 rdelim)) + #:use-module ((src crawl-lockfiles) #:prefix lock:) + #:export (build-actions)) + +(define (make-in-vicinity dir f) + (let ((dir (in-vicinity dir f))) + (util:mkdir-p dir) + dir)) + +(define (build-actions) + (let* ((build-action-sh (in-vicinity conf:scripts-dir "build-action.sh")) + (time (strftime "%Y-%m-%d_%H-%M-%S" (localtime (current-time)))) + (dir (string-append conf:cache-dir "/build-" time)) + (bf (make-in-vicinity dir "build-failures")) + (ma (make-in-vicinity dir "missing-artifact")) + (mr (make-in-vicinity dir "maybe-reproducible")) + (r (make-in-vicinity dir "reproducible")) + (packages '("bash" + "coreutils" + "diffoscope" + "dos2unix" + "findutils" + "gawk" + "git" + "grep" + "node" + "nss-certs" + "python" + "sed" + "xxd" + "diffutils")) + (opts '("--network" "--emulate-fhs" "--container" + "--preserve='^TERM$'")) ;diffoscope needs this + (cmd `("--" "bash" ,build-action-sh + "~a" "~a" "./~a" "~a" "~a" "~a" "~a"))) + (call-with-input-file lock:npm-file + (lambda (port) + (let loop () + (let ((line (read-line port))) + (unless (eof-object? line) + (let* ((list (string-split line char-set:whitespace)) + (repo (car list)) + (sha (cadr list)) + (lock-dir (dirname (caddr list)))) + (format #t "Starting build of ~a@~a~%" repo sha) + (system (format #f (string-join (append '("guix" "shell") + opts + packages + cmd) + " ") + repo sha lock-dir bf ma mr r))) + (loop)))))) + (format #t "Done! Check ~a.~%" dir))) diff --git a/src/config.scm b/src/config.scm new file mode 100644 index 0000000..82d565f --- /dev/null +++ b/src/config.scm @@ -0,0 +1,22 @@ +;; kenku --- crawl and reproduce github actions +;; Copyright © 2026 bdunahu <bdunahu@operationnull.com> +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +(define-module (src config) + #:export (scripts-dir + cache-dir)) + +(define scripts-dir (in-vicinity (getcwd) "src/scripts")) +(define cache-dir (in-vicinity (getcwd) "/.cache")) diff --git a/src/crawl-actions-wrapper.scm b/src/crawl-actions-wrapper.scm new file mode 100644 index 0000000..2f42dea --- /dev/null +++ b/src/crawl-actions-wrapper.scm @@ -0,0 +1,61 @@ +;; kenku --- crawl and reproduce github actions +;; Copyright © 2026 bdunahu <bdunahu@operationnull.com> +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +(define-module (src crawl-actions-wrapper) + #:use-module (srfi srfi-1) + #:use-module (ice-9 string-fun) + #:use-module ((src utils) #:prefix util:) + #:use-module ((src config) #:prefix conf:) + #:use-module (ice-9 textual-ports) + #:use-module ((src poll-repos) #:prefix poll:) + #:export (crawl-actions + external-file)) + +(define mapdir (dirname poll:outfile)) +(define mapfile (in-vicinity mapdir "repos-to-actions-map.txt")) +(define actiondir (in-vicinity conf:cache-dir "actions")) +(define external-file (in-vicinity actiondir "external.txt")) +(define docker-file (in-vicinity actiondir "docker.txt")) +(define file->regex `((,external-file . + "^[A-Za-z0-9][A-Za-z0-9_.-]*(/[A-Za-z0-9_.-]+)*(\\s(\\S+))?$") + (,docker-file . "^docker://") + (,(in-vicinity actiondir "internal.txt") . + "^\\./[A-Za-z0-9_./-]+(\\s(\\S+))?$"))) + +(define (get-uniq-actions str) + (define (kill-comments str) + (let ((pos (string-index str #\#))) + (if pos (substring str 0 pos) str))) + (let* ((lines (filter (lambda (s) (not (string=? s ""))) + (string-split str #\newline))) + (lines (map kill-comments lines)) + (actions (map (lambda (l) (cadr (string-split + l char-set:whitespace))) + lines)) + (uniq-actions (delete-duplicates actions))) + (map (lambda (a) (string-replace-substring a "@" " ")) + uniq-actions))) + +(define (crawl-actions) + (let* ((repos-to-actions-map-sh (in-vicinity conf:scripts-dir + "repos_to_actions_map.sh"))) + (system (string-append repos-to-actions-map-sh " < " + poll:outfile " >> " mapfile))) ;append mode + ;; again, we do this separately after writing everything to another file + ;; github can cut me off at any time. I would prefer the choice of + ;; restarting again with a manually modified input file than lose all data. + (util:normalize-file mapfile) + (util:filter-actions-on-regex mapfile file->regex get-uniq-actions)) diff --git a/src/crawl-lockfiles.scm b/src/crawl-lockfiles.scm new file mode 100755 index 0000000..2ab37a0 --- /dev/null +++ b/src/crawl-lockfiles.scm @@ -0,0 +1,98 @@ +;; kenku --- crawl and reproduce github actions +;; Copyright © 2026 bdunahu <bdunahu@operationnull.com> +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. +;; +;; +;; This file reads from standard in a list of repo identifiers and commits: +;; +;; 1fexd/gh-create-release-notes 0.0.18 +;; 1password/load-secrets-action 92467eb28f72e8255933372f1e0707c567ce2259 +;; 1password/load-secrets-action v3 +;; 2428392/gh-truncate-string-action b3ff790d21cf42af3ca7579146eedb93c8fb0757 +;; 2428392/gh-truncate-string-action v1.0.0 +;; 3ru/gpt-translate master +;; 8398a7/action-slack 77eaa4f1c608a7d68b38af4e3f739dcd8cba273e +;; +;; It only cares about the first two columns, but writes a third. What does it +;; write? It writes where it found a lockfile in the associated repo, if any. +;; I did this because I noticed some repos were putting the lockfiles in a sub- +;; directory with the action.yml. Repos can put it anywhere they want if they're +;; mean, but I will not be downloading the repo at this phase or spending my +;; tokens on a recursive find. + +;; The detected types of lockfiles are for npm, pnpm, and yarn. +;; +;; Depending on what it finds, it filters the result to a different output file +;; automatically. Like the other files, since we're using the github REST API, +;; set your $TOKEN env variable. + +(define-module (src crawl-lockfiles) + #:use-module ((src utils) #:prefix util:) + #:use-module ((src config) #:prefix conf:) + #:use-module ((src crawl-type-wrapper) #:prefix types:) + #:use-module ((ice-9 rdelim)) + #:export (npm-file + crawl-lockfiles)) + +(define outdir (in-vicinity conf:cache-dir "lock-friend")) +(define npm-file (in-vicinity outdir "npm.txt")) +(define github-api-template + "https://api.github.com/repos/~a/~a/contents/~a?ref=~a") +(define lockfiles-to-outfile '(("package-lock.json" . "npm.txt") + ("yarn.lock" . "alt-pm.txt") + ("pnpm-lock.yaml" . "alt-pm.txt") + ("pnpm-lock.yml" . "alt-pm.txt") + ("" . "no-lock.txt"))) + +(define (search-for-lockfiles owner name rest sha) + (define (search files) + (let* ((file (car files)) + (remain (cdr files)) + (url (format #f github-api-template owner name file sha))) + (if (util:url-exists? url) + file + (and (not (null? remain)) + (search remain))))) + (let* ((lockfiles (map car lockfiles-to-outfile)) + (to-try (append lockfiles (map (lambda (f) (in-vicinity rest f)) + lockfiles)))) + (search to-try))) + +(define (filter-to-file line) + (let* ((parts (string-split line char-set:whitespace)) + (paths (car parts)) + (sha (cadr parts)) + (seg (string-split paths #\/)) + (owner (car seg)) + (name (cadr seg)) + (rest (string-join (list-tail seg 2) "/")) + (lock (search-for-lockfiles owner name rest sha)) + (output (open-file (in-vicinity outdir + (assoc-ref lockfiles-to-outfile + lock)) + "a"))) + ;; stream output + (format output "~a ~a ~a\n" paths sha lock) + (close output))) + +(define (crawl-lockfiles) + (util:mkdir-p outdir) + (call-with-input-file types:node-file + (lambda (port) + (let loop () + (let ((line (read-line port))) + (unless (eof-object? line) + (filter-to-file line) + (loop))))))) diff --git a/src/crawl-newest-commits.scm b/src/crawl-newest-commits.scm new file mode 100755 index 0000000..150973e --- /dev/null +++ b/src/crawl-newest-commits.scm @@ -0,0 +1,104 @@ +;; kenku --- crawl and reproduce github actions +;; Copyright © 2026 bdunahu <bdunahu@operationnull.com> +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. +;; +;; +;; This file reads from standard in a list of repo identifiers and commits: +;; +;; 1fexd/gh-create-release-notes 0.0.18 +;; 1password/load-secrets-action 92467eb28f72e8255933372f1e0707c567ce2259 +;; 1password/load-secrets-action v3 +;; 2428392/gh-truncate-string-action b3ff790d21cf42af3ca7579146eedb93c8fb0757 +;; 2428392/gh-truncate-string-action v1.0.0 +;; 3ru/gpt-translate master +;; 8398a7/action-slack 77eaa4f1c608a7d68b38af4e3f739dcd8cba273e +;; +;; It only cares about the first two columns. It then collects all the commits +;; associated with a repo and folds it into a single outputted line featuring +;; the newest commit. Since I'm not downloading all the repos at this phase, +;; it again uses the github API. Surprisingly, the limit of queries is quite +;; large. Speaking of which, be sure to export your personal access token +;; under the variable "TOKEN" if you want to run this script. + +(define-module (src crawl-newest-commits) + #:use-module (srfi srfi-1) + #:use-module (ice-9 regex) + #:use-module ((src crawl-actions-wrapper) #:prefix get-act:) + #:use-module ((src utils) #:prefix util:) + #:use-module ((ice-9 rdelim)) + #:export (crawl-commits + outfile)) + +(define outfile (in-vicinity (dirname get-act:external-file) + "newest-external-commits.txt")) +(define github-api-template "https://api.github.com/repos/~a/~a/commits") +(define github-commit-api-template + (in-vicinity github-api-template "~a")) +(define github-branch-api-template + (string-append github-api-template "?sha=~a&per_page=1")) +(define git-commit-sha-regex"[0-9a-b]{40}") + +(define (port-to-hash port) + (let loop ((ht (make-hash-table))) + (let ((line (read-line port))) + (if (eof-object? line) + ht + (loop (let* ((node-action + (string-split line char-set:whitespace)) + (repo-commit (list-head node-action 2))) + (apply util:append-to-values (cons* ht repo-commit)))))))) + +(define (traverse-and-get-epoch response) + (define (iso-8601-to-epoch date) + (string->number (strftime "%s" (car (strptime "%F" date))))) + (unless (assoc "message" response) + (iso-8601-to-epoch + (fold (lambda (field alist) (cdr (assoc field alist))) + response + '("commit" "committer" "date"))))) + +(define (compare-commits owner repo id recent-pair) + (let* ((is-sha (string-match git-commit-sha-regex id)) + (url (format #f (if is-sha + github-commit-api-template + github-branch-api-template) + owner repo id)) + (commit-obj (util:url->scm url)) + (epoch (traverse-and-get-epoch + (if is-sha (cdr commit-obj) + (cdr (cdr (vector-ref commit-obj 0)))))) + (current-pair (cons id epoch))) + (if (> (cdr current-pair) (cdr recent-pair)) + current-pair + recent-pair))) + +(define (get-recent-commit repo ids) + (let* ((parts (list-head (string-split repo #\/) 2)) + (owner (car parts)) + (name (cadr parts)) + (recent-commit (fold (lambda (id most-recent) + (compare-commits owner name id most-recent)) + (cons "old-commit" -1) + ids)) + (output (open-file outfile "a"))) + ;; stream responses; just in case github cuts me off. + (format output "~a ~a\n~!" repo (car recent-commit)) + (close output))) + +(define (crawl-commits) + (util:mkdir-p (dirname outfile)) + (call-with-input-file get-act:external-file + (lambda (port) + (hash-for-each get-recent-commit (port-to-hash port))))) diff --git a/src/crawl-type-wrapper.scm b/src/crawl-type-wrapper.scm new file mode 100644 index 0000000..359ac51 --- /dev/null +++ b/src/crawl-type-wrapper.scm @@ -0,0 +1,49 @@ +;; kenku --- crawl and reproduce github actions +;; Copyright © 2026 bdunahu <bdunahu@operationnull.com> +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +(define-module (src crawl-type-wrapper) + #:use-module (srfi srfi-1) + #:use-module (ice-9 string-fun) + #:use-module (ice-9 regex) + #:use-module ((src utils) #:prefix util:) + #:use-module ((src config) #:prefix conf:) + #:use-module ((src crawl-newest-commits) #:prefix new-commit:) + #:use-module (ice-9 textual-ports) + #:export (crawl-types + node-file)) + +(define mapdir (dirname new-commit:outfile)) +(define mapfile (in-vicinity mapdir "external-to-type-map.txt")) +(define typedir (in-vicinity conf:cache-dir "action-types")) +(define node-file (in-vicinity typedir "node.txt")) +(define file->regex `((,node-file . "node[0-9]{1,2}$") + (,(in-vicinity typedir "docker.txt") . "docker$") + (,(in-vicinity typedir "composite.txt") . + "composite$"))) + +(define (parse-actions str) + (filter (lambda (s) (not (string=? s ""))) + (string-split str #\newline))) + +(define filter-actions) + +(define (crawl-types) + (let* ((actions-to-type-sh (in-vicinity conf:scripts-dir + "actions_to_type.sh"))) + (system (string-append actions-to-type-sh " < " + new-commit:outfile " >> " mapfile))) ;append mode + (util:normalize-file mapfile) + (util:filter-actions-on-regex mapfile file->regex parse-actions)) diff --git a/src/poll-repos.scm b/src/poll-repos.scm new file mode 100644 index 0000000..b7f37b1 --- /dev/null +++ b/src/poll-repos.scm @@ -0,0 +1,62 @@ +;; kenku --- crawl and reproduce github actions +;; Copyright © 2026 bdunahu <bdunahu@operationnull.com> +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +(define-module (src poll-repos) + #:use-module (srfi srfi-1) + #:use-module ((src utils) #:prefix util:) + #:use-module ((src config) #:prefix conf:) + #:use-module (ice-9 pretty-print) + #:export (poll-repos + outfile)) + +(define outdir (in-vicinity conf:cache-dir "repos")) +(define outfile (in-vicinity outdir "repos.txt")) +(define github-api-template + (string-append "https://api.github.com/search/repositories?q=~a" + "&sort=stars&order=desc&per_page=100&page=~d")) ;100/page max + +(define pages 10) +;;; you may manually change these to get interesting results. +(define queries '(;;"stars:>5000" ;top-repos + "stars:1000..5000+pushed:>2026-01-01" + "stars:1000..5000+language:Python" + "stars:1000..5000+language:C" + "stars:1000..5000+language:Javascript" + "stars:50..100")) + +(define (poll-repos) + (util:mkdir-p outdir) + (for-each + (lambda (q) + (let ((repos (append-map + (lambda (p) + (let* ((scm-obj + (util:url->scm + (format #f github-api-template q p) + #:headers `((User-Agent . "curl 8.6.0")))) + (items (assoc "items" scm-obj))) + (when items + (map (lambda (repo) + (cdr (assoc "html_url" repo))) + (vector->list (cdr items)))))) + (iota pages 1))) + (output (open-file outfile "a"))) + (display (string-join repos "\n") output) + (close output))) + queries) + ;; we open the file and normalize it separately because I would prefer + ;; to have responses streamed in case github cuts me off or whatnot. + (util:normalize-file outfile)) diff --git a/src/scripts/actions_to_type.sh b/src/scripts/actions_to_type.sh new file mode 100755 index 0000000..5d93955 --- /dev/null +++ b/src/scripts/actions_to_type.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# kenku --- crawl and reproduce github actions +# Copyright © 2026 bdunahu <bdunahu@operationnull.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# Takes a file with a repository identifier per line, like this: +# +# actions/deploy-pages v4 +# actions/upload-pages-artifact v4 +# astral-sh/setup-uv v7 +# actions/setup-python v2 +# actions/checkout v2 +# +# And writes an equivalent file with what it's using. 'using' in this case can +# be one of the standard action types in the github actions lingo: 'node', +# 'docker', or 'composite'. +# The `actions.yaml` or `actions.yml` file does a pretty good job explaining +# what is used or not. + +function curl_action { + curl -s \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${TOKEN}" \ + "https://api.github.com/repos/$1?ref=$2" +} + +function find_action { + owner=$(echo "$1" | awk -F'/' '{print $(1)}') + repo=$(echo "$1" | awk -F'/' '{print $(2)}') + fname=$(echo "$1" | cut -d'/' -f3-) + sha="$2" + + path="$owner/$repo/contents/$fname" + + if [[ "$path" =~ \.(yml|yaml)$ ]]; then + action=$(curl_action "$path" "$sha") + exists=true + else + for c in action.yml action.yaml; do + p="$path/$c" + + action=$(curl_action "$p" "$sha") + + # endpoint doesn't exist + exists=$(echo "$action" | jq -e 'has("message") | not') + [[ "$exists" == true ]] && break + done + fi + + using=$("$exists" && { + echo "$action" \ + | jq -r '.content' \ + | base64 -d \ + | grep -E '^\s*"?using"?:' \ + | sed -E 's/.*"?using"?:[[:space:]]*//' + } || echo "NO_FILE" + ) + [[ -z "$using" ]] && using="NO_USING" + echo "$1 $sha $using" +} + +while read -r action sha; do + find_action "$action" "$sha" +done diff --git a/src/scripts/build-action.sh b/src/scripts/build-action.sh new file mode 100644 index 0000000..9c1d502 --- /dev/null +++ b/src/scripts/build-action.sh @@ -0,0 +1,135 @@ +#!/usr/bin/env bash +# kenku --- crawl and reproduce github actions +# Copyright © 2026 bdunahu <bdunahu@operationnull.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +function get_output_file { + local output_file action_file + action_file="action.yml" + [ ! -f "$action_file" ] && action_file="action.yaml" + output_file=$(grep -E '^\s*main:\s*' "${TO_ACT}${action_file}" | awk '{print $2}') + output_file=${output_file//\"/} + echo ${output_file//\'/} +} + +function script_exists { + npm run | grep -q "^ $1" +} + +ALL="$1" +SHA="$2" +DIR="$3" +BUILD_FAILURE_DIR="$4" +MISSING_ARTIFACT_DIR="$5" +MAYBE_REPRODUCIBLE_DIR="$6" +REPRODUCIBLE_DIR="$7" + +REPO_PRINTABLE="${ALL//\//_}_$SHA" +# holds temporary print strings + +REPO=$(echo "$ALL" | awk -F'/' '{print $(1)}') +TO_ACT=$(echo "$ALL" | awk -F'/' '{print $(2)}') +REPO="$REPO/$TO_ACT" +TO_ACT=$(echo "$ALL" | cut -d'/' -f3-) +[[ -n "$TO_ACT" ]] && TO_ACT="$TO_ACT/" + +BUILD_DIR=$(mktemp -d) +cd "$BUILD_DIR" + +echo "Cloning $REPO..." +git clone "https://github.com/$REPO.git" repo > /dev/null 2>&1 +cd repo +echo "Checking out $SHA" +git checkout "$SHA" > /dev/null 2>&1 + +# actions.yml refers to actions relatively, so if we parse +# ../dist/foo/ as the action final build dir, we need to +# refer to it relative to the curr dir of the script +# this is one way +DIST="$TO_ACT$(get_output_file)" +# some actions just name an index.js in the root dir. +# In cases they name a directory, we'll diff the full thing. +[ "$(dirname "$DIST")" != "." ] && DIST="$(dirname "$DIST")" +REFERENCE_DIST="$DIST.1" +echo "Saving $DIST to $REFERENCE_DIST" + +mv "$DIST" "$REFERENCE_DIST" + +cd "$DIR" +echo "Installing packages..." + +INSTALL=$(npm ci 2>&1) + +echo "Attempting to build..." + +TMP="owo what's the build command" +# surely ONE of these will work... +if COMM="release"; script_exists "$COMM"; then + echo "Trying '$COMM'..." + TMP=$(npm run "$COMM" 2>&1) +elif COMM="package"; script_exists "$COMM"; then + echo "Trying '$COMM'..." + TMP=$(npm run "$COMM" 2>&1) +elif COMM="bundle"; script_exists "$COMM"; then + echo "Trying '$COMM'..." + TMP=$(npm run "$COMM" 2>&1) +elif COMM="build"; script_exists "$COMM"; then + echo "Trying '$COMM'..." + TMP=$(npm run "$COMM" 2>&1) +fi + +# if the build fails, the developers may have failed to setup +# the environment properly, or it uses deprecated features. +# It may be that there is no build command at all! This needs +# manual review. +if [[ $? -ne 0 ]]; then + echo "$ALL did not build." + echo "$INSTALL" > "$BUILD_FAILURE_DIR/${REPO_PRINTABLE}.log" + echo "$TMP" >> "$BUILD_FAILURE_DIR/${REPO_PRINTABLE}.log" + exit 0 #don't kill outer script +fi + +# return back to where we were +cd "$BUILD_DIR" +cd repo + +if [[ ! -d "$DIST" ]]; then + # TODO: if the dist dir wasn't produced, then the commands + # used were likely wrong + echo "$ALL did not produce artifacts." + echo "$INSTALL" >> "$MISSING_ARTIFACT_DIR/${REPO_PRINTABLE}.log" + echo "$TMP" >> "$MISSING_ARTIFACT_DIR/${REPO_PRINTABLE}.log" + exit 0 +fi + +# diffoscope will output binary diffs if line endings are not +# comparable. --force forces it to convert binary files too. +find "$REFERENCE_DIST" -type f -exec dos2unix --force {} \; + +# TODO: solves path errors in containers when using global install +mkdir -p "$HOME/.npm-global" +npm config set prefix "$HOME/.npm-global" +export PATH="$HOME/.npm-global/bin:$PATH" + +# diffoscope uses to compare pretty +npm install -g js-beautify + +diffoscope "$DIST" "$REFERENCE_DIST" \ + --exclude-directory-metadata=yes \ + --html "$MAYBE_REPRODUCIBLE_DIR/${REPO_PRINTABLE}.html" +if [[ ! -f "$MAYBE_REPRODUCIBLE_DIR/${REPO_PRINTABLE}.html" ]]; then + touch "$REPRODUCIBLE_DIR/${REPO_PRINTABLE}.flag" + exit 0 +fi diff --git a/src/scripts/channels.scm b/src/scripts/channels.scm new file mode 100644 index 0000000..b49849d --- /dev/null +++ b/src/scripts/channels.scm @@ -0,0 +1,20 @@ +(list (channel + (name 'guix) + (url "https://codeberg.org/guix/guix") + (branch "master") + (commit "c3c2f3be04364e3f616bfcc38875b112bedbe901") + (introduction + (make-channel-introduction + "9edb3f66fd807b096b48283debdcddccfea34bad" + (openpgp-fingerprint + "BBB0 2DDF 2CEA F6A8 0D1D E643 A2A0 6DF2 A33A 54FA")))) + (channel + (name 'tanelorn) + (url "https://git.operationnull.com/tanelorn.git") + (branch "master") + (commit "1c25e9f613d8654f800f9be1d4b232ec9de25aa0") + (introduction + (make-channel-introduction + "3960d45383c672f8aacab8e354824793256c9d29" + (openpgp-fingerprint + "5550 5CA6 9DE5 D342 7F31 F9AE 5F86 6C65 2A34 C996"))))) diff --git a/src/scripts/repos_to_actions_map.sh b/src/scripts/repos_to_actions_map.sh new file mode 100755 index 0000000..2216a5a --- /dev/null +++ b/src/scripts/repos_to_actions_map.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# kenku --- crawl and reproduce github actions +# Copyright © 2026 bdunahu <bdunahu@operationnull.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# Takes a file with one repo per line and outputs a master list +# of REPO, ACTION pairs. This script takes a long time to run. +# There is an obvious part of this script that is pretty gross. +# I got it working sometime in early march and have forgotten +# why it does what it does. + +function get_flows { + curl -s \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer $TOKEN" \ + "https://api.github.com/repos/$1/$2/contents/.github/workflows" +} + +function has_flows { + echo "$1" \ + | jq -e 'type == "object" and has("message") | not' >/dev/null 2>&1 +} + +function get_url { + local owner repo flows + owner=$(echo "$1" | awk -F'/' '{print $(NF-1)}') + repo=$(echo "$1" | awk -F'/' '{print $(NF)}') + + flows=$(get_flows "$owner" "$repo") + has_flows "$flows" && + echo "$flows" | jq -r '.[] | select(.type=="file") | .download_url' \ + | xargs -n1 sh -c ' + for url do + curl -s "$url" | grep -E "^\s*-?\s*uses:" | sed "s|.*uses:\s*|$1 |" + done + ' _ "$1" # passes arg to sh -c +} + +while read -r url; do + get_url "$url" +done diff --git a/src/utils.scm b/src/utils.scm new file mode 100644 index 0000000..ddcfb3f --- /dev/null +++ b/src/utils.scm @@ -0,0 +1,137 @@ +;; kenku --- crawl and reproduce github actions +;; Copyright © 2026 bdunahu <bdunahu@operationnull.com> +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +(define-module (src utils) + #:use-module (srfi srfi-1) + #:use-module (ice-9 match) + #:use-module (web client) + #:use-module (web response) + #:use-module (rnrs bytevectors) + #:use-module (json) + #:use-module (ice-9 receive) + #:use-module (ice-9 textual-ports) + #:use-module (ice-9 pretty-print) + #:use-module (ice-9 regex) + #:use-module (ice-9 popen) + #:export (default-headers + url-exists? + url->scm + shell->str + normalize-file + append-to-values + mkdir-p + filter-actions-on-regex)) + +(define default-headers + `((Accept . "application/vnd.github+json") + (Authorization . ,(string-append + "Bearer " + (getenv "TOKEN"))) + (User-Agent . "curl 8.6.0"))) ;i lied + +(define (url-exists? url) + (receive (status body) + (http-request url #:headers default-headers) + (equal? 200 (response-code status)))) + +(define* (url->scm url #:key (headers default-headers)) + (receive (status body) + (http-request url #:headers headers) + (json-string->scm (utf8->string body)))) + +(define (shell->str . args) + (let* ((port (apply open-pipe* + (cons OPEN_READ + args))) + (contents (get-string-all port))) + (close-pipe port) + contents)) + +(define (normalize-file file) + (let* ((str (call-with-input-file file get-string-all)) + (str (string-filter (lambda (c) + (not (member c (list #\, #\" #\' #\return)))) + str)) + (str (string-join + (delete-duplicates (string-split str #\newline)) + "\n")) + (output (open-file file "w"))) + (display str output) + (close output))) + +(define (append-to-values hashtable k v) + (let ((existing (hash-ref hashtable k '()))) + (hash-set! hashtable k (cons v existing)) + hashtable)) + +(define (mkdir-p dir) + "Yoinked from https://codeberg.org/guix/guix." + (define absolute? + (string-prefix? "/" dir)) + (define not-slash + (char-set-complement (char-set #\/))) + (let loop ((components (string-tokenize dir not-slash)) + (root (if absolute? + "" + "."))) + (match components + ((head tail ...) + (let ((path (string-append root "/" head))) + (catch 'system-error + (lambda () + (mkdir path) + (loop tail path)) + (lambda args + (if (= EEXIST (system-error-errno args)) + (loop tail path) + (apply throw args)))))) + (() #t)))) + +(define (filter-actions-on-regex file file->regex parse-f) + "This procedure is for the bash drop-ins only, which do not filter their +outputs into files, but rather output lines which need to be filtered based on +regex. + +FILE: the file to be filtered +FILE->REGEX: an alist mapping file names to the regex each item (usually a line +in FILE, should match) would need to match for inclusion to said file name. +PARSE-F: A function describing how the contents of FILE should be parsed into +items." + (let ((ht (make-hash-table))) + (define (hash-actions-to-regex actions) + (for-each + (lambda (s) + (for-each + (lambda (pair) + (let ((file (car pair)) + (regex (cdr pair))) + (when (string-match regex s) + (append-to-values ht file s)))) + file->regex)) + actions) + ht) + (define (hash->files) + (hash-for-each (lambda (file actions) + (mkdir-p (dirname file)) + (let ((output (open-file file "w"))) + (map (lambda (a) (format output "~a~%" a)) + actions) + (close output))) + ht)) + (let* ((str (call-with-input-file file get-string-all)) + (actions (parse-f str))) + (hash-actions-to-regex actions) + (hash->files)))) |
