summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/build-actions.scm74
-rw-r--r--src/config.scm22
-rw-r--r--src/crawl-actions-wrapper.scm61
-rwxr-xr-xsrc/crawl-lockfiles.scm98
-rwxr-xr-xsrc/crawl-newest-commits.scm104
-rw-r--r--src/crawl-type-wrapper.scm49
-rw-r--r--src/poll-repos.scm62
-rwxr-xr-xsrc/scripts/actions_to_type.sh77
-rw-r--r--src/scripts/build-action.sh135
-rw-r--r--src/scripts/channels.scm20
-rwxr-xr-xsrc/scripts/repos_to_actions_map.sh53
-rw-r--r--src/utils.scm137
12 files changed, 892 insertions, 0 deletions
diff --git a/src/build-actions.scm b/src/build-actions.scm
new file mode 100644
index 0000000..f2b1d02
--- /dev/null
+++ b/src/build-actions.scm
@@ -0,0 +1,74 @@
+;; kenku --- crawl and reproduce github actions
+;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+(define-module (src build-actions)
+ #:use-module (srfi srfi-1)
+ #:use-module (guix scripts environment)
+ #:use-module ((src utils) #:prefix util:)
+ #:use-module ((src config) #:prefix conf:)
+ #:use-module ((ice-9 rdelim))
+ #:use-module ((src crawl-lockfiles) #:prefix lock:)
+ #:export (build-actions))
+
+(define (make-in-vicinity dir f)
+ (let ((dir (in-vicinity dir f)))
+ (util:mkdir-p dir)
+ dir))
+
+(define (build-actions)
+ (let* ((build-action-sh (in-vicinity conf:scripts-dir "build-action.sh"))
+ (time (strftime "%Y-%m-%d_%H-%M-%S" (localtime (current-time))))
+ (dir (string-append conf:cache-dir "/build-" time))
+ (bf (make-in-vicinity dir "build-failures"))
+ (ma (make-in-vicinity dir "missing-artifact"))
+ (mr (make-in-vicinity dir "maybe-reproducible"))
+ (r (make-in-vicinity dir "reproducible"))
+ (packages '("bash"
+ "coreutils"
+ "diffoscope"
+ "dos2unix"
+ "findutils"
+ "gawk"
+ "git"
+ "grep"
+ "node"
+ "nss-certs"
+ "python"
+ "sed"
+ "xxd"
+ "diffutils"))
+ (opts '("--network" "--emulate-fhs" "--container"
+ "--preserve='^TERM$'")) ;diffoscope needs this
+ (cmd `("--" "bash" ,build-action-sh
+ "~a" "~a" "./~a" "~a" "~a" "~a" "~a")))
+ (call-with-input-file lock:npm-file
+ (lambda (port)
+ (let loop ()
+ (let ((line (read-line port)))
+ (unless (eof-object? line)
+ (let* ((list (string-split line char-set:whitespace))
+ (repo (car list))
+ (sha (cadr list))
+ (lock-dir (dirname (caddr list))))
+ (format #t "Starting build of ~a@~a~%" repo sha)
+ (system (format #f (string-join (append '("guix" "shell")
+ opts
+ packages
+ cmd)
+ " ")
+ repo sha lock-dir bf ma mr r)))
+ (loop))))))
+ (format #t "Done! Check ~a.~%" dir)))
diff --git a/src/config.scm b/src/config.scm
new file mode 100644
index 0000000..82d565f
--- /dev/null
+++ b/src/config.scm
@@ -0,0 +1,22 @@
+;; kenku --- crawl and reproduce github actions
+;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+(define-module (src config)
+ #:export (scripts-dir
+ cache-dir))
+
+(define scripts-dir (in-vicinity (getcwd) "src/scripts"))
+(define cache-dir (in-vicinity (getcwd) "/.cache"))
diff --git a/src/crawl-actions-wrapper.scm b/src/crawl-actions-wrapper.scm
new file mode 100644
index 0000000..2f42dea
--- /dev/null
+++ b/src/crawl-actions-wrapper.scm
@@ -0,0 +1,61 @@
+;; kenku --- crawl and reproduce github actions
+;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+(define-module (src crawl-actions-wrapper)
+ #:use-module (srfi srfi-1)
+ #:use-module (ice-9 string-fun)
+ #:use-module ((src utils) #:prefix util:)
+ #:use-module ((src config) #:prefix conf:)
+ #:use-module (ice-9 textual-ports)
+ #:use-module ((src poll-repos) #:prefix poll:)
+ #:export (crawl-actions
+ external-file))
+
+(define mapdir (dirname poll:outfile))
+(define mapfile (in-vicinity mapdir "repos-to-actions-map.txt"))
+(define actiondir (in-vicinity conf:cache-dir "actions"))
+(define external-file (in-vicinity actiondir "external.txt"))
+(define docker-file (in-vicinity actiondir "docker.txt"))
+(define file->regex `((,external-file .
+ "^[A-Za-z0-9][A-Za-z0-9_.-]*(/[A-Za-z0-9_.-]+)*(\\s(\\S+))?$")
+ (,docker-file . "^docker://")
+ (,(in-vicinity actiondir "internal.txt") .
+ "^\\./[A-Za-z0-9_./-]+(\\s(\\S+))?$")))
+
+(define (get-uniq-actions str)
+ (define (kill-comments str)
+ (let ((pos (string-index str #\#)))
+ (if pos (substring str 0 pos) str)))
+ (let* ((lines (filter (lambda (s) (not (string=? s "")))
+ (string-split str #\newline)))
+ (lines (map kill-comments lines))
+ (actions (map (lambda (l) (cadr (string-split
+ l char-set:whitespace)))
+ lines))
+ (uniq-actions (delete-duplicates actions)))
+ (map (lambda (a) (string-replace-substring a "@" " "))
+ uniq-actions)))
+
+(define (crawl-actions)
+ (let* ((repos-to-actions-map-sh (in-vicinity conf:scripts-dir
+ "repos_to_actions_map.sh")))
+ (system (string-append repos-to-actions-map-sh " < "
+ poll:outfile " >> " mapfile))) ;append mode
+ ;; again, we do this separately after writing everything to another file
+ ;; github can cut me off at any time. I would prefer the choice of
+ ;; restarting again with a manually modified input file than lose all data.
+ (util:normalize-file mapfile)
+ (util:filter-actions-on-regex mapfile file->regex get-uniq-actions))
diff --git a/src/crawl-lockfiles.scm b/src/crawl-lockfiles.scm
new file mode 100755
index 0000000..2ab37a0
--- /dev/null
+++ b/src/crawl-lockfiles.scm
@@ -0,0 +1,98 @@
+;; kenku --- crawl and reproduce github actions
+;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+;;
+;;
+;; This file reads from standard in a list of repo identifiers and commits:
+;;
+;; 1fexd/gh-create-release-notes 0.0.18
+;; 1password/load-secrets-action 92467eb28f72e8255933372f1e0707c567ce2259
+;; 1password/load-secrets-action v3
+;; 2428392/gh-truncate-string-action b3ff790d21cf42af3ca7579146eedb93c8fb0757
+;; 2428392/gh-truncate-string-action v1.0.0
+;; 3ru/gpt-translate master
+;; 8398a7/action-slack 77eaa4f1c608a7d68b38af4e3f739dcd8cba273e
+;;
+;; It only cares about the first two columns, but writes a third. What does it
+;; write? It writes where it found a lockfile in the associated repo, if any.
+;; I did this because I noticed some repos were putting the lockfiles in a sub-
+;; directory with the action.yml. Repos can put it anywhere they want if they're
+;; mean, but I will not be downloading the repo at this phase or spending my
+;; tokens on a recursive find.
+
+;; The detected types of lockfiles are for npm, pnpm, and yarn.
+;;
+;; Depending on what it finds, it filters the result to a different output file
+;; automatically. Like the other files, since we're using the github REST API,
+;; set your $TOKEN env variable.
+
+(define-module (src crawl-lockfiles)
+ #:use-module ((src utils) #:prefix util:)
+ #:use-module ((src config) #:prefix conf:)
+ #:use-module ((src crawl-type-wrapper) #:prefix types:)
+ #:use-module ((ice-9 rdelim))
+ #:export (npm-file
+ crawl-lockfiles))
+
+(define outdir (in-vicinity conf:cache-dir "lock-friend"))
+(define npm-file (in-vicinity outdir "npm.txt"))
+(define github-api-template
+ "https://api.github.com/repos/~a/~a/contents/~a?ref=~a")
+(define lockfiles-to-outfile '(("package-lock.json" . "npm.txt")
+ ("yarn.lock" . "alt-pm.txt")
+ ("pnpm-lock.yaml" . "alt-pm.txt")
+ ("pnpm-lock.yml" . "alt-pm.txt")
+ ("" . "no-lock.txt")))
+
+(define (search-for-lockfiles owner name rest sha)
+ (define (search files)
+ (let* ((file (car files))
+ (remain (cdr files))
+ (url (format #f github-api-template owner name file sha)))
+ (if (util:url-exists? url)
+ file
+ (and (not (null? remain))
+ (search remain)))))
+ (let* ((lockfiles (map car lockfiles-to-outfile))
+ (to-try (append lockfiles (map (lambda (f) (in-vicinity rest f))
+ lockfiles))))
+ (search to-try)))
+
+(define (filter-to-file line)
+ (let* ((parts (string-split line char-set:whitespace))
+ (paths (car parts))
+ (sha (cadr parts))
+ (seg (string-split paths #\/))
+ (owner (car seg))
+ (name (cadr seg))
+ (rest (string-join (list-tail seg 2) "/"))
+ (lock (search-for-lockfiles owner name rest sha))
+ (output (open-file (in-vicinity outdir
+ (assoc-ref lockfiles-to-outfile
+ lock))
+ "a")))
+ ;; stream output
+ (format output "~a ~a ~a\n" paths sha lock)
+ (close output)))
+
+(define (crawl-lockfiles)
+ (util:mkdir-p outdir)
+ (call-with-input-file types:node-file
+ (lambda (port)
+ (let loop ()
+ (let ((line (read-line port)))
+ (unless (eof-object? line)
+ (filter-to-file line)
+ (loop)))))))
diff --git a/src/crawl-newest-commits.scm b/src/crawl-newest-commits.scm
new file mode 100755
index 0000000..150973e
--- /dev/null
+++ b/src/crawl-newest-commits.scm
@@ -0,0 +1,104 @@
+;; kenku --- crawl and reproduce github actions
+;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+;;
+;;
+;; This file reads from standard in a list of repo identifiers and commits:
+;;
+;; 1fexd/gh-create-release-notes 0.0.18
+;; 1password/load-secrets-action 92467eb28f72e8255933372f1e0707c567ce2259
+;; 1password/load-secrets-action v3
+;; 2428392/gh-truncate-string-action b3ff790d21cf42af3ca7579146eedb93c8fb0757
+;; 2428392/gh-truncate-string-action v1.0.0
+;; 3ru/gpt-translate master
+;; 8398a7/action-slack 77eaa4f1c608a7d68b38af4e3f739dcd8cba273e
+;;
+;; It only cares about the first two columns. It then collects all the commits
+;; associated with a repo and folds it into a single outputted line featuring
+;; the newest commit. Since I'm not downloading all the repos at this phase,
+;; it again uses the github API. Surprisingly, the limit of queries is quite
+;; large. Speaking of which, be sure to export your personal access token
+;; under the variable "TOKEN" if you want to run this script.
+
+(define-module (src crawl-newest-commits)
+ #:use-module (srfi srfi-1)
+ #:use-module (ice-9 regex)
+ #:use-module ((src crawl-actions-wrapper) #:prefix get-act:)
+ #:use-module ((src utils) #:prefix util:)
+ #:use-module ((ice-9 rdelim))
+ #:export (crawl-commits
+ outfile))
+
+(define outfile (in-vicinity (dirname get-act:external-file)
+ "newest-external-commits.txt"))
+(define github-api-template "https://api.github.com/repos/~a/~a/commits")
+(define github-commit-api-template
+ (in-vicinity github-api-template "~a"))
+(define github-branch-api-template
+ (string-append github-api-template "?sha=~a&per_page=1"))
+(define git-commit-sha-regex"[0-9a-b]{40}")
+
+(define (port-to-hash port)
+ (let loop ((ht (make-hash-table)))
+ (let ((line (read-line port)))
+ (if (eof-object? line)
+ ht
+ (loop (let* ((node-action
+ (string-split line char-set:whitespace))
+ (repo-commit (list-head node-action 2)))
+ (apply util:append-to-values (cons* ht repo-commit))))))))
+
+(define (traverse-and-get-epoch response)
+ (define (iso-8601-to-epoch date)
+ (string->number (strftime "%s" (car (strptime "%F" date)))))
+ (unless (assoc "message" response)
+ (iso-8601-to-epoch
+ (fold (lambda (field alist) (cdr (assoc field alist)))
+ response
+ '("commit" "committer" "date")))))
+
+(define (compare-commits owner repo id recent-pair)
+ (let* ((is-sha (string-match git-commit-sha-regex id))
+ (url (format #f (if is-sha
+ github-commit-api-template
+ github-branch-api-template)
+ owner repo id))
+ (commit-obj (util:url->scm url))
+ (epoch (traverse-and-get-epoch
+ (if is-sha (cdr commit-obj)
+ (cdr (cdr (vector-ref commit-obj 0))))))
+ (current-pair (cons id epoch)))
+ (if (> (cdr current-pair) (cdr recent-pair))
+ current-pair
+ recent-pair)))
+
+(define (get-recent-commit repo ids)
+ (let* ((parts (list-head (string-split repo #\/) 2))
+ (owner (car parts))
+ (name (cadr parts))
+ (recent-commit (fold (lambda (id most-recent)
+ (compare-commits owner name id most-recent))
+ (cons "old-commit" -1)
+ ids))
+ (output (open-file outfile "a")))
+ ;; stream responses; just in case github cuts me off.
+ (format output "~a ~a\n~!" repo (car recent-commit))
+ (close output)))
+
+(define (crawl-commits)
+ (util:mkdir-p (dirname outfile))
+ (call-with-input-file get-act:external-file
+ (lambda (port)
+ (hash-for-each get-recent-commit (port-to-hash port)))))
diff --git a/src/crawl-type-wrapper.scm b/src/crawl-type-wrapper.scm
new file mode 100644
index 0000000..359ac51
--- /dev/null
+++ b/src/crawl-type-wrapper.scm
@@ -0,0 +1,49 @@
+;; kenku --- crawl and reproduce github actions
+;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+(define-module (src crawl-type-wrapper)
+ #:use-module (srfi srfi-1)
+ #:use-module (ice-9 string-fun)
+ #:use-module (ice-9 regex)
+ #:use-module ((src utils) #:prefix util:)
+ #:use-module ((src config) #:prefix conf:)
+ #:use-module ((src crawl-newest-commits) #:prefix new-commit:)
+ #:use-module (ice-9 textual-ports)
+ #:export (crawl-types
+ node-file))
+
+(define mapdir (dirname new-commit:outfile))
+(define mapfile (in-vicinity mapdir "external-to-type-map.txt"))
+(define typedir (in-vicinity conf:cache-dir "action-types"))
+(define node-file (in-vicinity typedir "node.txt"))
+(define file->regex `((,node-file . "node[0-9]{1,2}$")
+ (,(in-vicinity typedir "docker.txt") . "docker$")
+ (,(in-vicinity typedir "composite.txt") .
+ "composite$")))
+
+(define (parse-actions str)
+ (filter (lambda (s) (not (string=? s "")))
+ (string-split str #\newline)))
+
+(define filter-actions)
+
+(define (crawl-types)
+ (let* ((actions-to-type-sh (in-vicinity conf:scripts-dir
+ "actions_to_type.sh")))
+ (system (string-append actions-to-type-sh " < "
+ new-commit:outfile " >> " mapfile))) ;append mode
+ (util:normalize-file mapfile)
+ (util:filter-actions-on-regex mapfile file->regex parse-actions))
diff --git a/src/poll-repos.scm b/src/poll-repos.scm
new file mode 100644
index 0000000..b7f37b1
--- /dev/null
+++ b/src/poll-repos.scm
@@ -0,0 +1,62 @@
+;; kenku --- crawl and reproduce github actions
+;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+(define-module (src poll-repos)
+ #:use-module (srfi srfi-1)
+ #:use-module ((src utils) #:prefix util:)
+ #:use-module ((src config) #:prefix conf:)
+ #:use-module (ice-9 pretty-print)
+ #:export (poll-repos
+ outfile))
+
+(define outdir (in-vicinity conf:cache-dir "repos"))
+(define outfile (in-vicinity outdir "repos.txt"))
+(define github-api-template
+ (string-append "https://api.github.com/search/repositories?q=~a"
+ "&sort=stars&order=desc&per_page=100&page=~d")) ;100/page max
+
+(define pages 10)
+;;; you may manually change these to get interesting results.
+(define queries '(;;"stars:>5000" ;top-repos
+ "stars:1000..5000+pushed:>2026-01-01"
+ "stars:1000..5000+language:Python"
+ "stars:1000..5000+language:C"
+ "stars:1000..5000+language:Javascript"
+ "stars:50..100"))
+
+(define (poll-repos)
+ (util:mkdir-p outdir)
+ (for-each
+ (lambda (q)
+ (let ((repos (append-map
+ (lambda (p)
+ (let* ((scm-obj
+ (util:url->scm
+ (format #f github-api-template q p)
+ #:headers `((User-Agent . "curl 8.6.0"))))
+ (items (assoc "items" scm-obj)))
+ (when items
+ (map (lambda (repo)
+ (cdr (assoc "html_url" repo)))
+ (vector->list (cdr items))))))
+ (iota pages 1)))
+ (output (open-file outfile "a")))
+ (display (string-join repos "\n") output)
+ (close output)))
+ queries)
+ ;; we open the file and normalize it separately because I would prefer
+ ;; to have responses streamed in case github cuts me off or whatnot.
+ (util:normalize-file outfile))
diff --git a/src/scripts/actions_to_type.sh b/src/scripts/actions_to_type.sh
new file mode 100755
index 0000000..5d93955
--- /dev/null
+++ b/src/scripts/actions_to_type.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+# kenku --- crawl and reproduce github actions
+# Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# Takes a file with a repository identifier per line, like this:
+#
+# actions/deploy-pages v4
+# actions/upload-pages-artifact v4
+# astral-sh/setup-uv v7
+# actions/setup-python v2
+# actions/checkout v2
+#
+# And writes an equivalent file with what it's using. 'using' in this case can
+# be one of the standard action types in the github actions lingo: 'node',
+# 'docker', or 'composite'.
+# The `actions.yaml` or `actions.yml` file does a pretty good job explaining
+# what is used or not.
+
+function curl_action {
+ curl -s \
+ -H "Accept: application/vnd.github+json" \
+ -H "Authorization: Bearer ${TOKEN}" \
+ "https://api.github.com/repos/$1?ref=$2"
+}
+
+function find_action {
+ owner=$(echo "$1" | awk -F'/' '{print $(1)}')
+ repo=$(echo "$1" | awk -F'/' '{print $(2)}')
+ fname=$(echo "$1" | cut -d'/' -f3-)
+ sha="$2"
+
+ path="$owner/$repo/contents/$fname"
+
+ if [[ "$path" =~ \.(yml|yaml)$ ]]; then
+ action=$(curl_action "$path" "$sha")
+ exists=true
+ else
+ for c in action.yml action.yaml; do
+ p="$path/$c"
+
+ action=$(curl_action "$p" "$sha")
+
+ # endpoint doesn't exist
+ exists=$(echo "$action" | jq -e 'has("message") | not')
+ [[ "$exists" == true ]] && break
+ done
+ fi
+
+ using=$("$exists" && {
+ echo "$action" \
+ | jq -r '.content' \
+ | base64 -d \
+ | grep -E '^\s*"?using"?:' \
+ | sed -E 's/.*"?using"?:[[:space:]]*//'
+ } || echo "NO_FILE"
+ )
+ [[ -z "$using" ]] && using="NO_USING"
+ echo "$1 $sha $using"
+}
+
+while read -r action sha; do
+ find_action "$action" "$sha"
+done
diff --git a/src/scripts/build-action.sh b/src/scripts/build-action.sh
new file mode 100644
index 0000000..9c1d502
--- /dev/null
+++ b/src/scripts/build-action.sh
@@ -0,0 +1,135 @@
+#!/usr/bin/env bash
+# kenku --- crawl and reproduce github actions
+# Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+function get_output_file {
+ local output_file action_file
+ action_file="action.yml"
+ [ ! -f "$action_file" ] && action_file="action.yaml"
+ output_file=$(grep -E '^\s*main:\s*' "${TO_ACT}${action_file}" | awk '{print $2}')
+ output_file=${output_file//\"/}
+ echo ${output_file//\'/}
+}
+
+function script_exists {
+ npm run | grep -q "^ $1"
+}
+
+ALL="$1"
+SHA="$2"
+DIR="$3"
+BUILD_FAILURE_DIR="$4"
+MISSING_ARTIFACT_DIR="$5"
+MAYBE_REPRODUCIBLE_DIR="$6"
+REPRODUCIBLE_DIR="$7"
+
+REPO_PRINTABLE="${ALL//\//_}_$SHA"
+# holds temporary print strings
+
+REPO=$(echo "$ALL" | awk -F'/' '{print $(1)}')
+TO_ACT=$(echo "$ALL" | awk -F'/' '{print $(2)}')
+REPO="$REPO/$TO_ACT"
+TO_ACT=$(echo "$ALL" | cut -d'/' -f3-)
+[[ -n "$TO_ACT" ]] && TO_ACT="$TO_ACT/"
+
+BUILD_DIR=$(mktemp -d)
+cd "$BUILD_DIR"
+
+echo "Cloning $REPO..."
+git clone "https://github.com/$REPO.git" repo > /dev/null 2>&1
+cd repo
+echo "Checking out $SHA"
+git checkout "$SHA" > /dev/null 2>&1
+
+# actions.yml refers to actions relatively, so if we parse
+# ../dist/foo/ as the action final build dir, we need to
+# refer to it relative to the curr dir of the script
+# this is one way
+DIST="$TO_ACT$(get_output_file)"
+# some actions just name an index.js in the root dir.
+# In cases they name a directory, we'll diff the full thing.
+[ "$(dirname "$DIST")" != "." ] && DIST="$(dirname "$DIST")"
+REFERENCE_DIST="$DIST.1"
+echo "Saving $DIST to $REFERENCE_DIST"
+
+mv "$DIST" "$REFERENCE_DIST"
+
+cd "$DIR"
+echo "Installing packages..."
+
+INSTALL=$(npm ci 2>&1)
+
+echo "Attempting to build..."
+
+TMP="owo what's the build command"
+# surely ONE of these will work...
+if COMM="release"; script_exists "$COMM"; then
+ echo "Trying '$COMM'..."
+ TMP=$(npm run "$COMM" 2>&1)
+elif COMM="package"; script_exists "$COMM"; then
+ echo "Trying '$COMM'..."
+ TMP=$(npm run "$COMM" 2>&1)
+elif COMM="bundle"; script_exists "$COMM"; then
+ echo "Trying '$COMM'..."
+ TMP=$(npm run "$COMM" 2>&1)
+elif COMM="build"; script_exists "$COMM"; then
+ echo "Trying '$COMM'..."
+ TMP=$(npm run "$COMM" 2>&1)
+fi
+
+# if the build fails, the developers may have failed to setup
+# the environment properly, or it uses deprecated features.
+# It may be that there is no build command at all! This needs
+# manual review.
+if [[ $? -ne 0 ]]; then
+ echo "$ALL did not build."
+ echo "$INSTALL" > "$BUILD_FAILURE_DIR/${REPO_PRINTABLE}.log"
+ echo "$TMP" >> "$BUILD_FAILURE_DIR/${REPO_PRINTABLE}.log"
+ exit 0 #don't kill outer script
+fi
+
+# return back to where we were
+cd "$BUILD_DIR"
+cd repo
+
+if [[ ! -d "$DIST" ]]; then
+ # TODO: if the dist dir wasn't produced, then the commands
+ # used were likely wrong
+ echo "$ALL did not produce artifacts."
+ echo "$INSTALL" >> "$MISSING_ARTIFACT_DIR/${REPO_PRINTABLE}.log"
+ echo "$TMP" >> "$MISSING_ARTIFACT_DIR/${REPO_PRINTABLE}.log"
+ exit 0
+fi
+
+# diffoscope will output binary diffs if line endings are not
+# comparable. --force forces it to convert binary files too.
+find "$REFERENCE_DIST" -type f -exec dos2unix --force {} \;
+
+# TODO: solves path errors in containers when using global install
+mkdir -p "$HOME/.npm-global"
+npm config set prefix "$HOME/.npm-global"
+export PATH="$HOME/.npm-global/bin:$PATH"
+
+# diffoscope uses to compare pretty
+npm install -g js-beautify
+
+diffoscope "$DIST" "$REFERENCE_DIST" \
+ --exclude-directory-metadata=yes \
+ --html "$MAYBE_REPRODUCIBLE_DIR/${REPO_PRINTABLE}.html"
+if [[ ! -f "$MAYBE_REPRODUCIBLE_DIR/${REPO_PRINTABLE}.html" ]]; then
+ touch "$REPRODUCIBLE_DIR/${REPO_PRINTABLE}.flag"
+ exit 0
+fi
diff --git a/src/scripts/channels.scm b/src/scripts/channels.scm
new file mode 100644
index 0000000..b49849d
--- /dev/null
+++ b/src/scripts/channels.scm
@@ -0,0 +1,20 @@
+(list (channel
+ (name 'guix)
+ (url "https://codeberg.org/guix/guix")
+ (branch "master")
+ (commit "c3c2f3be04364e3f616bfcc38875b112bedbe901")
+ (introduction
+ (make-channel-introduction
+ "9edb3f66fd807b096b48283debdcddccfea34bad"
+ (openpgp-fingerprint
+ "BBB0 2DDF 2CEA F6A8 0D1D E643 A2A0 6DF2 A33A 54FA"))))
+ (channel
+ (name 'tanelorn)
+ (url "https://git.operationnull.com/tanelorn.git")
+ (branch "master")
+ (commit "1c25e9f613d8654f800f9be1d4b232ec9de25aa0")
+ (introduction
+ (make-channel-introduction
+ "3960d45383c672f8aacab8e354824793256c9d29"
+ (openpgp-fingerprint
+ "5550 5CA6 9DE5 D342 7F31 F9AE 5F86 6C65 2A34 C996")))))
diff --git a/src/scripts/repos_to_actions_map.sh b/src/scripts/repos_to_actions_map.sh
new file mode 100755
index 0000000..2216a5a
--- /dev/null
+++ b/src/scripts/repos_to_actions_map.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+# kenku --- crawl and reproduce github actions
+# Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# Takes a file with one repo per line and outputs a master list
+# of REPO, ACTION pairs. This script takes a long time to run.
+# There is an obvious part of this script that is pretty gross.
+# I got it working sometime in early march and have forgotten
+# why it does what it does.
+
+function get_flows {
+ curl -s \
+ -H "Accept: application/vnd.github+json" \
+ -H "Authorization: Bearer $TOKEN" \
+ "https://api.github.com/repos/$1/$2/contents/.github/workflows"
+}
+
+function has_flows {
+ echo "$1" \
+ | jq -e 'type == "object" and has("message") | not' >/dev/null 2>&1
+}
+
+function get_url {
+ local owner repo flows
+ owner=$(echo "$1" | awk -F'/' '{print $(NF-1)}')
+ repo=$(echo "$1" | awk -F'/' '{print $(NF)}')
+
+ flows=$(get_flows "$owner" "$repo")
+ has_flows "$flows" &&
+ echo "$flows" | jq -r '.[] | select(.type=="file") | .download_url' \
+ | xargs -n1 sh -c '
+ for url do
+ curl -s "$url" | grep -E "^\s*-?\s*uses:" | sed "s|.*uses:\s*|$1 |"
+ done
+ ' _ "$1" # passes arg to sh -c
+}
+
+while read -r url; do
+ get_url "$url"
+done
diff --git a/src/utils.scm b/src/utils.scm
new file mode 100644
index 0000000..ddcfb3f
--- /dev/null
+++ b/src/utils.scm
@@ -0,0 +1,137 @@
+;; kenku --- crawl and reproduce github actions
+;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+(define-module (src utils)
+ #:use-module (srfi srfi-1)
+ #:use-module (ice-9 match)
+ #:use-module (web client)
+ #:use-module (web response)
+ #:use-module (rnrs bytevectors)
+ #:use-module (json)
+ #:use-module (ice-9 receive)
+ #:use-module (ice-9 textual-ports)
+ #:use-module (ice-9 pretty-print)
+ #:use-module (ice-9 regex)
+ #:use-module (ice-9 popen)
+ #:export (default-headers
+ url-exists?
+ url->scm
+ shell->str
+ normalize-file
+ append-to-values
+ mkdir-p
+ filter-actions-on-regex))
+
+(define default-headers
+ `((Accept . "application/vnd.github+json")
+ (Authorization . ,(string-append
+ "Bearer "
+ (getenv "TOKEN")))
+ (User-Agent . "curl 8.6.0"))) ;i lied
+
+(define (url-exists? url)
+ (receive (status body)
+ (http-request url #:headers default-headers)
+ (equal? 200 (response-code status))))
+
+(define* (url->scm url #:key (headers default-headers))
+ (receive (status body)
+ (http-request url #:headers headers)
+ (json-string->scm (utf8->string body))))
+
+(define (shell->str . args)
+ (let* ((port (apply open-pipe*
+ (cons OPEN_READ
+ args)))
+ (contents (get-string-all port)))
+ (close-pipe port)
+ contents))
+
+(define (normalize-file file)
+ (let* ((str (call-with-input-file file get-string-all))
+ (str (string-filter (lambda (c)
+ (not (member c (list #\, #\" #\' #\return))))
+ str))
+ (str (string-join
+ (delete-duplicates (string-split str #\newline))
+ "\n"))
+ (output (open-file file "w")))
+ (display str output)
+ (close output)))
+
+(define (append-to-values hashtable k v)
+ (let ((existing (hash-ref hashtable k '())))
+ (hash-set! hashtable k (cons v existing))
+ hashtable))
+
+(define (mkdir-p dir)
+ "Yoinked from https://codeberg.org/guix/guix."
+ (define absolute?
+ (string-prefix? "/" dir))
+ (define not-slash
+ (char-set-complement (char-set #\/)))
+ (let loop ((components (string-tokenize dir not-slash))
+ (root (if absolute?
+ ""
+ ".")))
+ (match components
+ ((head tail ...)
+ (let ((path (string-append root "/" head)))
+ (catch 'system-error
+ (lambda ()
+ (mkdir path)
+ (loop tail path))
+ (lambda args
+ (if (= EEXIST (system-error-errno args))
+ (loop tail path)
+ (apply throw args))))))
+ (() #t))))
+
+(define (filter-actions-on-regex file file->regex parse-f)
+ "This procedure is for the bash drop-ins only, which do not filter their
+outputs into files, but rather output lines which need to be filtered based on
+regex.
+
+FILE: the file to be filtered
+FILE->REGEX: an alist mapping file names to the regex each item (usually a line
+in FILE, should match) would need to match for inclusion to said file name.
+PARSE-F: A function describing how the contents of FILE should be parsed into
+items."
+ (let ((ht (make-hash-table)))
+ (define (hash-actions-to-regex actions)
+ (for-each
+ (lambda (s)
+ (for-each
+ (lambda (pair)
+ (let ((file (car pair))
+ (regex (cdr pair)))
+ (when (string-match regex s)
+ (append-to-values ht file s))))
+ file->regex))
+ actions)
+ ht)
+ (define (hash->files)
+ (hash-for-each (lambda (file actions)
+ (mkdir-p (dirname file))
+ (let ((output (open-file file "w")))
+ (map (lambda (a) (format output "~a~%" a))
+ actions)
+ (close output)))
+ ht))
+ (let* ((str (call-with-input-file file get-string-all))
+ (actions (parse-f str)))
+ (hash-actions-to-regex actions)
+ (hash->files))))