diff options
| author | bdunahu <bdunahu@operationnull.com> | 2026-04-27 22:16:12 -0400 |
|---|---|---|
| committer | bdunahu <bdunahu@operationnull.com> | 2026-04-28 00:15:09 -0400 |
| commit | 9e143d1d84817ec7e6d139d234f0fff07749621c (patch) | |
| tree | 7565eac131cc3528d33d5ea3597cdd8006fdb968 /src/crawl-newest-commits.scm | |
Diffstat (limited to 'src/crawl-newest-commits.scm')
| -rwxr-xr-x | src/crawl-newest-commits.scm | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/src/crawl-newest-commits.scm b/src/crawl-newest-commits.scm new file mode 100755 index 0000000..150973e --- /dev/null +++ b/src/crawl-newest-commits.scm @@ -0,0 +1,104 @@ +;; kenku --- crawl and reproduce github actions +;; Copyright © 2026 bdunahu <bdunahu@operationnull.com> +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. +;; +;; +;; This file reads from standard in a list of repo identifiers and commits: +;; +;; 1fexd/gh-create-release-notes 0.0.18 +;; 1password/load-secrets-action 92467eb28f72e8255933372f1e0707c567ce2259 +;; 1password/load-secrets-action v3 +;; 2428392/gh-truncate-string-action b3ff790d21cf42af3ca7579146eedb93c8fb0757 +;; 2428392/gh-truncate-string-action v1.0.0 +;; 3ru/gpt-translate master +;; 8398a7/action-slack 77eaa4f1c608a7d68b38af4e3f739dcd8cba273e +;; +;; It only cares about the first two columns. It then collects all the commits +;; associated with a repo and folds it into a single outputted line featuring +;; the newest commit. Since I'm not downloading all the repos at this phase, +;; it again uses the github API. Surprisingly, the limit of queries is quite +;; large. Speaking of which, be sure to export your personal access token +;; under the variable "TOKEN" if you want to run this script. + +(define-module (src crawl-newest-commits) + #:use-module (srfi srfi-1) + #:use-module (ice-9 regex) + #:use-module ((src crawl-actions-wrapper) #:prefix get-act:) + #:use-module ((src utils) #:prefix util:) + #:use-module ((ice-9 rdelim)) + #:export (crawl-commits + outfile)) + +(define outfile (in-vicinity (dirname get-act:external-file) + "newest-external-commits.txt")) +(define github-api-template "https://api.github.com/repos/~a/~a/commits") +(define github-commit-api-template + (in-vicinity github-api-template "~a")) +(define github-branch-api-template + (string-append github-api-template "?sha=~a&per_page=1")) +(define git-commit-sha-regex"[0-9a-b]{40}") + +(define (port-to-hash port) + (let loop ((ht (make-hash-table))) + (let ((line (read-line port))) + (if (eof-object? line) + ht + (loop (let* ((node-action + (string-split line char-set:whitespace)) + (repo-commit (list-head node-action 2))) + (apply util:append-to-values (cons* ht repo-commit)))))))) + +(define (traverse-and-get-epoch response) + (define (iso-8601-to-epoch date) + (string->number (strftime "%s" (car (strptime "%F" date))))) + (unless (assoc "message" response) + (iso-8601-to-epoch + (fold (lambda (field alist) (cdr (assoc field alist))) + response + '("commit" "committer" "date"))))) + +(define (compare-commits owner repo id recent-pair) + (let* ((is-sha (string-match git-commit-sha-regex id)) + (url (format #f (if is-sha + github-commit-api-template + github-branch-api-template) + owner repo id)) + (commit-obj (util:url->scm url)) + (epoch (traverse-and-get-epoch + (if is-sha (cdr commit-obj) + (cdr (cdr (vector-ref commit-obj 0)))))) + (current-pair (cons id epoch))) + (if (> (cdr current-pair) (cdr recent-pair)) + current-pair + recent-pair))) + +(define (get-recent-commit repo ids) + (let* ((parts (list-head (string-split repo #\/) 2)) + (owner (car parts)) + (name (cadr parts)) + (recent-commit (fold (lambda (id most-recent) + (compare-commits owner name id most-recent)) + (cons "old-commit" -1) + ids)) + (output (open-file outfile "a"))) + ;; stream responses; just in case github cuts me off. + (format output "~a ~a\n~!" repo (car recent-commit)) + (close output))) + +(define (crawl-commits) + (util:mkdir-p (dirname outfile)) + (call-with-input-file get-act:external-file + (lambda (port) + (hash-for-each get-recent-commit (port-to-hash port))))) |
