summaryrefslogtreecommitdiff
path: root/src/crawl-newest-commits.scm
diff options
context:
space:
mode:
authorbdunahu <bdunahu@operationnull.com>2026-04-27 22:16:12 -0400
committerbdunahu <bdunahu@operationnull.com>2026-04-28 00:15:09 -0400
commit9e143d1d84817ec7e6d139d234f0fff07749621c (patch)
tree7565eac131cc3528d33d5ea3597cdd8006fdb968 /src/crawl-newest-commits.scm
initial commitHEADmaster
Diffstat (limited to 'src/crawl-newest-commits.scm')
-rwxr-xr-xsrc/crawl-newest-commits.scm104
1 files changed, 104 insertions, 0 deletions
diff --git a/src/crawl-newest-commits.scm b/src/crawl-newest-commits.scm
new file mode 100755
index 0000000..150973e
--- /dev/null
+++ b/src/crawl-newest-commits.scm
@@ -0,0 +1,104 @@
+;; kenku --- crawl and reproduce github actions
+;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+;;
+;;
+;; This file reads from standard in a list of repo identifiers and commits:
+;;
+;; 1fexd/gh-create-release-notes 0.0.18
+;; 1password/load-secrets-action 92467eb28f72e8255933372f1e0707c567ce2259
+;; 1password/load-secrets-action v3
+;; 2428392/gh-truncate-string-action b3ff790d21cf42af3ca7579146eedb93c8fb0757
+;; 2428392/gh-truncate-string-action v1.0.0
+;; 3ru/gpt-translate master
+;; 8398a7/action-slack 77eaa4f1c608a7d68b38af4e3f739dcd8cba273e
+;;
+;; It only cares about the first two columns. It then collects all the commits
+;; associated with a repo and folds it into a single outputted line featuring
+;; the newest commit. Since I'm not downloading all the repos at this phase,
+;; it again uses the github API. Surprisingly, the limit of queries is quite
+;; large. Speaking of which, be sure to export your personal access token
+;; under the variable "TOKEN" if you want to run this script.
+
+(define-module (src crawl-newest-commits)
+ #:use-module (srfi srfi-1)
+ #:use-module (ice-9 regex)
+ #:use-module ((src crawl-actions-wrapper) #:prefix get-act:)
+ #:use-module ((src utils) #:prefix util:)
+ #:use-module ((ice-9 rdelim))
+ #:export (crawl-commits
+ outfile))
+
+(define outfile (in-vicinity (dirname get-act:external-file)
+ "newest-external-commits.txt"))
+(define github-api-template "https://api.github.com/repos/~a/~a/commits")
+(define github-commit-api-template
+ (in-vicinity github-api-template "~a"))
+(define github-branch-api-template
+ (string-append github-api-template "?sha=~a&per_page=1"))
+(define git-commit-sha-regex"[0-9a-b]{40}")
+
+(define (port-to-hash port)
+ (let loop ((ht (make-hash-table)))
+ (let ((line (read-line port)))
+ (if (eof-object? line)
+ ht
+ (loop (let* ((node-action
+ (string-split line char-set:whitespace))
+ (repo-commit (list-head node-action 2)))
+ (apply util:append-to-values (cons* ht repo-commit))))))))
+
+(define (traverse-and-get-epoch response)
+ (define (iso-8601-to-epoch date)
+ (string->number (strftime "%s" (car (strptime "%F" date)))))
+ (unless (assoc "message" response)
+ (iso-8601-to-epoch
+ (fold (lambda (field alist) (cdr (assoc field alist)))
+ response
+ '("commit" "committer" "date")))))
+
+(define (compare-commits owner repo id recent-pair)
+ (let* ((is-sha (string-match git-commit-sha-regex id))
+ (url (format #f (if is-sha
+ github-commit-api-template
+ github-branch-api-template)
+ owner repo id))
+ (commit-obj (util:url->scm url))
+ (epoch (traverse-and-get-epoch
+ (if is-sha (cdr commit-obj)
+ (cdr (cdr (vector-ref commit-obj 0))))))
+ (current-pair (cons id epoch)))
+ (if (> (cdr current-pair) (cdr recent-pair))
+ current-pair
+ recent-pair)))
+
+(define (get-recent-commit repo ids)
+ (let* ((parts (list-head (string-split repo #\/) 2))
+ (owner (car parts))
+ (name (cadr parts))
+ (recent-commit (fold (lambda (id most-recent)
+ (compare-commits owner name id most-recent))
+ (cons "old-commit" -1)
+ ids))
+ (output (open-file outfile "a")))
+ ;; stream responses; just in case github cuts me off.
+ (format output "~a ~a\n~!" repo (car recent-commit))
+ (close output)))
+
+(define (crawl-commits)
+ (util:mkdir-p (dirname outfile))
+ (call-with-input-file get-act:external-file
+ (lambda (port)
+ (hash-for-each get-recent-commit (port-to-hash port)))))