summaryrefslogtreecommitdiff
path: root/src/crawl-newest-commits.scm
blob: 150973ec0da9f8f00a150121cd5cb4f94fbd6ca8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
;; kenku --- crawl and reproduce github actions
;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
;;
;;
;; This file reads from standard in a list of repo identifiers and commits:
;;
;; 1fexd/gh-create-release-notes 0.0.18
;; 1password/load-secrets-action 92467eb28f72e8255933372f1e0707c567ce2259
;; 1password/load-secrets-action v3
;; 2428392/gh-truncate-string-action b3ff790d21cf42af3ca7579146eedb93c8fb0757
;; 2428392/gh-truncate-string-action v1.0.0
;; 3ru/gpt-translate master
;; 8398a7/action-slack 77eaa4f1c608a7d68b38af4e3f739dcd8cba273e
;;
;; It only cares about the first two columns. It then collects all the commits
;; associated with a repo and folds it into a single outputted line featuring
;; the newest commit. Since I'm not downloading all the repos at this phase,
;; it again uses the github API. Surprisingly, the limit of queries is quite
;; large. Speaking of which, be sure to export your personal access token
;; under the variable "TOKEN" if you want to run this script.

(define-module (src crawl-newest-commits)
  #:use-module (srfi srfi-1)
  #:use-module (ice-9 regex)
  #:use-module ((src crawl-actions-wrapper) #:prefix get-act:)
  #:use-module ((src utils) #:prefix util:)
  #:use-module ((ice-9 rdelim))
  #:export (crawl-commits
            outfile))

(define outfile (in-vicinity (dirname get-act:external-file)
                             "newest-external-commits.txt"))
(define github-api-template "https://api.github.com/repos/~a/~a/commits")
(define github-commit-api-template
  (in-vicinity github-api-template "~a"))
(define github-branch-api-template
  (string-append github-api-template "?sha=~a&per_page=1"))
(define git-commit-sha-regex"[0-9a-b]{40}")

(define (port-to-hash port)
  (let loop ((ht (make-hash-table)))
    (let ((line (read-line port)))
      (if (eof-object? line)
          ht
          (loop (let* ((node-action
                        (string-split line char-set:whitespace))
                       (repo-commit (list-head node-action 2)))
                  (apply util:append-to-values (cons* ht repo-commit))))))))

(define (traverse-and-get-epoch response)
  (define (iso-8601-to-epoch date)
    (string->number (strftime "%s" (car (strptime "%F" date)))))
  (unless (assoc "message" response)
    (iso-8601-to-epoch
     (fold (lambda (field alist) (cdr (assoc field alist)))
           response
           '("commit" "committer" "date")))))

(define (compare-commits owner repo id recent-pair)
  (let* ((is-sha (string-match git-commit-sha-regex id))
         (url (format #f (if is-sha
                             github-commit-api-template
                             github-branch-api-template)
                      owner repo id))
         (commit-obj (util:url->scm url))
         (epoch (traverse-and-get-epoch
                 (if is-sha (cdr commit-obj)
                     (cdr (cdr (vector-ref commit-obj 0))))))
         (current-pair (cons id epoch)))
    (if (> (cdr current-pair) (cdr recent-pair))
        current-pair
        recent-pair)))

(define (get-recent-commit repo ids)
  (let* ((parts (list-head (string-split repo #\/) 2))
         (owner (car parts))
         (name (cadr parts))
         (recent-commit (fold (lambda (id most-recent)
                                (compare-commits owner name id most-recent))
                              (cons "old-commit" -1)
                              ids))
         (output (open-file outfile "a")))
    ;; stream responses; just in case github cuts me off.
    (format output "~a ~a\n~!" repo (car recent-commit))
    (close output)))

(define (crawl-commits)
  (util:mkdir-p (dirname outfile))
  (call-with-input-file get-act:external-file
    (lambda (port)
      (hash-for-each get-recent-commit (port-to-hash port)))))