summaryrefslogtreecommitdiff
path: root/src/poll-repos.scm
blob: b7f37b14dc5a192b0a67e77654488e46c07acb08 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
;; kenku --- crawl and reproduce github actions
;; Copyright © 2026 bdunahu <bdunahu@operationnull.com>
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program.  If not, see <https://www.gnu.org/licenses/>.

(define-module (src poll-repos)
  #:use-module (srfi srfi-1)
  #:use-module ((src utils) #:prefix util:)
  #:use-module ((src config) #:prefix conf:)
  #:use-module (ice-9 pretty-print)
  #:export (poll-repos
            outfile))

(define outdir (in-vicinity conf:cache-dir "repos"))
(define outfile (in-vicinity outdir "repos.txt"))
(define github-api-template
  (string-append "https://api.github.com/search/repositories?q=~a"
                 "&sort=stars&order=desc&per_page=100&page=~d")) ;100/page max

(define pages 10)
;;; you may manually change these to get interesting results.
(define queries '(;;"stars:>5000"        ;top-repos
                  "stars:1000..5000+pushed:>2026-01-01"
                  "stars:1000..5000+language:Python"
                  "stars:1000..5000+language:C"
                  "stars:1000..5000+language:Javascript"
                  "stars:50..100"))

(define (poll-repos)
  (util:mkdir-p outdir)
  (for-each
   (lambda (q)
     (let ((repos (append-map
                   (lambda (p)
                     (let* ((scm-obj
                            (util:url->scm
                             (format #f github-api-template q p)
                             #:headers `((User-Agent . "curl 8.6.0"))))
                            (items (assoc "items" scm-obj)))
                       (when items
                         (map (lambda (repo)
                                (cdr (assoc "html_url" repo)))
                              (vector->list (cdr items))))))
                   (iota pages 1)))
           (output (open-file outfile "a")))
       (display (string-join repos "\n") output)
       (close output)))
   queries)
  ;; we open the file and normalize it separately because I would prefer
  ;; to have responses streamed in case github cuts me off or whatnot.
  (util:normalize-file outfile))