From 8e27ef2a45052ffc7a5eb275e99af34b9195fa72 Mon Sep 17 00:00:00 2001 From: Gene Pasquet Date: Fri, 7 Mar 2025 05:21:23 +0000 Subject: Added tracker --- .gitignore | 1 + newsphaser.asd | 11 +++-- package.lisp | 41 ++++++++++++++++++ phaser.lisp | 126 ------------------------------------------------------ reporter.lisp | 96 ++++++++++++++++++++++++++++++++++++++++++ toots.lisp | 20 +++++++++ tracker.lisp | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ unhtml.lisp | 5 --- 8 files changed, 297 insertions(+), 134 deletions(-) create mode 100644 package.lisp delete mode 100644 phaser.lisp create mode 100644 reporter.lisp create mode 100644 toots.lisp create mode 100644 tracker.lisp diff --git a/.gitignore b/.gitignore index 544c6c9..b30cc76 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .env *~ \#*\# +*.fasl diff --git a/newsphaser.asd b/newsphaser.asd index e1bbc48..0482950 100644 --- a/newsphaser.asd +++ b/newsphaser.asd @@ -24,8 +24,13 @@ :description "A Mastodon bot to aggregate posts by popularity." :homepage "https://github.com/Etenil/newsphaser" :serial T - :components ((:file "unhtml") - (:file "phaser")) + :components ((:file "package") + (:file "unhtml") + (:file "toots") + (:file "tracker") + (:file "reporter")) :depends-on (:tooter :cl-dotenv - :plump)) + :cl-redis + :plump + :alexandria)) diff --git a/package.lisp b/package.lisp new file mode 100644 index 0000000..65a686c --- /dev/null +++ b/package.lisp @@ -0,0 +1,41 @@ +(defpackage :phaser.unhtml + (:use :common-lisp) + (:export + #:text-excerpt)) + +(defpackage :phaser.toots + (:use :cl) + (:export + #:get-timeline + #:*client* + #:get-status)) + +(defpackage :phaser.tracker + (:use :cl) + (:import-from :phaser.toots #:get-timeline) + (:import-from :phaser.toots #:get-status) + (:export + #:track-statuses + #:collect-tracking)) + +(defpackage :phaser.reporter + (:use + :common-lisp) + (:import-from :phaser.unhtml #:text-excerpt) + (:import-from :phaser.tracker #:track-statuses) + (:import-from :phaser.toots #:*client*) + (:export + #:reblog-top-statuses)) + +(defpackage #:phaser + (:use + #:cl + #:phaser.unhtml + #:phaser.toots + #:phaser.tracker + #:phaser.reporter) + (:shadowing-import-from #:phaser.reporter #:block)) + +(dolist (package '(#:phaser.reporter #:phaser.tracker)) + (do-symbols (symbol package) + (export symbol '#:phaser))) diff --git a/phaser.lisp b/phaser.lisp deleted file mode 100644 index 2fcda54..0000000 --- a/phaser.lisp +++ /dev/null @@ -1,126 +0,0 @@ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; NewsPhaser, Mastodon posts aggregator -;; Copyright (C) 2025 Gene Pasquet -;; -;; This program is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; This program is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with this program. If not, see . -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(defpackage :phaser - (:use - :common-lisp) - (:import-from :phaser.unhtml #:text-excerpt) - (:export - #:get-timeline - #:load-timeline - #:load-top-statuses - #:reblog-top-statuses - #:summarize-top-statuses)) - -(in-package :phaser) - -(ignore-errors - ;; Don't signal if .env is missing. - (.env:load-env (merge-pathnames ".env"))) - -(defvar *client* (make-instance 'tooter:client - :base (uiop:getenv "MASTODON_URL") - :name "Newsphaser bot" - :key (uiop:getenv "MASTODON_KEY") - :secret (uiop:getenv "MASTODON_SECRET") - :access-token (uiop:getenv "MASTODON_TOKEN"))) -(defvar *timeline-page-size* 50 - "Maximum number of statuses retrieved from the timeline for each request") - -(defvar *timeline-min-age* (* 24 3600) - "Minimum age of statuses before being considered for ranking") -(defvar *timeline-max-age* (* 48 3600) ; 48h - "Maximum age of statuses to be retrieved from timelines.") - -(defvar *timeline-min-timestamp* (- (get-universal-time) *timeline-max-age*)) -(defvar *timeline-max-timestamp* (- (get-universal-time) *timeline-min-age*)) - -(defvar *pop-favs-factor* 1) -(defvar *pop-boosts-factor* 1) -(defvar *pop-replies-factor* 1) -(defvar *max-requests* 20) - -(defvar *top-statuses-to-boost* 10) - -(defun outdated-statuses (page) - (remove-if - (lambda (status) - (> (tooter:created-at status) *timeline-min-timestamp*)) - page)) - -(defun last-page-p (page) - (or (null page) - (not (null (outdated-statuses page))))) - -(defun flatten (structure) - (cond ((null structure) nil) - ((atom structure) (list structure)) - (t (mapcan #'flatten structure)))) - -(defun load-timeline-pages (min-id &optional (pages '()) (iter-num 0)) - "Load statuses from the timeline up to *TIMELINE-MAX-AGE* or MIN-ID." - (let* ((max-id (if (null pages) - nil - (tooter:id (car (last (car pages)))))) - (page (tooter:timeline *client* :home - :limit *timeline-page-size* - :max-id max-id - :min-id min-id)) - (statuses-and-page (if (null pages) - (list page) - (cons page pages)))) - (if (or (last-page-p page) (>= iter-num *max-requests*)) - (flatten statuses-and-page) - (load-timeline-pages min-id statuses-and-page (1+ iter-num))))) - -(defun load-timeline () - (remove-if (lambda (status) - (let ((status-timestamp (tooter:created-at status))) - (or (< status-timestamp *timeline-min-timestamp*) - (> status-timestamp *timeline-max-timestamp*)))) - (load-timeline-pages nil))) - -(defun status-popularity (status) - (/ (+ (* (tooter:replies-count status) *pop-replies-factor*) - (* (tooter:favourites-count status) *pop-favs-factor*) - (* (tooter:reblogs-count status) *pop-boosts-factor*)) - (- (get-universal-time) (tooter:created-at status)))) - -(defun get-timeline () - (tooter:timeline *client* :home)) - -(defun load-top-statuses () - (let* ((timeline (load-timeline)) - (ranked-statuses (sort (mapcar (lambda (status) - `(,(status-popularity status) . ,status)) - timeline) - '> :key 'car))) - (mapcar 'cdr (butlast ranked-statuses (- (length ranked-statuses) 10))))) - - -(defun summarize-top-statuses () - (let ((statuses (load-top-statuses))) - (mapcar (lambda (status) - (text-excerpt (tooter:content status))) - statuses))) - -(defun reblog-top-statuses () - (let ((top-statuses (load-top-statuses))) - (mapc (lambda (status) - (tooter:favourite *client* status)) - top-statuses))) diff --git a/reporter.lisp b/reporter.lisp new file mode 100644 index 0000000..48ec0fa --- /dev/null +++ b/reporter.lisp @@ -0,0 +1,96 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; NewsPhaser, Mastodon posts aggregator +;; Copyright (C) 2025 Gene Pasquet +;; +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(in-package :phaser) + +(defvar *timeline-page-size* 50 + "Maximum number of statuses retrieved from the timeline for each request") +(defvar *timeline-min-age* (* 24 3600) + "Minimum age of statuses before being considered for ranking") +(defvar *timeline-max-age* (* 48 3600) ; 48h + "Maximum age of statuses to be retrieved from timelines.") + +(defvar *timeline-min-timestamp* (- (get-universal-time) *timeline-max-age*)) +(defvar *timeline-max-timestamp* (- (get-universal-time) *timeline-min-age*)) + +(defvar *pop-favs-factor* 1) +(defvar *pop-boosts-factor* 1) +(defvar *pop-replies-factor* 1) +(defvar *max-requests* 20) + +(defvar *top-statuses-to-boost* 10) + +(defun outdated-statuses (page) + (remove-if + (lambda (status) + (> (tooter:created-at status) *timeline-min-timestamp*)) + page)) + +(defun last-page-p (page) + (or (null page) + (not (null (outdated-statuses page))))) + +(defun load-timeline-pages (min-id &optional (pages '()) (iter-num 0)) + "Load statuses from the timeline up to *TIMELINE-MAX-AGE* or MIN-ID." + (let* ((max-id (if (null pages) + nil + (tooter:id (car (last (car pages)))))) + (page (tooter:timeline *client* :home + :limit *timeline-page-size* + :max-id max-id + :min-id min-id)) + (statuses-and-page (if (null pages) + (list page) + (cons page pages)))) + (if (or (last-page-p page) (>= iter-num *max-requests*)) + (alexandria:flatten statuses-and-page) + (load-timeline-pages min-id statuses-and-page (1+ iter-num))))) + +(defun load-timeline () + (remove-if (lambda (status) + (let ((status-timestamp (tooter:created-at status))) + (or (< status-timestamp *timeline-min-timestamp*) + (> status-timestamp *timeline-max-timestamp*)))) + (load-timeline-pages nil))) + +(defun status-popularity (status) + (/ (+ (* (tooter:replies-count status) *pop-replies-factor*) + (* (tooter:favourites-count status) *pop-favs-factor*) + (* (tooter:reblogs-count status) *pop-boosts-factor*)) + (- (get-universal-time) (tooter:created-at status)))) + +(defun load-top-statuses () + (let* ((timeline (load-timeline)) + (ranked-statuses (sort (mapcar (lambda (status) + `(,(status-popularity status) . ,status)) + timeline) + '> :key 'car))) + (mapcar 'cdr (butlast ranked-statuses (- (length ranked-statuses) 10))))) + + +(defun summarize-top-statuses () + (let ((statuses (load-top-statuses))) + (mapcar (lambda (status) + (text-excerpt (tooter:content status))) + statuses))) + +(defun reblog-top-statuses () + (let ((top-statuses (load-top-statuses))) + (mapc (lambda (status) + (tooter:favourite *client* status)) + top-statuses))) diff --git a/toots.lisp b/toots.lisp new file mode 100644 index 0000000..f36c9eb --- /dev/null +++ b/toots.lisp @@ -0,0 +1,20 @@ +(in-package :phaser.toots) + +(ignore-errors + ;; Don't signal if .env is missing. + (.env:load-env (merge-pathnames ".env"))) + + +(defvar *client* + (make-instance 'tooter:client + :base (uiop:getenv "MASTODON_URL") + :name "Newsphaser bot" + :key (uiop:getenv "MASTODON_KEY") + :secret (uiop:getenv "MASTODON_SECRET") + :access-token (uiop:getenv "MASTODON_TOKEN"))) + +(defun get-timeline (&key (timeline :home)) + (tooter:timeline *client* timeline)) + +(defun get-status (id) + (tooter:find-status *client* id)) diff --git a/tracker.lisp b/tracker.lisp new file mode 100644 index 0000000..d1ccd78 --- /dev/null +++ b/tracker.lisp @@ -0,0 +1,131 @@ +(in-package :phaser.tracker) + +(defvar *max-num* 100) +(defvar *max-age* (* 14 24 3600)) ; Two weeks +(defvar *tracking-key-fmt* "HISTORY-~a") +(defvar *index-key* "STATUSES") + +(defun tracking-key-status-id (key) + (subseq key (length (format nil *tracking-key-fmt* "")))) + +(defun status-data-key (status) + (format nil "status-~a" (tooter:id status))) + +(defstruct status-tracker + time id) + +(defun status-tracker-to-cons (tracker) + (cons (status-tracker-time tracker) + (status-tracker-id tracker))) + +(defun status-tracker-from-cons (pair) + (make-status-tracker :time (car pair) + :id (cdr pair))) + +(defun status-tracker-from-status (status) + (make-status-tracker :time (tooter:created-at status) + :id (tooter:id status))) + +(defun read-from-redis (key) + (let ((data (red:get key))) + (when data + (read-from-string data)))) + +(defun read-list-from-redis (key) + (let ((data (red:lrange key 0 -1))) + (mapcar #'read-from-string data))) + +(defun load-status-trackers () + (mapcar #'status-tracker-from-cons + (read-from-redis *index-key*))) + +(defun save-status-trakers (trackers) + (let ((data (mapcar #'status-tracker-to-cons trackers))) + (red:set *index-key* data) + trackers)) + +(defun rand-pick (sequence) + "Picks a random number of items from sequence" + (subseq sequence 0 (random (length sequence)))) + +(defun strip-tracked-statuses (tracked-statuses new-statuses) + (remove-if + (lambda (status) + (find (tooter:id status) + tracked-statuses + :test (lambda (id tracker) + (eql id (status-tracker-id tracker))))) + new-statuses)) + +(defun get-new-statuses (tracked-statuses) + (let* ((statuses (get-timeline)) + (num-statuses-to-pick (- *max-num* (length tracked-statuses))) + (new-statuses (rand-pick + (strip-tracked-statuses tracked-statuses statuses)))) + (if (> (length new-statuses) num-statuses-to-pick) + (subseq new-statuses 0 num-statuses-to-pick) + new-statuses))) + +(defun track-status (tracker) + (let ((status (get-status (status-tracker-id tracker)))) + (red:lpush (format nil *tracking-key-fmt* (tooter:id status)) + (list + (cons 'time (get-universal-time)) + (cons 'favourites (tooter:favourites-count status)) + (cons 'replies (tooter:replies-count status)) + (cons 'reblogs (tooter:reblogs-count status)))))) + +(defun track-statuses () + (redis:with-connection () + (let ((tracked-statuses (load-status-trackers))) + + ;; Fill up the tracked status pool + (when (< (length tracked-statuses) *max-num*) + (setf tracked-statuses (nconc + tracked-statuses + (mapcar + #'status-tracker-from-status + (get-new-statuses tracked-statuses))))) + + ;; for each status, request details + (mapc #'track-status tracked-statuses) + + ;; if status is older than max-age, remove from list of tracked statuses + (setf tracked-statuses (remove-if + (lambda (tracker) + (< (status-tracker-time tracker) + (- (get-universal-time) *max-age*))) + tracked-statuses)) + + ;; Persist tracking list + (save-status-trakers tracked-statuses) + )) + ) + +(defun collect-status-tracking (id) + (read-list-from-redis (format nil *tracking-key-fmt* id))) + +(defun collect-tracking () + (redis:with-connection () + (let* ((history-keys (red:keys (format nil *tracking-key-fmt* "*"))) + (status-history (mapcar (lambda (history-key) + (let ((id (tracking-key-status-id history-key))) + (cons + id + (collect-status-tracking id)))) + history-keys))) + ;; status-history + (format nil "~{~a~%~}" + (alexandria:flatten + (mapcar (lambda (item) + (mapcar + (lambda (entry) + (format nil "~a,~a,~a,~a,~a" + (car item) + (cdr (assoc 'time entry)) + (cdr (assoc 'favourites entry)) + (cdr (assoc 'reblogs entry)) + (cdr (assoc 'replies entry)))) + (cdr item))) + status-history))) + ))) diff --git a/unhtml.lisp b/unhtml.lisp index 7ecea1b..18bab64 100644 --- a/unhtml.lisp +++ b/unhtml.lisp @@ -1,8 +1,3 @@ -(defpackage :phaser.unhtml - (:use :common-lisp) - (:export - #:text-excerpt)) - (in-package :phaser.unhtml) (defun node-has-text (node) -- cgit v1.2.3