Jan Andrle 0a5326b80f
All checks were successful
Update RSS / update-rss (push) Successful in 5m44s
🐛 Fallback for missing author and pull
2024-12-10 10:09:42 +01:00

199 lines
6.3 KiB
JavaScript
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env -S npx nodejsscript
/* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */
const url_drops= "https://pagenotfound.cz/drop/";
const { version, description, homepage }= s.cat("package.json").xargs(JSON.parse);
/**
* @typedef {Object} Article
* @property {string} title
* @property {string} perex
* @property {string} author
* @property {string} loc
* @property {string} drop
* */
/**
* @typedef {Object} Drop
* @property {string} drop
* @property {string} date
* */
/**
* @typedef {Object} Sitemap
* @property {Article[]} articles
* @property {Drop[]} drops
* */
/**
* @typedef {Object} State
* @property {Sitemap} json
* @property {string[]} changed Changed files
* */
const pubDate= (function pubDateInner(){ // pubDate must be an RFC-822 date-time
const intl= new Intl.DateTimeFormat("en-gb", {
/* Wed, 02 Oct 2002 */ weekday: "short", day: "2-digit", month: "short", year: "numeric",
/* 08:00:00 */ hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit",
/* EST */ timeZoneName: "longOffset", timeZone: "UTC",
});
/** @param {string} date */
return date=> {
const D= new Date(date);
const d= intl.format(D);
// remove redundant second comma to make it RFC-822
const red_comma_i= d.indexOf(",", d.indexOf(",")+1);
return d.slice(0, red_comma_i) + d.slice(red_comma_i+1);
};
})();
$.api()
.version(version)
.describe(description)
.command("pull", "Update article list")
.option("--git", "Update git repository")
.action(async function pull({ git: is_git= false }){
if(is_git) s.run`git pull --rebase`;
const { changed }= await sitemap().then(toRSS);
echo("Changed files:", changed.length ? changed.join(", ") : "—");
if(is_git) gitCommit(changed, "pull");
$.exit(0);
})
.command("only-rss", "Update RSS from known sitemap")
.action(async function onlyRSS(){
const { path, json }= knownSitemap();
const status= await toRSS({ json, changed: [ path ] });
echo({ status });
$.exit(0);
})
.parse();
function gitCommit(files, des= "not specified"){
if(!files.length || !s.run`git diff --numstat`.trim())
return echo("Nothig todo");
echo("Diff to save");
s.run`git config user.name "Bot"`;
s.run`git config user.email "${"zc.murtnec@naj.elrdna".split("").reverse().join("")}"`;
s.run`git add ${files}`;
s.run`git commit -m "Updated by bot ${des}"`;
s.run`git push`;
s.run`git config --remove-section user`;
}
/**
* @param {State} state
* @returns {State} state
* */
async function toRSS({ json, changed }){
if(!changed.length) return { json, changed };
const path= "rss.xml";
const host= "https://pagenotfound.cz";
const articles= json.articles.map(function({ title, perex, author, loc, drop }){
return [
"<item>",
...[
`<title>${title}</title>`,
`<link>${host+loc}</link>`,
`<guid>${host+loc}</guid>`,
`<description>${perex}</description>`,
`<dc:creator>${author}</dc:creator>`,
`<pubDate>${pubDate(json.drops.find(d=> d.drop === drop).date)}</pubDate>`,
`<category>${drop}</category>`,
].map(l=> "\t"+l),
"</item>"
].map(l=> "\t"+l).join("\n");
});
const description = [
"Page not found jsme založili z touhy po zábavné, kvalitní a inovativní žurnalistice.",
"Chceme vám tu nabízet komplexní long ready, nečekané bonusy, multimediální obsah",
"a hlavně texty, které nám samotným v ostatních médiích chybí. Budujte spolu s námi",
"komunitu Page not found, dejte nám zpětnou vazbu na první drop, přihlaste se",
"k odebírání newsletterů. Společně s vámi budeme moct naše cíle plnit rychleji."
].join(" ");
s.echo([
`<?xml version="1.0" encoding="UTF-8" ?>`,
`<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom">`,
"<channel>",
` <title>Pagenotfound.cz</title>`,
` <link>${host}</link>`,
` <description>${description}</description>`,
` <atom:link href="${homepage}/raw/branch/main/rss.xml" rel="self" type="application/rss+xml" />`,
...articles,
"</channel>",
"</rss>"
].join("\n")).to(path);
return { json, changed: [...changed, path] };
}
import { JSDOM } from "jsdom";
/** @returns {Promise<State>} */
async function sitemap(){
const { json, path }= knownSitemap();
await syncDrops(json);
const [ { drop: drop_last } ]= json.drops;
const res= await fetch(url_drops+drop_last);
if(res.status !== 200) return { json, changed: [] };
const drop_articles= dropArticles(drop_last, json);
const dom= new JSDOM(await res.text());
const diff= [];
for(const article of dom.window.document.querySelectorAll("article")){
const loc= article.querySelector("a")?.href;
if(!loc){
echo("Article without link:", article.textContent);
continue;
}
if(drop_articles.has(loc)) continue;
diff.push({
title: article.querySelector("h2").textContent.trim(),
perex: article.querySelector("[class^=ArticleTile_perex]").textContent.trim(),
author: (article.querySelector("[class^=ArticleTile_author]")?.textContent || "Redakce").trim(),
loc,
drop: drop_last,
});
}
if(!diff.length) return { json, changed: [] };
json.articles.unshift(...diff);
s.echo(JSON.stringify(json, null, "\t")).to(path);
return { json, changed: [ path ] };
}
function knownSitemap(){
const path= "sitemap.json";
/** @type {Sitemap} */
const json= s.test("-f", path) ? s.cat(path).xargs(JSON.parse) : { drops: [], articles: [] };
return { json, path };
}
/**
* Assumes that articles are sorted from newest to oldest
* @param {Drop.drop} drop
* @param {Sitemap} json
* @returns {Set<Article.loc>}
* */
function dropArticles(drop, { articles }){
const out= new Set();
for(const article of articles){
if(article.drop !== drop) break;
out.add(article.loc);
}
return out;
}
/** @param {Sitemap} json */
async function syncDrops(json){
const [ { drop: drop_last } ]= json.drops;
const i_index= drop_last.search(/\d/);
const pre= drop_last.slice(0, i_index);
const index= pipe(
Number,
i=> i+1,
i=> i.toString().padStart(drop_last.length - i_index, "0"),
)(drop_last.slice(i_index));
const drop= pre+index;
const res= await fetch(url_drops+drop, { method: "HEAD" });
if(res.status !== 200) return json;
const date= pipe(
d=> new Date(d),
d=> d.toISOString(),
)(res.headers.get("date"));
json.drops.unshift({ drop, date });
return json;
}