#!/usr/bin/env -S npx nodejsscript /* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */ const url_drops= "https://pagenotfound.cz/drop/"; const { version, description, homepage }= s.cat("package.json").xargs(JSON.parse); /** * @typedef {Object} Article * @property {string} title * @property {string} perex * @property {string} author * @property {string} loc * @property {string} drop * */ /** * @typedef {Object} Drop * @property {string} drop * @property {string} date * */ /** * @typedef {Object} Sitemap * @property {Article[]} articles * @property {Drop[]} drops * */ /** * @typedef {Object} State * @property {Sitemap} json * @property {string[]} changed Changed files * */ const pubDate= (function pubDateInner(){ // pubDate must be an RFC-822 date-time const intl= new Intl.DateTimeFormat("en-gb", { /* Wed, 02 Oct 2002 */ weekday: "short", day: "2-digit", month: "short", year: "numeric", /* 08:00:00 */ hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit", /* EST */ timeZoneName: "longOffset", timeZone: "UTC", }); /** @param {string} date */ return date=> { const D= new Date(date); const d= intl.format(D); // remove redundant second comma to make it RFC-822 const red_comma_i= d.indexOf(",", d.indexOf(",")+1); return d.slice(0, red_comma_i) + d.slice(red_comma_i+1); }; })(); $.api() .version(version) .describe(description) .command("pull", "Update article list") .option("--git", "Update git repository") .action(async function pull({ git: is_git= false }){ if(is_git) s.run`git pull --rebase`; const { changed }= await sitemap().then(toRSS); echo("Changed files:", changed.length ? changed.join(", ") : "—"); if(is_git) gitCommit(changed, "pull"); $.exit(0); }) .command("only-rss", "Update RSS from known sitemap") .action(async function onlyRSS(){ const { path, json }= knownSitemap(); const status= await toRSS({ json, changed: [ path ] }); echo({ status }); $.exit(0); }) .parse(); function gitCommit(files, des= "not specified"){ if(!files.length || !s.run`git diff --numstat`.trim()) return echo("Nothig todo"); echo("Diff to save"); s.run`git config user.name "Bot"`; s.run`git config user.email "${"zc.murtnec@naj.elrdna".split("").reverse().join("")}"`; s.run`git add ${files}`; s.run`git commit -m "Updated by bot – ${des}"`; s.run`git push`; s.run`git config --remove-section user`; } /** * @param {State} state * @returns {State} state * */ async function toRSS({ json, changed }){ if(!changed.length) return { json, changed }; const path= "rss.xml"; const host= "https://pagenotfound.cz"; const articles= json.articles.map(function({ title, perex, author, loc, drop }){ return [ "", ...[ `${title}`, `${host+loc}`, `${host+loc}`, `${perex}`, `${author}`, `${pubDate(json.drops.find(d=> d.drop === drop).date)}`, `${drop}`, ].map(l=> "\t"+l), "" ].map(l=> "\t"+l).join("\n"); }); const description = [ "Page not found jsme založili z touhy po zábavné, kvalitní a inovativní žurnalistice.", "Chceme vám tu nabízet komplexní long ready, nečekané bonusy, multimediální obsah", "a hlavně texty, které nám samotným v ostatních médiích chybí. Budujte spolu s námi", "komunitu Page not found, dejte nám zpětnou vazbu na první drop, přihlaste se", "k odebírání newsletterů. Společně s vámi budeme moct naše cíle plnit rychleji." ].join(" "); s.echo([ ``, ``, "", ` Pagenotfound.cz`, ` ${host}`, ` ${description}`, ` `, ...articles, "", "" ].join("\n")).to(path); return { json, changed: [...changed, path] }; } import { JSDOM } from "jsdom"; /** @returns {Promise} */ async function sitemap(){ const { json, path }= knownSitemap(); await syncDrops(json); const [ { drop: drop_last } ]= json.drops; const res= await fetch(url_drops+drop_last); if(res.status !== 200) return { json, changed: [] }; const drop_articles= dropArticles(drop_last, json); const dom= new JSDOM(await res.text()); const diff= []; for(const article of dom.window.document.querySelectorAll("article")){ const loc= article.querySelector("a")?.href; if(!loc){ echo("Article without link:", article.textContent); continue; } if(drop_articles.has(loc)) continue; diff.push({ title: article.querySelector("h2").textContent.trim(), perex: article.querySelector("[class^=ArticleTile_perex]").textContent.trim(), author: (article.querySelector("[class^=ArticleTile_author]")?.textContent || "Redakce").trim(), loc, drop: drop_last, }); } if(!diff.length) return { json, changed: [] }; json.articles.unshift(...diff); s.echo(JSON.stringify(json, null, "\t")).to(path); return { json, changed: [ path ] }; } function knownSitemap(){ const path= "sitemap.json"; /** @type {Sitemap} */ const json= s.test("-f", path) ? s.cat(path).xargs(JSON.parse) : { drops: [], articles: [] }; return { json, path }; } /** * Assumes that articles are sorted from newest to oldest * @param {Drop.drop} drop * @param {Sitemap} json * @returns {Set} * */ function dropArticles(drop, { articles }){ const out= new Set(); for(const article of articles){ if(article.drop !== drop) break; out.add(article.loc); } return out; } /** @param {Sitemap} json */ async function syncDrops(json){ const [ { drop: drop_last } ]= json.drops; const i_index= drop_last.search(/\d/); const pre= drop_last.slice(0, i_index); const index= pipe( Number, i=> i+1, i=> i.toString().padStart(drop_last.length - i_index, "0"), )(drop_last.slice(i_index)); const drop= pre+index; const res= await fetch(url_drops+drop, { method: "HEAD" }); if(res.status !== 200) return json; const date= pipe( d=> new Date(d), d=> d.toISOString(), )(res.headers.get("date")); json.drops.unshift({ drop, date }); return json; }