199 lines
6.3 KiB
JavaScript
Raw Normal View History

2024-06-26 14:57:18 +02:00
#!/usr/bin/env -S npx nodejsscript
/* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */
const url_drops= "https://pagenotfound.cz/drop/";
const { version, description, homepage }= s.cat("package.json").xargs(JSON.parse);
2024-06-26 14:57:18 +02:00
/**
* @typedef {Object} Article
* @property {string} title
* @property {string} perex
* @property {string} author
* @property {string} loc
* @property {string} drop
* */
/**
* @typedef {Object} Drop
* @property {string} drop
* @property {string} date
* */
/**
* @typedef {Object} Sitemap
* @property {Article[]} articles
* @property {Drop[]} drops
* */
/**
* @typedef {Object} State
* @property {Sitemap} json
* @property {string[]} changed Changed files
* */
const pubDate= (function pubDateInner(){ // pubDate must be an RFC-822 date-time
const intl= new Intl.DateTimeFormat("en-gb", {
/* Wed, 02 Oct 2002 */ weekday: "short", day: "2-digit", month: "short", year: "numeric",
/* 08:00:00 */ hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit",
/* EST */ timeZoneName: "longOffset", timeZone: "UTC",
});
/** @param {string} date */
return date=> {
const D= new Date(date);
const d= intl.format(D);
// remove redundant second comma to make it RFC-822
const red_comma_i= d.indexOf(",", d.indexOf(",")+1);
return d.slice(0, red_comma_i) + d.slice(red_comma_i+1);
};
})();
2024-06-26 14:57:18 +02:00
$.api()
.version(version)
.describe(description)
.command("pull", "Update article list")
2024-06-27 10:52:51 +02:00
.option("--git", "Update git repository")
.action(async function pull({ git: is_git= false }){
2024-07-06 20:05:05 +02:00
if(is_git) s.run`git pull --rebase`;
const { changed }= await sitemap().then(toRSS);
echo("Changed files:", changed.length ? changed.join(", ") : "—");
if(is_git) gitCommit(changed, "pull");
2024-06-26 14:57:18 +02:00
$.exit(0);
})
.command("only-rss", "Update RSS from known sitemap")
.action(async function onlyRSS(){
const { path, json }= knownSitemap();
const status= await toRSS({ json, changed: [ path ] });
echo({ status });
$.exit(0);
})
2024-06-26 14:57:18 +02:00
.parse();
2024-06-27 10:52:51 +02:00
function gitCommit(files, des= "not specified"){
if(!files.length || !s.run`git diff --numstat`.trim())
2024-06-27 10:52:51 +02:00
return echo("Nothig todo");
echo("Diff to save");
s.run`git config user.name "Bot"`;
s.run`git config user.email "${"zc.murtnec@naj.elrdna".split("").reverse().join("")}"`;
s.run`git add ${files}`;
s.run`git commit -m "Updated by bot ${des}"`;
s.run`git push`;
s.run`git config --remove-section user`;
}
/**
* @param {State} state
* @returns {State} state
* */
async function toRSS({ json, changed }){
if(!changed.length) return { json, changed };
const path= "rss.xml";
2024-06-26 14:57:18 +02:00
const host= "https://pagenotfound.cz";
const articles= json.articles.map(function({ title, perex, author, loc, drop }){
return [
"<item>",
...[
`<title>${title}</title>`,
`<link>${host+loc}</link>`,
`<guid>${host+loc}</guid>`,
2024-06-26 14:57:18 +02:00
`<description>${perex}</description>`,
`<dc:creator>${author}</dc:creator>`,
`<pubDate>${pubDate(json.drops.find(d=> d.drop === drop).date)}</pubDate>`,
2024-06-26 14:57:18 +02:00
`<category>${drop}</category>`,
].map(l=> "\t"+l),
"</item>"
].map(l=> "\t"+l).join("\n");
});
const description = [
"Page not found jsme založili z touhy po zábavné, kvalitní a inovativní žurnalistice.",
"Chceme vám tu nabízet komplexní long ready, nečekané bonusy, multimediální obsah",
"a hlavně texty, které nám samotným v ostatních médiích chybí. Budujte spolu s námi",
"komunitu Page not found, dejte nám zpětnou vazbu na první drop, přihlaste se",
"k odebírání newsletterů. Společně s vámi budeme moct naše cíle plnit rychleji."
].join(" ");
2024-06-26 14:57:18 +02:00
s.echo([
`<?xml version="1.0" encoding="UTF-8" ?>`,
`<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom">`,
2024-06-26 14:57:18 +02:00
"<channel>",
` <title>Pagenotfound.cz</title>`,
` <link>${host}</link>`,
` <description>${description}</description>`,
` <atom:link href="${homepage}/raw/branch/main/rss.xml" rel="self" type="application/rss+xml" />`,
2024-06-26 14:57:18 +02:00
...articles,
"</channel>",
"</rss>"
].join("\n")).to(path);
return { json, changed: [...changed, path] };
2024-06-26 14:57:18 +02:00
}
2024-06-26 15:52:59 +02:00
import { JSDOM } from "jsdom";
/** @returns {Promise<State>} */
2024-06-26 14:57:18 +02:00
async function sitemap(){
const { json, path }= knownSitemap();
2024-06-26 14:57:18 +02:00
await syncDrops(json);
const [ { drop: drop_last } ]= json.drops;
const res= await fetch(url_drops+drop_last);
if(res.status !== 200) return { json, changed: [] };
const drop_articles= dropArticles(drop_last, json);
2024-06-26 14:57:18 +02:00
const dom= new JSDOM(await res.text());
const diff= [];
for(const article of dom.window.document.querySelectorAll("article")){
const loc= article.querySelector("a")?.href;
if(!loc){
echo("Article without link:", article.textContent);
continue;
}
if(drop_articles.has(loc)) continue;
2024-06-26 14:57:18 +02:00
diff.push({
title: article.querySelector("h2").textContent.trim(),
perex: article.querySelector("[class^=ArticleTile_perex]").textContent.trim(),
author: (article.querySelector("[class^=ArticleTile_author]")?.textContent || "Redakce").trim(),
loc,
2024-06-26 14:57:18 +02:00
drop: drop_last,
});
}
if(!diff.length) return { json, changed: [] };
2024-06-26 14:57:18 +02:00
json.articles.unshift(...diff);
s.echo(JSON.stringify(json, null, "\t")).to(path);
return { json, changed: [ path ] };
2024-06-26 14:57:18 +02:00
}
function knownSitemap(){
const path= "sitemap.json";
/** @type {Sitemap} */
const json= s.test("-f", path) ? s.cat(path).xargs(JSON.parse) : { drops: [], articles: [] };
return { json, path };
}
/**
* Assumes that articles are sorted from newest to oldest
* @param {Drop.drop} drop
* @param {Sitemap} json
* @returns {Set<Article.loc>}
* */
function dropArticles(drop, { articles }){
const out= new Set();
for(const article of articles){
if(article.drop !== drop) break;
out.add(article.loc);
}
return out;
}
2024-06-26 14:57:18 +02:00
/** @param {Sitemap} json */
async function syncDrops(json){
const [ { drop: drop_last } ]= json.drops;
const i_index= drop_last.search(/\d/);
const pre= drop_last.slice(0, i_index);
const index= pipe(
Number,
i=> i+1,
i=> i.toString().padStart(drop_last.length - i_index, "0"),
)(drop_last.slice(i_index));
const drop= pre+index;
const res= await fetch(url_drops+drop, { method: "HEAD" });
if(res.status !== 200) return json;
const date= pipe(
d=> new Date(d),
d=> d.toISOString(),
)(res.headers.get("date"));
json.drops.unshift({ drop, date });
return json;
}