2024-06-26 14:57:18 +02:00
|
|
|
|
#!/usr/bin/env -S npx nodejsscript
|
|
|
|
|
/* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */
|
|
|
|
|
const url_drops= "https://pagenotfound.cz/drop/";
|
2024-07-11 15:51:28 +02:00
|
|
|
|
const { version, description, homepage }= s.cat("package.json").xargs(JSON.parse);
|
2024-06-26 14:57:18 +02:00
|
|
|
|
/**
|
|
|
|
|
* @typedef {Object} Article
|
|
|
|
|
* @property {string} title
|
|
|
|
|
* @property {string} perex
|
|
|
|
|
* @property {string} author
|
|
|
|
|
* @property {string} loc
|
|
|
|
|
* @property {string} drop
|
|
|
|
|
* */
|
|
|
|
|
/**
|
|
|
|
|
* @typedef {Object} Drop
|
|
|
|
|
* @property {string} drop
|
|
|
|
|
* @property {string} date
|
|
|
|
|
* */
|
|
|
|
|
/**
|
|
|
|
|
* @typedef {Object} Sitemap
|
|
|
|
|
* @property {Article[]} articles
|
|
|
|
|
* @property {Drop[]} drops
|
|
|
|
|
* */
|
2024-06-27 16:35:15 +02:00
|
|
|
|
/**
|
|
|
|
|
* @typedef {Object} State
|
|
|
|
|
* @property {Sitemap} json
|
|
|
|
|
* @property {string[]} changed Changed files
|
|
|
|
|
* */
|
2024-07-11 15:51:28 +02:00
|
|
|
|
const pubDate= (function pubDateInner(){ // pubDate must be an RFC-822 date-time
|
|
|
|
|
const intl= new Intl.DateTimeFormat("en-gb", {
|
|
|
|
|
/* Wed, 02 Oct 2002 */ weekday: "short", day: "2-digit", month: "short", year: "numeric",
|
|
|
|
|
/* 08:00:00 */ hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit",
|
|
|
|
|
/* EST */ timeZoneName: "longOffset", timeZone: "UTC",
|
|
|
|
|
});
|
|
|
|
|
/** @param {string} date */
|
|
|
|
|
return date=> {
|
|
|
|
|
const D= new Date(date);
|
|
|
|
|
const d= intl.format(D);
|
|
|
|
|
// remove redundant second comma to make it RFC-822
|
|
|
|
|
const red_comma_i= d.indexOf(",", d.indexOf(",")+1);
|
|
|
|
|
return d.slice(0, red_comma_i) + d.slice(red_comma_i+1);
|
|
|
|
|
};
|
|
|
|
|
})();
|
2024-06-26 14:57:18 +02:00
|
|
|
|
|
|
|
|
|
$.api()
|
|
|
|
|
.version(version)
|
|
|
|
|
.describe(description)
|
|
|
|
|
.command("pull", "Update article list")
|
2024-06-27 10:52:51 +02:00
|
|
|
|
.option("--git", "Update git repository")
|
|
|
|
|
.action(async function pull({ git: is_git= false }){
|
2024-07-06 20:05:05 +02:00
|
|
|
|
if(is_git) s.run`git pull --rebase`;
|
2024-06-27 16:35:15 +02:00
|
|
|
|
const { changed }= await sitemap().then(toRSS);
|
|
|
|
|
echo("Changed files:", changed.length ? changed.join(", ") : "—");
|
|
|
|
|
if(is_git) gitCommit(changed, "pull");
|
2024-06-26 14:57:18 +02:00
|
|
|
|
$.exit(0);
|
|
|
|
|
})
|
2024-08-28 21:14:11 +02:00
|
|
|
|
.command("only-rss", "Update RSS from known sitemap")
|
|
|
|
|
.action(async function onlyRSS(){
|
|
|
|
|
const { path, json }= knownSitemap();
|
|
|
|
|
const status= await toRSS({ json, changed: [ path ] });
|
|
|
|
|
echo({ status });
|
|
|
|
|
$.exit(0);
|
|
|
|
|
})
|
2024-06-26 14:57:18 +02:00
|
|
|
|
.parse();
|
|
|
|
|
|
2024-06-27 10:52:51 +02:00
|
|
|
|
function gitCommit(files, des= "not specified"){
|
2024-06-27 16:35:15 +02:00
|
|
|
|
if(!files.length || !s.run`git diff --numstat`.trim())
|
2024-06-27 10:52:51 +02:00
|
|
|
|
return echo("Nothig todo");
|
|
|
|
|
|
|
|
|
|
echo("Diff to save");
|
|
|
|
|
s.run`git config user.name "Bot"`;
|
|
|
|
|
s.run`git config user.email "${"zc.murtnec@naj.elrdna".split("").reverse().join("")}"`;
|
|
|
|
|
s.run`git add ${files}`;
|
|
|
|
|
s.run`git commit -m "Updated by bot – ${des}"`;
|
|
|
|
|
s.run`git push`;
|
|
|
|
|
s.run`git config --remove-section user`;
|
|
|
|
|
}
|
2024-06-27 16:35:15 +02:00
|
|
|
|
/**
|
|
|
|
|
* @param {State} state
|
|
|
|
|
* @returns {State} state
|
|
|
|
|
* */
|
|
|
|
|
async function toRSS({ json, changed }){
|
|
|
|
|
if(!changed.length) return { json, changed };
|
|
|
|
|
|
|
|
|
|
const path= "rss.xml";
|
2024-06-26 14:57:18 +02:00
|
|
|
|
const host= "https://pagenotfound.cz";
|
|
|
|
|
const articles= json.articles.map(function({ title, perex, author, loc, drop }){
|
|
|
|
|
return [
|
|
|
|
|
"<item>",
|
|
|
|
|
...[
|
|
|
|
|
`<title>${title}</title>`,
|
|
|
|
|
`<link>${host+loc}</link>`,
|
2024-07-11 15:51:28 +02:00
|
|
|
|
`<guid>${host+loc}</guid>`,
|
2024-06-26 14:57:18 +02:00
|
|
|
|
`<description>${perex}</description>`,
|
2024-07-11 15:51:28 +02:00
|
|
|
|
`<dc:creator>${author}</dc:creator>`,
|
|
|
|
|
`<pubDate>${pubDate(json.drops.find(d=> d.drop === drop).date)}</pubDate>`,
|
2024-06-26 14:57:18 +02:00
|
|
|
|
`<category>${drop}</category>`,
|
|
|
|
|
].map(l=> "\t"+l),
|
|
|
|
|
"</item>"
|
|
|
|
|
].map(l=> "\t"+l).join("\n");
|
|
|
|
|
});
|
|
|
|
|
|
2024-07-11 15:51:28 +02:00
|
|
|
|
const description = [
|
|
|
|
|
"Page not found jsme založili z touhy po zábavné, kvalitní a inovativní žurnalistice.",
|
|
|
|
|
"Chceme vám tu nabízet komplexní long ready, nečekané bonusy, multimediální obsah",
|
|
|
|
|
"a hlavně texty, které nám samotným v ostatních médiích chybí. Budujte spolu s námi",
|
|
|
|
|
"komunitu Page not found, dejte nám zpětnou vazbu na první drop, přihlaste se",
|
|
|
|
|
"k odebírání newsletterů. Společně s vámi budeme moct naše cíle plnit rychleji."
|
|
|
|
|
].join(" ");
|
2024-06-26 14:57:18 +02:00
|
|
|
|
s.echo([
|
|
|
|
|
`<?xml version="1.0" encoding="UTF-8" ?>`,
|
2024-07-11 15:51:28 +02:00
|
|
|
|
`<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom">`,
|
2024-06-26 14:57:18 +02:00
|
|
|
|
"<channel>",
|
|
|
|
|
` <title>Pagenotfound.cz</title>`,
|
|
|
|
|
` <link>${host}</link>`,
|
2024-07-11 15:51:28 +02:00
|
|
|
|
` <description>${description}</description>`,
|
|
|
|
|
` <atom:link href="${homepage}/raw/branch/main/rss.xml" rel="self" type="application/rss+xml" />`,
|
2024-06-26 14:57:18 +02:00
|
|
|
|
...articles,
|
|
|
|
|
"</channel>",
|
|
|
|
|
"</rss>"
|
|
|
|
|
].join("\n")).to(path);
|
2024-06-27 16:35:15 +02:00
|
|
|
|
return { json, changed: [...changed, path] };
|
2024-06-26 14:57:18 +02:00
|
|
|
|
}
|
2024-06-26 15:52:59 +02:00
|
|
|
|
import { JSDOM } from "jsdom";
|
2024-06-27 16:35:15 +02:00
|
|
|
|
/** @returns {Promise<State>} */
|
2024-06-26 14:57:18 +02:00
|
|
|
|
async function sitemap(){
|
2024-08-28 21:14:11 +02:00
|
|
|
|
const { json, path }= knownSitemap();
|
2024-06-26 14:57:18 +02:00
|
|
|
|
await syncDrops(json);
|
|
|
|
|
const [ { drop: drop_last } ]= json.drops;
|
|
|
|
|
|
|
|
|
|
const res= await fetch(url_drops+drop_last);
|
2024-06-27 16:35:15 +02:00
|
|
|
|
if(res.status !== 200) return { json, changed: [] };
|
|
|
|
|
|
2024-07-08 10:33:33 +02:00
|
|
|
|
const drop_articles= dropArticles(drop_last, json);
|
2024-06-26 14:57:18 +02:00
|
|
|
|
const dom= new JSDOM(await res.text());
|
|
|
|
|
const diff= [];
|
|
|
|
|
for(const article of dom.window.document.querySelectorAll("article")){
|
2024-07-08 10:33:33 +02:00
|
|
|
|
const loc= article.querySelector("a")?.href;
|
|
|
|
|
if(!loc){
|
|
|
|
|
echo("Article without link:", article.textContent);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if(drop_articles.has(loc)) continue;
|
2024-06-26 14:57:18 +02:00
|
|
|
|
diff.push({
|
|
|
|
|
title: article.querySelector("h2").textContent.trim(),
|
|
|
|
|
perex: article.querySelector("[class^=ArticleTile_perex]").textContent.trim(),
|
2024-12-10 10:09:42 +01:00
|
|
|
|
author: (article.querySelector("[class^=ArticleTile_author]")?.textContent || "Redakce").trim(),
|
2024-07-08 10:33:33 +02:00
|
|
|
|
loc,
|
2024-06-26 14:57:18 +02:00
|
|
|
|
drop: drop_last,
|
|
|
|
|
});
|
|
|
|
|
}
|
2024-07-08 10:33:33 +02:00
|
|
|
|
if(!diff.length) return { json, changed: [] };
|
2024-06-26 14:57:18 +02:00
|
|
|
|
|
|
|
|
|
json.articles.unshift(...diff);
|
|
|
|
|
s.echo(JSON.stringify(json, null, "\t")).to(path);
|
2024-06-27 16:35:15 +02:00
|
|
|
|
return { json, changed: [ path ] };
|
2024-06-26 14:57:18 +02:00
|
|
|
|
}
|
2024-08-28 21:14:11 +02:00
|
|
|
|
function knownSitemap(){
|
|
|
|
|
const path= "sitemap.json";
|
|
|
|
|
/** @type {Sitemap} */
|
|
|
|
|
const json= s.test("-f", path) ? s.cat(path).xargs(JSON.parse) : { drops: [], articles: [] };
|
|
|
|
|
return { json, path };
|
|
|
|
|
}
|
2024-07-08 10:33:33 +02:00
|
|
|
|
/**
|
|
|
|
|
* Assumes that articles are sorted from newest to oldest
|
|
|
|
|
* @param {Drop.drop} drop
|
|
|
|
|
* @param {Sitemap} json
|
|
|
|
|
* @returns {Set<Article.loc>}
|
|
|
|
|
* */
|
|
|
|
|
function dropArticles(drop, { articles }){
|
|
|
|
|
const out= new Set();
|
|
|
|
|
for(const article of articles){
|
|
|
|
|
if(article.drop !== drop) break;
|
|
|
|
|
out.add(article.loc);
|
|
|
|
|
}
|
|
|
|
|
return out;
|
|
|
|
|
}
|
2024-06-26 14:57:18 +02:00
|
|
|
|
/** @param {Sitemap} json */
|
|
|
|
|
async function syncDrops(json){
|
|
|
|
|
const [ { drop: drop_last } ]= json.drops;
|
|
|
|
|
const i_index= drop_last.search(/\d/);
|
|
|
|
|
const pre= drop_last.slice(0, i_index);
|
|
|
|
|
const index= pipe(
|
|
|
|
|
Number,
|
|
|
|
|
i=> i+1,
|
|
|
|
|
i=> i.toString().padStart(drop_last.length - i_index, "0"),
|
|
|
|
|
)(drop_last.slice(i_index));
|
|
|
|
|
|
|
|
|
|
const drop= pre+index;
|
|
|
|
|
const res= await fetch(url_drops+drop, { method: "HEAD" });
|
|
|
|
|
if(res.status !== 200) return json;
|
|
|
|
|
|
|
|
|
|
const date= pipe(
|
|
|
|
|
d=> new Date(d),
|
|
|
|
|
d=> d.toISOString(),
|
|
|
|
|
)(res.headers.get("date"));
|
|
|
|
|
json.drops.unshift({ drop, date });
|
|
|
|
|
return json;
|
|
|
|
|
}
|