Jan Andrle
1ffe1ab293
All checks were successful
Update RSS / update-rss (push) Successful in 5m17s
currently new drop detection not working!!!
199 lines
6.3 KiB
JavaScript
Executable File
199 lines
6.3 KiB
JavaScript
Executable File
#!/usr/bin/env -S npx nodejsscript
|
||
/* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */
|
||
const url_drops= "https://pagenotfound.cz/drop/";
|
||
const { version, description, homepage }= s.cat("package.json").xargs(JSON.parse);
|
||
/**
|
||
* @typedef {Object} Article
|
||
* @property {string} title
|
||
* @property {string} perex
|
||
* @property {string} author
|
||
* @property {string} loc
|
||
* @property {string} drop
|
||
* */
|
||
/**
|
||
* @typedef {Object} Drop
|
||
* @property {string} drop
|
||
* @property {string} date
|
||
* */
|
||
/**
|
||
* @typedef {Object} Sitemap
|
||
* @property {Article[]} articles
|
||
* @property {Drop[]} drops
|
||
* */
|
||
/**
|
||
* @typedef {Object} State
|
||
* @property {Sitemap} json
|
||
* @property {string[]} changed Changed files
|
||
* */
|
||
const pubDate= (function pubDateInner(){ // pubDate must be an RFC-822 date-time
|
||
const intl= new Intl.DateTimeFormat("en-gb", {
|
||
/* Wed, 02 Oct 2002 */ weekday: "short", day: "2-digit", month: "short", year: "numeric",
|
||
/* 08:00:00 */ hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit",
|
||
/* EST */ timeZoneName: "longOffset", timeZone: "UTC",
|
||
});
|
||
/** @param {string} date */
|
||
return date=> {
|
||
const D= new Date(date);
|
||
const d= intl.format(D);
|
||
// remove redundant second comma to make it RFC-822
|
||
const red_comma_i= d.indexOf(",", d.indexOf(",")+1);
|
||
return d.slice(0, red_comma_i) + d.slice(red_comma_i+1);
|
||
};
|
||
})();
|
||
|
||
$.api()
|
||
.version(version)
|
||
.describe(description)
|
||
.command("pull", "Update article list")
|
||
.option("--git", "Update git repository")
|
||
.action(async function pull({ git: is_git= false }){
|
||
if(is_git) s.run`git pull --rebase`;
|
||
const { changed }= await sitemap().then(toRSS);
|
||
echo("Changed files:", changed.length ? changed.join(", ") : "—");
|
||
if(is_git) gitCommit(changed, "pull");
|
||
$.exit(0);
|
||
})
|
||
.command("only-rss", "Update RSS from known sitemap")
|
||
.action(async function onlyRSS(){
|
||
const { path, json }= knownSitemap();
|
||
const status= await toRSS({ json, changed: [ path ] });
|
||
echo({ status });
|
||
$.exit(0);
|
||
})
|
||
.parse();
|
||
|
||
function gitCommit(files, des= "not specified"){
|
||
if(!files.length || !s.run`git diff --numstat`.trim())
|
||
return echo("Nothig todo");
|
||
|
||
echo("Diff to save");
|
||
s.run`git config user.name "Bot"`;
|
||
s.run`git config user.email "${"zc.murtnec@naj.elrdna".split("").reverse().join("")}"`;
|
||
s.run`git add ${files}`;
|
||
s.run`git commit -m "Updated by bot – ${des}"`;
|
||
s.run`git push`;
|
||
s.run`git config --remove-section user`;
|
||
}
|
||
/**
|
||
* @param {State} state
|
||
* @returns {State} state
|
||
* */
|
||
async function toRSS({ json, changed }){
|
||
if(!changed.length) return { json, changed };
|
||
|
||
const path= "rss.xml";
|
||
const host= "https://pagenotfound.cz";
|
||
const articles= json.articles.map(function({ title, perex, author, loc, drop }){
|
||
return [
|
||
"<item>",
|
||
...[
|
||
`<title>${title}</title>`,
|
||
`<link>${host+loc}</link>`,
|
||
`<guid>${host+loc}</guid>`,
|
||
`<description>${perex}</description>`,
|
||
`<dc:creator>${author}</dc:creator>`,
|
||
`<pubDate>${pubDate(json.drops.find(d=> d.drop === drop).date)}</pubDate>`,
|
||
`<category>${drop}</category>`,
|
||
].map(l=> "\t"+l),
|
||
"</item>"
|
||
].map(l=> "\t"+l).join("\n");
|
||
});
|
||
|
||
const description = [
|
||
"Page not found jsme založili z touhy po zábavné, kvalitní a inovativní žurnalistice.",
|
||
"Chceme vám tu nabízet komplexní long ready, nečekané bonusy, multimediální obsah",
|
||
"a hlavně texty, které nám samotným v ostatních médiích chybí. Budujte spolu s námi",
|
||
"komunitu Page not found, dejte nám zpětnou vazbu na první drop, přihlaste se",
|
||
"k odebírání newsletterů. Společně s vámi budeme moct naše cíle plnit rychleji."
|
||
].join(" ");
|
||
s.echo([
|
||
`<?xml version="1.0" encoding="UTF-8" ?>`,
|
||
`<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom">`,
|
||
"<channel>",
|
||
` <title>Pagenotfound.cz</title>`,
|
||
` <link>${host}</link>`,
|
||
` <description>${description}</description>`,
|
||
` <atom:link href="${homepage}/raw/branch/main/rss.xml" rel="self" type="application/rss+xml" />`,
|
||
...articles,
|
||
"</channel>",
|
||
"</rss>"
|
||
].join("\n")).to(path);
|
||
return { json, changed: [...changed, path] };
|
||
}
|
||
import { JSDOM } from "jsdom";
|
||
/** @returns {Promise<State>} */
|
||
async function sitemap(){
|
||
const { json, path }= knownSitemap();
|
||
await syncDrops(json);
|
||
const [ { drop: drop_last } ]= json.drops;
|
||
|
||
const res= await fetch(url_drops+drop_last);
|
||
if(res.status !== 200) return { json, changed: [] };
|
||
|
||
const drop_articles= dropArticles(drop_last, json);
|
||
const dom= new JSDOM(await res.text());
|
||
const diff= [];
|
||
for(const article of dom.window.document.querySelectorAll("article")){
|
||
const loc= article.querySelector("a")?.href;
|
||
if(!loc){
|
||
echo("Article without link:", article.textContent);
|
||
continue;
|
||
}
|
||
if(drop_articles.has(loc)) continue;
|
||
diff.push({
|
||
title: article.querySelector("h2").textContent.trim(),
|
||
perex: article.querySelector("[class^=ArticleTile_perex]").textContent.trim(),
|
||
author: article.querySelector("[class^=ArticleTile_author]").textContent.trim(),
|
||
loc,
|
||
drop: drop_last,
|
||
});
|
||
}
|
||
if(!diff.length) return { json, changed: [] };
|
||
|
||
json.articles.unshift(...diff);
|
||
s.echo(JSON.stringify(json, null, "\t")).to(path);
|
||
return { json, changed: [ path ] };
|
||
}
|
||
function knownSitemap(){
|
||
const path= "sitemap.json";
|
||
/** @type {Sitemap} */
|
||
const json= s.test("-f", path) ? s.cat(path).xargs(JSON.parse) : { drops: [], articles: [] };
|
||
return { json, path };
|
||
}
|
||
/**
|
||
* Assumes that articles are sorted from newest to oldest
|
||
* @param {Drop.drop} drop
|
||
* @param {Sitemap} json
|
||
* @returns {Set<Article.loc>}
|
||
* */
|
||
function dropArticles(drop, { articles }){
|
||
const out= new Set();
|
||
for(const article of articles){
|
||
if(article.drop !== drop) break;
|
||
out.add(article.loc);
|
||
}
|
||
return out;
|
||
}
|
||
/** @param {Sitemap} json */
|
||
async function syncDrops(json){
|
||
const [ { drop: drop_last } ]= json.drops;
|
||
const i_index= drop_last.search(/\d/);
|
||
const pre= drop_last.slice(0, i_index);
|
||
const index= pipe(
|
||
Number,
|
||
i=> i+1,
|
||
i=> i.toString().padStart(drop_last.length - i_index, "0"),
|
||
)(drop_last.slice(i_index));
|
||
|
||
const drop= pre+index;
|
||
const res= await fetch(url_drops+drop, { method: "HEAD" });
|
||
if(res.status !== 200) return json;
|
||
|
||
const date= pipe(
|
||
d=> new Date(d),
|
||
d=> d.toISOString(),
|
||
)(res.headers.get("date"));
|
||
json.drops.unshift({ drop, date });
|
||
return json;
|
||
}
|