Compare commits
85 Commits
5700e740f8
...
main
Author | SHA1 | Date | |
---|---|---|---|
c8f411f46f | |||
9c58b975b4 | |||
548c6b9a62 | |||
46f4f39603 | |||
dd6d51fc39 | |||
631386dcd3 | |||
64a16c6afd | |||
33a19b324d | |||
ccd578cea5 | |||
d8d2b3cb56 | |||
11f7bef325 | |||
6efdd8177a | |||
c0955cf0a3 | |||
de7e7fd4f0 | |||
784444d1fc | |||
59af72a72c | |||
41b7ac3688 | |||
c18aa5d278 | |||
1747eadc54 | |||
7d56bb1f59 | |||
158b96c7c5 | |||
950868bfb4 | |||
3d1ac373e3
|
|||
b1db05311f | |||
4e8cc1bd94 | |||
8c766b0ac3 | |||
13441a8788 | |||
e494ea3ff6 | |||
b3fed778f3 | |||
85cdbe7a9e | |||
c09b247142 | |||
26b75634d5 | |||
7d04475878 | |||
da9a6d9e9d | |||
44da85f0d0 | |||
9d55e4ebd9 | |||
0a5326b80f
|
|||
c74512e208 | |||
8f0a06a526
|
|||
2f3caba17e | |||
77ecfa309a | |||
8568922457 | |||
39e8ff4b4c | |||
7abd1571b9 | |||
898b7d7ea5 | |||
e33cc9d756 | |||
9b5084bd63 | |||
fbf3e60e0d | |||
81c8d52b00 | |||
ffd2f41a13 | |||
1aab450bf7 | |||
46765180ae | |||
55e5e9dc7d | |||
a56d920a67 | |||
000216ae77
|
|||
9e16a8ea95 | |||
633ca5465b | |||
1ffe1ab293
|
|||
200669791c | |||
fd723e72e9 | |||
b146530fe2 | |||
fe224be1d3 | |||
97ef8e3fb2 | |||
f20337c45e | |||
6fbb45210f | |||
4404a4b962 | |||
ded37d1178 | |||
71acab3ce5
|
|||
a576caff53 | |||
e080684fb2 | |||
5d174d2f19
|
|||
ce93c7241f | |||
7ebcf6e2d2
|
|||
8f0deba912
|
|||
1b2f55079c | |||
5a1c52565e
|
|||
6c6c269dbd
|
|||
df82405ed8
|
|||
39bb5ef62d
|
|||
4bbd88a563
|
|||
0054ceb6f2
|
|||
add7993460
|
|||
db2af48953
|
|||
597e21895a
|
|||
837cea046d
|
@ -2,10 +2,10 @@ name: Update RSS
|
|||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 3 * * *' # daily at 3am
|
- cron: '39 19 * * *' # daily at 19:39 Prague
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
Explore-Gitea-Actions:
|
update-rss:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: https://gitea.com/actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
- uses: https://gitea.com/actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||||
@ -14,4 +14,4 @@ jobs:
|
|||||||
node-version: lts/*
|
node-version: lts/*
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
- run: npm ci
|
- run: npm ci
|
||||||
- run: npx nodejsscript cli.js pull
|
- run: npx nodejsscript cli.mjs pull --git
|
||||||
|
134
cli.mjs
134
cli.mjs
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env -S npx nodejsscript
|
#!/usr/bin/env -S npx nodejsscript
|
||||||
/* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */
|
/* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */
|
||||||
const url_drops= "https://pagenotfound.cz/drop/";
|
const url_drops= "https://pagenotfound.cz/drop/";
|
||||||
const { version, description }= s.cat("package.json").xargs(JSON.parse);
|
const { version, description, homepage }= s.cat("package.json").xargs(JSON.parse);
|
||||||
/**
|
/**
|
||||||
* @typedef {Object} Article
|
* @typedef {Object} Article
|
||||||
* @property {string} title
|
* @property {string} title
|
||||||
@ -20,76 +20,168 @@ const { version, description }= s.cat("package.json").xargs(JSON.parse);
|
|||||||
* @property {Article[]} articles
|
* @property {Article[]} articles
|
||||||
* @property {Drop[]} drops
|
* @property {Drop[]} drops
|
||||||
* */
|
* */
|
||||||
|
/**
|
||||||
|
* @typedef {Object} State
|
||||||
|
* @property {Sitemap} json
|
||||||
|
* @property {string[]} changed Changed files
|
||||||
|
* */
|
||||||
|
const pubDate= (function pubDateInner(){ // pubDate must be an RFC-822 date-time
|
||||||
|
const intl= new Intl.DateTimeFormat("en-gb", {
|
||||||
|
/* Wed, 02 Oct 2002 */ weekday: "short", day: "2-digit", month: "short", year: "numeric",
|
||||||
|
/* 08:00:00 */ hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit",
|
||||||
|
/* EST */ timeZoneName: "longOffset", timeZone: "UTC",
|
||||||
|
});
|
||||||
|
/** @param {string} date */
|
||||||
|
return date=> {
|
||||||
|
const D= new Date(date);
|
||||||
|
const d= intl.format(D);
|
||||||
|
// remove redundant second comma to make it RFC-822
|
||||||
|
const red_comma_i= d.indexOf(",", d.indexOf(",")+1);
|
||||||
|
return d.slice(0, red_comma_i) + d.slice(red_comma_i+1);
|
||||||
|
};
|
||||||
|
})();
|
||||||
|
|
||||||
$.api()
|
$.api()
|
||||||
.version(version)
|
.version(version)
|
||||||
.describe(description)
|
.describe(description)
|
||||||
.command("pull", "Update article list")
|
.command("pull", "Update article list")
|
||||||
.action(async function pull(){
|
.option("--git", "Update git repository")
|
||||||
const json= await sitemap();
|
.action(async function pull({ git: is_git= false }){
|
||||||
toRSS(json);
|
if(is_git) s.run`git pull --rebase`;
|
||||||
|
const { changed }= await sitemap().then(toRSS);
|
||||||
|
echo("Changed files:", changed.length ? changed.join(", ") : "—");
|
||||||
|
if(is_git) gitCommit(changed, "pull");
|
||||||
|
$.exit(0);
|
||||||
|
})
|
||||||
|
.command("only-rss", "Update RSS from known sitemap")
|
||||||
|
.action(async function onlyRSS(){
|
||||||
|
const { path, json }= knownSitemap();
|
||||||
|
const status= await toRSS({ json, changed: [ path ] });
|
||||||
|
echo({ status });
|
||||||
$.exit(0);
|
$.exit(0);
|
||||||
})
|
})
|
||||||
.parse();
|
.parse();
|
||||||
|
|
||||||
/** @param {Sitemap} json */
|
function gitCommit(files, des= "not specified"){
|
||||||
async function toRSS(json){
|
if(!files.length || !s.run`git diff --numstat`.trim())
|
||||||
|
return echo("Nothig todo");
|
||||||
|
|
||||||
|
echo("Diff to save");
|
||||||
|
s.run`git config user.name "Bot"`;
|
||||||
|
s.run`git config user.email "${"zc.murtnec@naj.elrdna".split("").reverse().join("")}"`;
|
||||||
|
s.run`git add ${files}`;
|
||||||
|
s.run`git commit -m "Updated by bot – ${des}"`;
|
||||||
|
s.run`git push`;
|
||||||
|
s.run`git config --remove-section user`;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @param {State} state
|
||||||
|
* @returns {State} state
|
||||||
|
* */
|
||||||
|
async function toRSS({ json, changed }){
|
||||||
|
if(!changed.length) return { json, changed };
|
||||||
|
|
||||||
const path= "rss.xml";
|
const path= "rss.xml";
|
||||||
const host= "https://pagenotfound.cz";
|
const host= "https://pagenotfound.cz";
|
||||||
const articles= json.articles.map(function({ title, perex, author, loc, drop }){
|
const articles= json.articles.map(function({ title, perex, author, loc, drop }){
|
||||||
return [
|
return [
|
||||||
"<item>",
|
"<item>",
|
||||||
...[
|
...[
|
||||||
`<title>${title}</title>`,
|
`<title>${encodeToXml(title)}</title>`,
|
||||||
`<link>${host+loc}</link>`,
|
`<link>${host+loc}</link>`,
|
||||||
`<description>${perex}</description>`,
|
`<guid>${host+loc}</guid>`,
|
||||||
`<author>${author}</author>`,
|
`<description>${encodeToXml(perex)}</description>`,
|
||||||
`<pubDate>${json.drops.find(d=> d.drop === drop).date}</pubDate>`,
|
`<dc:creator>${encodeToXml(author)}</dc:creator>`,
|
||||||
|
`<pubDate>${pubDate(json.drops.find(d=> d.drop === drop).date)}</pubDate>`,
|
||||||
`<category>${drop}</category>`,
|
`<category>${drop}</category>`,
|
||||||
].map(l=> "\t"+l),
|
].map(l=> "\t"+l),
|
||||||
"</item>"
|
"</item>"
|
||||||
].map(l=> "\t"+l).join("\n");
|
].map(l=> "\t"+l).join("\n");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const description = [
|
||||||
|
"Page not found jsme založili z touhy po zábavné, kvalitní a inovativní žurnalistice.",
|
||||||
|
"Chceme vám tu nabízet komplexní long ready, nečekané bonusy, multimediální obsah",
|
||||||
|
"a hlavně texty, které nám samotným v ostatních médiích chybí. Budujte spolu s námi",
|
||||||
|
"komunitu Page not found, dejte nám zpětnou vazbu na první drop, přihlaste se",
|
||||||
|
"k odebírání newsletterů. Společně s vámi budeme moct naše cíle plnit rychleji."
|
||||||
|
].join(" ");
|
||||||
s.echo([
|
s.echo([
|
||||||
`<?xml version="1.0" encoding="UTF-8" ?>`,
|
`<?xml version="1.0" encoding="UTF-8" ?>`,
|
||||||
`<rss version="2.0">`,
|
`<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom">`,
|
||||||
"<channel>",
|
"<channel>",
|
||||||
` <title>Pagenotfound.cz</title>`,
|
` <title>Pagenotfound.cz</title>`,
|
||||||
` <link>${host}</link>`,
|
` <link>${host}</link>`,
|
||||||
|
` <description>${description}</description>`,
|
||||||
|
` <atom:link href="${homepage}/raw/branch/main/rss.xml" rel="self" type="application/rss+xml" />`,
|
||||||
...articles,
|
...articles,
|
||||||
"</channel>",
|
"</channel>",
|
||||||
"</rss>"
|
"</rss>"
|
||||||
].join("\n")).to(path);
|
].join("\n")).to(path);
|
||||||
|
return { json, changed: [...changed, path] };
|
||||||
}
|
}
|
||||||
import { JSDOM } from "jsdom";
|
import { JSDOM } from "jsdom";
|
||||||
|
/** @returns {Promise<State>} */
|
||||||
async function sitemap(){
|
async function sitemap(){
|
||||||
const path= "sitemap.json";
|
const { json, path }= knownSitemap();
|
||||||
|
|
||||||
/** @type {Sitemap} */
|
|
||||||
const json= s.test("-f", path) ? s.cat(path).xargs(JSON.parse) : { drops: [], articles: [] };
|
|
||||||
const [ article_last= { drop: "" } ]= json.articles;
|
|
||||||
await syncDrops(json);
|
await syncDrops(json);
|
||||||
const [ { drop: drop_last } ]= json.drops;
|
const [ { drop: drop_last } ]= json.drops;
|
||||||
if(drop_last === article_last.drop) return json;
|
|
||||||
|
|
||||||
const res= await fetch(url_drops+drop_last);
|
const res= await fetch(url_drops+drop_last);
|
||||||
if(res.status !== 200) return;
|
if(res.status !== 200) return { json, changed: [] };
|
||||||
|
|
||||||
|
const drop_articles= dropArticles(drop_last, json);
|
||||||
const dom= new JSDOM(await res.text());
|
const dom= new JSDOM(await res.text());
|
||||||
const diff= [];
|
const diff= [];
|
||||||
for(const article of dom.window.document.querySelectorAll("article")){
|
for(const article of dom.window.document.querySelectorAll("article")){
|
||||||
|
const loc= article.querySelector("a")?.href;
|
||||||
|
if(!loc){
|
||||||
|
echo("Article without link:", article.textContent);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(drop_articles.has(loc)) continue;
|
||||||
diff.push({
|
diff.push({
|
||||||
title: article.querySelector("h2").textContent.trim(),
|
title: article.querySelector("h2").textContent.trim(),
|
||||||
perex: article.querySelector("[class^=ArticleTile_perex]").textContent.trim(),
|
perex: article.querySelector("[class^=ArticleTile_perex]").textContent.trim(),
|
||||||
author: article.querySelector("[class^=ArticleTile_author]").textContent.trim(),
|
author: (article.querySelector("[class^=ArticleTile_author]")?.textContent || "Redakce").trim(),
|
||||||
loc: article.querySelector("a").href,
|
loc,
|
||||||
drop: drop_last,
|
drop: drop_last,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if(!diff.length) return { json, changed: [] };
|
||||||
|
|
||||||
json.articles.unshift(...diff);
|
json.articles.unshift(...diff);
|
||||||
s.echo(JSON.stringify(json, null, "\t")).to(path);
|
s.echo(JSON.stringify(json, null, "\t")).to(path);
|
||||||
return json;
|
return { json, changed: [ path ] };
|
||||||
|
}
|
||||||
|
function encodeToXml(str){
|
||||||
|
if(!str) return str;
|
||||||
|
return str
|
||||||
|
.replace(/&/g, "&")
|
||||||
|
.replace(/"/g, """)
|
||||||
|
.replace(/'/g, "'")
|
||||||
|
.replace(/</g, "<")
|
||||||
|
.replace(/>/g, ">");
|
||||||
|
}
|
||||||
|
function knownSitemap(){
|
||||||
|
const path= "sitemap.json";
|
||||||
|
/** @type {Sitemap} */
|
||||||
|
const json= s.test("-f", path) ? s.cat(path).xargs(JSON.parse) : { drops: [], articles: [] };
|
||||||
|
return { json, path };
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Assumes that articles are sorted from newest to oldest
|
||||||
|
* @param {Drop.drop} drop
|
||||||
|
* @param {Sitemap} json
|
||||||
|
* @returns {Set<Article.loc>}
|
||||||
|
* */
|
||||||
|
function dropArticles(drop, { articles }){
|
||||||
|
const out= new Set();
|
||||||
|
for(const article of articles){
|
||||||
|
if(article.drop !== drop) break;
|
||||||
|
out.add(article.loc);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
/** @param {Sitemap} json */
|
/** @param {Sitemap} json */
|
||||||
async function syncDrops(json){
|
async function syncDrops(json){
|
||||||
|
2180
package-lock.json
generated
2180
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
42
package.json
42
package.json
@ -1,18 +1,28 @@
|
|||||||
{
|
{
|
||||||
"name": "pagenotfound-cli",
|
"name": "pagenotfound-cli",
|
||||||
"version": "1.0.0",
|
"version": "1.2.3",
|
||||||
"description": "Utility primary for generating RSS feed for Pagenotfound",
|
"description": "Utility primary for generating RSS feed for Pagenotfound",
|
||||||
"bin": "cli.js",
|
"bin": "cli.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
},
|
},
|
||||||
"author": "",
|
"author": "Jan Andrle <andrle.jan@centrum.cz>",
|
||||||
"license": "MIT",
|
"repository": {
|
||||||
"dependencies": {
|
"type": "git",
|
||||||
"jsdom": "~24.1",
|
"url": "https://gitea.jaandrle.cz/jaandrle/pagenotfound-cli.git"
|
||||||
"nodejsscript": "~1.0"
|
},
|
||||||
},
|
"homepage": "https://gitea.jaandrle.cz/jaandrle/pagenotfound-cli",
|
||||||
"engines": {
|
"bugs": {
|
||||||
"node": ">=18.19"
|
"url": "https://gitea.jaandrle.cz/jaandrle/pagenotfound-cli/issues"
|
||||||
}
|
},
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"jsdom": "~25.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"nodejsscript": "~1.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.19"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
2229
sitemap.json
2229
sitemap.json
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user