Compare commits
85 Commits
5700e740f8
...
main
Author | SHA1 | Date | |
---|---|---|---|
c8f411f46f | |||
9c58b975b4 | |||
548c6b9a62 | |||
46f4f39603 | |||
dd6d51fc39 | |||
631386dcd3 | |||
64a16c6afd | |||
33a19b324d | |||
ccd578cea5 | |||
d8d2b3cb56 | |||
11f7bef325 | |||
6efdd8177a | |||
c0955cf0a3 | |||
de7e7fd4f0 | |||
784444d1fc | |||
59af72a72c | |||
41b7ac3688 | |||
c18aa5d278 | |||
1747eadc54 | |||
7d56bb1f59 | |||
158b96c7c5 | |||
950868bfb4 | |||
3d1ac373e3
|
|||
b1db05311f | |||
4e8cc1bd94 | |||
8c766b0ac3 | |||
13441a8788 | |||
e494ea3ff6 | |||
b3fed778f3 | |||
85cdbe7a9e | |||
c09b247142 | |||
26b75634d5 | |||
7d04475878 | |||
da9a6d9e9d | |||
44da85f0d0 | |||
9d55e4ebd9 | |||
0a5326b80f
|
|||
c74512e208 | |||
8f0a06a526
|
|||
2f3caba17e | |||
77ecfa309a | |||
8568922457 | |||
39e8ff4b4c | |||
7abd1571b9 | |||
898b7d7ea5 | |||
e33cc9d756 | |||
9b5084bd63 | |||
fbf3e60e0d | |||
81c8d52b00 | |||
ffd2f41a13 | |||
1aab450bf7 | |||
46765180ae | |||
55e5e9dc7d | |||
a56d920a67 | |||
000216ae77
|
|||
9e16a8ea95 | |||
633ca5465b | |||
1ffe1ab293
|
|||
200669791c | |||
fd723e72e9 | |||
b146530fe2 | |||
fe224be1d3 | |||
97ef8e3fb2 | |||
f20337c45e | |||
6fbb45210f | |||
4404a4b962 | |||
ded37d1178 | |||
71acab3ce5
|
|||
a576caff53 | |||
e080684fb2 | |||
5d174d2f19
|
|||
ce93c7241f | |||
7ebcf6e2d2
|
|||
8f0deba912
|
|||
1b2f55079c | |||
5a1c52565e
|
|||
6c6c269dbd
|
|||
df82405ed8
|
|||
39bb5ef62d
|
|||
4bbd88a563
|
|||
0054ceb6f2
|
|||
add7993460
|
|||
db2af48953
|
|||
597e21895a
|
|||
837cea046d
|
@ -2,10 +2,10 @@ name: Update RSS
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 3 * * *' # daily at 3am
|
||||
- cron: '39 19 * * *' # daily at 19:39 Prague
|
||||
|
||||
jobs:
|
||||
Explore-Gitea-Actions:
|
||||
update-rss:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: https://gitea.com/actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
@ -14,4 +14,4 @@ jobs:
|
||||
node-version: lts/*
|
||||
cache: 'npm'
|
||||
- run: npm ci
|
||||
- run: npx nodejsscript cli.js pull
|
||||
- run: npx nodejsscript cli.mjs pull --git
|
||||
|
134
cli.mjs
134
cli.mjs
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env -S npx nodejsscript
|
||||
/* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */
|
||||
const url_drops= "https://pagenotfound.cz/drop/";
|
||||
const { version, description }= s.cat("package.json").xargs(JSON.parse);
|
||||
const { version, description, homepage }= s.cat("package.json").xargs(JSON.parse);
|
||||
/**
|
||||
* @typedef {Object} Article
|
||||
* @property {string} title
|
||||
@ -20,76 +20,168 @@ const { version, description }= s.cat("package.json").xargs(JSON.parse);
|
||||
* @property {Article[]} articles
|
||||
* @property {Drop[]} drops
|
||||
* */
|
||||
/**
|
||||
* @typedef {Object} State
|
||||
* @property {Sitemap} json
|
||||
* @property {string[]} changed Changed files
|
||||
* */
|
||||
const pubDate= (function pubDateInner(){ // pubDate must be an RFC-822 date-time
|
||||
const intl= new Intl.DateTimeFormat("en-gb", {
|
||||
/* Wed, 02 Oct 2002 */ weekday: "short", day: "2-digit", month: "short", year: "numeric",
|
||||
/* 08:00:00 */ hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit",
|
||||
/* EST */ timeZoneName: "longOffset", timeZone: "UTC",
|
||||
});
|
||||
/** @param {string} date */
|
||||
return date=> {
|
||||
const D= new Date(date);
|
||||
const d= intl.format(D);
|
||||
// remove redundant second comma to make it RFC-822
|
||||
const red_comma_i= d.indexOf(",", d.indexOf(",")+1);
|
||||
return d.slice(0, red_comma_i) + d.slice(red_comma_i+1);
|
||||
};
|
||||
})();
|
||||
|
||||
$.api()
|
||||
.version(version)
|
||||
.describe(description)
|
||||
.command("pull", "Update article list")
|
||||
.action(async function pull(){
|
||||
const json= await sitemap();
|
||||
toRSS(json);
|
||||
.option("--git", "Update git repository")
|
||||
.action(async function pull({ git: is_git= false }){
|
||||
if(is_git) s.run`git pull --rebase`;
|
||||
const { changed }= await sitemap().then(toRSS);
|
||||
echo("Changed files:", changed.length ? changed.join(", ") : "—");
|
||||
if(is_git) gitCommit(changed, "pull");
|
||||
$.exit(0);
|
||||
})
|
||||
.command("only-rss", "Update RSS from known sitemap")
|
||||
.action(async function onlyRSS(){
|
||||
const { path, json }= knownSitemap();
|
||||
const status= await toRSS({ json, changed: [ path ] });
|
||||
echo({ status });
|
||||
$.exit(0);
|
||||
})
|
||||
.parse();
|
||||
|
||||
/** @param {Sitemap} json */
|
||||
async function toRSS(json){
|
||||
function gitCommit(files, des= "not specified"){
|
||||
if(!files.length || !s.run`git diff --numstat`.trim())
|
||||
return echo("Nothig todo");
|
||||
|
||||
echo("Diff to save");
|
||||
s.run`git config user.name "Bot"`;
|
||||
s.run`git config user.email "${"zc.murtnec@naj.elrdna".split("").reverse().join("")}"`;
|
||||
s.run`git add ${files}`;
|
||||
s.run`git commit -m "Updated by bot – ${des}"`;
|
||||
s.run`git push`;
|
||||
s.run`git config --remove-section user`;
|
||||
}
|
||||
/**
|
||||
* @param {State} state
|
||||
* @returns {State} state
|
||||
* */
|
||||
async function toRSS({ json, changed }){
|
||||
if(!changed.length) return { json, changed };
|
||||
|
||||
const path= "rss.xml";
|
||||
const host= "https://pagenotfound.cz";
|
||||
const articles= json.articles.map(function({ title, perex, author, loc, drop }){
|
||||
return [
|
||||
"<item>",
|
||||
...[
|
||||
`<title>${title}</title>`,
|
||||
`<title>${encodeToXml(title)}</title>`,
|
||||
`<link>${host+loc}</link>`,
|
||||
`<description>${perex}</description>`,
|
||||
`<author>${author}</author>`,
|
||||
`<pubDate>${json.drops.find(d=> d.drop === drop).date}</pubDate>`,
|
||||
`<guid>${host+loc}</guid>`,
|
||||
`<description>${encodeToXml(perex)}</description>`,
|
||||
`<dc:creator>${encodeToXml(author)}</dc:creator>`,
|
||||
`<pubDate>${pubDate(json.drops.find(d=> d.drop === drop).date)}</pubDate>`,
|
||||
`<category>${drop}</category>`,
|
||||
].map(l=> "\t"+l),
|
||||
"</item>"
|
||||
].map(l=> "\t"+l).join("\n");
|
||||
});
|
||||
|
||||
const description = [
|
||||
"Page not found jsme založili z touhy po zábavné, kvalitní a inovativní žurnalistice.",
|
||||
"Chceme vám tu nabízet komplexní long ready, nečekané bonusy, multimediální obsah",
|
||||
"a hlavně texty, které nám samotným v ostatních médiích chybí. Budujte spolu s námi",
|
||||
"komunitu Page not found, dejte nám zpětnou vazbu na první drop, přihlaste se",
|
||||
"k odebírání newsletterů. Společně s vámi budeme moct naše cíle plnit rychleji."
|
||||
].join(" ");
|
||||
s.echo([
|
||||
`<?xml version="1.0" encoding="UTF-8" ?>`,
|
||||
`<rss version="2.0">`,
|
||||
`<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom">`,
|
||||
"<channel>",
|
||||
` <title>Pagenotfound.cz</title>`,
|
||||
` <link>${host}</link>`,
|
||||
` <description>${description}</description>`,
|
||||
` <atom:link href="${homepage}/raw/branch/main/rss.xml" rel="self" type="application/rss+xml" />`,
|
||||
...articles,
|
||||
"</channel>",
|
||||
"</rss>"
|
||||
].join("\n")).to(path);
|
||||
return { json, changed: [...changed, path] };
|
||||
}
|
||||
import { JSDOM } from "jsdom";
|
||||
/** @returns {Promise<State>} */
|
||||
async function sitemap(){
|
||||
const path= "sitemap.json";
|
||||
|
||||
/** @type {Sitemap} */
|
||||
const json= s.test("-f", path) ? s.cat(path).xargs(JSON.parse) : { drops: [], articles: [] };
|
||||
const [ article_last= { drop: "" } ]= json.articles;
|
||||
const { json, path }= knownSitemap();
|
||||
await syncDrops(json);
|
||||
const [ { drop: drop_last } ]= json.drops;
|
||||
if(drop_last === article_last.drop) return json;
|
||||
|
||||
const res= await fetch(url_drops+drop_last);
|
||||
if(res.status !== 200) return;
|
||||
if(res.status !== 200) return { json, changed: [] };
|
||||
|
||||
const drop_articles= dropArticles(drop_last, json);
|
||||
const dom= new JSDOM(await res.text());
|
||||
const diff= [];
|
||||
for(const article of dom.window.document.querySelectorAll("article")){
|
||||
const loc= article.querySelector("a")?.href;
|
||||
if(!loc){
|
||||
echo("Article without link:", article.textContent);
|
||||
continue;
|
||||
}
|
||||
if(drop_articles.has(loc)) continue;
|
||||
diff.push({
|
||||
title: article.querySelector("h2").textContent.trim(),
|
||||
perex: article.querySelector("[class^=ArticleTile_perex]").textContent.trim(),
|
||||
author: article.querySelector("[class^=ArticleTile_author]").textContent.trim(),
|
||||
loc: article.querySelector("a").href,
|
||||
author: (article.querySelector("[class^=ArticleTile_author]")?.textContent || "Redakce").trim(),
|
||||
loc,
|
||||
drop: drop_last,
|
||||
});
|
||||
}
|
||||
if(!diff.length) return { json, changed: [] };
|
||||
|
||||
json.articles.unshift(...diff);
|
||||
s.echo(JSON.stringify(json, null, "\t")).to(path);
|
||||
return json;
|
||||
return { json, changed: [ path ] };
|
||||
}
|
||||
function encodeToXml(str){
|
||||
if(!str) return str;
|
||||
return str
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
}
|
||||
function knownSitemap(){
|
||||
const path= "sitemap.json";
|
||||
/** @type {Sitemap} */
|
||||
const json= s.test("-f", path) ? s.cat(path).xargs(JSON.parse) : { drops: [], articles: [] };
|
||||
return { json, path };
|
||||
}
|
||||
/**
|
||||
* Assumes that articles are sorted from newest to oldest
|
||||
* @param {Drop.drop} drop
|
||||
* @param {Sitemap} json
|
||||
* @returns {Set<Article.loc>}
|
||||
* */
|
||||
function dropArticles(drop, { articles }){
|
||||
const out= new Set();
|
||||
for(const article of articles){
|
||||
if(article.drop !== drop) break;
|
||||
out.add(article.loc);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/** @param {Sitemap} json */
|
||||
async function syncDrops(json){
|
||||
|
2180
package-lock.json
generated
2180
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
42
package.json
42
package.json
@ -1,18 +1,28 @@
|
||||
{
|
||||
"name": "pagenotfound-cli",
|
||||
"version": "1.0.0",
|
||||
"description": "Utility primary for generating RSS feed for Pagenotfound",
|
||||
"bin": "cli.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"jsdom": "~24.1",
|
||||
"nodejsscript": "~1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.19"
|
||||
}
|
||||
"name": "pagenotfound-cli",
|
||||
"version": "1.2.3",
|
||||
"description": "Utility primary for generating RSS feed for Pagenotfound",
|
||||
"bin": "cli.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "Jan Andrle <andrle.jan@centrum.cz>",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://gitea.jaandrle.cz/jaandrle/pagenotfound-cli.git"
|
||||
},
|
||||
"homepage": "https://gitea.jaandrle.cz/jaandrle/pagenotfound-cli",
|
||||
"bugs": {
|
||||
"url": "https://gitea.jaandrle.cz/jaandrle/pagenotfound-cli/issues"
|
||||
},
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"jsdom": "~25.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"nodejsscript": "~1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.19"
|
||||
}
|
||||
}
|
||||
|
2229
sitemap.json
2229
sitemap.json
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user