refact and moves ~/bin to ~/.local/bin

This commit is contained in:
2025-09-09 13:19:23 +02:00
parent b8cf5a87ab
commit fe4109c0f1
43 changed files with 514 additions and 21 deletions

182
.local/bin/§pdftk-data.mjs Executable file
View File

@@ -0,0 +1,182 @@
#!/usr/bin/env nodejsscript
/* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */
const tmp= "tmp-"; //pdftk has issue reading from /tmp, so need to put it somewhere to the same folder :-(
if(!s.which("pdftk")) $.error("pdftk not found");
$.api()
.version("2025-08-05")
.describe([
"A small wrapper around 'pdftk' to extract data from pdf into the JSON.",
"Or to update the PDF with data from JSON.",
"Bookmars are converted to JSON object with key=PageNumber, value= Title (no. of spaces= level-1).",
])
.option("--debug", "Debug mode")
.command("extract <file_pdf> [file_info]", "Extract data from PDF.")
.action(function extractCMD(file_pdf, file_info, { debug }){
if(!s.test("-f", file_pdf)) $.error("PDF File not found");
if(!file_info) file_info= filename(file_pdf) + ".json";
const info= extract(file_pdf);
s.echo(info).to(file_info);
$.exit(0);
})
.command("update <file_pdf> [file_info]", "Update PDF with data from JSON.")
.action(function update(file_pdf, file_info, { debug }){
if(!s.test("-f", file_pdf)) $.error("PDF File not found");
if(!file_info) file_info= filename(file_pdf) + ".json";
if(!s.test("-f", file_info)) $.error("Info File not found");
const infoIsHtml= file_info.endsWith(".html");
const info= infoIsHtml ? infoFromHTML(file_info, file_pdf, debug) : infoFromJSON(file_info);
const temp= `${tmp}${tmpname(file_pdf)}.info`;
s.echo(info).to(temp);
const tmp_pdf= `${tmp}${tmpname(file_pdf)}.pdf`;
s.cp(file_pdf, tmp_pdf);
s.run`pdftk ${tmp_pdf} update_info_utf8 ${temp} output ${file_pdf}`;
if(!debug){
s.rm(tmp_pdf);
s.rm(temp);
}
$.exit(0);
})
.command("convert <file_info>", "Converts between JSON and raw text.")
.action(function convert(file_info){
if(!s.test("-f", file_info)) $.error("Info File not found");
const ext= file_info.slice(file_info.lastIndexOf("."));
const info= ext===".json" ? infoFromJSON(file_info) : infoToJSON(file_info);
echo(info);
$.exit(0);
})
.parse();
function extract(file_pdf, debug){
const temp= `${tmp}${tmpname(file_pdf)}.info` ;
s.run`pdftk ${file_pdf} dump_data_utf8 output ${temp}`;
const out= infoToJSON(temp);
if(!debug) s.rm(temp);
return out;
}
function filename(path){ return path.slice(path.lastIndexOf("/")+1, path.lastIndexOf(".")); }
function tmpname(path){ return filename(path) + "-" + Date.now(); }
function infoFromHTML(file_info, file_pdf, debug){
const info_orig= JSON.parse(extract(file_pdf, debug));
const info= s.cat(file_info).trim();
let isInside= false;
for(const line_raw of info.split("\n")){
const line= line_raw.trim();
if(line.startsWith("<head")){
isInside= true;
continue;
}
if(!line || !isInside) continue;
if(line.startsWith("<title>")){
const title= line.slice(7).replace("</title>", "").trim();
info_orig.Info.Title= title;
continue;
}
if(line.startsWith("<meta") && line.includes("name=")){
const [,, key]= line.match(/name=("|')(.*?)(\1)/);
const [,, value]= line.match(/content=("|')(.*?)(\1)/);
info_orig.Info[key[0].toUpperCase()+key.slice(1)]= value;
}
if(line.startsWith("</head>")){
break;
}
}
const tmp_json= `${tmp}${tmpname(file_pdf)}.json`;
s.echo(JSON.stringify(info_orig, null, "\t")).to(tmp_json);
const out= infoFromJSON(tmp_json);
if(!debug) s.rm(tmp_json);
return out;
}
function infoFromJSON(file_info){
const info= s.cat(file_info).xargs(JSON.parse);
const output= [];
info.Bookmark= Object.entries(info.Bookmark)
.map(/** @param {[string, string]} _ */([PageNumber, Title])=> {
PageNumber= Number.parseInt(PageNumber);
const level= Title.search(/[^ ]/);
return {
PageNumber,
Title: Title.slice(level),
Level: level+1,
};
});
for(const [key, value] of Object.entries(info)){
if(Array.isArray(value)){
const records= value.flatMap(pipe(
Object.entries,
entries=> [key+"Begin"].concat(entries.map(([subkey, value])=> `${key}${subkey}: ${value}`)),
));
output.push(...records);
continue;
}
if(typeof value==="object"){
const records= Object.entries(value).flatMap(([subkey, value])=> [
key+"Begin",
`${key}Key: ${subkey}`,
`${key}Value: ${value}`,
]);
output.push(...records);
continue;
}
output.push(`${key}: ${value}`);
}
return output.join("\n");
}
/** @returns {Record<string, unknown>} */
function infoToJSON(file_info){
const output= new Map();
const data= s.cat(file_info).split("\n");
let line= 0;
const isEnd= line=> line>=data.length;
for(; line<data.length; line++){
const content= data[line].trim();
if(!content) continue;
if("InfoBegin"===content){
const info= output.has("Info") ? output.get("Info") : {};
const curr= [];
while(++line){
if(isEnd(line)) break;
const content= data[line].trim();
if(!content){ line++; continue; }
const [key, value]= content.split(": ");
const index= ["InfoKey", "InfoValue"].indexOf(key);
if(index===-1){ line--; break; }
curr[index]= value;
}
const [key, value]= curr;
info[key]= value;
output.set("Info", info);
continue;
}
if(content.endsWith("Begin")){
const name= content.replace("Begin", "");
const output_curr= output.has(name) ? output.get(name) : [];
const curr= {};
while(++line){
if(isEnd(line)) break;
const content= data[line].trim();
if(!content){ line++; continue; }
const [key, value]= content.split(": ");
if(!key.startsWith(name) || key === (name+"Begin")){ line--; break; }
curr[key.replace(name, "")]= value;
}
output_curr.push(curr);
output.set(name, output_curr);
continue;
}
const [key, value]= content.split(": ");
output.set(key, value);
}
output.set("Bookmark", pipe(
items=> items.map(({ PageNumber, Title, Level }, i)=> ([PageNumber+"-"+i, " ".repeat(Number(Level)-1) + Title])),
Object.fromEntries,
)(output.get("Bookmark") || []));
return pipe(
Object.fromEntries,
o=> JSON.stringify(o, null, "\t"),
)(output.entries());
}