Files
dotfiles/bin/§pdftk-data.mjs
2025-09-03 10:49:03 +02:00

145 lines
4.7 KiB
JavaScript
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env nodejsscript
/* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */
const tmp= "tmp-"; //pdftk has issue reading from /tmp, so need to put it somewhere to the same folder :-(
if(!s.which("pdftk")) $.error("pdftk not found");
$.api()
.version("2025-08-05")
.describe([
"A small wrapper around 'pdftk' to extract data from pdf into the JSON.",
"Or to update the PDF with data from JSON.",
"Bookmars are converted to JSON object with key=PageNumber, value= Title (no. of spaces= level-1).",
])
.option("--debug", "Debug mode")
.command("extract <file_pdf> [file_info]", "Extract data from PDF.")
.action(function extract(file_pdf, file_info, { debug }){
if(!s.test("-f", file_pdf)) $.error("PDF File not found");
if(!file_info) file_info= filename(file_pdf) + ".json";
const temp= `${tmp}${tmpname(file_pdf)}.info` ;
s.run`pdftk ${file_pdf} dump_data_utf8 output ${temp}`;
const info= infoToJSON(temp);
if(!debug) s.rm(temp);
s.echo(info).to(file_info);
$.exit(0);
})
.command("update <file_pdf> [file_info]", "Update PDF with data from JSON.")
.action(function update(file_pdf, file_info, { debug }){
if(!s.test("-f", file_pdf)) $.error("PDF File not found");
if(!file_info) file_info= filename(file_pdf) + ".json";
if(!s.test("-f", file_info)) $.error("Info File not found");
const info= infoFromJSON(file_info);
const temp= `${tmp}${tmpname(file_pdf)}.info`;
s.echo(info).to(temp);
const tmp_pdf= `${tmp}${tmpname(file_pdf)}.pdf`;
s.cp(file_pdf, tmp_pdf);
s.run`pdftk ${tmp_pdf} update_info_utf8 ${temp} output ${file_pdf}`;
if(!debug){
s.rm(tmp_pdf);
s.rm(temp);
}
$.exit(0);
})
.command("convert <file_info>", "Converts between JSON and raw text.")
.action(function convert(file_info){
if(!s.test("-f", file_info)) $.error("Info File not found");
const ext= file_info.slice(file_info.lastIndexOf("."));
const info= ext===".json" ? infoFromJSON(file_info) : infoToJSON(file_info);
echo(info);
$.exit(0);
})
.parse();
function filename(path){ return path.slice(path.lastIndexOf("/")+1, path.lastIndexOf(".")); }
function tmpname(path){ return filename(path) + "-" + Date.now(); }
function infoFromJSON(file_info){
const info= s.cat(file_info).xargs(JSON.parse);
const output= [];
info.Bookmark= Object.entries(info.Bookmark)
.map(/** @param {[string, string]} _ */([PageNumber, Title])=> {
const level= Title.search(/[^ ]/);
return {
PageNumber,
Title: Title.slice(level),
Level: level+1,
};
});
for(const [key, value] of Object.entries(info)){
if(Array.isArray(value)){
const records= value.flatMap(pipe(
Object.entries,
entries=> [key+"Begin"].concat(entries.map(([subkey, value])=> `${key}${subkey}: ${value}`)),
));
output.push(...records);
continue;
}
if(typeof value==="object"){
const records= Object.entries(value).flatMap(([subkey, value])=> [
key+"Begin",
`${key}Key: ${subkey}`,
`${key}Value: ${value}`,
]);
output.push(...records);
continue;
}
output.push(`${key}: ${value}`);
}
return output.join("\n");
}
/** @returns {Record<string, unknown>} */
function infoToJSON(file_info){
const output= new Map();
const data= s.cat(file_info).split("\n");
let line= 0;
const isEnd= line=> line>=data.length;
for(; line<data.length; line++){
const content= data[line].trim();
if(!content) continue;
if("InfoBegin"===content){
const info= output.has("Info") ? output.get("Info") : {};
const curr= [];
while(++line){
if(isEnd(line)) break;
const content= data[line].trim();
if(!content){ line++; continue; }
const [key, value]= content.split(": ");
const index= ["InfoKey", "InfoValue"].indexOf(key);
if(index===-1){ line--; break; }
curr[index]= value;
}
const [key, value]= curr;
info[key]= value;
output.set("Info", info);
continue;
}
if(content.endsWith("Begin")){
const name= content.replace("Begin", "");
const output_curr= output.has(name) ? output.get(name) : [];
const curr= {};
while(++line){
if(isEnd(line)) break;
const content= data[line].trim();
if(!content){ line++; continue; }
const [key, value]= content.split(": ");
if(!key.startsWith(name) || key === (name+"Begin")){ line--; break; }
curr[key.replace(name, "")]= value;
}
output_curr.push(curr);
output.set(name, output_curr);
continue;
}
const [key, value]= content.split(": ");
output.set(key, value);
}
output.set("Bookmark", pipe(
items=> items.map(({ PageNumber, Title, Level })=> ([PageNumber, " ".repeat(Number(Level)-1) + Title])),
Object.fromEntries,
)(output.get("Bookmark") || []));
return pipe(
Object.fromEntries,
o=> JSON.stringify(o, null, "\t"),
)(output.entries());
}