#!/usr/bin/env nodejsscript /* jshint esversion: 11,-W097, -W040, module: true, node: true, expr: true, undef: true *//* global echo, $, pipe, s, fetch, cyclicLoop */ const tmp= "tmp-"; //pdftk has issue reading from /tmp, so need to put it somewhere to the same folder :-( if(!s.which("pdftk")) $.error("pdftk not found"); $.api() .version("2025-08-05") .describe([ "A small wrapper around 'pdftk' to extract data from pdf into the JSON.", "Or to update the PDF with data from JSON.", "Bookmars are converted to JSON object with key=PageNumber, value= Title (no. of spaces= level-1).", ]) .option("--debug", "Debug mode") .command("extract [file_info]", "Extract data from PDF.") .action(function extract(file_pdf, file_info, { debug }){ if(!s.test("-f", file_pdf)) $.error("PDF File not found"); if(!file_info) file_info= filename(file_pdf) + ".json"; const temp= `${tmp}${tmpname(file_pdf)}.info` ; s.run`pdftk ${file_pdf} dump_data_utf8 output ${temp}`; const info= infoToJSON(temp); if(!debug) s.rm(temp); s.echo(info).to(file_info); $.exit(0); }) .command("update [file_info]", "Update PDF with data from JSON.") .action(function update(file_pdf, file_info, { debug }){ if(!s.test("-f", file_pdf)) $.error("PDF File not found"); if(!file_info) file_info= filename(file_pdf) + ".json"; if(!s.test("-f", file_info)) $.error("Info File not found"); const info= infoFromJSON(file_info); const temp= `${tmp}${tmpname(file_pdf)}.info`; s.echo(info).to(temp); const tmp_pdf= `${tmp}${tmpname(file_pdf)}.pdf`; s.cp(file_pdf, tmp_pdf); s.run`pdftk ${tmp_pdf} update_info_utf8 ${temp} output ${file_pdf}`; if(!debug){ s.rm(tmp_pdf); s.rm(temp); } $.exit(0); }) .command("convert ", "Converts between JSON and raw text.") .action(function convert(file_info){ if(!s.test("-f", file_info)) $.error("Info File not found"); const ext= file_info.slice(file_info.lastIndexOf(".")); const info= ext===".json" ? infoFromJSON(file_info) : infoToJSON(file_info); echo(info); $.exit(0); }) .parse(); function filename(path){ return path.slice(path.lastIndexOf("/")+1, path.lastIndexOf(".")); } function tmpname(path){ return filename(path) + "-" + Date.now(); } function infoFromJSON(file_info){ const info= s.cat(file_info).xargs(JSON.parse); const output= []; info.Bookmark= Object.entries(info.Bookmark) .map(/** @param {[string, string]} _ */([PageNumber, Title])=> { const level= Title.search(/[^ ]/); return { PageNumber, Title: Title.slice(level), Level: level+1, }; }); for(const [key, value] of Object.entries(info)){ if(Array.isArray(value)){ const records= value.flatMap(pipe( Object.entries, entries=> [key+"Begin"].concat(entries.map(([subkey, value])=> `${key}${subkey}: ${value}`)), )); output.push(...records); continue; } if(typeof value==="object"){ const records= Object.entries(value).flatMap(([subkey, value])=> [ key+"Begin", `${key}Key: ${subkey}`, `${key}Value: ${value}`, ]); output.push(...records); continue; } output.push(`${key}: ${value}`); } return output.join("\n"); } /** @returns {Record} */ function infoToJSON(file_info){ const output= new Map(); const data= s.cat(file_info).split("\n"); let line= 0; const isEnd= line=> line>=data.length; for(; line items.map(({ PageNumber, Title, Level })=> ([PageNumber, " ".repeat(Number(Level)-1) + Title])), Object.fromEntries, )(output.get("Bookmark") || [])); return pipe( Object.fromEntries, o=> JSON.stringify(o, null, "\t"), )(output.entries()); }