PyMuPDF for Node JS
npm install pymupdf-node
Import into your source code with:
import * as PyMuPDFNode from "pymupdf-node";
Wheels should be loaded into the pymupdf
module with:
const pymupdf = await PyMuPDFNode.loadPyMuPDF("node_modules/pymupdf-node/pymupdf/pymupdf-1.26.0-cp312-abi3-pyodide_2024_0_wasm32.whl");
And if you need PyMuPDF4LLM, into the pymupdf4LLM
module with:
const pymupdf4LLM = await PyMuPDFNode.loadPyMuPDF4LLM("node_modules/pymupdf-node/pymupdf/pymupdf4llm-0.0.24-py3-none-any.whl");
let doc = pymupdf.open("test.pdf");
doc.copy_page(0);
const md = pymupdf4LLM.to_markdown(doc, {
page_chunks: false,
write_images: false,
ignore_images: true,
image_path: "",
extract_words: false,
show_progress: false,
});
console.log(md);
PyMuPDF Node is PyMuPDF for JavaScript environments, but just use the PyMuPDF Python API as if it was JavaScript, see the PyMuPDF API documentation.
import * as PyMuPDFNode from "../dist/pymupdf-node.js";
import * as fs from "fs";
const pymupdf = await PyMuPDFNode.loadPyMuPDF("../pymupdf/pymupdf-1.26.0-cp312-abi3-pyodide_2024_0_wasm32.whl");
const pymupdf4LLM = await PyMuPDFNode.loadPyMuPDF4LLM("../pymupdf/pymupdf4llm-0.0.24-py3-none-any.whl");
function logSection(title) {
console.log(`\n===== ${title} =====`);
}
function printMarkdown(doc, label = "Document Snapshot") {
logSection(`📄 Print Markdown: ${label}`);
const identifyHeaders = new pymupdf4LLM.IdentifyHeaders(doc);
const tocHeaders = new pymupdf4LLM.TocHeaders(doc);
const md = pymupdf4LLM.to_markdown(doc, {
page_chunks: false,
write_images: false,
ignore_images: true,
image_path: "",
extract_words: false,
show_progress: false,
});
console.log(md);
}
function openFreshDoc() {
return pymupdf.open("test.pdf");
}
let doc = openFreshDoc();
logSection("1. Initial Page Count");
console.log("Page count:", doc.page_count);
printMarkdown(doc, "Initial PDF");
doc = openFreshDoc();
logSection("2. Copy Page");
doc.copy_page(0);
console.log("Copied page 0 to the end");
printMarkdown(doc, "After copy_page(0)");
doc = openFreshDoc();
logSection("3. Add New Page (end)");
doc.new_page();
console.log("Added blank page at the end");
printMarkdown(doc, "After new_page()");
doc = openFreshDoc();
logSection("4. Add New Page (index 1, custom size)");
doc.new_page(1, 400, 500);
console.log("Inserted blank page at index 1 (400x500)");
printMarkdown(doc, "After new_page(1, 400, 500)");
doc = openFreshDoc();
logSection("5. Insert Page with Text");
const count = doc.insert_page({ pno: 0, text: "Inserted Page Content" });
console.log(`Inserted text page at index 0 (lines inserted: ${count})`);
printMarkdown(doc, "After insert_page()");
doc = openFreshDoc();
logSection("6. Delete Last Page");
doc.delete_page(doc.page_count - 1);
console.log("Deleted last page");
printMarkdown(doc, "After delete_page()");
doc = openFreshDoc();
logSection("7. Delete Pages by Array");
doc.delete_pages([1, 2]);
console.log("Deleted pages at index 1 and 2");
printMarkdown(doc, "After delete_pages([1, 2])");
doc = openFreshDoc();
logSection("8. Delete Page Range 0–1");
doc.delete_pages(0, 1);
console.log("Deleted pages from index 0 to 1");
printMarkdown(doc, "After delete_pages(0, 1)");
doc = openFreshDoc();
logSection("9. Set & Get Page Labels");
doc.set_page_labels([{ startpage: 0, prefix: "L-", style: "D", firstpagenum: 1 }]);
const labels = doc.get_page_labels();
console.log("Page labels:", labels);
const match = doc.get_page_numbers("L-1");
console.log("Page numbers with label 'L-1':", match);
doc = openFreshDoc();
logSection("10. Bake Document");
doc.bake(true, true);
console.log("Baked document (annotations + widgets)");
doc = openFreshDoc();
logSection("11. Scrub Metadata");
doc.scrub({ metadata: true, javascript: true });
console.log("Scrubbed metadata and javascript");
doc = openFreshDoc();
const doc2 = openFreshDoc();
logSection("12. Insert PDF Page from Another Doc");
doc.insert_pdf(doc2, { from_page: 0, to_page: 0 });
console.log("Inserted first page of another doc");
printMarkdown(doc, "After insert_pdf()");
doc = openFreshDoc();
logSection("13. Embed File");
const buffer = fs.readFileSync("test.pdf");
const xref = doc.embfile_add("sample", buffer.buffer, "test.pdf", "test.pdf", "Embedded sample PDF");
console.log("Embedded file XREF:", xref);
doc = openFreshDoc();
logSection("14. Add annot, link, text, etc");
const page = doc.load_page(0);
page.add_caret_annot([10, 10]);
page.add_text_annot([10, 30], 'This is a text annotation.');
page.add_freetext_annot([50, 30, 300, 80], 'This is a free_text annotation.', { fontsize: 14, richtext: true, border_color: [0, 0, 0], border_width: 2 });
page.add_file_annot([10, 70], fs.readFileSync('./test.pdf').buffer, 'pymupdf4node.pdf');
page.add_ink_annot([
[[10, 90], [30, 110], [50, 90]],
]);
page.add_line_annot([10, 110], [30, 130]);
page.add_rect_annot([10, 150, 30, 170]);
page.add_circle_annot([10, 170, 30, 190]);
page.add_polyline_annot([[10, 190], [30, 210], [50, 190]]);
page.add_polyline_annot([[10, 210], [30, 230], [50, 210]]);
page.add_underline_annot({ quads: [[10, 230, 50, 240]] });
page.add_strikeout_annot({ quads: [[10, 250, 50, 260]] });
page.add_squiggly_annot({ quads: [[10, 270, 50, 280]] });
page.add_highlight_annot({ quads: [[10, 290, 50, 300]] });
page.add_stamp_annot([10, 310, 50, 350], 3);
page.add_redact_annot([10, 370, 50, 410], { text: 'pymupdf4node redact', fill: [0, 0, 0], text_color: [1, 1, 1] });
page.apply_redactions();
page.add_redact_annot([10, 430, 50, 470], { cross_out: false });
page.insert_link({ from: [10, 490, 50, 530], kind: 2, uri: 'https://pymupdf.readthedocs.io/en/latest/vars.html#linkdest-kinds' });
page.insert_text([10, 550], 'Inserted Text');
page.delete_annot(page.add_freetext_annot([310, 30, 570, 80], 'This is a free_text annotation.', { fontsize: 14, richtext: true, border_color: [0, 0, 0], border_width: 2 }));
doc.load_page(1).set_rotation(180);
doc.load_page(2).set_cropbox([10, 10, 100, 100]);
await doc.save('./pdf/output.pdf');
console.log("Please check the PDF located at the \"tests/pdf/output.pdf\" location.");
logSection('15. Find tables in page');
const results = page.find_tables();
results.tables.forEach(table => console.log(table.to_markdown()));
logSection('16. Get XML metadata');
const metadata = doc.get_xml_metadata()
console.log("XML metadata=", metadata);