With WebViewer it is possible to download the images embedded in a PDF. The following sample shows how this can be achieved, downloading all images when a file is loaded into the viewer.
Webviewer({
initialDoc: './newsletter.pdf', //sample file with images
path: '/lib',
enableFilePicker: true,
fullAPI: true,
}, document.getElementById('viewer')).then(instance => {
const { docViewer, PDFNet } = instance;
documentViewer = docViewer;
let image_counter = 0;
const imageExtract = async (reader) => {
let element;
while ((element = await reader.next()) !== null) {
switch (await element.getType()) {
case PDFNet.Element.Type.e_image:
case PDFNet.Element.Type.e_inline_image:
console.log('--> Image: ' + ++image_counter);
console.log(' Width: ' + await element.getImageWidth());
console.log(' Height: ' + await element.getImageHeight());
console.log(' BPC: ' + await element.getBitsPerComponent());
const ctm = await element.getCTM();
let x2 = 1, y2 = 1;
const result = await ctm.mult(x2, y2);
x2 = result.x;
y2 = result.y;
console.log(' Coords: x1=' + ctm.m_h.toFixed(2) + ', y1=' + ctm.m_v.toFixed(2)
+ ', x2=' + x2.toFixed(2) + ', y2=' + y2.toFixed(2));
if (await element.getType() == PDFNet.Element.Type.e_image) {
const image = await PDFNet.Image.createFromObj(await element.getXObject());
const filter = await PDFNet.Filter.createMemoryFilter(65536, false);
const writer = await PDFNet.FilterWriter.create(filter);
await image.exportAsPngFromStream(writer);
await writer.flush();
await filter.memoryFilterSetAsInputFilter();
const reader = await PDFNet.FilterReader.create(filter);
const pngBuffer = await reader.readAllIntoBuffer();
const file = new Blob([pngBuffer], {type: 'image/png'});
saveAs(file, `image${image_counter}.png`)
}
break;
case PDFNet.Element.Type.e_form: // Process form XObjects
reader.formBegin();
await imageExtract(reader);
reader.end();
break;
}
}
}
const extract = async () => {
// Extract images by traversing the display list for
// every page. With this approach it is possible to obtain
// image positioning information and DPI.
try {
PDFNet.initialize().then(async() => {
const document = docViewer.getDocument();
const doc = await document.getPDFDoc();
doc.initSecurityHandler();
const reader = await PDFNet.ElementReader.create();
const itr = await doc.getPageIterator(1);
// Read every page
for (itr; await itr.hasNext(); await itr.next()) {
const page = await itr.current();
reader.beginOnPage(page);
await imageExtract(reader);
reader.end();
}
console.log('Done.');
})
.catch(error => console.log({ error }))
}
docViewer.on('documentLoaded', () => {
extract();
})
});