PDF转图片
在开发过程中,我们遇到PDF 转图片的需求,这里主要介绍下Java(Apache pdfbox)
Java Apache PdfBox使用
首先我们要引入Apache Pdfbox的包
gradle
compile 'org.apache.pdfbox:pdfbox:2.0.14'
maven
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox --> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.14</version> </dependency>
接下来几行代码搞定
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPageTree; import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.PDFRenderer; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; public class App { public static void main(String[] args) { Long currentTimeMillis = System.currentTimeMillis(); File dir = new File("."); File[] filesList = dir.listFiles(); if (filesList != null) { for (File file : filesList) { if (file.isFile() && getFileExtension(file.getName()).equalsIgnoreCase("pdf")) { convert(file); } } } long diff = System.currentTimeMillis() - currentTimeMillis; System.out.print(diff); } private static String getFileExtension(String fileName) { if (fileName == null || fileName.equals("")) return "undefined"; int dotIndex = fileName.lastIndexOf("."); return (dotIndex == -1) ? "" : fileName.substring(dotIndex + 1); } private static void convert(File sourceFile) { try { String destinationDir = sourceFile.getName().replace(".pdf", "") + "/"; // converted images from pdf document are saved here File destinationFile = new File(destinationDir); if (!destinationFile.exists()) { boolean fileCreated = destinationFile.mkdir(); if (fileCreated) System.out.println("Folder Created -> "+ destinationFile.getAbsolutePath()); } if (sourceFile.exists()) { System.out.println("Images copied to Folder: "+ destinationFile.getName()); PDDocument document = PDDocument.load(sourceFile); PDPageTree pdPageTree = document.getDocumentCatalog().getPages(); System.out.println("Total files to be converted -> "+ pdPageTree.getCount()); PDFRenderer pdfRenderer = new PDFRenderer(document); String fileName = sourceFile.getName().replace(".pdf", ""); int pageNumber = 1; for (int page = 0; page < document.getNumberOfPages(); ++page) { BufferedImage image = pdfRenderer.renderImageWithDPI(page, 150, ImageType.RGB); File outputFile = new File(destinationDir + fileName +"_"+ pageNumber +".jpg"); System.out.println("Image Created -> "+ outputFile.getName()); ImageIO.write(image, "jpg", outputFile); pageNumber++; } document.close(); System.out.println("Converted Images are saved at -> "+ destinationFile.getAbsolutePath()); } else { System.err.println(sourceFile.getName() +" File not exists"); } } catch (Exception e) { e.printStackTrace(); } } }
这里主要使用了BufferedImage与ImageIO输出图片,通Pdfbox获取到PDF的内容