Java · 2020年6月30日 0

PDF转图片

PDF转图片

在开发过程中,我们遇到PDF 转图片的需求,这里主要介绍下Java(Apache pdfbox)

Java Apache PdfBox使用

首先我们要引入Apache Pdfbox的包

gradle

compile 'org.apache.pdfbox:pdfbox:2.0.14'

maven

<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
   <groupId>org.apache.pdfbox</groupId>
   <artifactId>pdfbox</artifactId>
   <version>2.0.14</version>
</dependency>

接下来几行代码搞定


import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;


public class App {

   public static void main(String[] args) {

       Long currentTimeMillis = System.currentTimeMillis();

       File dir = new File(".");
       File[] filesList = dir.listFiles();
       if (filesList != null) {
           for (File file : filesList) {
               if (file.isFile() && getFileExtension(file.getName()).equalsIgnoreCase("pdf")) {
                   convert(file);
               }
           }
       }

       long diff = System.currentTimeMillis() - currentTimeMillis;

       System.out.print(diff);

   }

   private static String getFileExtension(String fileName) {
       if (fileName == null || fileName.equals(""))
           return "undefined";
       int dotIndex = fileName.lastIndexOf(".");
       return (dotIndex == -1) ? "" : fileName.substring(dotIndex + 1);
   }

   private static void convert(File sourceFile) {
       try {
           String destinationDir = sourceFile.getName().replace(".pdf", "") + "/"; // converted images from pdf document are saved here

           File destinationFile = new File(destinationDir);
           if (!destinationFile.exists()) {
               boolean fileCreated = destinationFile.mkdir();
               if (fileCreated)
                   System.out.println("Folder Created -> "+ destinationFile.getAbsolutePath());
           }
           if (sourceFile.exists()) {
               System.out.println("Images copied to Folder: "+ destinationFile.getName());
               PDDocument document = PDDocument.load(sourceFile);
               PDPageTree pdPageTree = document.getDocumentCatalog().getPages();
               System.out.println("Total files to be converted -> "+ pdPageTree.getCount());

               PDFRenderer pdfRenderer = new PDFRenderer(document);

               String fileName = sourceFile.getName().replace(".pdf", "");
               int pageNumber = 1;
               for (int page = 0; page < document.getNumberOfPages(); ++page) {

                   BufferedImage image = pdfRenderer.renderImageWithDPI(page, 150, ImageType.RGB);
                   File outputFile = new File(destinationDir + fileName +"_"+ pageNumber +".jpg");
                   System.out.println("Image Created -> "+ outputFile.getName());
                   ImageIO.write(image, "jpg", outputFile);
                   pageNumber++;
               }
               document.close();
               System.out.println("Converted Images are saved at -> "+ destinationFile.getAbsolutePath());
           } else {
               System.err.println(sourceFile.getName() +" File not exists");
           }

       } catch (Exception e) {
           e.printStackTrace();
       }
   }
}

这里主要使用了BufferedImage与ImageIO输出图片,通Pdfbox获取到PDF的内容

Share this: