forked from mindee/mindee-api-java
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPdfBoxApi.java
More file actions
91 lines (77 loc) · 2.84 KB
/
PdfBoxApi.java
File metadata and controls
91 lines (77 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
package com.mindee.pdf;
import com.mindee.MindeeException;
import com.mindee.input.PageOptions;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
/**
* Allows performing various operations on PDFs.
*/
public final class PdfBoxApi implements PdfOperation {
@Override
public SplitPdf split(SplitQuery splitQuery) throws IOException {
if (!checkPdfOpen(splitQuery.getFile())) {
throw new MindeeException("This document cannot be open and cannot be split.");
}
try (PDDocument originalDocument = Loader.loadPDF(splitQuery.getFile())) {
try (PDDocument splitDocument = new PDDocument()) {
int totalOriginalPages = countPages(splitQuery.getFile());
if (totalOriginalPages < splitQuery.getPageOptions().getOnMinPages()) {
return new SplitPdf(splitQuery.getFile(), totalOriginalPages);
}
List<Integer> pageRange = getPageRanges(splitQuery.getPageOptions(), totalOriginalPages);
pageRange.stream()
.filter(i -> i < totalOriginalPages)
.forEach(i -> splitDocument.addPage(originalDocument.getPage(i)));
try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
splitDocument.save(outputStream);
byte[] splitPdf = outputStream.toByteArray();
return new SplitPdf(splitPdf, countPages(splitPdf));
}
}
}
}
private List<Integer> getPageRanges(PageOptions pageOptions, Integer numberOfPages) {
Set<Integer> pages = Optional.ofNullable(pageOptions.getPageIndexes())
.map(Collection::stream)
.orElseGet(Stream::empty)
.filter(x -> x > (numberOfPages) * (-1) && x <= (numberOfPages - 1))
.map(x -> (numberOfPages + x) % numberOfPages)
.collect(Collectors.toSet());
List<Integer> allPages = IntStream.range(0, numberOfPages).boxed().collect(Collectors.toList());
switch (pageOptions.getOperation()) {
case KEEP_ONLY:
return new ArrayList<>(pages);
case REMOVE:
allPages.removeAll(pages);
return allPages;
default:
return allPages;
}
}
private boolean checkPdfOpen(byte[] documentFile) {
boolean opens = false;
try {
Loader.loadPDF(documentFile).close();
opens = true;
} catch (IOException e) {
e.printStackTrace();
}
return opens;
}
private int countPages(byte[] documentFile) throws IOException {
PDDocument document = Loader.loadPDF(documentFile);
int pageCount = document.getNumberOfPages();
document.close();
return pageCount;
}
}