Hi:
I am writing a java version of “search text in PDF & highlight”, based on a C# version in this group.
I am not 100% understand the code, as I realize that the code doesn’t pass the word to the highlight function, but
the bbox (I assume it is bounding box).
So if I use txt.begin(currPage), it will output images, but it obviously won’t highlight.
If I use txt.begin(currPage, word_bbox), it won’t output anything. I think word_bbox points to nowhere, so it can’t find
text to highlight.
Please look at the code below, it should give you a better what I want to do.
cheers
gary
Code:
package pdfsearch;
import java.io.File;
import pdftron.Common.PDFNetException;
import pdftron.PDF.Annot;
import pdftron.PDF.ColorPt;
import pdftron.PDF.ColorSpace;
import pdftron.PDF.Element;
import pdftron.PDF.ElementBuilder;
import pdftron.PDF.ElementWriter;
import pdftron.PDF.GState;
import pdftron.PDF.PDFDoc;
import pdftron.PDF.PDFDraw;
import pdftron.PDF.PDFNet;
import pdftron.PDF.Page;
import pdftron.PDF.PageIterator;
import pdftron.PDF.Rect;
import pdftron.PDF.TextExtractor;
import pdftron.SDF.Obj;
public class PDFSearch
{
static boolean containThisWord(String serchTerm, String needleTerm)
{
String[] array = serchTerm.split(" ");
for(String searchWord: array)
{
int condi = searchWord.compareToIgnoreCase(needleTerm);
if(condi == 0)
{
return true;
}
}
return false;
}
// Use PDFNet to generate appearance stream for highlight
static Obj CreateHighlightAppearance(PDFDoc doc, Rect bbox, ColorPt
higlight_color)
{
Obj stm = null;
try
{
// Create a button appearance stream
ElementBuilder build = new ElementBuilder();
ElementWriter writer = new ElementWriter();
writer.begin(doc);
// Draw background
double x1 = bbox.getX1();
double y1 = bbox.getY1();
double x2 = bbox.getX2();
double y2 = bbox.getY2();
Element element = build.createRect(x1- 2, y1, x2 +
2, y2);
element.setPathFill(true);
element.setPathStroke(false);
GState gs = element.getGState();
gs.setFillColorSpace(ColorSpace.createDeviceRGB());
gs.setFillColor(higlight_color);
gs.setBlendMode(GState.e_bl_multiply);
writer.writeElement(element);
stm = writer.end();
// Set the bounding box
stm.putRect("BBox", x1, y1, x2, y2);
stm.putName("Subtype", "Form");
}
catch(PDFNetException e)
{
e.printStackTrace();
}
return stm;
}
// Create Highlight Annotation.
static Annot CreateHighlightAnnot(PDFDoc doc, Rect bbox, ColorPt
highlight_color)
{
Annot a = null;
try
{
a = Annot.create(doc, Annot.e_Highlight, bbox);
a.setColor(highlight_color);
a.setAppearance(CreateHighlightAppearance(doc, bbox, highlight_color));
double x1 = bbox.getX1();
double y1 = bbox.getY1();
double x2 = bbox.getX2();
double y2 = bbox.getY2();
Obj quads = doc.createIndirectArray();
quads.pushBackNumber(x1);
quads.pushBackNumber(y2);
quads.pushBackNumber(x2);
quads.pushBackNumber(y2);
quads.pushBackNumber(x1);
quads.pushBackNumber(y1);
quads.pushBackNumber(x2);
quads.pushBackNumber(y1);
a.getSDFObj().put("QuadPoints", quads);
}
catch(PDFNetException e)
{
e.printStackTrace();
}
return a;
}
public static void main(String[] args)
{
// Commmand line input
// Get current directory path
File file = new File("");
String abolutePath = file.getAbsolutePath();
PDFNet.initialize();
PDFNet.setResourcesPath(abolutePath + "/resources");
String inputPath = abolutePath + "/input";
String outputPath = abolutePath + "/output";
try
{
String searchTerm = "google file system";
PDFDoc doc = new PDFDoc(inputPath + "/input.pdf");
doc.initSecurityHandler();
ColorPt highlight_color = new ColorPt(1, 1, 0); // Yellow
TextExtractor txt = new TextExtractor(); // Used to extract words
Rect word_bbox = new Rect();
int dpi = 150;
PDFDraw draw=new PDFDraw(dpi);
// Go through each page
int pageNum = 0;
for (PageIterator itr=doc.getPageIterator(); itr.hasNext();)
{
Page currPage = (Page)(itr.next());
// Read the page.
txt.begin(currPage, word_bbox);
//txt.begin(currPage);
//Extract words one by one.
for (TextExtractor.Line line = txt.getFirstLine(); line.isValid(); line=line.getNextLine())
{
for (TextExtractor.Word word = line.getFirstWord(); word.isValid(); word=word.getNextWord())
{
String wordStr = word.getString();
if(containThisWord(searchTerm, wordStr) == true)
{
currPage.annotPushBack(CreateHighlightAnnot(doc, word_bbox,
highlight_color));
}
}
//String outname = String.Format("{0}out{1:d}.jpg", outputPath, (Object)"0"); // itr.getPageNumber() //*
String outname = outputPath + "/" + "out" + pageNum + ".jpg";
draw.export(currPage, outname, "jpg");
}
++pageNum;
//test
if(pageNum > 2)
{
break;
}
}
// Complete search and highlight
System.out.println("Search & highlight completed");
}
catch(PDFNetException e)
{
e.printStackTrace();
}
}
}