//--------------------------------------------------------------------------------------- // Copyright (c) 2001-2020 by PDFTron Systems Inc. All Rights Reserved. // Consult legal.txt regarding legal and license information. //--------------------------------------------------------------------------------------- #include #include #include #include #include #include #include #include using namespace pdftron; using namespace std; using namespace PDF; using namespace Common; void ProcessElements(PDFDoc& doc, Page& page, ElementReader& reader) { for (Element element=reader.Next(); element; element = reader.Next()) // Read page contents { switch (element.GetType()) { case Element::e_text: // Process text strings... { CharIterator itr = element.GetCharIterator(); GState gs = element.GetGState(); Font font = gs.GetFont(); double font_size = gs.GetFontSize(); double horiz_spacing = gs.GetHorizontalScale() / 100.0; Matrix2D text_mtx = element.GetTextMatrix(); Matrix2D pos(1, 0, 0, 1, 0, 0); Matrix2D font_mtx(font_size * horiz_spacing, 0, 0, font_size, 0, 0); double units_per_em = font.GetUnitsPerEm(); font_mtx *= Matrix2D(1.0 / units_per_em, 0, 0, -1.0 / units_per_em, 0, 0); ElementBuilder bldr; for (; itr.HasNext(); itr.Next()) { pos.m_h = itr.Current().x; pos.m_v = itr.Current().y; Matrix2D path_mtx(text_mtx); path_mtx *= pos; path_mtx *= font_mtx; PathData pathData = font.GetGlyphPath(itr.Current().char_code, false, &path_mtx); Element tempElement = bldr.CreatePath(pathData.GetPoints(), pathData.GetOperators()); Rect bbox; tempElement.GetBBox(bbox); /////////////////////////////////////////////////////////////////// // Add an annotation to show that the bounding box is as expected // just for debug purposes Annots::Square sqr = Annots::Square::Create(doc, bbox); sqr.SetColor(ColorPt(1, 0, 1), 3); sqr.SetBorderStyle(BorderStyle(BorderStyle::e_solid, 0.25)); sqr.RefreshAppearance(); page.AnnotPushBack(sqr); /////////////////////////////////////////////////////////////////// } } break; case Element::e_form: // Process form XObjects { reader.FormBegin(); ProcessElements(doc, page, reader); reader.End(); } break; } } } int main(int argc, char *argv[]) { int ret = 0; PDFNet::Initialize(); string input_path = "../../TestFiles/"; string output_path = "../../TestFiles/Output/"; string input_filename = "newsletter.pdf"; string output_filename = "newsletter_bboxes.pdf"; try // Extract text data from all pages in the document { PDFDoc doc((input_path + "newsletter.pdf").c_str()); doc.InitSecurityHandler(); int pgnum = doc.GetPageCount(); PageIterator itr; ElementReader page_reader; for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next()) // Read every page { page_reader.Begin(itr.Current()); ProcessElements(doc, itr.Current(), page_reader); page_reader.End(); } doc.Save(output_path + output_filename, SDF::SDFDoc::e_linearized, 0); cout << "Done." << endl; } catch(std::exception& e) { cout << e.what() << endl; ret = 1; } catch(...) { cout << "Unknown Exception" << endl; ret = 1; } PDFNet::Terminate(); return ret; }