Q:
How can I get the bounding box for each character in the text run?
---
A:
GetBBox() returns a scaled box using BBox, ascent, and descent
information that is available in the font dictionary. This calculation
is quite fast, but unfortunately this information is often incorrect
(due to abundance of poor PDF producers out there).
Also the returned bounding box applies to the entire text run and not
to individual characters. For applications where accurate bbox
information is important there are couple of alternatives:
a) Obtain a bounding box by finding extents of the transoformed glyph
otline.
b) Use CharIterator->x/y and element->GState info (such as the font
size, char and word spacing, CTM and text matrix, and font.GetWidth(),
etc) to compute the bounding box.
In case you prefer to go with method a) you can obtain the bounding
for each character by obtaining the glyph path and calling
GetPathBBox() on returned points.
GetPathBBox() accepts a transform matrix (path_mtx) which can be
calculated as follows (for C# or VB.NET sample code similar to
GetPathBBox, please see ElementReaderAdvanced ):
=====
void MyFunct(...)
{
CharIterator itr = element->CharBegin(); CharIterator end =
element->CharEnd();
GState* gs = element->GetGState();
PDF::Font font = gs->GetFont();
double font_size = gs->GetFontSize();
double horiz_spacing = gs->GetHorizontalScale() / 100.0; Matrix2D
font_mtx(font_size * horiz_spacing, 0, 0, font_size, 0, 0);
if (font.GetType() != Font::e_Type3) {
double units_per_em = font.GetUnitsPerEm();
font_mtx *= Matrix2D(1.0/units_per_em, 0, 0, -1.0/units_per_em, 0,
0);
Matrix2D text_mtx(element->GetTextMatrix());
Matrix2D pos(1, 0, 0, 1, 0, 0);
for (; itr!=end; ++itr) {
pos.m_h = itr->x; pos.m_v = itr->y;
Matrix2D path_mtx(text_mtx * pos * font_mtx);
...
font.GetGlyphPath(itr->char_code, m_path_oprs, m_path_data, true);
// transofrm all glyph points using path_mtx
{
int sz = m_path_data.size();
assert(sz%2 == 0);
Real* pt = &m_path_data[0];
Real* pt_end = pt + sz;
for (++pt; pt<pt_end; pt+=2) path_mtx.Mult(*(pt-1), *pt);
}
Rect bbox;
GetPathBBox(&m_path_data[0], int(m_path_data.size()), (char*)
&m_path_oprs[0], int(m_path_oprs.size()), bbox.x1, bbox.y1, bbox.x2,
bbox.y2);
}
bool GetPathBBox(const double* data, int data_sz, const char* opr, int
opr_sz, double &min_x, double &min_y, double &max_x, double &max_y) {
const char *opr_itr = opr, *opr_end = opr + opr_sz;
if (opr_itr >= opr_end) return false;
const double *data_itr = data, *data_end = data + data_sz;
double x, y;
static const double invalid_value = 1e300;
max_x = max_y = -invalid_value;
min_x = min_y = invalid_value;
for (; opr_itr<opr_end; ++opr_itr)
{
switch(*opr_itr)
{
case Element::e_moveto:
if (!(data_itr+2<=data_end)) return false;
x = *data_itr; ++data_itr;
if (x < min_x) min_x = x;
if (x > max_x) max_x = x;
y = *data_itr; ++data_itr;
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
break;
case Element::e_lineto:
if (!(data_itr+2<=data_end)) return false;
x = *data_itr; ++data_itr;
if (x < min_x) min_x = x;
if (x > max_x) max_x = x;
y = *data_itr; ++data_itr;
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
break;
case Element::e_rect:
if (!(data_itr+4<=data_end)) return false;
x = *data_itr; ++data_itr;
if (x < min_x) min_x = x;
if (x > max_x) max_x = x;
y = *data_itr; ++data_itr;
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
x = x + *data_itr; ++data_itr;
if (x < min_x) min_x = x;
if (x > max_x) max_x = x;
y = y + *data_itr; ++data_itr;
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
break;
case Element::e_cubicto:
if (!(data_itr+6<=data_end)) return false;
x = *data_itr; ++data_itr;
if (x < min_x) min_x = x;
if (x > max_x) max_x = x;
y = *data_itr; ++data_itr;
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
x = *data_itr; ++data_itr;
if (x < min_x) min_x = x;
if (x > max_x) max_x = x;
y = *data_itr; ++data_itr;
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
x = *data_itr; ++data_itr;
if (x < min_x) min_x = x;
if (x > max_x) max_x = x;
y = *data_itr; ++data_itr;
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
break;
case Element::e_conicto:
if (!(data_itr+4<=data_end)) return false;
x = *data_itr; ++data_itr;
if (x < min_x) min_x = x;
if (x > max_x) max_x = x;
y = *data_itr; ++data_itr;
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
x = *data_itr; ++data_itr;
if (x < min_x) min_x = x;
if (x > max_x) max_x = x;
y = *data_itr; ++data_itr;
if (y < min_y) min_y = y;
if (y > max_y) max_y = y;
break;
}
}
return (min_x != invalid_value && min_y != invalid_value);
}
//////////////////////////////////////////////////////////////
The second option may look along the following lines:
void MyFunct(...)
{
CharIterator itr = element->CharBegin(); CharIterator end = element-
CharEnd();
GState* gs = element->GetGState();
Font font = gs->GetFont();
double font_size = gs->GetFontSize();
double horiz_spacing = gs->GetHorizontalScale() / 100.0; Matrix2D
font_mtx(font_size * horiz_spacing, 0, 0, font_size, 0, 0);
if (font.GetType() != Font::e_Type3) {
double units_per_em = font.GetUnitsPerEm();
font_mtx *= Matrix2D(1.0/units_per_em, 0, 0, -1.0/units_per_em, 0,
0);
Matrix2D text_mtx(element->GetTextMatrix());
Matrix2D pos(1, 0, 0, 1, 0, 0);
for (; itr!=end; ++itr) {
pos.m_h = itr->x; pos.m_v = itr->y;
Matrix2D path_mtx(text_mtx * pos * font_mtx);
...
double width = font.GetWidth(itr->char_code);
// transofrm the width using path_mtx
double x = mtx.m_a * width;
double y = mtx.m_b * width;
// 'width' now represents character advance width in 'PDF User
Space'.
width = sqrt(x*x + y*y);
}