How can I get the bounding box for each character in the text run?

Q:
How can I get the bounding box for each character in the text run?
---

A:

GetBBox() returns a scaled box using BBox, ascent, and descent
information that is available in the font dictionary. This calculation
is quite fast, but unfortunately this information is often incorrect
(due to abundance of poor PDF producers out there).

Also the returned bounding box applies to the entire text run and not
to individual characters. For applications where accurate bbox
information is important there are couple of alternatives:

a) Obtain a bounding box by finding extents of the transoformed glyph
otline.
b) Use CharIterator->x/y and element->GState info (such as the font
size, char and word spacing, CTM and text matrix, and font.GetWidth(),
etc) to compute the bounding box.

In case you prefer to go with method a) you can obtain the bounding
for each character by obtaining the glyph path and calling
GetPathBBox() on returned points.

GetPathBBox() accepts a transform matrix (path_mtx) which can be
calculated as follows (for C# or VB.NET sample code similar to
GetPathBBox, please see ElementReaderAdvanced ):

=====
void MyFunct(...)
{
CharIterator itr = element->CharBegin(); CharIterator end =
element->CharEnd();

GState* gs = element->GetGState();
PDF::Font font = gs->GetFont();

double font_size = gs->GetFontSize();
double horiz_spacing = gs->GetHorizontalScale() / 100.0; Matrix2D
font_mtx(font_size * horiz_spacing, 0, 0, font_size, 0, 0);

if (font.GetType() != Font::e_Type3) {
  double units_per_em = font.GetUnitsPerEm();
  font_mtx *= Matrix2D(1.0/units_per_em, 0, 0, -1.0/units_per_em, 0,
0);
  Matrix2D text_mtx(element->GetTextMatrix());
  Matrix2D pos(1, 0, 0, 1, 0, 0);

  for (; itr!=end; ++itr) {
    pos.m_h = itr->x; pos.m_v = itr->y;
    Matrix2D path_mtx(text_mtx * pos * font_mtx);
    ...
    font.GetGlyphPath(itr->char_code, m_path_oprs, m_path_data, true);

    // transofrm all glyph points using path_mtx
    {
  int sz = m_path_data.size();
  assert(sz%2 == 0);
  Real* pt = &m_path_data[0];
  Real* pt_end = pt + sz;
  for (++pt; pt<pt_end; pt+=2) path_mtx.Mult(*(pt-1), *pt);
    }

    Rect bbox;
    GetPathBBox(&m_path_data[0], int(m_path_data.size()), (char*)
&m_path_oprs[0], int(m_path_oprs.size()), bbox.x1, bbox.y1, bbox.x2,
bbox.y2);

}

bool GetPathBBox(const double* data, int data_sz, const char* opr, int
opr_sz, double &min_x, double &min_y, double &max_x, double &max_y) {
  const char *opr_itr = opr, *opr_end = opr + opr_sz;
  if (opr_itr >= opr_end) return false;

  const double *data_itr = data, *data_end = data + data_sz;
  double x, y;
  static const double invalid_value = 1e300;
  max_x = max_y = -invalid_value;
  min_x = min_y = invalid_value;
  for (; opr_itr<opr_end; ++opr_itr)
  {
    switch(*opr_itr)
    {
    case Element::e_moveto:
      if (!(data_itr+2<=data_end)) return false;
      x = *data_itr; ++data_itr;
      if (x < min_x) min_x = x;
      if (x > max_x) max_x = x;

      y = *data_itr; ++data_itr;
      if (y < min_y) min_y = y;
      if (y > max_y) max_y = y;
      break;
    case Element::e_lineto:
      if (!(data_itr+2<=data_end)) return false;
      x = *data_itr; ++data_itr;
      if (x < min_x) min_x = x;
      if (x > max_x) max_x = x;

      y = *data_itr; ++data_itr;
      if (y < min_y) min_y = y;
      if (y > max_y) max_y = y;

      break;
    case Element::e_rect:
      if (!(data_itr+4<=data_end)) return false;

      x = *data_itr; ++data_itr;
      if (x < min_x) min_x = x;
      if (x > max_x) max_x = x;

      y = *data_itr; ++data_itr;
      if (y < min_y) min_y = y;
      if (y > max_y) max_y = y;

      x = x + *data_itr; ++data_itr;
      if (x < min_x) min_x = x;
      if (x > max_x) max_x = x;

      y = y + *data_itr; ++data_itr;
      if (y < min_y) min_y = y;
      if (y > max_y) max_y = y;

      break;
    case Element::e_cubicto:
      if (!(data_itr+6<=data_end)) return false;

      x = *data_itr; ++data_itr;
      if (x < min_x) min_x = x;
      if (x > max_x) max_x = x;

      y = *data_itr; ++data_itr;
      if (y < min_y) min_y = y;
      if (y > max_y) max_y = y;

      x = *data_itr; ++data_itr;
      if (x < min_x) min_x = x;
      if (x > max_x) max_x = x;

      y = *data_itr; ++data_itr;
      if (y < min_y) min_y = y;
      if (y > max_y) max_y = y;

      x = *data_itr; ++data_itr;
      if (x < min_x) min_x = x;
      if (x > max_x) max_x = x;

      y = *data_itr; ++data_itr;
      if (y < min_y) min_y = y;
      if (y > max_y) max_y = y;

      break;
    case Element::e_conicto:
      if (!(data_itr+4<=data_end)) return false;

      x = *data_itr; ++data_itr;
      if (x < min_x) min_x = x;
      if (x > max_x) max_x = x;

      y = *data_itr; ++data_itr;
      if (y < min_y) min_y = y;
      if (y > max_y) max_y = y;

      x = *data_itr; ++data_itr;
      if (x < min_x) min_x = x;
      if (x > max_x) max_x = x;

      y = *data_itr; ++data_itr;
      if (y < min_y) min_y = y;
      if (y > max_y) max_y = y;

      break;
    }
  }

  return (min_x != invalid_value && min_y != invalid_value);
}

//////////////////////////////////////////////////////////////

The second option may look along the following lines:

void MyFunct(...)
{
CharIterator itr = element->CharBegin(); CharIterator end = element-

CharEnd();

GState* gs = element->GetGState();
Font font = gs->GetFont();

double font_size = gs->GetFontSize();
double horiz_spacing = gs->GetHorizontalScale() / 100.0; Matrix2D
font_mtx(font_size * horiz_spacing, 0, 0, font_size, 0, 0);

if (font.GetType() != Font::e_Type3) {
  double units_per_em = font.GetUnitsPerEm();
  font_mtx *= Matrix2D(1.0/units_per_em, 0, 0, -1.0/units_per_em, 0,
0);
  Matrix2D text_mtx(element->GetTextMatrix());
  Matrix2D pos(1, 0, 0, 1, 0, 0);

  for (; itr!=end; ++itr) {
    pos.m_h = itr->x; pos.m_v = itr->y;
    Matrix2D path_mtx(text_mtx * pos * font_mtx);
    ...
    double width = font.GetWidth(itr->char_code);

    // transofrm the width using path_mtx
    double x = mtx.m_a * width;
    double y = mtx.m_b * width;

    // 'width' now represents character advance width in 'PDF User
Space'.
    width = sqrt(x*x + y*y);
}

Q:

How can I get the bounding box for each character in the text run? I
saw an implemetation in C++ but I am looking for C# code.
---

A:
The following is yet another example of how to obtain boinding box for
each caracter in the text run (implemented in C#):

static Rect GetGlyphBBox(CharIterator itr, pdftron.PDF.Font font,
double horiz_scale, double font_sz, double ascent, double descent)
{
Rect out_bbox = new Rect();
out_bbox.x1 = itr.Current().x;
out_bbox.y1 = itr.Current().y;

double dx = 0;
if (font.IsSimple())
{
  dx = font.GetWidth(itr.Current().char_code) / 1000.0;
  dx *= horiz_scale * font_sz;

  out_bbox.x2 = out_bbox.x1 + dx;
  out_bbox.y1 = itr.Current().y + descent;
  out_bbox.y2 = itr.Current().y + ascent;
}
else
{
  int cid = font.MapToCID(itr.Current().char_code);
  dx = font.GetWidth(cid) / 1000.0;
  dx *= horiz_scale * font_sz;

  out_bbox.x2 = out_bbox.x1 + dx;
  out_bbox.y1 = itr.Current().y + descent;
  out_bbox.y2 = itr.Current().y + ascent;
}

return out_bbox;
}

static Rect GetBBoxTransfRect(Rect inn, Matrix2D mtx)
{
double p1x = inn.x1, p1y = inn.y1, p2x = inn.x2, p2y = inn.y1, p3x =
inn.x2, p3y = inn.y2, p4x = inn.x1, p4y = inn.y2;
mtx.Mult(ref p1x, ref p1y);
mtx.Mult(ref p2x, ref p2y);
mtx.Mult(ref p3x, ref p3y);
mtx.Mult(ref p4x, ref p4y);

return new Rect(Math.Min(Math.Min(Math.Min(p1x, p2x), p3x), p4x),
Math.Min(Math.Min(Math.Min(p1y, p2y), p3y), p4y),
Math.Max(Math.Max(Math.Max(p1x, p2x), p3x), p4x),
Math.Max(Math.Max(Math.Max(p1y, p2y), p3y), p4y));
}

and the floowing is an example of how to use the above functions:

while ((element = reader.Next()) != null)
{
if (element.GetType() == Element.Type.e_text)
{
   GState gs = element.GetGState();
   pdftron.PDF.Font font = gs.GetFont();
   double font_sz = gs.GetFontSize();
   Rect bbox1 = font.GetBBox();
   double bbox_height = bbox1.y2 - bbox1.y1;
   double decent = font_sz * bbox1.y1 / bbox_height;
   double ascent = font_sz * bbox1.y2 / bbox_height;
   double horiz_scale = gs.GetHorizontalScale() / 100.0;
   Matrix2D tmtx = element.GetTextMatrix();
   Matrix2D mtx = element.GetCTM() * tmtx;

   CharIterator itr = element.CharBegin();
   CharIterator end = element.CharEnd();

   for (; itr != end; itr.Next())
   {
    Rect gbox = GetGlyphBBox(itr, font, horiz_scale, font_sz, ascent,
decent);
    gbox = GetBBoxTransfRect(gbox, mtx);
   }
  }
}