$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
merging_hdoc.hh
1 // Copyright (C) 2010, 2011, 2012, 2013 EPITA Research and Development
2 // Laboratory (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 #ifndef SCRIBO_TEXT_MERGING_HDOC_HH
28 # define SCRIBO_TEXT_MERGING_HDOC_HH
29 
33 
34 
35 #include <iostream>
36 #include <fstream>
37 #include <sstream>
38 #include <vector>
39 #include <set>
40 #include <algorithm>
41 
42 #include <mln/core/image/image2d.hh>
43 #include <mln/core/image/dmorph/image_if.hh>
44 #include <mln/util/array.hh>
45 #include <mln/io/pbm/load.hh>
46 #include <mln/io/pgm/save.hh>
47 
48 #include <mln/data/fill.hh>
49 #include <mln/data/wrap.hh>
50 
51 #include <mln/make/box2d.hh>
52 
53 #include <mln/value/rgb8.hh>
54 #include <mln/io/ppm/save.hh>
55 
56 #include <mln/draw/box.hh>
57 #include <mln/data/stretch.hh>
58 #include <mln/data/wrap.hh>
59 #include <mln/util/timer.hh>
60 
61 #include <scribo/text/look_like_text_lines.hh>
62 
63 
64 namespace scribo
65 {
66 
67  namespace text
68  {
69 
70  using namespace mln;
71 
72 
86  template <typename L>
87  line_set<L>
88  merging_hdoc(const scribo::line_set<L>& lines);
89 
90 
91 # ifndef MLN_INCLUDE_ONLY
92 
93 
94  namespace internal
95  {
96 
97  using namespace mln;
98  using value::int_u8;
99 
100 
101  template <typename T, typename T2>
102  void draw_box(image2d<T>& input, const box2d& b, T2 l)
103  {
104  const unsigned
105  delta = input.delta_offset(dpoint2d(1,0)),
106  nrows = b.nrows(),
107  ncols = b.ncols();
108  T* p_start = & input(b.pmin());
109  T* ptr = p_start;
110  for (unsigned r = 0; r < nrows; ++r)
111  {
112  ptr = p_start;
113  for (unsigned c = 0; c < ncols; ++c)
114  *ptr++ = l;
115  p_start += delta;
116  }
117  }
118 
119 
120 
121 
122  template <typename T, typename T2>
123  void draw_box(image2d<T>& input,
124  int pmin_row, int pmin_col,
125  int pmax_row, int pmax_col,
126  T2 l)
127  {
128  if (pmax_row < pmin_row || pmax_col < pmin_col)
129  return;
130 
131  const unsigned
132  input_nrows_1 = input.nrows() - 1,
133  input_ncols_1 = input.ncols() - 1;
134  if (pmin_row < 0) pmin_row = 0;
135  if (pmin_col < 0) pmin_col = 0;
136  if (pmax_row > input_nrows_1) pmax_row = input_nrows_1;
137  if (pmax_col > input_ncols_1) pmax_col = input_ncols_1;
138 
139  const unsigned
140  delta = input.delta_offset(dpoint2d(1,0)),
141  nrows = pmax_row - pmin_row + 1,
142  ncols = pmax_col - pmin_col + 1;
143  T* p_start = & input.at_(pmin_row, pmin_col);
144  T* ptr = p_start;
145  for (unsigned r = 0; r < nrows; ++r)
146  {
147  ptr = p_start;
148  for (unsigned c = 0; c < ncols; ++c)
149  *ptr++ = l;
150  p_start += delta;
151  }
152  }
153 
154 
155 
156 
157  inline
158  unsigned my_find_root(mln::util::array<unsigned>& parent, unsigned x)
159  {
160  if (parent[x] == x)
161  return x;
162  return parent[x] = my_find_root(parent, parent[x]);
163  }
164 
165 
166  inline
167  void swap_ordering(unsigned l1, unsigned l2)
168  {
169  if (l2 > l1)
170  return;
171  unsigned l1_ = l1;
172  l1 = l2;
173  l2 = l1_;
174  }
175 
176 
177 
178  template <typename L>
179  unsigned do_union(scribo::line_set<L>& lines,
180  unsigned l1,
181  unsigned l2,
183  {
184  l1 = my_find_root(parent, l1);
185  l2 = my_find_root(parent, l2);
186  if (l1 == l2)
187  return l1;
188 
189  swap_ordering(l1, l2);
190  parent[l2] = l1; // The smallest label value is root.
191 
192  line_info<L>& l1_info = lines(l1);
193  line_info<L>& l2_info = lines(l2);
194 
195  if (l2_info.card() > l1_info.card())
196  {
197  // we transfer data from the largest item to the root one.
198  scribo::line_info<L> tmp = l1_info;
199  std::swap(l1_info, l2_info);
200  l1_info.fast_merge(tmp);
201 
202  // We must set manually the tag for lines(l2) since it is
203  // not used directly in merge process so its tag cannot be
204  // updated automatically.
205  l2_info.update_tag(line::Merged);
206  l2_info.set_hidden(true);
207  }
208  else
209  l1_info.fast_merge(l2_info);
210 
211  // l1's tag is automatically set to line::Needs_Precise_Stats_Update
212  // l2's tag is automatically set to line::Merged
213 
214  return l1;
215  }
216 
217 
218 
219 
220  inline
221  box2d enlarge(const box2d& b, int delta)
222  {
223  box2d b_(point2d(b.pmin().row(), b.pmin().col() - delta),
224  point2d(b.pmax().row(), b.pmax().col() + delta));
225  return b_;
226  }
227 
228 
229  template <typename L>
230  bool between_separators(const scribo::line_set<L>& lines,
231  const line_id_t& l1_,
232  const line_id_t& l2_)
233  {
234  const scribo::line_info<L>& l1 = lines(l1_);
235  const scribo::line_info<L>& l2 = lines(l2_);
236 
237  // No separators found in image.
238  mln_precondition(lines.components().has_separators());
239 
240  const box2d& l1_bbox = l1.bbox();
241  const box2d& l2_bbox = l2.bbox();
242 
243  const unsigned
244  col1 = l1_bbox.pcenter().col(),
245  col2 = l2_bbox.pcenter().col();
246  const mln_ch_value(L, bool)&
247  separators = lines.components().separators();
248 
249  // Checking for separators starting from 1 / 4, 3/ 4 and the
250  // center of the box
251  typedef const bool* sep_ptr_t;
252  sep_ptr_t sep_ptr, sep_ptr_top, sep_ptr_bottom, end;
253 
254  if (col1 < col2)
255  {
256  const unsigned quarter =
257  ((l1_bbox.pcenter().row() - l1_bbox.pmin().row()) >> 1);
258 
259  sep_ptr = &separators(l1_bbox.pcenter());
260  sep_ptr_top = &separators(point2d(l1_bbox.pmin().row() + quarter,
261  l1_bbox.pcenter().col()));
262  sep_ptr_bottom = &separators(point2d(l1_bbox.pmax().row() - quarter,
263  l1_bbox.pcenter().col()));
264  end = sep_ptr + col2 - col1;
265  }
266  else
267  {
268  const unsigned quarter =
269  ((l2_bbox.pcenter().row() - l2_bbox.pmin().row()) >> 1);
270 
271  sep_ptr = &separators(l2_bbox.pcenter());
272  sep_ptr_top = &separators(point2d(l2_bbox.pmin().row() + quarter,
273  l2_bbox.pcenter().col()));
274  sep_ptr_bottom = &separators(point2d(l2_bbox.pmax().row() - quarter,
275  l2_bbox.pcenter().col()));
276  end = sep_ptr + col1 - col2;
277  }
278 
279  // If sep_ptr is true, then a separator is reached.
280  while (!*sep_ptr && !*sep_ptr_top && !*sep_ptr_bottom && sep_ptr != end)
281  {
282  ++sep_ptr;
283  ++sep_ptr_top;
284  ++sep_ptr_bottom;
285  }
286 
287  return (*sep_ptr || *sep_ptr_top || *sep_ptr_bottom);
288  }
289 
290 
299  template <typename L>
300  bool lines_can_merge(scribo::line_set<L>& lines,
301  const scribo::line_id_t& l1_,
302  const scribo::line_id_t& l2_)
303  {
304  scribo::line_info<L>& l1 = lines(l1_);
305  scribo::line_info<L>& l2 = lines(l2_);
306 
307  // Parameters.
308  const float x_ratio_max = 1.7f;
309  const float baseline_delta_max =
310  0.5f * std::min(l1.x_height(), l2.x_height());
311 
312  const box2d& l1_bbox = l1.bbox();
313  const box2d& l2_bbox = l2.bbox();
314 
315  const point2d& l1_pmin = l1_bbox.pmin();
316  const point2d& l2_pmin = l2_bbox.pmin();
317  const point2d& l1_pmax = l1_bbox.pmax();
318  const point2d& l2_pmax = l2_bbox.pmax();
319 
320  const bool l1_has_separators = lines.components().has_separators();
321  const bool l1_l2_between_separators = (l1_has_separators) ?
322  between_separators(lines, l1_, l2_) : false;
323  const float l_ted_cw = l2.char_width();
324 
325  const float dx = std::max(l1_pmin.col(), l2_pmin.col())
326  - std::min(l1_pmax.col(), l2_pmax.col());
327  const float dy = std::max(l1_pmin.row(), l2_pmin.row())
328  - std::min(l1_pmax.row(), l2_pmax.row());
329 
330  // Particular case of "
331  {
332  if (// Must have 2 characters
333  (l1.card() == 2
334  // The box height must be smaller than the touched line x height
335  && l1_bbox.height() < l2.x_height())
336  // The line must be vertically and horizontally close to
337  // the touched line
338  && (dx < l_ted_cw && dy < 0)
339  // No separator between the two lines
340  && not (l1_l2_between_separators))
341  {
342  // Line is then considered as punctuation
343  l1.update_type(line::Punctuation);
344  return true;
345  }
346  }
347 
348  // Particular case like merging between a line and [5]
349  {
350  const mln::def::coord
351  top_row_l2 = l2_pmin.row(),
352  top_row_l1 = l1_pmin.row(),
353  bot_row = l2_pmax.row();
354  const float x1 = l1.x_height(), x2 = l2.x_height();
355  const float x_ratio = std::max(x1, x2) / std::min(x1, x2);
356 
357  if (// No separator
358  !l1_l2_between_separators
359  // The x height ration must be lower than 2
360  && (x_ratio < 2.0f)
361  // Baseline alignment
362  && (std::abs(bot_row - l1.baseline()) < baseline_delta_max)
363  // The top of the boxes must be aligned
364  && (std::abs(top_row_l2 - top_row_l1) < 5)
365  // Distance between the line and the touched line.
366  && dx < 5.0f * l_ted_cw)
367  {
368  return true;
369  }
370  }
371 
372  // Similarity of x_height.
373  {
374  const float x1 = l1.x_height(), x2 = l2.x_height();
375  const float x_ratio = std::max(x1, x2) / std::min(x1, x2);
376  if (x_ratio > x_ratio_max)
377  return false;
378  }
379 
380  // Same baseline.
381  {
382  if (std::abs(l1.baseline() - l2.baseline()) > baseline_delta_max)
383  return false;
384  }
385 
386  // left / right
387  const unsigned
388  col1 = l1_bbox.pcenter().col(),
389  col2 = l2_bbox.pcenter().col();
390  if (col1 < col2)
391  {
392  if ((col1 + l1_bbox.width() / 4) >= (col2 - l2_bbox.width() / 4))
393  return false;
394  }
395  else
396  if ((col2 + l2_bbox.width() / 4) >= (col1 - l1_bbox.width() / 4))
397  return false;
398 
399  // Check that there is no separator in between.
400  if (l1_has_separators)
401  return ! l1_l2_between_separators;
402 
403  return true;
404  }
405 
406 
407 
408 
409  inline
410  int horizontal_distance(const box2d& l1,
411  const box2d& l2)
412  {
413  if (l1.pcenter().col() < l2.pcenter().col())
414  return l2.pmin().col() - l1.pmax().col();
415  else
416  return l1.pmin().col() - l2.pmax().col();
417  }
418 
419 
420 
421 
437  template <typename L>
438  bool non_text_and_text_can_merge(scribo::line_set<L>& lines,
439  const scribo::line_id_t& l_cur_, // current
440  const scribo::line_id_t l_ted_) // touched
441  {
442  scribo::line_info<L>& l_cur = lines(l_cur_);
443  scribo::line_info<L>& l_ted = lines(l_ted_);
444 
445  if (l_cur.type() == line::Text || l_ted.type() != line::Text)
446  return false;
447  // the current object is a NON-textline
448  // the background (touched) object is a textline
449 
450 
451  // Check that there is no separator in between.
452  if (lines.components().has_separators()
453  && between_separators(lines, l_cur_, l_ted_))
454  return false;
455 
456  const box2d& l_cur_bbox = l_cur.bbox();
457  const box2d& l_ted_bbox = l_ted.bbox();
458 
459  const point2d& l_cur_pmin = l_cur_bbox.pmin();
460  const point2d& l_ted_pmin = l_ted_bbox.pmin();
461  const point2d& l_cur_pmax = l_cur_bbox.pmax();
462  const point2d& l_ted_pmax = l_ted_bbox.pmax();
463 
464  const float dx = std::max(l_cur_pmin.col(), l_ted_pmin.col())
465  - std::min(l_cur_pmax.col(), l_ted_pmax.col());
466  const float l_ted_cw = l_ted.char_width();
467  const float l_ted_x_height = l_ted.x_height();
468 
469  const unsigned l_cur_height = l_cur_bbox.height();
470  const unsigned l_cur_width = l_cur_bbox.width();
471 
472  // General case (for tiny components like --> ',:."; <--):
473  if (l_cur_height < l_ted_x_height
474  && l_cur_height > 0.05f * l_ted_x_height
475  && float(l_cur_width) / float(l_cur.card()) < l_ted.char_width()
476  && dx < 2 * l_ted_cw
477  && l_cur_pmin.row() < l_ted.baseline())
478  {
479  l_cur.update_type(line::Punctuation);
480  return true;
481  }
482 
483  // Special case for '---':
484  if (// small height:
485  l_cur_height < l_ted_x_height
486  // // not so long width:
487  && l_cur_width > 0.8 * l_ted_cw
488  && l_cur_width < 5 * l_ted_cw
489  // align with the 'x' center:
490  && std::abs((l_ted.baseline() + l_ted.meanline()) / 2 - l_cur.bbox().pcenter().row()) < 7
491  // tiny spacing:
492  && unsigned(horizontal_distance(l_cur_bbox, l_ted_bbox)) < 2 * l_ted_cw
493  )
494  {
495  return true;
496  }
497 
498 
499  // Special case
500 
501  // Looking for alignement.
502  const mln::def::coord
503  top_row = l_cur.bbox().pmin().row(),
504  bot_row = l_cur.bbox().pmax().row();
505 
506  const box2d& l_ted_ebbox = l_ted.ebbox();
507 
508  if ((std::abs(bot_row - l_ted.baseline()) < 5
509  || std::abs(bot_row - l_ted_ebbox.pmax().row()) < 5)
510  &&
511  (std::abs(top_row - l_ted.meanline()) < 5
512  || std::abs(top_row - l_ted_ebbox.pmin().row()) < 5)
513  && dx < 5.0f * l_ted_cw)
514  {
515  return true;
516  }
517 
518  return false;
519  }
520 
521 
522 
523 
524 
601  // FIXME:
602  //
603  // Important note: after merging two lines, we draw the
604  // merged line over the existing one; we have to ensure that we
605  // cover the previous rectangle (otherwise we have a label in
606  // 'billboard' that is not used anymore! and it can mix up the
607  // detection of upcoming merges...) so this delta has to remain
608  // the same during one pass. Another solution (yet more costly)
609  // could be of erasing the previous rectangle before re-drawing...
610  //
611  template <typename L>
612  void
613  one_merge_pass(unsigned ith_pass,
614  const box2d& domain,
615  std::vector<scribo::line_id_t>& v, // Ids sorted by bbox size.
616  scribo::line_set<L>& lines, // Tagged Lines (looks_like_a_text_line?)
618  {
619  image2d<unsigned> billboard(domain);
620  data::fill(billboard, 0);
621 
622 # ifndef SCRIBO_NDEBUG
623  image2d<value::int_u8> log(domain);
624  data::fill(log, 0);
625 # endif // ! SCRIBO_NDEBUG
626 
627  const unsigned n = v.size();
628  unsigned l_;
629 
630  unsigned
631  count_txtline_IN_txtline = 0,
632  count_txtline_IN_junk = 0,
633  count_two_lines_merge = 0,
634  count_new_txtline = 0,
635  count_comp_IN_txtline = 0,
636  count_comp_HITS_txtline = 0,
637  count_WTF = 0;
638 
639  for (int i = n - 1; i >= 0; --i)
640  {
641  unsigned l = v[i];
642 
643  if (parent[l] != l) // not a root, so has already merged, thus ignore it
644  continue;
645 
646  const box2d& b = lines(l).bbox();
647 
648  // unsigned tl, tr, ml, mc, mr, bl, br;
649 
650  const box2d& b_ = lines(l).ebbox();
651 
652  /*
653  tl tr
654  x---------------x
655  | |
656  | mc |
657  ml x x x mr
658  | |
659  | |
660  x---------------x
661  bl br
662 
663  */
664 
665 
666  const unsigned tl = billboard(b_.pmin());
667  const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
668  const unsigned ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
669  const unsigned mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
670  const unsigned mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
671  const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
672  const unsigned br = billboard(b_.pmax());
673 
674  typedef std::set<unsigned> set_t;
675  std::set<unsigned> labels;
676  labels.insert(tl);
677  labels.insert(tl);
678  labels.insert(tr);
679  labels.insert(ml);
680  labels.insert(mc);
681  labels.insert(mr);
682  labels.insert(bl);
683  labels.insert(br);
684 
685 
686  for (set_t::const_iterator it = labels.begin();
687  it != labels.end();
688  ++it)
689  {
690  if (*it == 0)
691  continue;
692 
693  if (lines(*it).type() != line::Text)
694  std::cerr << "outch: we have hit, so drawn, a non-text..." << std::endl;
695  }
696 
697 
698  if (labels.size() == 1) // Same behavior for all anchors.
699  {
700  if (mc != 0)
701  {
702  // Main case: it is an "included" box (falling in an already drawn box)
703 
704  const line_info<L>& l_info = lines(l);
705  const line_info<L>& mc_info = lines(mc);
706 
707  if (l_info.type() == line::Text) // the current object IS a text line
708  {
709  if (mc_info.type() == line::Text) // included in a text line => weird
710  {
711  ++count_txtline_IN_txtline;
712 
713  // Particular case of "
714  // {
715  // if ((lines(l).card() == 2 &&
716  // lines(l).bbox().height() < lines(mc).x_height()) &&
717  // not (lines(l).holder().components().has_separators()
718  // && between_separators(lines(l),
719  // lines(mc))))
720 
721  const box2d& l_bbox = l_info.bbox();
722  const box2d& mc_bbox = mc_info.bbox();
723 
724  const point2d& l_pmin = l_bbox.pmin();
725  const point2d& mc_pmin = mc_bbox.pmin();
726  const point2d& l_pmax = l_bbox.pmax();
727  const point2d& mc_pmax = mc_bbox.pmax();
728 
729  const float dx = std::max(l_pmin.col(), mc_pmin.col())
730  - std::min(l_pmax.col(), mc_pmax.col());
731  const float dy = std::max(l_pmin.row(), mc_pmin.row())
732  - std::min(l_pmax.row(), mc_pmax.row());
733  const float l_ted_cw = mc_info.char_width();
734 
735  // We accept a line included into another only if it
736  // is horizontally close to the line's bbox and
737  // vertically aligned
738  // Obviously no separators between the two lines
739  if ((l_info.card() <= 5 ||
740  (std::abs(l_info.baseline() - mc_info.baseline())
741  < 5 && std::abs(l_info.meanline() -
742  mc_info.meanline()) < 5))
743  && dx < l_ted_cw && dy < 0
744  && not (lines.components().has_separators()
745  && between_separators(lines, l, mc)))
746  l = do_union(lines, l, mc, parent);
747  // }
748 
749 # ifndef SCRIBO_NDEBUG
750  // Log:
751  draw_box(log, b, 126);
752 # endif // ! SCRIBO_NDEBUG
753  }
754 
755  else // FIXME: Remove! since included in a
756  // non-text-line, so not drawn, so inclusion
757  // impossible!!!!!!!!!!
758  {
759  std::cerr << "error: should NOT happen (a text line included in a NON-text-line (so not drawn!!!)" << std::endl;
760  ++count_txtline_IN_junk;
761 
762  // a non-text-line (probably a drawing or a frame) includes a text line
763  draw_box(billboard, lines(l).ebbox(), l);
764 
765 # ifndef SCRIBO_NDEBUG
766  // Log:
767  draw_box(log, b, 100);
768 # endif // ! SCRIBO_NDEBUG
769  }
770 
771  }
772  else // the current object is NOT a text line
773  {
774  if (lines(mc).type() == line::Text) // included in a text line
775  {
776  ++count_comp_IN_txtline;
777 
778  // The current object is supposed to be punctuation
779  // since it is fully included in the bbox of a line
780  // of text, so we always want to merge this object
781  // with the line.
782  //
783  // However, in case of a bad quality document, we
784  // may not always want to merge since this object
785  // could be noise or garbage... So adding new
786  // criterions could fix this issue.
787  //
788  if (!non_text_and_text_can_merge(lines, l, mc))
789  continue;
790 
791  // Avoid the case when a large title ebbox overlap
792  // with a text column. In that case, the title may
793  // merge with punctuation from the text.
794  // if (lines(l).holder().components().has_separators()
795  // && between_separators(lines(l), lines(mc)))
796  // continue;
797 
798  // Mark current line as punctuation.
799  lines(l).update_type(line::Punctuation);
800 
801  // Merge non-text-line #l into text line #mc.
802  l_ = do_union(lines, mc, l, parent);
803  // We have to re-draw the original largest text line since
804  // it may change of label (take the one of the included line).
805  draw_box(billboard, lines(l_).ebbox(), l_);
806 
807 # ifndef SCRIBO_NDEBUG
808  // Log:
809  draw_box(log, b, 128);
810 # endif // ! SCRIBO_NDEBUG
811  }
812  }
813  }
814  else // mc == 0
815  {
816  // Main case: it is a "new" box, that might be drawn in the background.
817 
818  // we only draw this box if it is a text-line!!!
819  if (lines(l).type() == line::Text)
820  {
821  ++count_new_txtline;
822  draw_box(billboard, lines(l).ebbox(), l);
823 # ifndef SCRIBO_NDEBUG
824  // Log:
825  draw_box(log, b, 127);
826 # endif // ! SCRIBO_NDEBUG
827  }
828 # ifndef SCRIBO_NDEBUG
829  else
830  draw_box(log, b, 1);
831 # endif // ! SCRIBO_NDEBUG
832  }
833  }
834  else
835  {
836  l_ = l; // current label.
837 
838  // Particular cases.
839  for (set_t::const_iterator it = labels.begin();
840  it != labels.end();
841  ++it)
842  {
843  unsigned lcand = *it;
844 
845  if (lcand == 0) // Skip background.
846  continue;
847 
848  // if (lines(lcand).type() != line::Text)
849  // std::cerr << "again!" << std::endl;
850 
851 
852  if (lines(l_).type() == line::Text)
853  {
854  // l_ and lcand look like text line chunks.
855  if (lines_can_merge(lines, l_, lcand))
856  {
857  ++count_two_lines_merge;
858  l_ = do_union(lines, l_, lcand, parent);
859 
860  draw_box(billboard, lines(l_).ebbox(), l_);
861 
862 # ifndef SCRIBO_NDEBUG
863  // Log:
864  draw_box(log, b, 151);
865 # endif // ! SCRIBO_NDEBUG
866 
867  continue;
868  }
869  else
870  {
871  ++count_WTF;
872 
873 # ifndef SCRIBO_NDEBUG
874  // Log:
875  draw_box(log, b, 255);
876 # endif // ! SCRIBO_NDEBUG
877 
878  // (*) SEE BELOW
879  draw_box(billboard, lines(l_).ebbox(), l_);
880  }
881  }
882  else
883  {
884  // l_ does NOT looks like a text line chunk.
885  ++count_comp_HITS_txtline;
886  if (non_text_and_text_can_merge(lines, l_, lcand))
887  // a petouille merges with a text line?
888  {
889  ++count_comp_HITS_txtline;
890  l_ = do_union(lines, l_, lcand, parent);
891  draw_box(billboard, lines(l_).ebbox(), l_);
892 
893 # ifndef SCRIBO_NDEBUG
894  // Log:
895  draw_box(log, b, 169);
896 # endif // ! SCRIBO_NDEBUG
897 
898  continue;
899  }
900 # ifndef SCRIBO_NDEBUG
901  else
902  {
903  // Log:
904  draw_box(log, b, 254);
905  }
906 # endif // ! SCRIBO_NDEBUG
907  }
908 
909 
910  /* (*) Text lines verticaly overlap.
911 
912  --------------------------
913  | l |
914  | |
915  --------------------------
916  | lcand |
917  | |
918  --------------------------
919 
920  or
921 
922  --------------------------
923  | l |
924  | |
925  |---------------------------
926  |------------------------- |
927  | lcand |
928  ----------------------------
929 
930  or
931 
932  --------------------------
933  | lcand |
934  | |
935  |---------------------------
936  |------------------------- |
937  | l |
938  ----------------------------
939 
940  */
941 
942  }
943  }
944 
945  }
946 
947 
948  (void) ith_pass;
949  }
950 
951 
952 
953 
954  template <typename L>
955  struct order_lines_id
956  {
957  order_lines_id(const scribo::line_set<L>& lines)
958  : lines_(lines)
959  {
960  }
961 
962  bool operator()(const scribo::line_id_t& l1, const scribo::line_id_t& l2) const
963  {
964  const unsigned l1_nsites = lines_(l1).bbox().nsites();
965  const unsigned l2_nsites = lines_(l2).bbox().nsites();
966 
967  if (l1_nsites == l2_nsites)
968  return l1 < l2;
969  return l1_nsites < l2_nsites;
970  }
971 
972  scribo::line_set<L> lines_;
973  };
974 
975 
976  template <typename L>
978  draw_boxes(const box2d& input_domain,
979  const scribo::line_set<L>& lines_)
980  {
981  scribo::line_set<L> lines = lines_.duplicate();
982  const unsigned n = lines.nelements();
983 
984  order_lines_id<L> func(lines);
985  std::vector<scribo::line_id_t> v;
986  v.reserve(n);
987 
988  // Union-find parent data, used to merge lines.
989  mln::util::array<unsigned> parent(n + 1);
990 
991  // Initialize data
992  parent(0) = 0;
993  for (unsigned l = 1; l < parent.nelements(); ++l)
994  {
995  v.push_back(l);
996  parent[l] = l;
997  }
998 
999  // Sort lines by bbox.nelements() and ids.
1000  std::sort(v.begin(), v.end(), func);
1001 
1002  // Setting lines as text lines according to specific criterions.
1003  for_all_lines(l, lines)
1004  if (looks_like_a_text_line(lines(l)))
1005  lines(l).update_type(line::Text);
1006 
1007 
1008  // First pass
1009  one_merge_pass(1, input_domain, v, lines, parent);
1010 
1011  //lines.force_stats_update();
1012 
1013  // Sort lines by bbox.nelements() and ids again!
1014  // line may have grown differently since the first pass.
1015  std::sort(v.begin(), v.end(), func);
1016 
1017 
1018  // Second pass
1019  one_merge_pass(2, input_domain, v, lines, parent); // <- last pass
1020 
1021  lines.force_stats_update();
1022 
1023  return lines;
1024  }
1025 
1026  } // end of namespace scribo::text::internal
1027 
1028 
1029 
1030  // Facade
1031 
1032  template <typename L>
1033  line_set<L>
1034  merging_hdoc(const scribo::line_set<L>& lines)
1035  {
1036  using namespace mln;
1037 
1038  scribo::line_set<L> output
1039  = internal::draw_boxes(lines.components().labeled_image().domain(),
1040  lines);
1041  return output;
1042  }
1043 
1044 # endif // ! MLN_INCLUDE_ONLY
1045 
1046  } // end of namespace scribo::text
1047 
1048 } // end of namespace scribo
1049 
1050 #endif // ! SCRIBO_TEXT_MERGING_HDOC_HH