$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
merging.hh
1 // Copyright (C) 2010, 2012, 2013 EPITA Research and Development Laboratory
2 // (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 #ifndef SCRIBO_TEXT_MERGING_HH
28 # define SCRIBO_TEXT_MERGING_HH
29 
33 
34 
35 #include <iostream>
36 #include <fstream>
37 #include <sstream>
38 #include <vector>
39 #include <set>
40 #include <algorithm>
41 
42 #include <mln/core/image/image2d.hh>
43 #include <mln/core/image/dmorph/image_if.hh>
44 #include <mln/util/array.hh>
45 #include <mln/io/pbm/load.hh>
46 #include <mln/io/pgm/save.hh>
47 
48 #include <mln/data/fill.hh>
49 #include <mln/data/wrap.hh>
50 
51 #include <mln/make/box2d.hh>
52 
53 #include <mln/value/rgb8.hh>
54 #include <mln/io/ppm/save.hh>
55 
56 #include <mln/draw/box.hh>
57 #include <mln/data/stretch.hh>
58 #include <mln/data/wrap.hh>
59 #include <mln/util/timer.hh>
60 
61 #include <scribo/text/look_like_text_lines.hh>
62 
63 
64 namespace scribo
65 {
66 
67  namespace text
68  {
69 
70  using namespace mln;
71 
72 
86  template <typename L>
87  line_set<L>
88  merging(const scribo::line_set<L>& lines);
89 
90 
91 # ifndef MLN_INCLUDE_ONLY
92 
93 
94  namespace internal
95  {
96 
97  using namespace mln;
98  using value::int_u8;
99 
100 
101  template <typename T, typename T2>
102  void draw_box(image2d<T>& input, const box2d& b, T2 l)
103  {
104  const unsigned
105  delta = input.delta_offset(dpoint2d(1,0)),
106  nrows = b.nrows(),
107  ncols = b.ncols();
108  T* p_start = & input(b.pmin());
109  T* ptr = p_start;
110  for (unsigned r = 0; r < nrows; ++r)
111  {
112  ptr = p_start;
113  for (unsigned c = 0; c < ncols; ++c)
114  *ptr++ = l;
115  p_start += delta;
116  }
117  }
118 
119 
120 
121 
122  template <typename T, typename T2>
123  void draw_box(image2d<T>& input,
124  int pmin_row, int pmin_col,
125  int pmax_row, int pmax_col,
126  T2 l)
127  {
128  if (pmax_row < pmin_row || pmax_col < pmin_col)
129  return;
130 
131  const unsigned
132  input_nrows_1 = input.nrows() - 1,
133  input_ncols_1 = input.ncols() - 1;
134  if (pmin_row < 0) pmin_row = 0;
135  if (pmin_col < 0) pmin_col = 0;
136  if (pmax_row > input_nrows_1) pmax_row = input_nrows_1;
137  if (pmax_col > input_ncols_1) pmax_col = input_ncols_1;
138 
139  const unsigned
140  delta = input.delta_offset(dpoint2d(1,0)),
141  nrows = pmax_row - pmin_row + 1,
142  ncols = pmax_col - pmin_col + 1;
143  T* p_start = & input.at_(pmin_row, pmin_col);
144  T* ptr = p_start;
145  for (unsigned r = 0; r < nrows; ++r)
146  {
147  ptr = p_start;
148  for (unsigned c = 0; c < ncols; ++c)
149  *ptr++ = l;
150  p_start += delta;
151  }
152  }
153 
154 
155 
156 
157  inline
158  unsigned my_find_root(mln::util::array<unsigned>& parent, unsigned x)
159  {
160  if (parent[x] == x)
161  return x;
162  return parent[x] = my_find_root(parent, parent[x]);
163  }
164 
165 
166  inline
167  void swap_ordering(unsigned l1, unsigned l2)
168  {
169  if (l2 > l1)
170  return;
171  unsigned l1_ = l1;
172  l1 = l2;
173  l2 = l1_;
174  }
175 
176 
177 
178  template <typename L>
179  unsigned do_union(scribo::line_set<L>& lines,
180  unsigned l1,
181  unsigned l2,
183  {
184  l1 = my_find_root(parent, l1);
185  l2 = my_find_root(parent, l2);
186  if (l1 == l2)
187  {
188  return l1;
189  }
190 
191  swap_ordering(l1, l2);
192  parent[l2] = l1; // The smallest label value is root.
193 
194  line_info<L>& l1_info = lines(l1);
195  line_info<L>& l2_info = lines(l2);
196 
197  if (l2_info.card() > l1_info.card())
198  {
199  // we transfer data from the largest item to the root one.
200  scribo::line_info<L> tmp = l1_info;
201  std::swap(l1_info, l2_info);
202  l1_info.fast_merge(tmp);
203 
204  // We must set manually the tag for lines(l2) since it is
205  // not used directly in merge process so its tag cannot be
206  // updated automatically.
207  l2_info.update_tag(line::Merged);
208  l2_info.set_hidden(true);
209  }
210  else
211  l1_info.fast_merge(l2_info);
212 
213  // l1's tag is automatically set to line::Needs_Precise_Stats_Update
214  // l2's tag is automatically set to line::Merged
215 
216  return l1;
217  }
218 
219 
220 
221 
222  inline
223  box2d enlarge(const box2d& b, int delta)
224  {
225  box2d b_(point2d(b.pmin().row(), b.pmin().col() - delta),
226  point2d(b.pmax().row(), b.pmax().col() + delta));
227  return b_;
228  }
229 
230 
231  template <typename L>
232  bool between_separators(const line_set<L>& lines,
233  const line_id_t& l1_,
234  const line_id_t& l2_)
235  {
236  // No separators found in image.
237  mln_precondition(lines.components().has_separators());
238 
239  const scribo::line_info<L>& l1 = lines(l1_);
240  const scribo::line_info<L>& l2 = lines(l2_);
241 
242  const box2d& l1_bbox = l1.bbox();
243  const box2d& l2_bbox = l2.bbox();
244 
245  const unsigned
246  col1 = l1_bbox.pcenter().col(),
247  col2 = l2_bbox.pcenter().col();
248  const mln_ch_value(L, bool)&
249  separators = lines.components().separators();
250 
251  // Checking for separators starting from 1 / 4, 3/ 4 and the
252  // center of the box
253  typedef const bool* sep_ptr_t;
254  sep_ptr_t sep_ptr, sep_ptr_top, sep_ptr_bottom, end;
255 
256  if (col1 < col2)
257  {
258  const unsigned quarter =
259  ((l1_bbox.pcenter().row() - l1_bbox.pmin().row()) >> 1);
260 
261  sep_ptr = &separators(l1_bbox.pcenter());
262  sep_ptr_top = &separators(point2d(l1_bbox.pmin().row() + quarter,
263  l1_bbox.pcenter().col()));
264  sep_ptr_bottom = &separators(point2d(l1_bbox.pmax().row() - quarter,
265  l1_bbox.pcenter().col()));
266  end = sep_ptr + col2 - col1;
267  }
268  else
269  {
270  const unsigned quarter =
271  ((l2_bbox.pcenter().row() - l2_bbox.pmin().row()) >> 1);
272 
273  sep_ptr = &separators(l2_bbox.pcenter());
274  sep_ptr_top = &separators(point2d(l2_bbox.pmin().row() + quarter,
275  l2_bbox.pcenter().col()));
276  sep_ptr_bottom = &separators(point2d(l2_bbox.pmax().row() - quarter,
277  l2_bbox.pcenter().col()));
278  end = sep_ptr + col1 - col2;
279  }
280 
281  // If sep_ptr is true, then a separator is reached.
282  while (!*sep_ptr && !*sep_ptr_top && !*sep_ptr_bottom && sep_ptr != end)
283  {
284  ++sep_ptr;
285  ++sep_ptr_top;
286  ++sep_ptr_bottom;
287  }
288 
289  return (*sep_ptr || *sep_ptr_top || *sep_ptr_bottom);
290  }
291 
292 
301  template <typename L>
302  bool lines_can_merge(scribo::line_set<L>& lines,
303  const line_id_t& l1_,
304  const line_id_t& l2_)
305  {
306  scribo::line_info<L>& l1 = lines(l1_);
307  const scribo::line_info<L>& l2 = lines(l2_);
308 
309  // Parameters.
310  const float x_ratio_max = 1.7f;
311  const float baseline_delta_max =
312  0.5f * std::min(l1.x_height(), l2.x_height());
313 
314  const box2d& l1_bbox = l1.bbox();
315  const box2d& l2_bbox = l2.bbox();
316 
317  const point2d& l1_pmin = l1_bbox.pmin();
318  const point2d& l2_pmin = l2_bbox.pmin();
319  const point2d& l1_pmax = l1_bbox.pmax();
320  const point2d& l2_pmax = l2_bbox.pmax();
321 
322  const bool l1_has_separators = lines.components().has_separators();
323  const bool l1_l2_between_separators = (l1_has_separators) ?
324  between_separators(lines, l1_, l2_) : false;
325  const float l_ted_cw = l2.char_width();
326 
327  const float dx = std::max(l1_pmin.col(), l2_pmin.col())
328  - std::min(l1_pmax.col(), l2_pmax.col());
329  const float dy = std::max(l1_pmin.row(), l2_pmin.row())
330  - std::min(l1_pmax.row(), l2_pmax.row());
331 
332  // Particular case of "
333  {
334  if (// Must have 2 characters
335  (l1.card() == 2
336  // The box height must be smaller than the touched line x height
337  && l1_bbox.height() < l2.x_height())
338  // The line must be vertically and horizontally close to
339  // the touched line
340  && (dx < l_ted_cw && dy < 0)
341  // No separator between the two lines
342  && not (l1_l2_between_separators))
343  {
344  // Line is then considered as punctuation
345  l1.update_type(line::Punctuation);
346  return true;
347  }
348  }
349 
350  // Particular case like merging between a line and [5]
351  {
352  const mln::def::coord
353  top_row_l2 = l2_pmin.row(),
354  top_row_l1 = l1_pmin.row(),
355  bot_row = l2_pmax.row();
356  const float x1 = l1.x_height(), x2 = l2.x_height();
357  const float x_ratio = std::max(x1, x2) / std::min(x1, x2);
358 
359  if (// No separator
360  !l1_l2_between_separators
361  // The x height ration must be lower than 2
362  && (x_ratio < 2.0f)
363  // Baseline alignment
364  && (std::abs(bot_row - l1.baseline()) < baseline_delta_max)
365  // The top of the boxes must be aligned
366  && (std::abs(top_row_l2 - top_row_l1) < 5)
367  // Distance between the line and the touched line.
368  && dx < 5.0f * l_ted_cw)
369  {
370  return true;
371  }
372  }
373 
374  // Similarity of x_height.
375  {
376  const float x1 = l1.x_height(), x2 = l2.x_height();
377  const float x_ratio = std::max(x1, x2) / std::min(x1, x2);
378  if (x_ratio > x_ratio_max)
379  return false;
380  }
381 
382  // Same baseline.
383  {
384  if (std::abs(l1.baseline() - l2.baseline()) > baseline_delta_max)
385  return false;
386  }
387 
388  // left / right
389  const unsigned
390  col1 = l1_bbox.pcenter().col(),
391  col2 = l2_bbox.pcenter().col();
392  if (col1 < col2)
393  {
394  if ((col1 + l1_bbox.width() / 4) >= (col2 - l2_bbox.width() / 4))
395  return false;
396  }
397  else
398  if ((col2 + l2_bbox.width() / 4) >= (col1 - l1_bbox.width() / 4))
399  return false;
400 
401  // Check that there is no separator in between.
402  if (l1_has_separators)
403  return ! l1_l2_between_separators;
404 
405  return true;
406  }
407 
408 
409 
410 
411  inline
412  int horizontal_distance(const box2d& l1,
413  const box2d& l2)
414  {
415  if (l1.pcenter().col() < l2.pcenter().col())
416  return l2.pmin().col() - l1.pmax().col();
417  else
418  return l1.pmin().col() - l2.pmax().col();
419  }
420 
421 
422 
423 
439  template <typename L>
440  bool non_text_and_text_can_merge(line_set<L>& lines,
441  const line_id_t& l_cur_, // current
442  const line_id_t& l_ted_) // touched
443  {
444  scribo::line_info<L>& l_cur = lines(l_cur_);
445  const scribo::line_info<L>& l_ted = lines(l_ted_);
446 
447 
448  if (l_cur.type() == line::Text || l_ted.type() != line::Text)
449  return false;
450  // the current object is a NON-textline
451  // the background (touched) object is a textline
452 
453 
454  // Check that there is no separator in between.
455  if (lines.components().has_separators()
456  && between_separators(lines, l_cur_, l_ted_))
457  return false;
458 
459  const box2d& l_cur_bbox = l_cur.bbox();
460  const box2d& l_ted_bbox = l_ted.bbox();
461 
462  const point2d& l_cur_pmin = l_cur_bbox.pmin();
463  const point2d& l_ted_pmin = l_ted_bbox.pmin();
464  const point2d& l_cur_pmax = l_cur_bbox.pmax();
465  const point2d& l_ted_pmax = l_ted_bbox.pmax();
466 
467  const float dx = std::max(l_cur_pmin.col(), l_ted_pmin.col())
468  - std::min(l_cur_pmax.col(), l_ted_pmax.col());
469  const float l_ted_cw = l_ted.char_width();
470  const float l_ted_x_height = l_ted.x_height();
471 
472  const unsigned l_cur_height = l_cur_bbox.height();
473  const unsigned l_cur_width = l_cur_bbox.width();
474 
475  // General case (for tiny components like --> ',:."; <--):
476  if (l_cur_height < l_ted_x_height
477  && l_cur_height > 0.05f * l_ted_x_height
478  && float(l_cur_width) / float(l_cur.card()) < l_ted.char_width()
479  && dx < l_ted_cw
480  && l_cur_pmin.row() < l_ted_pmax.row())
481  {
482  l_cur.update_type(line::Punctuation);
483  return true;
484  }
485 
486  // Special case for '---':
487  if (// small height:
488  l_cur_height < l_ted_x_height
489  // // not so long width:
490  && l_cur_width > 0.8 * l_ted_cw
491  && l_cur_width < 5 * l_ted_cw
492  // align with the 'x' center:
493  && std::abs((l_ted.baseline() + l_ted.meanline()) / 2 - l_cur.bbox().pcenter().row()) < 7
494  // tiny spacing:
495  && unsigned(horizontal_distance(l_cur_bbox, l_ted_bbox)) < 2 * l_ted_cw
496  )
497  {
498  return true;
499  }
500 
501 
502  // Special case
503 
504  // Looking for alignement.
505  const mln::def::coord
506  top_row = l_cur.bbox().pmin().row(),
507  bot_row = l_cur.bbox().pmax().row();
508 
509  const box2d& l_ted_ebbox = l_ted.ebbox();
510 
511  if ((std::abs(bot_row - l_ted.baseline()) < 5
512  || std::abs(bot_row - l_ted_ebbox.pmax().row()) < 5)
513  &&
514  (std::abs(top_row - l_ted.meanline()) < 5
515  || std::abs(top_row - l_ted_ebbox.pmin().row()) < 5)
516  && dx < 5.0f * l_ted_cw)
517  {
518  return true;
519  }
520 
521  return false;
522  }
523 
524 
525 
526 
527 
549 
604  // FIXME:
605  //
606  // Important note: after merging two lines, we draw the
607  // merged line over the existing one; we have to ensure that we
608  // cover the previous rectangle (otherwise we have a label in
609  // 'billboard' that is not used anymore! and it can mix up the
610  // detection of upcoming merges...) so this delta has to remain
611  // the same during one pass. Another solution (yet more costly)
612  // could be of erasing the previous rectangle before re-drawing...
613  //
614  template <typename L>
615  void
616  one_merge_pass(unsigned ith_pass,
617  const box2d& domain,
618  std::vector<scribo::line_id_t>& v, // Ids sorted by bbox size.
619  scribo::line_set<L>& lines, // Tagged Lines (looks_like_a_text_line?)
621  {
622  image2d<unsigned> billboard(domain);
623  data::fill(billboard, 0);
624 
625  image2d<value::int_u8> log(domain);
626  data::fill(log, 0);
627 
628  const unsigned n = v.size();
629  unsigned l_;
630 
631  unsigned
632  count_txtline_IN_txtline = 0,
633  count_txtline_IN_junk = 0,
634  count_two_lines_merge = 0,
635  count_new_txtline = 0,
636  count_comp_IN_txtline = 0,
637  count_comp_HITS_txtline = 0,
638  count_WTF = 0;
639 
640  for (int i = n - 1; i >= 0; --i)
641  {
642  unsigned l = v[i];
643 
644  if (parent[l] != l) // not a root, so has already merged, thus ignore it
645  continue;
646 
647  const box2d& b = lines(l).bbox();
648 
649  const box2d& b_ = lines(l).ebbox();
650 
651  /*
652  tl tr
653  x---------------x
654  | |
655  | mc |
656  ml x x x mr
657  | |
658  | |
659  x---------------x
660  bl br
661 
662  */
663 
664 
665  const unsigned tl = billboard(b_.pmin());
666  const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
667  const unsigned ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
668  const unsigned mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
669  const unsigned mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
670  const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
671  const unsigned br = billboard(b_.pmax());
672 
673  typedef std::set<unsigned> set_t;
674  std::set<unsigned> labels;
675  labels.insert(tl);
676  labels.insert(tl);
677  labels.insert(tr);
678  labels.insert(ml);
679  labels.insert(mc);
680  labels.insert(mr);
681  labels.insert(bl);
682  labels.insert(br);
683 
684 
685  for (set_t::const_iterator it = labels.begin();
686  it != labels.end();
687  ++it)
688  {
689  if (*it == 0)
690  continue;
691 
692  if (lines(*it).type() != line::Text)
693  std::cerr << "outch: we have hit, so drawn, a non-text..." << std::endl;
694  }
695 
696 
697  if (labels.size() == 1) // Same behavior for all anchors.
698  {
699  if (mc != 0)
700  {
701  // Main case: it is an "included" box (falling in an already drawn box)
702 
703  const line_info<L>& l_info = lines(l);
704  const line_info<L>& mc_info = lines(mc);
705 
706  if (l_info.type() == line::Text) // the current object IS a text line
707  {
708  if (mc_info.type() == line::Text) // included in a text line => weird
709  {
710  ++count_txtline_IN_txtline;
711 
712  // Particular case of "
713  // {
714  // if ((lines(l).card() == 2 &&
715  // lines(l).bbox().height() < lines(mc).x_height()) &&
716  // not (lines(l).holder().components().has_separators()
717  // && between_separators(lines(l),
718  // lines(mc))))
719 
720  const box2d& l_bbox = l_info.bbox();
721  const box2d& mc_bbox = mc_info.bbox();
722 
723  const point2d& l_pmin = l_bbox.pmin();
724  const point2d& mc_pmin = mc_bbox.pmin();
725  const point2d& l_pmax = l_bbox.pmax();
726  const point2d& mc_pmax = mc_bbox.pmax();
727 
728  const float dx = std::max(l_pmin.col(), mc_pmin.col())
729  - std::min(l_pmax.col(), mc_pmax.col());
730  const float dy = std::max(l_pmin.row(), mc_pmin.row())
731  - std::min(l_pmax.row(), mc_pmax.row());
732  const float l_ted_cw = mc_info.char_width();
733 
734  // We accept a line included into another only if it
735  // is horizontally close to the line's bbox and
736  // vertically aligned
737  // Obviously no separators between the two lines
738  if ((l_info.card() <= 5 ||
739  (std::abs(l_info.baseline() - mc_info.baseline()) < 5
740  && std::abs(l_info.meanline() - mc_info.meanline()) < 5))
741  && dx < l_ted_cw && dy < 0
742  && not (lines.components().has_separators()
743  && between_separators(lines, l, mc)))
744  l = do_union(lines, l, mc, parent);
745  // }
746 
749 // if ((std::abs(lines(l).ascent() - lines(mc).ascent()) >= 5)
750 // || (std::abs(lines(l).descent() - lines(mc).descent()) >= 5))
751 // continue;
752 
753 // // FIXME: Is it valid?
754 // // A text line is included in another text line.
755 // // They are merged.
756 // //
757 // l_ = do_union(lines, mc, l, parent);
758 // draw_box(billboard, lines(l_).ebbox(), l_);
759 
760  // Log:
761  draw_box(log, b, 126);
762 
763  }
764 
765  else // FIXME: Remove! since included in a non-text-line, so not drawn, so inclusion impossible!!!!!!!!!!
766  {
767  std::cerr << "error: should NOT happen (a text line included in a NON-text-line (so not drawn!!!)" << std::endl;
768  ++count_txtline_IN_junk;
769 
770  // a non-text-line (probably a drawing or a frame) includes a text line
771  draw_box(billboard, lines(l).ebbox(), l);
772  // Log:
773  draw_box(log, b, 100);
774  }
775 
776  }
777  else // the current object is NOT a text line
778  {
779  if (lines(mc).type() == line::Text) // included in a text line
780  {
781  ++count_comp_IN_txtline;
782 
783  // The current object is supposed to be punctuation
784  // since it is fully included in the bbox of a line
785  // of text, so we always want to merge this object
786  // with the line.
787  //
788  // However, in case of a bad quality document, we
789  // may not always want to merge since this object
790  // could be noise or garbage... So adding new
791  // criterions could fix this issue.
792  //
793  if (!non_text_and_text_can_merge(lines, l, mc))
794  continue;
795 
796  // Avoid the case when a large title ebbox overlap
797  // with a text column. In that case, the title may
798  // merge with punctuation from the text.
799  // if (lines(l).holder().components().has_separators()
800  // && between_separators(lines(l), lines(mc)))
801  // continue;
802 
803  // Mark current line as punctuation.
804  lines(l).update_type(line::Punctuation);
805 
806  // Merge non-text-line #l into text line #mc.
807  l_ = do_union(lines, mc, l, parent);
808  // We have to re-draw the original largest text line since
809  // it may change of label (take the one of the included line).
810  draw_box(billboard, lines(l_).ebbox(), l_);
811 
812  // Log:
813  draw_box(log, b, 128);
814  }
815  }
816  }
817  else // mc == 0
818  {
819  // Main case: it is a "new" box, that might be drawn in the background.
820 
821  // we only draw this box if it is a text-line!!!
822  if (lines(l).type() == line::Text)
823  {
824  ++count_new_txtline;
825  draw_box(billboard, lines(l).ebbox(), l);
826  // Log:
827  draw_box(log, b, 127);
828  }
829  else
830  draw_box(log, b, 1);
831  }
832  }
833  else
834  {
835  l_ = l; // current label.
836 
837  // Particular cases.
838  for (set_t::const_iterator it = labels.begin();
839  it != labels.end();
840  ++it)
841  {
842  unsigned lcand = *it;
843 
844  if (lcand == 0) // Skip background.
845  continue;
846 
847  if (lines(l_).type() == line::Text)
848  {
849  // l_ and lcand look like text line chunks.
850  if (lines_can_merge(lines, l_, lcand))
851  {
852  ++count_two_lines_merge;
853  l_ = do_union(lines, l_, lcand, parent);
854 
855  draw_box(billboard, lines(l_).ebbox(), l_);
856  // Log:
857  draw_box(log, b, 151);
858  continue;
859  }
860  else
861  {
862  ++count_WTF;
863  // Log:
864  draw_box(log, b, 255);
865 
866  // (*) SEE BELOW
867  draw_box(billboard, lines(l_).ebbox(), l_);
868  }
869  }
870  else
871  {
872  // l_ does NOT looks like a text line chunk.
873  ++count_comp_HITS_txtline;
874  if (non_text_and_text_can_merge(lines, l_, lcand))
875  // a petouille merges with a text line?
876  {
877  ++count_comp_HITS_txtline;
878  l_ = do_union(lines, l_, lcand, parent);
879  draw_box(billboard, lines(l_).ebbox(), l_);
880 
881  // Log:
882  draw_box(log, b, 169);
883  continue;
884  }
885  else
886  {
887  // Log:
888  draw_box(log, b, 254);
889  }
890  }
891 
892 
893  /* (*) Text lines verticaly overlap.
894 
895  --------------------------
896  | l |
897  | |
898  --------------------------
899  | lcand |
900  | |
901  --------------------------
902 
903  or
904 
905  --------------------------
906  | l |
907  | |
908  |---------------------------
909  |------------------------- |
910  | lcand |
911  ----------------------------
912 
913  or
914 
915  --------------------------
916  | lcand |
917  | |
918  |---------------------------
919  |------------------------- |
920  | l |
921  ----------------------------
922 
923  */
924 
925  }
926  }
927 
928  }
929 
930 
931  (void) ith_pass;
932 // if (ith_pass == 1)
933 // {
934 // mln::io::pgm::save(log, "log_1.pgm");
935 // mln::io::pgm::save(data::wrap(int_u8(), billboard), "log_1e.pgm");
936 // }
937 // else if (ith_pass == 2)
938 // {
939 // mln::io::pgm::save(log, "log_2.pgm");
940 // mln::io::pgm::save(data::wrap(int_u8(), billboard), "log_2e.pgm");
941 // }
942 // else if (ith_pass == 3)
943 // {
944 // mln::io::pgm::save(log, "log_3.pgm");
945 // mln::io::pgm::save(data::wrap(int_u8(), billboard), "log_3e.pgm");
946 // }
947  }
948 
949 
950 
951 
952  template <typename L>
953  struct order_lines_id
954  {
955  order_lines_id(const scribo::line_set<L>& lines)
956  : lines_(lines)
957  {
958  }
959 
960  bool operator()(const scribo::line_id_t& l1, const scribo::line_id_t& l2) const
961  {
962  const unsigned l1_nsites = lines_(l1).bbox().nsites();
963  const unsigned l2_nsites = lines_(l2).bbox().nsites();
964 
965  if (l1_nsites == l2_nsites)
966  return l1 < l2;
967  return l1_nsites < l2_nsites;
968  }
969 
970  scribo::line_set<L> lines_;
971  };
972 
973 
974  template <typename L>
976  draw_boxes(const box2d& input_domain,
977  const scribo::line_set<L>& lines_)
978  {
979  scribo::line_set<L> lines = lines_.duplicate();
980  const unsigned n = lines.nelements();
981 
982  order_lines_id<L> func(lines);
983  std::vector<scribo::line_id_t> v;
984  v.reserve(n);
985 
986  // Union-find parent data, used to merge lines.
987  mln::util::array<unsigned> parent(n + 1);
988 
989  // Initialize data
990  parent(0) = 0;
991  for (unsigned l = 1; l < parent.nelements(); ++l)
992  {
993  v.push_back(l);
994  parent[l] = l;
995  }
996 
997  // Sort lines by bbox.nelements() and ids.
998  std::sort(v.begin(), v.end(), func);
999 
1000 
1001  // Setting lines as text lines according to specific criterions.
1002  for_all_lines(l, lines)
1003  if (looks_like_a_text_line(lines(l)))
1004  lines(l).update_type(line::Text);
1005 
1006 
1007  // First pass
1008  one_merge_pass(1, input_domain, v, lines, parent);
1009 
1010 
1011 // lines.force_stats_update();
1012 
1013  // Sort lines by bbox.nelements() and ids again!
1014  // line may have grown differently since the first pass.
1015  std::sort(v.begin(), v.end(), func);
1016 
1017 
1018  // Second pass
1019  one_merge_pass(2, input_domain, v, lines, parent); // <- last pass
1020 
1021 
1022  lines.force_stats_update();
1023 
1024  return lines;
1025  }
1026 
1027  } // end of namespace scribo::text::internal
1028 
1029 
1030 
1031  // Facade
1032 
1033  template <typename L>
1034  line_set<L>
1035  merging(const scribo::line_set<L>& lines)
1036  {
1037  using namespace mln;
1038 
1039  scribo::line_set<L> output
1040  = internal::draw_boxes(lines.components().labeled_image().domain(),
1041  lines);
1042  return output;
1043  }
1044 
1045 # endif // ! MLN_INCLUDE_ONLY
1046 
1047  } // end of namespace scribo::text
1048 
1049 } // end of namespace scribo
1050 
1051 #endif // ! SCRIBO_TEXT_MERGING_HH