$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
alignments.hh
1 // Copyright (C) 2011, 2013 EPITA Research and Development Laboratory
2 // (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 #ifndef SCRIBO_PRIMITIVE_EXTRACT_ALIGNMENTS_HH
28 # define SCRIBO_PRIMITIVE_EXTRACT_ALIGNMENTS_HH
29 
33 
34 # include <iostream>
35 
36 # include <mln/core/image/image2d.hh>
37 # include <mln/core/alias/neighb2d.hh>
38 
39 # include <mln/data/convert.hh>
40 
41 # include <mln/value/rgb8.hh>
42 # include <mln/value/label_16.hh>
43 # include <mln/value/int_u.hh>
44 # include <mln/literal/colors.hh>
45 
46 # include <mln/util/array.hh>
47 # include <mln/util/couple.hh>
48 
49 # include <mln/io/pbm/load.hh>
50 
51 # include <mln/util/couple.hh>
52 
53 # include <scribo/core/def/lbl_type.hh>
54 # include <scribo/primitive/extract/components.hh>
55 # include <scribo/filter/object_links_aligned.hh>
56 # include <scribo/filter/object_links_bbox_overlap.hh>
57 # include <scribo/filter/object_groups_small.hh>
58 # include <scribo/preprocessing/denoise_fg.hh>
59 # include <scribo/primitive/link/internal/link_single_dmax_ratio_aligned_delta_base.hh>
60 # include <scribo/primitive/link/internal/dmax_default.hh>
61 # include <scribo/primitive/link/internal/dmax_hrules.hh>
62 # include <scribo/primitive/link/merge_double_link_closest_aligned.hh>
63 
64 # include <scribo/debug/usage.hh>
65 # include <scribo/debug/links_image.hh>
66 
67 # include <scribo/core/document.hh>
68 # include <scribo/core/component_set.hh>
69 # include <scribo/core/line_set.hh>
70 # include <scribo/primitive/extract/components.hh>
71 # include <scribo/primitive/group/from_single_link.hh>
72 # include <scribo/primitive/group/apply.hh>
73 # include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
74 # include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
75 # include <scribo/primitive/link/merge_double_link.hh>
76 # include <scribo/primitive/link/internal/dmax_width_and_height.hh>
77 
78 # include <scribo/preprocessing/rotate_90.hh>
79 # include <scribo/filter/object_links_bbox_h_ratio.hh>
80 
81 
82 namespace scribo
83 {
84 
85  namespace primitive
86  {
87 
88  namespace extract
89  {
90 
166  template <typename L>
168  alignments(const document<L>& doc,
169  float dmax_ratio, unsigned delta_pixel);
170 
171 
172 
173 # ifndef MLN_INCLUDE_ONLY
174 
175  } // end of namespace scribo::primitive::extract
176 
177  } // end of namespace scribo::primitive
178 
179 
180 
181 
182  namespace internal
183  {
184 
185  template <typename V>
186  point2d
187  point_row_adjust(const point2d& pi, const image2d<V>& ima)
188  {
189  if (ima.domain().has(pi))
190  return pi;
191 
192  point2d po = pi;
193 
194  if (po.row() < ima.domain().pmin().row())
195  while (!ima.domain().has(po))
196  ++po.row();
197  else
198  while (!ima.domain().has(po))
199  --po.row();
200 
201  return po;
202  }
203 
204  template <typename V>
205  point2d
206  point_col_adjust(const point2d& pi, const image2d<V>& ima)
207  {
208  if (ima.domain().has(pi))
209  return pi;
210 
211  point2d po = pi;
212 
213  if (po.col() < ima.domain().pmin().col())
214  while (!ima.domain().has(po))
215  ++po.col();
216  else
217  while (!ima.domain().has(po))
218  --po.col();
219 
220  return po;
221  }
222 
223  } // end of namespace scribo::internal
224 
225 
226 
227 
228  namespace primitive
229  {
230 
231  namespace link
232  {
233 
234  namespace internal
235  {
236 
237  template <typename L, typename F>
238  class single_right_dmax_ratio_aligned_delta_functor
239  : public link_single_dmax_ratio_aligned_delta_base<L, F, single_right_dmax_ratio_aligned_delta_functor<L,F> >
240  {
241  typedef single_right_dmax_ratio_aligned_delta_functor<L,F> self_t;
242  typedef link_single_dmax_ratio_aligned_delta_base<L, F, self_t> super_;
243 
244  public:
245  typedef mln_site(L) P;
246 
247  single_right_dmax_ratio_aligned_delta_functor(
248  const component_set<L>& components,
249  const DMax_Functor<F>& dmax_f,
250  unsigned delta,
251  anchor::Direction delta_direction,
252  const L& bbox_ima,
253  unsigned delta_ws_lookup)
254  : super_(components, dmax_f, delta, delta_direction),
255  bbox_ima_(bbox_ima), delta_ws_lookup_(delta_ws_lookup)
256  {
257 # ifndef SCRIBO_NDEBUG
258  debug_ = data::convert(value::rgb8(), data::convert(bool(), bbox_ima));
259 # endif // ! SCRIBO_NDEBUG
260  }
261 
262  void compute_next_site_(P& p)
263  {
264  ++p.col();
265  }
266 
267 
268  inline
269  bool
270  valid_link_(unsigned current_object,
271  const P& start_point,
272  const P& p,
273  anchor::Type anchor)
274  {
275  if (!super_::valid_link_(current_object, start_point, p, anchor))
276  return false;
277 
278  mln_value(L) nbh = this->labeled_image_(p);
279 
280  switch(anchor)
281  {
282  // Top
283  case anchor::StrictTopCenter:
284  {
285  point2d p(std::min(this->components_(current_object).bbox().pmin().row(),
286  this->components_(nbh).bbox().pmin().row()) - delta_ws_lookup_,
287  this->components_(current_object).bbox().pmin().col());
288  p = scribo::internal::point_row_adjust(p, bbox_ima_);
289 
290  for (; p.col() <= this->components_(nbh).bbox().pmax().col()
291  && (bbox_ima_(p) == 0);)
292  {
293 # ifndef SCRIBO_NDEBUG
294  debug_(p) = literal::violet;
295 # endif // ! SCRIBO_NDEBUG
296  ++p.col();
297  }
298 
299  return bbox_ima_(p) == 0;
300  }
301 
302  // Bottom
303  case anchor::StrictBottomCenter:
304  {
305  point2d p(std::max(this->components_(current_object).bbox().pmax().row(),
306  this->components_(nbh).bbox().pmax().row()) + delta_ws_lookup_,
307  this->components_(current_object).bbox().pmin().col());
308  p = scribo::internal::point_row_adjust(p, bbox_ima_);
309 
310  for (; p.col() <= this->components_(nbh).bbox().pmax().col()
311  && (bbox_ima_(p) == 0);)
312  {
313 # ifndef SCRIBO_NDEBUG
314  debug_(p) = literal::violet;
315 # endif // ! SCRIBO_NDEBUG
316  ++p.col();
317  }
318 
319  return bbox_ima_(p) == 0;
320  }
321 
322  default:
323  mln_trace_warning("anchor not handled!");
324  }
325 
326  return false;
327  }
328 
329 
330  L bbox_ima_;
331  unsigned delta_ws_lookup_;
332 
333 # ifndef SCRIBO_NDEBUG
334  image2d<value::rgb8> debug_;
335 # endif // ! SCRIBO_NDEBUG
336  };
337 
338 
339  template <typename L, typename F>
340  class single_left_dmax_ratio_aligned_delta_functor
341  : public link_single_dmax_ratio_aligned_delta_base<L, F, single_left_dmax_ratio_aligned_delta_functor<L,F> >
342  {
343  typedef single_left_dmax_ratio_aligned_delta_functor<L,F> self_t;
344  typedef link_single_dmax_ratio_aligned_delta_base<L, F, self_t> super_;
345 
346  public:
347  typedef mln_site(L) P;
348 
349  single_left_dmax_ratio_aligned_delta_functor(
350  const component_set<L>& components,
351  const DMax_Functor<F>& dmax_f,
352  unsigned delta,
353  anchor::Direction delta_direction,
354  const L& bbox_ima,
355  unsigned delta_ws_lookup)
356  : super_(components, dmax_f, delta, delta_direction),
357  bbox_ima_(bbox_ima), delta_ws_lookup_(delta_ws_lookup)
358  {
359 # ifndef SCRIBO_NDEBUG
360  debug_ = data::convert(value::rgb8(), data::convert(bool(), bbox_ima));
361 # endif // ! SCRIBO_NDEBUG
362  }
363 
364  void compute_next_site_(P& p)
365  {
366  --p.col();
367  }
368 
369 
370  inline
371  bool
372  valid_link_(unsigned current_object,
373  const P& start_point,
374  const P& p,
375  anchor::Type anchor)
376  {
377  if (!super_::valid_link_(current_object, start_point, p, anchor))
378  return false;
379 
380  mln_value(L) nbh = this->labeled_image_(p);
381 
382  switch (anchor)
383  {
384 
385  // Top
386  case anchor::StrictTopCenter:
387  {
388  point2d p(std::min(this->components_(nbh).bbox().pmin().row(),
389  this->components_(current_object).bbox().pmin().row()) - delta_ws_lookup_,
390  this->components_(current_object).bbox().pmax().col());
391  p = scribo::internal::point_row_adjust(p, bbox_ima_);
392 
393  for (; p.col() > this->components_(nbh).bbox().pmin().col()
394  && (bbox_ima_(p) == 0);)
395  {
396 # ifndef SCRIBO_NDEBUG
397  debug_(p) = literal::violet;
398 # endif // ! SCRIBO_NDEBUG
399  --p.col();
400  }
401 
402  return bbox_ima_(p) == 0;
403  }
404 
405  // Bottom
406  case anchor::StrictBottomCenter:
407  {
408  point2d p(std::max(this->components_(nbh).bbox().pmax().row(),
409  this->components_(current_object).bbox().pmax().row()) + delta_ws_lookup_,
410  this->components_(current_object).bbox().pmax().col());
411  p = scribo::internal::point_row_adjust(p, bbox_ima_);
412 
413  for (; p.col() > this->components_(nbh).bbox().pmin().col()
414  && (bbox_ima_(p) == 0);)
415  {
416 # ifndef SCRIBO_NDEBUG
417  debug_(p) = literal::violet;
418 # endif // ! SCRIBO_NDEBUG
419  --p.col();
420  }
421 
422  return bbox_ima_(p) == 0;
423  }
424 
425  default:
426  mln_trace_warning("anchor not handled!");
427  }
428 
429  return false;
430  }
431 
432 
433  L bbox_ima_;
434  unsigned delta_ws_lookup_;
435 
436 # ifndef SCRIBO_NDEBUG
437  image2d<value::rgb8> debug_;
438 # endif // ! SCRIBO_NDEBUG
439  };
440 
441 
442  } // end of namespace scribo::primitive::link::internal
443 
444  } // end of namespace scribo::primitive::link
445 
446 
447  namespace extract
448  {
449 
450  namespace internal
451  {
452 
453  template <typename L>
454  bool pass_comp_criterion(const line_info<L>& line)
455  {
456  return line.bbox().height() < 301;// (line.pixel_area() < 10000);
457  }
458 
459 
460  inline
461  bool
462  is_valid_left_right_skewed_delimitor(const box2d& brot,
463  const image2d<bool> input,
464  unsigned local_delta)
465  {
466  if (brot.pmax().col() - brot.pmin().col() > 6)
467  {
468  point2d
469  p1 = brot.pmin(),
470  p2 = brot.pmax();
471 
472  // Handle left to right alignment
473  p1.col() -= local_delta;
474  p2.col() -= local_delta;
475  p_line2d l2d(p1, p2);
476  mln_piter_(p_line2d) p(l2d);
477  for_all(p)
478  if (input(p))
479  return false;
480 
481  return true;
482  }
483 
484  return false;
485  }
486 
487 
488  inline
489  bool
490  is_valid_right_left_skewed_delimitor(const box2d& brot,
491  const image2d<bool> input,
492  unsigned local_delta)
493  {
494  if (brot.pmax().col() - brot.pmin().col() > 6)
495  {
496  point2d
497  p1 = brot.pmin(),
498  p2 = brot.pmax();
499 
500  // Handle right to left alignment
501  p1.col() = brot.pmax().col() - local_delta;
502  p2.col() = brot.pmin().col() - local_delta;
503  p_line2d l2d(p1, p2);
504  mln_piter_(p_line2d) p(l2d);
505  for_all(p)
506  if (input(p))
507  return false;
508 
509  return true;
510  }
511 
512  return false;
513  }
514 
515 
516  inline
517  box2d fast_rotate_positive(const box2d& box, const box2d& rbox)
518  {
519  box2d b(point2d(box.pmin().col(),
520  rbox.ncols() - box.pmin().row() - 1),
521  point2d(box.pmax().col(),
522  rbox.ncols() - box.pmax().row() - 1));
523  return b;
524  }
525 
526 
527  inline
528  box2d fast_rotate_negative(const box2d& box, const box2d& rbox)
529  {
530  mln::def::coord max_row = rbox.ncols() - box.pmin().col() - 1;
531  mln::def::coord min_row = rbox.ncols() - box.pmax().col() - 1;
532 
533  box2d b(point2d(min_row,
534  box.pmin().row()),
535  point2d(max_row,
536  box.pmax().row()));
537 
538  return b;
539  }
540 
541 
542  inline
543  unsigned
544  compute_reliable_ldelta(const value::int_u12& char_width,
545  const value::int_u12& char_space)
546  {
547  return 1.2 * char_width + char_space;
548  }
549 
550 
551  inline
552  unsigned
553  compute_unreliable_ldelta(const value::int_u12& max_char_width)
554  {
555  return max_char_width * 2; // FIXME: reduce to 1.5 ? Problem
556  // with 2 or 3 lines besides an
557  // image.
558  }
559 
560 
561  } // end of namespace scribo::primitive::extract
562 
563 
565  // FACADES //
567 
568 
569  template <typename L>
571  alignments(const document<L>& doc,
572  float dmax_ratio, unsigned delta_pixel)
573  {
574  mln_trace("scribo::primitive::extract::alignments");
575  mln_precondition(doc.is_valid());
576  mln_precondition(doc.has_text());
577  const mln_ch_value(L,bool)& input = doc.binary_image();
578  mln_assertion(input.is_valid());
579 
580  unsigned min_card = 3;
581  unsigned delta = 5;
582 
583  // 0. Get low level structures in document data.
584  typedef mln_value(L) V;
585  const object_groups<L>& groups = doc.paragraphs().lines().groups();
586  const line_set<L>& lines = doc.lines();
587 
588  (void) groups; // Avoid warnings when debug is disabled.
589 
590  // 1. Construct an image of group bounding boxes.
591  //
592  // This image is used later to group bounding boxes. It is
593  // rotated in order to make grouping faster (linear memory
594  // reading).
595  //
596  box2d rbbox = box2d(input.domain().pmin(),
597  point2d(input.domain().pmax().col(),
598  input.domain().pmax().row()));
599 
600  L bbox_ima(rbbox);
601  data::fill(bbox_ima, 0);
602  for_all_lines(l, lines)
603  if (lines(l).is_valid()
604  && internal::pass_comp_criterion(lines(l)))
605  mln::draw::box(bbox_ima,
606  internal::fast_rotate_positive(lines(l).bbox(),
607  rbbox),
608  l);
609 
610  // Compute component information
611  //
612  // FIXME: not useful since we have all required information
613  // in the line_set. However a component_set is needed in link
614  // functors...
615  component_set<L>
616  components = component_set<L>(bbox_ima, lines.nelements());
617 
618 
619  // Dmax functor used for TOP and BOTTOM links.
620  primitive::link::internal::dmax_hrules dmax_func(dmax_ratio, 100);
621 
622 
623  // Output image
624  image2d<bool> delimitors;
625  initialize(delimitors, input);
626  data::fill(delimitors, false);
627 
628 # ifndef SCRIBO_NDEBUG
629  image2d<value::rgb8> debug;
630  initialize(debug, components.labeled_image());
631  data::fill(debug, literal::black);
632  scribo::draw::bounding_boxes(debug, components, literal::blue);
633 # endif // ! SCRIBO_NDEBUG
634 
635 
636 
637 
638  // 2.1. BEGIN OF TOP LINKS PROCESSING
639  {
640  object_links<L> top_links;
641  object_groups<L> top_groups;
642 
645  top_median_char_width,
646  top_median_char_space;
647 
648  mln::util::array<unsigned> top_max_char_width;
649 
650 
651 
652  // 2.1.1. Find TOP links.
653  {
654  object_links<L> right, left;
655 
656  // link right
657  {
658  primitive::link::internal::single_right_dmax_ratio_aligned_delta_functor<
659  L,primitive::link::internal::dmax_hrules>
660  functor(components, dmax_func, delta_pixel, anchor::Vertical, bbox_ima, delta);
661 
662  right = primitive::link::compute(functor, anchor::StrictTopCenter);
663  }
664 
665  // Link left
666  {
667  primitive::link::internal::single_left_dmax_ratio_aligned_delta_functor<
668  L,primitive::link::internal::dmax_hrules>
669  functor(components, dmax_func, delta_pixel, anchor::Vertical, bbox_ima, delta);
670 
671  left = primitive::link::compute(functor, anchor::StrictTopCenter);
672  }
673 
674  // Merge links
676  anchor::StrictTopCenter);
677 
678  // Remove links if component bboxes overlap too much.
679  top_links = filter::object_links_bbox_overlap(top_links, 0.80f);
680 
681  // Remove groups with not enough links.
682  top_groups = primitive::group::from_single_link(top_links);
683  top_groups = filter::object_groups_small(top_groups, min_card);
684 
685  // Compute char_width and char_space statistics.
686  //
687  // Here, we also compute max_char_width, in case other
688  // statistics are not significant enough for next steps.
689  //
690  // To avoid biased statistics, char_space and char_width are
691  // used from lines with at least 2 components.
692  //
693  top_median_char_space.resize(top_groups.nelements());
694  top_median_char_width.resize(top_groups.nelements());
695  top_max_char_width.resize(top_groups.nelements());
696 
697  for_all_groups(g, top_groups)
698  if (top_groups(g).is_valid())
699  for_all_elements(e, top_groups(g).component_ids())
700  {
701  unsigned l = top_groups(g).component_ids()(e);
702  if (lines(l).card() > 1)
703  {
704  top_median_char_space(g).take(lines(l).char_space());
705  top_median_char_width(g).take(lines(l).char_width());
706  }
707  else if (top_max_char_width(g) < lines(l).char_width())
708  top_max_char_width(g) = lines(l).char_width();
709  }
710  }
711 
712 
713  // 2.1.2. Check TOP whitespaces
714  {
716  group_bbox(top_groups.nelements());
717 
718  // Compute group bboxes
719  for_all_groups(g, top_groups)
720  for_all_elements(e, top_groups(g).component_ids())
721  {
722  unsigned l = top_groups(g).component_ids()(e);
723  point2d p = components(top_links(l)).bbox().pmax();
724  p.row() = components(top_links(l)).bbox().pmin().row();
725 
726  if (top_groups(g).is_valid())// && top_links.is_linked(l))
727  {
728  group_bbox(g).take(p);
729  group_bbox(g).take(components(l).bbox().pmin());
730 
731 # ifndef SCRIBO_NDEBUG
732  // Draw first component bbox.
733  mln::draw::box(debug,
734  internal::fast_rotate_positive(
735  groups.components()(lines(l).component_ids()[0]).bbox(),
736  rbbox),
737  literal::cyan);
738 
739  mln::draw::box(debug,
740  internal::fast_rotate_positive(
741  groups.components()(lines(top_links(l)).component_ids()[0]).bbox(),
742  rbbox),
743  literal::cyan);
744 # endif // ! SCRIBO_NDEBUG
745  }
746 # ifndef SCRIBO_NDEBUG
747  else if (top_groups(g).card() > 1)
748  {
749  mln::draw::line(debug,
750  components(l).bbox().pmin(),
751  p,
752  literal::orange);
753 
754  // Draw first component bbox.
755  mln::draw::box(debug,
756  internal::fast_rotate_positive(
757  groups.components()(lines(l).component_ids()[0]).bbox(),
758  rbbox),
759  literal::cyan);
760 
761  mln::draw::box(debug,
762  internal::fast_rotate_positive(
763  groups.components()(lines(top_links(l)).component_ids()[0]).bbox(),
764  rbbox),
765  literal::cyan);
766  }
767 # endif // ! SCRIBO_NDEBUG
768  }
769 
770  // Looking for whitespaces (if needed)
771  for_all_groups(g, top_groups)
772  if (top_groups(g).is_valid())
773  {
774  unsigned ldelta = delta;
775 
776  // Special case : handle low connected groups.
777  if (top_groups(g).card() >= 3 && top_groups(g).card() < 5)
778  {
779  // Stats are not reliable, prefer using max char width.
780  if (top_median_char_width(g).card() == 0)
781  ldelta = internal::compute_unreliable_ldelta(top_max_char_width(g));
782  else
783  ldelta = internal::compute_reliable_ldelta(top_median_char_width(g),
784  top_median_char_space(g));
785 
786  point2d p(group_bbox(g).to_result().pmin().row() - ldelta,
787  group_bbox(g).to_result().pmin().col());
788 
789  p = scribo::internal::point_row_adjust(p, bbox_ima);
790 
791  // Checking whitespace area
792  for (; p.col() <= group_bbox(g).to_result().pmax().col()
793  && (bbox_ima(p) == 0);)
794  ++p.col();
795 
796  if (bbox_ima(p) != 0)
797  {
798  top_groups(g).invalidate();
799 
800 # ifndef SCRIBO_NDEBUG
801  mln::draw::line(debug,
802  scribo::internal::point_row_adjust(
803  point2d(group_bbox(g).to_result().pmin().row() - ldelta,
804  group_bbox(g).to_result().pmin().col()), debug),
805  scribo::internal::point_row_adjust(
806  point2d(group_bbox(g).to_result().pmin().row() - ldelta,
807  group_bbox(g).to_result().pmax().col()), debug),
808  literal::red);
809 # endif // ! SCRIBO_NDEBUG
810  continue;
811  }
812  }
813 
814 
815  box2d
816  brot = internal::fast_rotate_negative(group_bbox(g).to_result(),
817  rbbox);
818 
819  // Checking if the delimitor is skewed.
820  if (internal::is_valid_left_right_skewed_delimitor(brot, input,
821  delta))
822  {
823  mln::draw::line(delimitors,
824  scribo::internal::point_col_adjust(
825  point2d(brot.pmin().row(),
826  brot.pmin().col() - delta), delimitors),
827  scribo::internal::point_col_adjust(
828  point2d(brot.pmax().row(),
829  brot.pmax().col() - delta), delimitors),
830  true);
831  }
832  else if (internal::is_valid_right_left_skewed_delimitor(brot, input,
833  delta))
834  {
835  mln::draw::line(delimitors,
836  scribo::internal::point_col_adjust(
837  point2d(brot.pmin().row(),
838  brot.pmax().col() - delta), delimitors),
839  scribo::internal::point_col_adjust(
840  point2d(brot.pmax().row(),
841  brot.pmin().col() - delta), delimitors),
842  true);
843  }
844  else
845  {
846  mln::draw::line(delimitors,
847  scribo::internal::point_col_adjust(
848  point2d(brot.pmin().row(),
849  brot.pmin().col() - delta), delimitors),
850  scribo::internal::point_col_adjust(
851  point2d(brot.pmax().row(),
852  brot.pmin().col() - delta), delimitors),
853  true);
854  }
855 
856 # ifndef SCRIBO_NDEBUG
857  mln::draw::line(debug,
858  scribo::internal::point_row_adjust(
859  point2d(group_bbox(g).to_result().pmin().row() - ldelta,
860  group_bbox(g).to_result().pmin().col()), debug),
861  scribo::internal::point_row_adjust(
862  point2d(group_bbox(g).to_result().pmin().row() - ldelta,
863  group_bbox(g).to_result().pmax().col()), debug),
864  literal::green);
865 # endif // ! SCRIBO_NDEBUG
866  }
867  }
868 
869  } // END OF TOP LINKS PROCESSING
870 
871 
872 
873  // 2.2. BEGIN OF BOTTOM LINKS PROCESSING
874  {
875  object_links<L> bot_links;
876  object_groups<L> bot_groups;
877 
880  bot_median_char_width,
881  bot_median_char_space;
882 
883  mln::util::array<unsigned> bot_max_char_width;
884 
885  // 2.2.1. Find BOTTOM links.
886  {
887  object_links<L> right, left;
888 
889  // link right
890  {
891  primitive::link::internal::single_right_dmax_ratio_aligned_delta_functor<
892  L,primitive::link::internal::dmax_hrules>
893  functor(components, dmax_func, delta_pixel, anchor::Vertical,
894  bbox_ima, delta);
895 
896  right = primitive::link::compute(functor, anchor::StrictBottomCenter);
897  }
898 
899  // Link left
900  {
901  primitive::link::internal::single_left_dmax_ratio_aligned_delta_functor<
902  L,primitive::link::internal::dmax_hrules>
903  functor(components, dmax_func, delta_pixel, anchor::Vertical,
904  bbox_ima, delta);
905 
906  left = primitive::link::compute(functor, anchor::StrictBottomCenter);
907  }
908 
909  // Merge links
911  anchor::StrictBottomCenter);
912 
913  // Remove links if component bboxes overlap too much.
914  bot_links = filter::object_links_bbox_overlap(bot_links, 0.80f);
915 
916  // Remove groups with not enough links.
917  bot_groups = primitive::group::from_single_link(bot_links);
918 
919  bot_groups = filter::object_groups_small(bot_groups, min_card);
920 
921  bot_median_char_space.resize(bot_groups.nelements());
922  bot_median_char_width.resize(bot_groups.nelements());
923  bot_max_char_width.resize(bot_groups.nelements());
924 
925  for_all_groups(g, bot_groups)
926  if (bot_groups(g).is_valid())
927  for_all_elements(e, bot_groups(g).component_ids())
928  {
929  unsigned l = bot_groups(g).component_ids()(e);
930 
931  if (lines(l).card() > 1)
932  {
933  bot_median_char_space(g).take(lines(l).char_space());
934  bot_median_char_width(g).take(lines(l).char_width());
935  }
936  // This data will be used if alignments are made with single
937  // components only.
938  else if (bot_max_char_width(g) < lines(l).char_width())
939  bot_max_char_width(g) = lines(l).char_width();
940  }
941  }
942 
943 
944  // 2.2.2. Check BOTTOM whitespaces
945  {
947  group_bbox(bot_groups.nelements());
948 
949  // Compute group bboxes
950  for_all_groups(g, bot_groups)
951  for_all_elements(e, bot_groups(g).component_ids())
952  {
953  unsigned l = bot_groups(g).component_ids()(e);
954  point2d p = components(l).bbox().pmin();
955  p.row() = components(l).bbox().pmax().row();
956 
957  if (bot_groups(g).is_valid())// && bot_links.is_linked(l))
958  {
959  if (bot_groups.group_of(l).is_valid())
960  {
961  group_bbox(g).take(p);
962  group_bbox(g).take(components(bot_links(l)).bbox().pmax());
963 
964 # ifndef SCRIBO_NDEBUG
965  // Draw first component box
966  mln::draw::box(debug,
967  internal::fast_rotate_positive(
968  groups.components()(lines(l).component_ids()[lines(l).card() - 1]).bbox(),
969  rbbox),
970  literal::cyan);
971 
972  mln::draw::box(debug,
973  internal::fast_rotate_positive(
974  groups.components()(lines(bot_links(l)).component_ids()[lines(bot_links(l)).card() - 1]).bbox(),
975  rbbox),
976  literal::cyan);
977 # endif // ! SCRIBO_NDEBUG
978  }
979  }
980 # ifndef SCRIBO_NDEBUG
981  else if (bot_groups(g).card() > 1)
982  {
983  mln::draw::line(debug,
984  p,
985  components(bot_links(l)).bbox().pmax(),
986  literal::orange);
987 
988  // Draw first component box
989  mln::draw::box(debug,
990  internal::fast_rotate_positive(
991  groups.components()(lines(l).component_ids()[lines(l).card() - 1]).bbox(),
992  rbbox),
993  literal::cyan);
994 
995  mln::draw::box(debug,
996  internal::fast_rotate_positive(
997  groups.components()(lines(bot_links(l)).component_ids()[lines(bot_links(l)).card() - 1]).bbox(),
998  rbbox),
999  literal::cyan);
1000  }
1001 # endif // ! SCRIBO_NDEBUG
1002  }
1003 
1004  // Looking for whitespaces
1005  for_all_groups(g, bot_groups)
1006  if (bot_groups(g).is_valid())
1007  {
1008  unsigned ldelta = delta;
1009  // Special case : handle low connected groups.
1010  if (bot_groups(g).card() >= 3 && bot_groups(g).card() < 5)
1011  {
1012  // Stats are not reliable, prefer using max char width.
1013  if (bot_median_char_width(g).card() == 0)
1014  ldelta = internal::compute_unreliable_ldelta(bot_max_char_width(g));
1015  else
1016  ldelta = internal::compute_reliable_ldelta(bot_median_char_width(g),
1017  bot_median_char_space(g));
1018 
1019  point2d p(bot_groups(g).bbox().pmax().row() + ldelta,
1020  bot_groups(g).bbox().pmin().col());
1021  p = scribo::internal::point_row_adjust(p, bbox_ima);
1022 
1023  // Checking whitespace area
1024  for (; p.col() <= group_bbox(g).to_result().pmax().col()
1025  && (bbox_ima(p) == 0);)
1026  ++p.col();
1027 
1028  if (bbox_ima(p) != 0)
1029  {
1030  bot_groups(g).invalidate();
1031 
1032 # ifndef SCRIBO_NDEBUG
1033  mln::draw::line(debug,
1034  scribo::internal::point_row_adjust(
1035  point2d(group_bbox(g).to_result().pmax().row() + ldelta,
1036  group_bbox(g).to_result().pmin().col()), debug),
1037  scribo::internal::point_row_adjust(
1038  point2d(group_bbox(g).to_result().pmax().row() + ldelta,
1039  group_bbox(g).to_result().pmax().col()), debug),
1040  literal::red);
1041 # endif // ! SCRIBO_NDEBUG
1042  continue;
1043  }
1044  }
1045 
1046  box2d
1047  brot = internal::fast_rotate_negative(group_bbox(g).to_result(),
1048  rbbox);
1049 
1050  // Checking if delimitor is skewed.
1051  if (internal::is_valid_left_right_skewed_delimitor(brot, input,
1052  - delta))
1053  {
1054  mln::draw::line(delimitors,
1055  scribo::internal::point_col_adjust(
1056  point2d(brot.pmin().row(),
1057  brot.pmin().col() + delta),
1058  delimitors),
1059  scribo::internal::point_col_adjust(
1060  point2d(brot.pmax().row(),
1061  brot.pmax().col() + delta),
1062  delimitors),
1063  true);
1064  }
1065  else if (internal::is_valid_right_left_skewed_delimitor(brot,
1066  input,
1067  - delta))
1068  {
1069  mln::draw::line(delimitors,
1070  scribo::internal::point_col_adjust(
1071  point2d(brot.pmin().row(),
1072  brot.pmax().col() + delta),
1073  delimitors),
1074  scribo::internal::point_col_adjust(
1075  point2d(brot.pmax().row(),
1076  brot.pmin().col() + delta),
1077  delimitors),
1078  true);
1079  }
1080  else
1081  {
1082  mln::draw::line(delimitors,
1083  scribo::internal::point_col_adjust(
1084  point2d(brot.pmin().row(),
1085  brot.pmax().col() + delta),
1086  delimitors),
1087  scribo::internal::point_col_adjust(
1088  point2d(brot.pmax().row(),
1089  brot.pmax().col() + delta),
1090  delimitors),
1091  true);
1092  }
1093 
1094 # ifndef SCRIBO_NDEBUG
1095  mln::draw::line(debug,
1096  scribo::internal::point_row_adjust(
1097  point2d(group_bbox(g).to_result().pmax().row() + ldelta,
1098  group_bbox(g).to_result().pmin().col()), debug),
1099  scribo::internal::point_row_adjust(
1100  point2d(group_bbox(g).to_result().pmax().row() + ldelta,
1101  group_bbox(g).to_result().pmax().col()), debug),
1102  literal::green);
1103 # endif // ! SCRIBO_NDEBUG
1104  }
1105  }
1106 
1107  } // END OF BOTTOM LINKS PROCESSING
1108 
1109 
1110  V ndelim;
1111  component_set<L>
1112  delim_comps = primitive::extract::components(delimitors, c8(), ndelim, component::WhitespaceSeparator);
1113 
1115  output(delim_comps, delimitors);
1116 
1117 # ifndef SCRIBO_NDEBUG
1118  debug = preprocessing::rotate_90(debug, true);
1119  debug::logger().log_image(debug::All,
1120  debug, "alignments_debug");
1121 # endif // ! SCRIBO_NDEBUG
1122 
1123  return output;
1124  }
1125 
1126 # endif // ! MLN_INCLUDE_ONLY
1127 
1128  } // end of namespace scribo::primitive::extract
1129 
1130  } // end of namespace scribo::primitive
1131 
1132 } // end of namespace scribo
1133 
1134 #endif // ! SCRIBO_PRIMITIVE_EXTRACT_ALIGNMENTS_HH