27 #ifndef SCRIBO_PRIMITIVE_EXTRACT_ALIGNMENTS_HH
28 # define SCRIBO_PRIMITIVE_EXTRACT_ALIGNMENTS_HH
36 # include <mln/core/image/image2d.hh>
37 # include <mln/core/alias/neighb2d.hh>
39 # include <mln/data/convert.hh>
41 # include <mln/value/rgb8.hh>
42 # include <mln/value/label_16.hh>
43 # include <mln/value/int_u.hh>
44 # include <mln/literal/colors.hh>
46 # include <mln/util/array.hh>
47 # include <mln/util/couple.hh>
49 # include <mln/io/pbm/load.hh>
51 # include <mln/util/couple.hh>
53 # include <scribo/core/def/lbl_type.hh>
54 # include <scribo/primitive/extract/components.hh>
55 # include <scribo/filter/object_links_aligned.hh>
56 # include <scribo/filter/object_links_bbox_overlap.hh>
57 # include <scribo/filter/object_groups_small.hh>
58 # include <scribo/preprocessing/denoise_fg.hh>
59 # include <scribo/primitive/link/internal/link_single_dmax_ratio_aligned_delta_base.hh>
60 # include <scribo/primitive/link/internal/dmax_default.hh>
61 # include <scribo/primitive/link/internal/dmax_hrules.hh>
62 # include <scribo/primitive/link/merge_double_link_closest_aligned.hh>
64 # include <scribo/debug/usage.hh>
65 # include <scribo/debug/links_image.hh>
67 # include <scribo/core/document.hh>
68 # include <scribo/core/component_set.hh>
69 # include <scribo/core/line_set.hh>
70 # include <scribo/primitive/extract/components.hh>
71 # include <scribo/primitive/group/from_single_link.hh>
72 # include <scribo/primitive/group/apply.hh>
73 # include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
74 # include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
75 # include <scribo/primitive/link/merge_double_link.hh>
76 # include <scribo/primitive/link/internal/dmax_width_and_height.hh>
78 # include <scribo/preprocessing/rotate_90.hh>
79 # include <scribo/filter/object_links_bbox_h_ratio.hh>
166 template <
typename L>
169 float dmax_ratio,
unsigned delta_pixel);
173 # ifndef MLN_INCLUDE_ONLY
185 template <
typename V>
204 template <
typename V>
237 template <
typename L,
typename F>
238 class single_right_dmax_ratio_aligned_delta_functor
239 :
public link_single_dmax_ratio_aligned_delta_base<L, F, single_right_dmax_ratio_aligned_delta_functor<L,F> >
241 typedef single_right_dmax_ratio_aligned_delta_functor<L,F> self_t;
242 typedef link_single_dmax_ratio_aligned_delta_base<L, F, self_t> super_;
245 typedef mln_site(L) P;
247 single_right_dmax_ratio_aligned_delta_functor(
249 const DMax_Functor<F>& dmax_f,
251 anchor::Direction delta_direction,
253 unsigned delta_ws_lookup)
254 : super_(components, dmax_f, delta, delta_direction),
255 bbox_ima_(bbox_ima), delta_ws_lookup_(delta_ws_lookup)
257 # ifndef SCRIBO_NDEBUG
259 # endif // ! SCRIBO_NDEBUG
262 void compute_next_site_(P& p)
270 valid_link_(
unsigned current_object,
271 const P& start_point,
275 if (!super_::valid_link_(current_object, start_point, p, anchor))
278 mln_value(L) nbh = this->labeled_image_(p);
283 case anchor::StrictTopCenter:
286 this->components_(nbh).
bbox().pmin().row()) - delta_ws_lookup_,
287 this->components_(current_object).
bbox().pmin().col());
288 p = scribo::internal::point_row_adjust(p, bbox_ima_);
290 for (; p.col() <= this->components_(nbh).bbox().pmax().col()
291 && (bbox_ima_(p) == 0);)
293 # ifndef SCRIBO_NDEBUG
294 debug_(p) = literal::violet;
295 # endif // ! SCRIBO_NDEBUG
299 return bbox_ima_(p) == 0;
303 case anchor::StrictBottomCenter:
306 this->components_(nbh).
bbox().pmax().row()) + delta_ws_lookup_,
307 this->components_(current_object).
bbox().pmin().col());
308 p = scribo::internal::point_row_adjust(p, bbox_ima_);
310 for (; p.col() <= this->components_(nbh).bbox().pmax().col()
311 && (bbox_ima_(p) == 0);)
313 # ifndef SCRIBO_NDEBUG
314 debug_(p) = literal::violet;
315 # endif // ! SCRIBO_NDEBUG
319 return bbox_ima_(p) == 0;
323 mln_trace_warning(
"anchor not handled!");
331 unsigned delta_ws_lookup_;
333 # ifndef SCRIBO_NDEBUG
335 # endif // ! SCRIBO_NDEBUG
339 template <
typename L,
typename F>
340 class single_left_dmax_ratio_aligned_delta_functor
341 :
public link_single_dmax_ratio_aligned_delta_base<L, F, single_left_dmax_ratio_aligned_delta_functor<L,F> >
343 typedef single_left_dmax_ratio_aligned_delta_functor<L,F> self_t;
344 typedef link_single_dmax_ratio_aligned_delta_base<L, F, self_t> super_;
347 typedef mln_site(L) P;
349 single_left_dmax_ratio_aligned_delta_functor(
350 const component_set<L>& components,
351 const DMax_Functor<F>& dmax_f,
353 anchor::Direction delta_direction,
355 unsigned delta_ws_lookup)
356 : super_(components, dmax_f, delta, delta_direction),
357 bbox_ima_(bbox_ima), delta_ws_lookup_(delta_ws_lookup)
359 # ifndef SCRIBO_NDEBUG
361 # endif // ! SCRIBO_NDEBUG
364 void compute_next_site_(P& p)
372 valid_link_(
unsigned current_object,
373 const P& start_point,
377 if (!super_::valid_link_(current_object, start_point, p, anchor))
380 mln_value(L) nbh = this->labeled_image_(p);
386 case anchor::StrictTopCenter:
389 this->components_(current_object).
bbox().pmin().row()) - delta_ws_lookup_,
390 this->components_(current_object).
bbox().pmax().col());
391 p = scribo::internal::point_row_adjust(p, bbox_ima_);
393 for (; p.col() > this->components_(nbh).bbox().pmin().col()
394 && (bbox_ima_(p) == 0);)
396 # ifndef SCRIBO_NDEBUG
397 debug_(p) = literal::violet;
398 # endif // ! SCRIBO_NDEBUG
402 return bbox_ima_(p) == 0;
406 case anchor::StrictBottomCenter:
409 this->components_(current_object).
bbox().pmax().row()) + delta_ws_lookup_,
410 this->components_(current_object).
bbox().pmax().col());
411 p = scribo::internal::point_row_adjust(p, bbox_ima_);
413 for (; p.col() > this->components_(nbh).bbox().pmin().col()
414 && (bbox_ima_(p) == 0);)
416 # ifndef SCRIBO_NDEBUG
417 debug_(p) = literal::violet;
418 # endif // ! SCRIBO_NDEBUG
422 return bbox_ima_(p) == 0;
426 mln_trace_warning(
"anchor not handled!");
434 unsigned delta_ws_lookup_;
436 # ifndef SCRIBO_NDEBUG
438 # endif // ! SCRIBO_NDEBUG
453 template <
typename L>
454 bool pass_comp_criterion(
const line_info<L>&
line)
456 return line.bbox().height() < 301;
462 is_valid_left_right_skewed_delimitor(
const box2d& brot,
464 unsigned local_delta)
473 p1.
col() -= local_delta;
474 p2.col() -= local_delta;
490 is_valid_right_left_skewed_delimitor(const
box2d& brot,
492 unsigned local_delta)
494 if (brot.pmax().col() - brot.pmin().col() > 6)
501 p1.
col() = brot.pmax().col() - local_delta;
502 p2.col() = brot.pmin().col() - local_delta;
520 rbox.ncols() - box.pmin().row() - 1),
522 rbox.ncols() - box.pmax().row() - 1));
544 compute_reliable_ldelta(
const value::int_u12& char_width,
545 const value::int_u12& char_space)
547 return 1.2 * char_width + char_space;
553 compute_unreliable_ldelta(
const value::int_u12& max_char_width)
555 return max_char_width * 2;
569 template <
typename L>
572 float dmax_ratio,
unsigned delta_pixel)
574 mln_trace(
"scribo::primitive::extract::alignments");
575 mln_precondition(doc.is_valid());
576 mln_precondition(doc.has_text());
578 mln_assertion(input.is_valid());
580 unsigned min_card = 3;
584 typedef mln_value(L) V;
585 const object_groups<L>& groups = doc.paragraphs().lines().groups();
586 const line_set<L>& lines = doc.lines();
597 point2d(input.domain().pmax().col(),
598 input.domain().pmax().row()));
602 for_all_lines(l, lines)
603 if (lines(l).is_valid()
604 && internal::pass_comp_criterion(lines(l)))
605 mln::draw::box(bbox_ima,
606 internal::fast_rotate_positive(lines(l).
bbox(),
616 components = component_set<L>(bbox_ima, lines.nelements());
620 primitive::link::internal::dmax_hrules dmax_func(dmax_ratio, 100);
625 initialize(delimitors, input);
628 # ifndef SCRIBO_NDEBUG
630 initialize(debug, components.labeled_image());
633 # endif // ! SCRIBO_NDEBUG
640 object_links<L> top_links;
641 object_groups<L> top_groups;
645 top_median_char_width,
646 top_median_char_space;
658 primitive::link::internal::single_right_dmax_ratio_aligned_delta_functor<
659 L,primitive::link::internal::dmax_hrules>
660 functor(components, dmax_func, delta_pixel, anchor::Vertical, bbox_ima, delta);
667 primitive::link::internal::single_left_dmax_ratio_aligned_delta_functor<
668 L,primitive::link::internal::dmax_hrules>
669 functor(components, dmax_func, delta_pixel, anchor::Vertical, bbox_ima, delta);
676 anchor::StrictTopCenter);
693 top_median_char_space.
resize(top_groups.nelements());
694 top_median_char_width.
resize(top_groups.nelements());
695 top_max_char_width.
resize(top_groups.nelements());
697 for_all_groups(g, top_groups)
698 if (top_groups(g).is_valid())
699 for_all_elements(e, top_groups(g).component_ids())
701 unsigned l = top_groups(g).component_ids()(e);
702 if (lines(l).card() > 1)
704 top_median_char_space(g).take(lines(l).char_space());
705 top_median_char_width(g).take(lines(l).char_width());
707 else if (top_max_char_width(g) < lines(l).char_width())
708 top_max_char_width(g) = lines(l).char_width();
716 group_bbox(top_groups.nelements());
719 for_all_groups(g, top_groups)
720 for_all_elements(e, top_groups(g).component_ids())
722 unsigned l = top_groups(g).component_ids()(e);
726 if (top_groups(g).is_valid())
728 group_bbox(g).take(p);
731 # ifndef SCRIBO_NDEBUG
734 internal::fast_rotate_positive(
735 groups.components()(lines(l).component_ids()[0]).
bbox(),
740 internal::fast_rotate_positive(
741 groups.components()(lines(top_links(l)).component_ids()[0]).
bbox(),
744 # endif // ! SCRIBO_NDEBUG
746 # ifndef SCRIBO_NDEBUG
747 else if (top_groups(g).
card() > 1)
756 internal::fast_rotate_positive(
757 groups.components()(lines(l).component_ids()[0]).
bbox(),
762 internal::fast_rotate_positive(
763 groups.components()(lines(top_links(l)).component_ids()[0]).
bbox(),
767 # endif // ! SCRIBO_NDEBUG
771 for_all_groups(g, top_groups)
772 if (top_groups(g).is_valid())
774 unsigned ldelta =
delta;
777 if (top_groups(g).
card() >= 3 && top_groups(g).
card() < 5)
780 if (top_median_char_width(g).
card() == 0)
781 ldelta = internal::compute_unreliable_ldelta(top_max_char_width(g));
783 ldelta = internal::compute_reliable_ldelta(top_median_char_width(g),
784 top_median_char_space(g));
789 p = scribo::internal::point_row_adjust(p, bbox_ima);
792 for (; p.
col() <= group_bbox(g).to_result().pmax().col()
793 && (bbox_ima(p) == 0);)
796 if (bbox_ima(p) != 0)
798 top_groups(g).invalidate();
800 # ifndef SCRIBO_NDEBUG
802 scribo::internal::point_row_adjust(
804 group_bbox(g).
to_result().pmin().col()), debug),
805 scribo::internal::point_row_adjust(
807 group_bbox(g).
to_result().pmax().col()), debug),
809 # endif // ! SCRIBO_NDEBUG
816 brot = internal::fast_rotate_negative(group_bbox(g).
to_result(),
820 if (internal::is_valid_left_right_skewed_delimitor(brot, input,
824 scribo::internal::point_col_adjust(
827 scribo::internal::point_col_adjust(
832 else if (internal::is_valid_right_left_skewed_delimitor(brot, input,
836 scribo::internal::point_col_adjust(
839 scribo::internal::point_col_adjust(
847 scribo::internal::point_col_adjust(
850 scribo::internal::point_col_adjust(
856 # ifndef SCRIBO_NDEBUG
858 scribo::internal::point_row_adjust(
860 group_bbox(g).
to_result().pmin().col()), debug),
861 scribo::internal::point_row_adjust(
863 group_bbox(g).
to_result().pmax().col()), debug),
865 # endif // ! SCRIBO_NDEBUG
875 object_links<L> bot_links;
876 object_groups<L> bot_groups;
880 bot_median_char_width,
881 bot_median_char_space;
887 object_links<L> right, left;
891 primitive::link::internal::single_right_dmax_ratio_aligned_delta_functor<
892 L,primitive::link::internal::dmax_hrules>
893 functor(components, dmax_func, delta_pixel, anchor::Vertical,
901 primitive::link::internal::single_left_dmax_ratio_aligned_delta_functor<
902 L,primitive::link::internal::dmax_hrules>
903 functor(components, dmax_func, delta_pixel, anchor::Vertical,
911 anchor::StrictBottomCenter);
921 bot_median_char_space.
resize(bot_groups.nelements());
922 bot_median_char_width.
resize(bot_groups.nelements());
923 bot_max_char_width.
resize(bot_groups.nelements());
925 for_all_groups(g, bot_groups)
926 if (bot_groups(g).is_valid())
927 for_all_elements(e, bot_groups(g).component_ids())
929 unsigned l = bot_groups(g).component_ids()(e);
931 if (lines(l).card() > 1)
933 bot_median_char_space(g).take(lines(l).char_space());
934 bot_median_char_width(g).take(lines(l).char_width());
938 else if (bot_max_char_width(g) < lines(l).char_width())
939 bot_max_char_width(g) = lines(l).char_width();
947 group_bbox(bot_groups.nelements());
950 for_all_groups(g, bot_groups)
951 for_all_elements(e, bot_groups(g).component_ids())
953 unsigned l = bot_groups(g).component_ids()(e);
957 if (bot_groups(g).is_valid())
959 if (bot_groups.group_of(l).is_valid())
961 group_bbox(g).take(p);
964 # ifndef SCRIBO_NDEBUG
967 internal::fast_rotate_positive(
968 groups.components()(lines(l).component_ids()[lines(l).card() - 1]).
bbox(),
973 internal::fast_rotate_positive(
974 groups.components()(lines(bot_links(l)).component_ids()[lines(bot_links(l)).card() - 1]).
bbox(),
977 # endif // ! SCRIBO_NDEBUG
980 # ifndef SCRIBO_NDEBUG
981 else if (bot_groups(g).
card() > 1)
990 internal::fast_rotate_positive(
991 groups.components()(lines(l).component_ids()[lines(l).card() - 1]).
bbox(),
996 internal::fast_rotate_positive(
997 groups.components()(lines(bot_links(l)).component_ids()[lines(bot_links(l)).card() - 1]).
bbox(),
1001 # endif // ! SCRIBO_NDEBUG
1005 for_all_groups(g, bot_groups)
1006 if (bot_groups(g).is_valid())
1008 unsigned ldelta =
delta;
1010 if (bot_groups(g).
card() >= 3 && bot_groups(g).
card() < 5)
1013 if (bot_median_char_width(g).
card() == 0)
1014 ldelta = internal::compute_unreliable_ldelta(bot_max_char_width(g));
1016 ldelta = internal::compute_reliable_ldelta(bot_median_char_width(g),
1017 bot_median_char_space(g));
1019 point2d p(bot_groups(g).
bbox().pmax().row() + ldelta,
1020 bot_groups(g).
bbox().pmin().col());
1021 p = scribo::internal::point_row_adjust(p, bbox_ima);
1024 for (; p.
col() <= group_bbox(g).to_result().pmax().col()
1025 && (bbox_ima(p) == 0);)
1028 if (bbox_ima(p) != 0)
1030 bot_groups(g).invalidate();
1032 # ifndef SCRIBO_NDEBUG
1034 scribo::internal::point_row_adjust(
1036 group_bbox(g).
to_result().pmin().col()), debug),
1037 scribo::internal::point_row_adjust(
1039 group_bbox(g).
to_result().pmax().col()), debug),
1041 # endif // ! SCRIBO_NDEBUG
1047 brot = internal::fast_rotate_negative(group_bbox(g).
to_result(),
1051 if (internal::is_valid_left_right_skewed_delimitor(brot, input,
1055 scribo::internal::point_col_adjust(
1059 scribo::internal::point_col_adjust(
1065 else if (internal::is_valid_right_left_skewed_delimitor(brot,
1070 scribo::internal::point_col_adjust(
1074 scribo::internal::point_col_adjust(
1083 scribo::internal::point_col_adjust(
1087 scribo::internal::point_col_adjust(
1094 # ifndef SCRIBO_NDEBUG
1096 scribo::internal::point_row_adjust(
1098 group_bbox(g).
to_result().pmin().col()), debug),
1099 scribo::internal::point_row_adjust(
1101 group_bbox(g).
to_result().pmax().col()), debug),
1103 # endif // ! SCRIBO_NDEBUG
1115 output(delim_comps, delimitors);
1117 # ifndef SCRIBO_NDEBUG
1120 debug,
"alignments_debug");
1121 # endif // ! SCRIBO_NDEBUG
1126 # endif // ! MLN_INCLUDE_ONLY
1134 #endif // ! SCRIBO_PRIMITIVE_EXTRACT_ALIGNMENTS_HH