27 #ifndef SCRIBO_TEXT_MERGING_HDOC_HH
28 # define SCRIBO_TEXT_MERGING_HDOC_HH
42 #include <mln/core/image/image2d.hh>
43 #include <mln/core/image/dmorph/image_if.hh>
44 #include <mln/util/array.hh>
45 #include <mln/io/pbm/load.hh>
46 #include <mln/io/pgm/save.hh>
48 #include <mln/data/fill.hh>
49 #include <mln/data/wrap.hh>
51 #include <mln/make/box2d.hh>
53 #include <mln/value/rgb8.hh>
54 #include <mln/io/ppm/save.hh>
56 #include <mln/draw/box.hh>
57 #include <mln/data/stretch.hh>
58 #include <mln/data/wrap.hh>
59 #include <mln/util/timer.hh>
61 #include <scribo/text/look_like_text_lines.hh>
91 # ifndef MLN_INCLUDE_ONLY
101 template <
typename T,
typename T2>
108 T* p_start = & input(b.
pmin());
110 for (
unsigned r = 0; r <
nrows; ++r)
113 for (
unsigned c = 0; c <
ncols; ++c)
122 template <
typename T,
typename T2>
124 int pmin_row,
int pmin_col,
125 int pmax_row,
int pmax_col,
128 if (pmax_row < pmin_row || pmax_col < pmin_col)
132 input_nrows_1 = input.
nrows() - 1,
133 input_ncols_1 = input.
ncols() - 1;
134 if (pmin_row < 0) pmin_row = 0;
135 if (pmin_col < 0) pmin_col = 0;
136 if (pmax_row > input_nrows_1) pmax_row = input_nrows_1;
137 if (pmax_col > input_ncols_1) pmax_col = input_ncols_1;
141 nrows = pmax_row - pmin_row + 1,
142 ncols = pmax_col - pmin_col + 1;
143 T* p_start = & input.
at_(pmin_row, pmin_col);
145 for (
unsigned r = 0; r <
nrows; ++r)
148 for (
unsigned c = 0; c <
ncols; ++c)
162 return parent[x] = my_find_root(parent, parent[x]);
167 void swap_ordering(
unsigned l1,
unsigned l2)
178 template <
typename L>
184 l1 = my_find_root(parent, l1);
185 l2 = my_find_root(parent, l2);
189 swap_ordering(l1, l2);
192 line_info<L>& l1_info = lines(l1);
193 line_info<L>& l2_info = lines(l2);
195 if (l2_info.card() > l1_info.card())
199 std::swap(l1_info, l2_info);
200 l1_info.fast_merge(tmp);
205 l2_info.update_tag(line::Merged);
206 l2_info.set_hidden(
true);
209 l1_info.fast_merge(l2_info);
229 template <
typename L>
231 const line_id_t& l1_,
232 const line_id_t& l2_)
238 mln_precondition(lines.
components().has_separators());
240 const box2d& l1_bbox = l1.bbox();
241 const box2d& l2_bbox = l2.bbox();
251 typedef const bool* sep_ptr_t;
252 sep_ptr_t sep_ptr, sep_ptr_top, sep_ptr_bottom, end;
256 const unsigned quarter =
257 ((l1_bbox.pcenter().row() - l1_bbox.pmin().row()) >> 1);
261 l1_bbox.pcenter().col()));
263 l1_bbox.pcenter().col()));
264 end = sep_ptr + col2 - col1;
268 const unsigned quarter =
276 end = sep_ptr + col1 - col2;
280 while (!*sep_ptr && !*sep_ptr_top && !*sep_ptr_bottom && sep_ptr != end)
287 return (*sep_ptr || *sep_ptr_top || *sep_ptr_bottom);
299 template <
typename L>
308 const float x_ratio_max = 1.7f;
309 const float baseline_delta_max =
310 0.5f *
std::min(l1.x_height(), l2.x_height());
312 const box2d& l1_bbox = l1.bbox();
313 const box2d& l2_bbox = l2.bbox();
315 const point2d& l1_pmin = l1_bbox.pmin();
317 const point2d& l1_pmax = l1_bbox.pmax();
320 const bool l1_has_separators = lines.
components().has_separators();
321 const bool l1_l2_between_separators = (l1_has_separators) ?
322 between_separators(lines, l1_, l2_) :
false;
323 const float l_ted_cw = l2.char_width();
335 && l1_bbox.height() < l2.x_height())
338 && (dx < l_ted_cw && dy < 0)
340 && not (l1_l2_between_separators))
343 l1.update_type(line::Punctuation);
351 top_row_l2 = l2_pmin.
row(),
352 top_row_l1 = l1_pmin.
row(),
353 bot_row = l2_pmax.
row();
354 const float x1 = l1.x_height(), x2 = l2.x_height();
358 !l1_l2_between_separators
362 && (
std::abs(bot_row - l1.baseline()) < baseline_delta_max)
364 && (
std::abs(top_row_l2 - top_row_l1) < 5)
366 && dx < 5.0f * l_ted_cw)
374 const float x1 = l1.x_height(), x2 = l2.x_height();
376 if (x_ratio > x_ratio_max)
382 if (
std::abs(l1.baseline() - l2.baseline()) > baseline_delta_max)
388 col1 = l1_bbox.pcenter().col(),
392 if ((col1 + l1_bbox.width() / 4) >= (col2 - l2_bbox.width() / 4))
396 if ((col2 + l2_bbox.width() / 4) >= (col1 - l1_bbox.width() / 4))
400 if (l1_has_separators)
401 return ! l1_l2_between_separators;
410 int horizontal_distance(
const box2d& l1,
437 template <
typename L>
445 if (l_cur.type() == line::Text || l_ted.type() != line::Text)
453 && between_separators(lines, l_cur_, l_ted_))
456 const box2d& l_cur_bbox = l_cur.bbox();
457 const box2d& l_ted_bbox = l_ted.bbox();
466 const float l_ted_cw = l_ted.char_width();
467 const float l_ted_x_height = l_ted.x_height();
469 const unsigned l_cur_height = l_cur_bbox.height();
470 const unsigned l_cur_width = l_cur_bbox.width();
473 if (l_cur_height < l_ted_x_height
474 && l_cur_height > 0.05f * l_ted_x_height
475 &&
float(l_cur_width) /
float(l_cur.card()) < l_ted.char_width()
477 && l_cur_pmin.
row() < l_ted.baseline())
479 l_cur.update_type(line::Punctuation);
485 l_cur_height < l_ted_x_height
487 && l_cur_width > 0.8 * l_ted_cw
488 && l_cur_width < 5 * l_ted_cw
490 &&
std::abs((l_ted.baseline() + l_ted.meanline()) / 2 - l_cur.bbox().
pcenter().
row()) < 7
492 &&
unsigned(horizontal_distance(l_cur_bbox, l_ted_bbox)) < 2 * l_ted_cw
503 top_row = l_cur.bbox().
pmin().
row(),
504 bot_row = l_cur.bbox().
pmax().
row();
508 if ((
std::abs(bot_row - l_ted.baseline()) < 5
511 (
std::abs(top_row - l_ted.meanline()) < 5
513 && dx < 5.0f * l_ted_cw)
611 template <
typename L>
613 one_merge_pass(
unsigned ith_pass,
615 std::vector<scribo::line_id_t>& v,
622 # ifndef SCRIBO_NDEBUG
625 # endif // ! SCRIBO_NDEBUG
627 const unsigned n = v.size();
631 count_txtline_IN_txtline = 0,
632 count_txtline_IN_junk = 0,
633 count_two_lines_merge = 0,
634 count_new_txtline = 0,
635 count_comp_IN_txtline = 0,
636 count_comp_HITS_txtline = 0,
639 for (
int i = n - 1; i >= 0; --i)
646 const box2d& b = lines(l).bbox();
650 const box2d& b_ = lines(l).ebbox();
666 const unsigned tl = billboard(b_.
pmin());
671 const unsigned bl = billboard.at_(b_.
pmax().
row(), b_.
pmin().
col());
672 const unsigned br = billboard(b_.
pmax());
674 typedef std::set<unsigned> set_t;
675 std::set<unsigned> labels;
686 for (set_t::const_iterator it = labels.begin();
693 if (lines(*it).type() != line::Text)
694 std::cerr <<
"outch: we have hit, so drawn, a non-text..." << std::endl;
698 if (labels.size() == 1)
704 const line_info<L>& l_info = lines(l);
705 const line_info<L>& mc_info = lines(mc);
707 if (l_info.type() == line::Text)
709 if (mc_info.type() == line::Text)
711 ++count_txtline_IN_txtline;
733 const float l_ted_cw = mc_info.char_width();
739 if ((l_info.card() <= 5 ||
740 (
std::abs(l_info.baseline() - mc_info.baseline())
742 mc_info.meanline()) < 5))
743 && dx < l_ted_cw && dy < 0
745 && between_separators(lines, l, mc)))
746 l = do_union(lines, l, mc, parent);
749 # ifndef SCRIBO_NDEBUG
751 draw_box(log, b, 126);
752 # endif // ! SCRIBO_NDEBUG
759 std::cerr <<
"error: should NOT happen (a text line included in a NON-text-line (so not drawn!!!)" << std::endl;
760 ++count_txtline_IN_junk;
763 draw_box(billboard, lines(l).ebbox(), l);
765 # ifndef SCRIBO_NDEBUG
767 draw_box(log, b, 100);
768 # endif // ! SCRIBO_NDEBUG
774 if (lines(mc).type() == line::Text)
776 ++count_comp_IN_txtline;
788 if (!non_text_and_text_can_merge(lines, l, mc))
799 lines(l).update_type(line::Punctuation);
802 l_ = do_union(lines, mc, l, parent);
805 draw_box(billboard, lines(l_).ebbox(), l_);
807 # ifndef SCRIBO_NDEBUG
809 draw_box(log, b, 128);
810 # endif // ! SCRIBO_NDEBUG
819 if (lines(l).type() == line::Text)
822 draw_box(billboard, lines(l).ebbox(), l);
823 # ifndef SCRIBO_NDEBUG
825 draw_box(log, b, 127);
826 # endif // ! SCRIBO_NDEBUG
828 # ifndef SCRIBO_NDEBUG
831 # endif // ! SCRIBO_NDEBUG
839 for (set_t::const_iterator it = labels.begin();
843 unsigned lcand = *it;
852 if (lines(l_).type() == line::Text)
855 if (lines_can_merge(lines, l_, lcand))
857 ++count_two_lines_merge;
858 l_ = do_union(lines, l_, lcand, parent);
860 draw_box(billboard, lines(l_).ebbox(), l_);
862 # ifndef SCRIBO_NDEBUG
864 draw_box(log, b, 151);
865 # endif // ! SCRIBO_NDEBUG
873 # ifndef SCRIBO_NDEBUG
875 draw_box(log, b, 255);
876 # endif // ! SCRIBO_NDEBUG
879 draw_box(billboard, lines(l_).ebbox(), l_);
885 ++count_comp_HITS_txtline;
886 if (non_text_and_text_can_merge(lines, l_, lcand))
889 ++count_comp_HITS_txtline;
890 l_ = do_union(lines, l_, lcand, parent);
891 draw_box(billboard, lines(l_).ebbox(), l_);
893 # ifndef SCRIBO_NDEBUG
895 draw_box(log, b, 169);
896 # endif // ! SCRIBO_NDEBUG
900 # ifndef SCRIBO_NDEBUG
904 draw_box(log, b, 254);
906 # endif // ! SCRIBO_NDEBUG
954 template <
typename L>
955 struct order_lines_id
964 const unsigned l1_nsites = lines_(l1).bbox().nsites();
965 const unsigned l2_nsites = lines_(l2).bbox().nsites();
967 if (l1_nsites == l2_nsites)
969 return l1_nsites < l2_nsites;
976 template <
typename L>
978 draw_boxes(
const box2d& input_domain,
984 order_lines_id<L> func(lines);
985 std::vector<scribo::line_id_t> v;
993 for (
unsigned l = 1; l < parent.
nelements(); ++l)
1000 std::sort(v.begin(), v.end(), func);
1003 for_all_lines(l, lines)
1004 if (looks_like_a_text_line(lines(l)))
1005 lines(l).update_type(
line::Text);
1009 one_merge_pass(1, input_domain, v, lines, parent);
1015 std::sort(v.begin(), v.end(), func);
1019 one_merge_pass(2, input_domain, v, lines, parent);
1021 lines.force_stats_update();
1032 template <typename L>
1036 using namespace mln;
1039 = internal::draw_boxes(lines.components().labeled_image().domain(),
1044 # endif // ! MLN_INCLUDE_ONLY
1050 #endif // ! SCRIBO_TEXT_MERGING_HDOC_HH