27 #ifndef SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_FUNCTOR_HH
28 # define SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_FUNCTOR_HH
34 # ifndef SCRIBO_NDEBUG
35 # include <mln/util/timer.hh>
36 # endif // ! SCRIBO_NDEBUG
38 # include <scribo/core/def/lbl_type.hh>
40 # include <scribo/primitive/extract/components.hh>
41 # include <scribo/primitive/extract/vertical_separators.hh>
42 # include <scribo/primitive/extract/separators_nonvisible.hh>
44 # include <scribo/primitive/remove/separators.hh>
46 # include <scribo/filter/object_links_bbox_h_ratio.hh>
47 # include <scribo/filter/objects_small.hh>
49 # include <scribo/primitive/group/from_single_link.hh>
51 # include <scribo/primitive/link/merge_double_link.hh>
52 # include <scribo/primitive/link/internal/dmax_width_and_height.hh>
53 # include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
54 # include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
56 # include <scribo/preprocessing/denoise_fg.hh>
58 # include <scribo/text/recognition.hh>
59 # include <scribo/text/merging.hh>
61 # include <scribo/make/debug_filename.hh>
63 # include <scribo/debug/logger.hh>
64 # include <scribo/debug/decision_image.hh>
65 # include <scribo/debug/bboxes_image.hh>
66 # include <scribo/debug/linked_bboxes_image.hh>
67 # include <scribo/debug/bboxes_enlarged_image.hh>
68 # include <scribo/debug/mean_and_base_lines_image.hh>
69 # include <scribo/debug/looks_like_a_text_line_image.hh>
71 # include <scribo/toolchain/internal/toolchain_functor.hh>
95 virtual int nsteps()
const;
107 bool enable_denoising;
108 bool enable_line_seps;
109 bool enable_whitespace_seps;
115 std::string ocr_language;
118 # ifndef SCRIBO_NDEBUG
122 virtual void on_start();
123 virtual void on_end();
124 virtual void on_progress();
128 # endif // ! SCRIBO_NDEBUG
136 # ifndef MLN_INCLUDE_ONLY
138 template <
typename I>
140 : enable_denoising(true),
141 enable_line_seps(true),
142 enable_whitespace_seps(true),
153 template <
typename I>
155 text_in_doc_functor<I>::operator()(
const Image<I>& input)
162 input_cleaned = exact(input);
164 if (enable_line_seps)
166 on_new_progress_label(
"Find vertical separators...");
173 on_new_progress_label(
"Remove separators...");
181 if (enable_whitespace_seps)
184 on_new_progress_label(
"Find whitespace separators...");
193 if (enable_whitespace_seps)
195 whitespaces,
"whitespaces");
197 if (enable_line_seps)
203 input_cleaned,
"input_wo_vseparators");
208 if (enable_denoising)
210 on_new_progress_label(
"Denoise...");
216 input_cleaned,
"denoised");
222 on_new_progress_label(
"Finding components...");
232 if (enable_line_seps)
234 if (enable_whitespace_seps)
235 components.add_separators(whitespaces);
239 components.separators(),
"all_separators");
242 on_new_progress_label(
"Filtering components");
250 on_new_progress_label(
"Linking objects...");
252 object_links<L> left_link
254 primitive::link::internal::dmax_width_and_height(1),
256 object_links<L> right_link
258 primitive::link::internal::dmax_width_and_height(1),
264 debug::AuxiliaryResults,
285 on_new_progress_label(
"Filtering objects");
288 object_links<L> hratio_filtered_links
297 hratio_filtered_links,
299 debug::
logger().log_image(debug::AuxiliaryResults,
300 hratio_decision_image,
301 "hratio_links_decision_image");
307 on_new_progress_label("Rebuilding lines");
310 groups = primitive::group::from_single_link(hratio_filtered_links);
316 lines = scribo::make::line_set(groups);
321 if (debug::
logger().is_enabled())
332 "step1_bboxes_enlarged");
337 "step1_looks_like_a_text_line");
367 "step2_looks_like_a_text_line");
382 for_all_lines(l, lines)
383 if (lines(l).tag() !=
line::Merged
384 && lines(l).tag() !=
line::Ignored
385 && lines(l).tag() !=
line::Pathological)
387 file << lines(l).bbox().pmin().row() <<
" "
388 << lines(l).bbox().pmin().col() <<
" "
389 << lines(l).bbox().pmax().row() <<
" "
390 << lines(l).bbox().pmax().col() <<
" "
391 << lines(l).card() <<
" "
392 << lines(l).baseline() <<
" "
393 << lines(l).x_height() <<
" "
394 << lines(l).meanline() <<
" "
395 << lines(l).d_height() <<
" "
396 << lines(l).a_height() <<
" "
397 << lines(l).char_space() <<
" "
398 << lines(l).char_width() << std::endl;
410 on_new_progress_label(
"Recognizing text");
426 text_in_doc_functor<I>::nsteps()
const
428 return 6 + enable_denoising + enable_line_seps
429 + enable_whitespace_seps;
433 # ifndef SCRIBO_NDEBUG
435 template <
typename I>
437 text_in_doc_functor<I>::on_start()
443 template <
typename I>
445 text_in_doc_functor<I>::on_end()
449 std::cout <<
"Total time: " << gt << std::endl;
452 template <
typename I>
454 text_in_doc_functor<I>::on_progress()
458 std::cout << t << std::endl;
463 # endif // ! SCRIBO_NDEBUG
466 # endif // ! MLN_INCLUDE_ONLY
475 #endif // ! SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_FUNCTOR_HH