27 #ifndef SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH
28 # define SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH
35 #include <mln/core/concept/image.hh>
36 #include <mln/data/transform.hh>
37 #include <mln/data/convert.hh>
38 #include <mln/fun/v2v/rgb_to_luma.hh>
40 #include <mln/subsampling/antialiased.hh>
41 #include <mln/util/timer.hh>
43 #include <scribo/binarization/sauvola.hh>
44 #include <scribo/binarization/sauvola_ms.hh>
46 #include <scribo/preprocessing/split_bg_fg.hh>
47 #include <scribo/preprocessing/deskew.hh>
48 #include <scribo/preprocessing/denoise.hh>
50 #include <scribo/toolchain/internal/toolchain_functor.hh>
69 enum Binarization_Algo
114 template <
typename I>
121 virtual int nsteps()
const;
135 bool enable_subsample;
136 bool enable_fg_extraction;
138 bool enable_denoising;
140 Binarization_Algo binarization_algo;
142 unsigned sauvola_win;
153 # ifndef SCRIBO_NDEBUG
157 virtual void on_start();
158 virtual void on_end();
159 virtual void on_progress();
163 # endif // ! SCRIBO_NDEBUG
166 unsigned find_best_scale(
const Image<I>& ima_);
171 # ifndef MLN_INCLUDE_ONLY
173 template <
typename I>
175 : enable_subsample(false),
176 enable_fg_extraction(false),
177 enable_deskew(false),
178 enable_denoising(false),
179 binarization_algo(SauvolaMs),
193 template <
typename I>
195 text_in_doc_preprocess_functor<I>::operator()(const
Image<I>& input_)
197 mln_trace(
"scribo::toolchain::text_in_doc_preprocess");
199 const I& input =
exact(input_);
200 mln_precondition(input.is_valid());
208 if (enable_subsample)
210 on_new_progress_label(
"Subsample");
213 find_best_scale(input_rgb));
221 if (enable_fg_extraction)
223 on_new_progress_label(
"Foreground Extraction");
227 unsigned rlambda = lambda;
229 rlambda = 1.2 * (input.nrows() + input.ncols());
235 input_rgb = res.second();
243 on_new_progress_label(
"Convert to gray-scale image");
254 on_new_progress_label(
"Deskew");
264 if (binarization_algo == Sauvola)
266 on_new_progress_label(
"Binarization (Sauvola)");
269 else if (binarization_algo == SauvolaMs)
271 on_new_progress_label(
"Binarization (Sauvola Multi-scale)");
280 on_new_progress_label(
"Binarization (Binary conversion)");
289 if (enable_denoising)
291 on_new_progress_label(
"Remove noise");
307 text_in_doc_preprocess_functor<I>::nsteps()
const
309 return 2 + enable_denoising + enable_deskew
310 + enable_fg_extraction + enable_subsample;
314 template <
typename I>
316 text_in_doc_preprocess_functor<I>::find_best_scale(
const Image<I>& ima_)
318 const I& ima =
exact(ima_);
319 if (ima.nrows() > 2500
320 && ima.nrows() < 5000
321 && ima.ncols() > 2500
322 && ima.ncols() < 5000)
325 if (ima.nrows() > 5000
326 && ima.ncols() > 5000)
333 # ifndef SCRIBO_NDEBUG
335 template <
typename I>
337 text_in_doc_preprocess_functor<I>::on_start()
343 template <
typename I>
345 text_in_doc_preprocess_functor<I>::on_end()
349 std::cout <<
"Total time: " << gt << std::endl;
352 template <
typename I>
354 text_in_doc_preprocess_functor<I>::on_progress()
358 std::cout << t << std::endl;
363 # endif // ! SCRIBO_NDEBUG
366 # endif // ! MLN_INCLUDE_ONLY
375 #endif // ! SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH