$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
text_in_doc_preprocess_functor.hh
1 // Copyright (C) 2010, 2011, 2012 EPITA Research and Development
2 // Laboratory (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 #ifndef SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH
28 # define SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH
29 
34 
35 #include <mln/core/concept/image.hh>
36 #include <mln/data/transform.hh>
37 #include <mln/data/convert.hh>
38 #include <mln/fun/v2v/rgb_to_luma.hh>
39 
40 #include <mln/subsampling/antialiased.hh>
41 #include <mln/util/timer.hh>
42 
43 #include <scribo/binarization/sauvola.hh>
44 #include <scribo/binarization/sauvola_ms.hh>
45 
46 #include <scribo/preprocessing/split_bg_fg.hh>
47 #include <scribo/preprocessing/deskew.hh>
48 #include <scribo/preprocessing/denoise.hh>
49 
50 #include <scribo/toolchain/internal/toolchain_functor.hh>
51 
52 namespace scribo
53 {
54 
55  namespace toolchain
56  {
57 
58  namespace internal
59  {
60 
61  using namespace mln;
62 
69  enum Binarization_Algo
70  {
71  Convert,
72  Sauvola,
73  SauvolaMs
74  };
75 
76 
114  template <typename I>
116  : public Toolchain_Functor
117  {
118 
120 
121  virtual int nsteps() const;
122 
123  //===============
124  // Core function
125  //===============
126 
127  mln_ch_value(I,bool) operator()(const Image<I>& input_);
128 
129 
130  //=========
131  // Options
132  //=========
133 
134  // Settings
135  bool enable_subsample;
136  bool enable_fg_extraction;
137  bool enable_deskew;
138  bool enable_denoising;
139 
140  Binarization_Algo binarization_algo;
141 
142  unsigned sauvola_win;
143  double sauvola_k2;
144  double sauvola_k3;
145  double sauvola_k4;
146  unsigned lambda;
147 
148  // Results
149  mln_concrete(I) fg;
150  mln_concrete(I) bg;
151  image2d<bool> output;
152 
153 # ifndef SCRIBO_NDEBUG
154  //=============
155  // DEBUG TOOLS
156  //=============
157  virtual void on_start();
158  virtual void on_end();
159  virtual void on_progress();
160 
162  mln::util::timer gt;
163 # endif // ! SCRIBO_NDEBUG
164 
165  private: // Methods
166  unsigned find_best_scale(const Image<I>& ima_);
167 
168  };
169 
170 
171 # ifndef MLN_INCLUDE_ONLY
172 
173  template <typename I>
175  : enable_subsample(false),
176  enable_fg_extraction(false),
177  enable_deskew(false),
178  enable_denoising(false),
179  binarization_algo(SauvolaMs),
180  sauvola_win(101),
181  sauvola_k2(0.34),
182  sauvola_k3(0.34),
183  sauvola_k4(0.34),
184  lambda(0)
185  {
186  }
187 
188 
189  //===============
190  // Core function
191  //===============
192 
193  template <typename I>
194  mln_ch_value(I,bool)
195  text_in_doc_preprocess_functor<I>::operator()(const Image<I>& input_)
196  {
197  mln_trace("scribo::toolchain::text_in_doc_preprocess");
198 
199  const I& input = exact(input_);
200  mln_precondition(input.is_valid());
201 
202  mln_concrete(I) input_rgb = input;
203 
204  on_start();
205 
206  // Subsample
207  //----------
208  if (enable_subsample)
209  {
210  on_new_progress_label("Subsample");
211 
212  input_rgb = mln::subsampling::antialiased(input_rgb,
213  find_best_scale(input_rgb));
214 
215  on_progress();
216  }
217 
218 
219  // Remove background
220  //------------------
221  if (enable_fg_extraction)
222  {
223  on_new_progress_label("Foreground Extraction");
224 
226 
227  unsigned rlambda = lambda;
228  if (!lambda)
229  rlambda = 1.2 * (input.nrows() + input.ncols());
230 
231  res_t res = scribo::preprocessing::split_bg_fg(input_rgb, rlambda, 32);
232 
233  bg = res.first();
234  fg = res.second();
235  input_rgb = res.second();
236 
237  on_progress();
238  }
239 
240 
241  // Convert to grayscale image (always happens).
242  //---------------------------------------------
243  on_new_progress_label("Convert to gray-scale image");
245  intensity_ima = mln::data::transform(input_rgb,
247  on_progress();
248 
249 
250  // Deskew
251  //-------
252  if (enable_deskew)
253  {
254  on_new_progress_label("Deskew");
255 
256  intensity_ima = scribo::preprocessing::deskew(intensity_ima);
257 
258  on_progress();
259  }
260 
261 
262  // Binarization (always happens)
263  //------------------------------
264  if (binarization_algo == Sauvola)
265  {
266  on_new_progress_label("Binarization (Sauvola)");
267  output = scribo::binarization::sauvola(intensity_ima);
268  }
269  else if (binarization_algo == SauvolaMs)
270  {
271  on_new_progress_label("Binarization (Sauvola Multi-scale)");
272  output = scribo::binarization::sauvola_ms(intensity_ima,
273  sauvola_win, 3,
274  sauvola_k2,
275  sauvola_k3,
276  sauvola_k4);
277  }
278  else // binarization_algo == Convert
279  {
280  on_new_progress_label("Binarization (Binary conversion)");
281  output = mln::data::convert(bool(), intensity_ima);
282  }
283 
284  on_progress();
285 
286 
287  // Denoise
288  //--------
289  if (enable_denoising)
290  {
291  on_new_progress_label("Remove noise");
292 
293  output = scribo::preprocessing::denoise(output, c8(), 2, 2);
294 
295  on_progress();
296  }
297 
298  on_end();
299 
300  return output;
301  }
302 
303 
304 
305  template<typename I>
306  int
307  text_in_doc_preprocess_functor<I>::nsteps() const
308  {
309  return 2 + enable_denoising + enable_deskew
310  + enable_fg_extraction + enable_subsample;
311  }
312 
313 
314  template <typename I>
315  unsigned
316  text_in_doc_preprocess_functor<I>::find_best_scale(const Image<I>& ima_)
317  {
318  const I& ima = exact(ima_);
319  if (ima.nrows() > 2500
320  && ima.nrows() < 5000
321  && ima.ncols() > 2500
322  && ima.ncols() < 5000)
323  return 2;
324 
325  if (ima.nrows() > 5000
326  && ima.ncols() > 5000)
327  return 3;
328 
329  return 1;
330  }
331 
332 
333 # ifndef SCRIBO_NDEBUG
334 
335  template <typename I>
336  void
337  text_in_doc_preprocess_functor<I>::on_start()
338  {
339  gt.start();
340  t.start();
341  }
342 
343  template <typename I>
344  void
345  text_in_doc_preprocess_functor<I>::on_end()
346  {
347  gt.stop();
348  if (verbose)
349  std::cout << "Total time: " << gt << std::endl;
350  }
351 
352  template <typename I>
353  void
354  text_in_doc_preprocess_functor<I>::on_progress()
355  {
356  t.stop();
357  if (verbose)
358  std::cout << t << std::endl;
359  t.restart();
360  }
361 
362 
363 # endif // ! SCRIBO_NDEBUG
364 
365 
366 # endif // ! MLN_INCLUDE_ONLY
367 
368 
369  } // end of namespace scribo::toolchain::internal
370 
371  } // end of namespace scribo::toolchain
372 
373 } // end of namespace scribo
374 
375 #endif // ! SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH