$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
non_text_components.cc
1 // Copyright (C) 2011, 2012, 2013 EPITA Research and Development
2 // Laboratory (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 
28 #include <libgen.h>
29 #include <fstream>
30 #include <iostream>
31 
32 #include <mln/core/image/image2d.hh>
33 
34 #include <mln/io/pbm/save.hh>
35 #include <mln/io/magick/load.hh>
36 
37 #include <scribo/toolchain/content_in_doc.hh>
38 #include <scribo/toolchain/text_in_doc_preprocess.hh>
39 
40 #include <scribo/core/document.hh>
41 
42 #include <scribo/debug/usage.hh>
43 
44 #include <scribo/preprocessing/crop_without_localization.hh>
45 #include <scribo/preprocessing/crop.hh>
46 
47 #include <scribo/io/xml/save.hh>
48 #include <scribo/io/img/save.hh>
49 
50 
51 const char *args_desc[][2] =
52 {
53  { "input.*", "An image." },
54  { "non_text_comps.pbm", "Non text components mask." },
55  { "enable_debug", "Enable debug image output. Set to 1 or 0." },
56  { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." },
57  {0, 0}
58 };
59 
60 
61 
62 int main(int argc, char* argv[])
63 {
64  using namespace scribo;
65  using namespace mln;
66 
67  if (argc != 4 && argc != 3 && argc != 5)
68  return scribo::debug::usage(argv,
69  "Extract non text components mask/",
70  "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
71  args_desc);
72 
73  std::string out_img = basename(argv[1]);
74  out_img.erase(out_img.size() - 4);
75 
76  std::string filename_prefix = out_img + "_debug";
78  if (argc > 3 && atoi(argv[3]))
79  scribo::debug::logger().set_level(scribo::debug::Special);
80  else
81  scribo::debug::logger().set_level(scribo::debug::None);
82 
83  mln_trace("main");
84 
87  mln::io::magick::load(input, argv[1]);
88 
90  t.start();
91 
92  // Preprocess document
94  input_preproc = toolchain::text_in_doc_preprocess(input, false, 0, 0.34,
95  false, false);
96 
97 
98  bool denoise = true;
99  std::string language = "";
100  bool find_line_seps = true;
101  bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
102 
103  std::cout << "Running with the following options :"
104  << " ocr_language = " << language
105  << " | find_lines_seps = " << find_line_seps
106  << " | find_whitespace_seps = " << find_whitespace_seps
107  << " | debug = " << scribo::debug::logger().is_enabled()
108  << std::endl;
109 
110  // Run document toolchain.
111 
112  // Text
113  std::cout << "Analysing document..." << std::endl;
115  doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
116  find_line_seps, find_whitespace_seps,
117  !language.empty(), language);
118  t.stop();
119  std::cout << t << std::endl;
120 
121  mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
122 
123  scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage);
124  scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage);
125 
126 }