$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
pbm_text_in_doc.cc
1 // Copyright (C) 2009, 2010, 2011, 2013 EPITA Research and Development
2 // Laboratory (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 #include <libgen.h>
28 #include <fstream>
29 #include <iostream>
30 
31 #include <mln/core/image/image2d.hh>
32 #include <mln/core/alias/neighb2d.hh>
33 
34 #include <mln/io/pbm/all.hh>
35 
36 
37 #include <scribo/toolchain/text_in_doc.hh>
38 
39 #include <scribo/core/document.hh>
40 #include <scribo/core/line_set.hh>
41 
42 #include <scribo/debug/option_parser.hh>
43 
44 #include <scribo/preprocessing/crop_without_localization.hh>
45 
46 #include <scribo/io/text_boxes/save.hh>
47 
48 
49 static const scribo::debug::arg_data arg_desc[] =
50 {
51  { "input.pbm", "A binary image." },
52  { "output.txt", "Text output." },
53  {0, 0}
54 };
55 
56 
57 // --enable/disable-<name>
58 static const scribo::debug::toggle_data toggle_desc[] =
59 {
60  // name, description, default value
61  { "denoising", "Performs a denoising. (default: enabled)", true },
62  { "find-delims", "Find text alignements and whitespaces "
63  "to improve layout detection. (default: enabled)", true },
64  { "find-seps", "Find separators in document (default: enabled)", true },
65  {0, 0, false}
66 };
67 
68 
69 // --<name> <args>
70 static const scribo::debug::opt_data opt_desc[] =
71 {
72  // name, description, arguments, check args function, number of args, default arg
73  { "crop", "Crop input image before processing it.",
74  "<pmin_row> <pmin_col> <pmax_row> <pmax_col>", 0, 4, 0 },
75  { "debug-prefix", "Enable debug image outputs. Prefix image name with that "
76  "given prefix.", "<prefix>", 0, 1, 0 },
77  { "ocr-lang", "Set the language to be recognized by the OCR (Tesseract). "
78  "According to your system, you can choose between eng (default), "
79  "fra, deu, ita, nld, por, spa, vie",
80  "<lang>", scribo::debug::check_ocr_lang, 1, "eng" },
81  { "verbose", "Enable verbose mode", 0, 0, 0, 0 },
82  {0, 0, 0, 0, 0, 0}
83 };
84 
85 
86 
87 int main(int argc, char* argv[])
88 {
89  using namespace scribo;
90  using namespace mln;
91 
92  scribo::debug::option_parser options(arg_desc, toggle_desc, opt_desc);
93 
94  if (!options.parse(argc, argv))
95  return 1;
96 
97  // Enable debug output.
98  if (options.is_set("debug-prefix"))
99  {
100  scribo::debug::logger().set_filename_prefix(options.opt_value("debug-prefix").c_str());
101  scribo::debug::logger().set_level(scribo::debug::All);
102  scribo::make::internal::debug_filename_prefix = options.opt_value("debug-prefix").c_str();
103  }
104 
105  bool verbose = options.is_set("verbose");
106 
107  mln_trace("main");
108 
110 
111  image2d<bool> input;
112  const char *input_name = options.arg("input.pbm");
113  mln::io::pbm::load(input, input_name);
114 
115  // Optional Cropping
116  point2d crop_shift = literal::origin;
117  if (options.is_set("crop"))
118  {
119  std::vector<const char *> values = options.opt_values("crop");
121  minr = atoi(values[0]),
122  minc = atoi(values[1]),
123  maxr = atoi(values[2]),
124  maxc = atoi(values[3]);
125 
126  if (verbose)
127  std::cout << "> Image cropped from (" << minr << "," << minc << ")"
128  << " to (" << maxr << "," << maxc << ")" << std::endl;
129 
130  box2d roi = mln::make::box2d(minr, minc, maxr, maxc);
131  input = preprocessing::crop_without_localization(input, roi);
132  crop_shift = point2d(minr, minc);
133 
134  scribo::debug::logger().log_image(scribo::debug::Results, input,
135  "input_cropped.pbm");
136  }
137 
138  bool denoise = options.is_enabled("denoising");
139  std::string language = options.opt_value("ocr-lang");
140  bool find_line_seps = options.is_enabled("find-seps");
141  bool find_whitespace_seps = options.is_enabled("find-delims");
142 
143  if (verbose)
144  std::cout << "Running with the following options :"
145  << " ocr language = " << language
146  << " | find_lines_seps = " << find_line_seps
147  << " | find_whitespace_seps = " << find_whitespace_seps
148  << " | debug = " << scribo::debug::logger().is_enabled()
149  << std::endl;
150 
151  // Run document toolchain.
153  lines = scribo::toolchain::text_in_doc(input, denoise,
154  language, find_line_seps,
155  find_whitespace_seps, verbose);
156 
157  scribo::document<L> doc(input_name);
158 
159  // Specify shift due to potential previous crop.
160  scribo::io::text_boxes::save(lines, options.arg("output.txt"), crop_shift);
161 
162 }