$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
text_recognition_in_picture.cc
1 // Copyright (C) 2009, 2010, 2011, 2013, 2014 EPITA Research and Development
2 // Laboratory (LRDE).
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 #include <libgen.h>
28 #include <iostream>
29 
30 #include <mln/core/image/image2d.hh>
31 #include <mln/core/image/imorph/tr_image.hh>
32 #include <mln/core/alias/neighb2d.hh>
33 
34 #include <mln/data/stretch.hh>
35 
36 #include <mln/io/pbm/all.hh>
37 #include <mln/io/ppm/save.hh>
38 #include <mln/io/magick/all.hh>
39 
40 #include <mln/math/min.hh>
41 
42 #include <mln/logical/not.hh>
43 
44 #include <mln/literal/colors.hh>
45 #include <mln/value/rgb8.hh>
46 
47 #include <mln/fun/v2v/rgb_to_luma.hh>
48 
49 #include <mln/data/wrap.hh>
50 
51 #include <mln/draw/box.hh>
52 
53 #include <mln/geom/translate.hh>
54 
55 #include <mln/subsampling/antialiased.hh>
56 
57 #include <scribo/draw/bounding_boxes.hh>
58 #include <scribo/draw/groups_bboxes.hh>
59 
60 #include <scribo/binarization/sauvola_ms.hh>
61 #include <scribo/binarization/sauvola.hh>
62 
63 #include <scribo/primitive/extract/components.hh>
64 
65 #include <scribo/primitive/link/merge_double_link.hh>
66 #include <scribo/primitive/link/with_single_left_link.hh>
67 #include <scribo/primitive/link/with_single_right_link.hh>
68 
69 #include <scribo/primitive/group/apply.hh>
70 #include <scribo/primitive/group/from_double_link.hh>
71 #include <scribo/primitive/group/from_single_link.hh>
72 
73 #include <scribo/primitive/regroup/from_single_left_link.hh>
74 #include <scribo/primitive/regroup/from_single_left_link_wrt_h_ratio.hh>
75 
76 #include <scribo/filter/objects_size_ratio.hh>
77 
78 #include <scribo/filter/object_groups_with_holes.hh>
79 
80 #include <scribo/filter/object_links_bbox_h_ratio.hh>
81 #include <scribo/filter/object_links_bbox_overlap.hh>
82 
83 #include <scribo/filter/common/objects_photo.hh>
84 
85 #include <scribo/filter/object_groups_small.hh>
86 #include <scribo/filter/object_groups_mean_width.hh>
87 
88 #include <scribo/debug/highlight_text_area.hh>
89 
90 #include <scribo/debug/decision_image.hh>
91 
92 #include <scribo/debug/option_parser.hh>
93 
94 #include <scribo/preprocessing/split_bg_fg.hh>
95 #include <scribo/preprocessing/rotate_90.hh>
96 
97 #include <scribo/debug/logger.hh>
98 #include <scribo/toolchain/text_in_picture.hh>
99 
100 #include <mln/util/timer.hh>
101 
102 
103 #include <scribo/afp/components.hh>
104 #include <scribo/afp/link.hh>
105 
106 #include <scribo/core/line_set.hh>
107 #include <scribo/text/recognition.hh>
108 #include <scribo/text/look_like_text_lines.hh>
109 
110 
111 #include <scribo/io/text_boxes/save.hh>
112 
113 
114 static const scribo::debug::arg_data arg_desc[] =
115 {
116  { "input.*", "An image." },
117  { "output.ppm", "A color image where the text is highlighted." },
118  { "output.txt", "Recognized text with its position." },
119  {0, 0}
120 };
121 
122 // --enable/disable-<name>
123 static const scribo::debug::toggle_data toggle_desc[] =
124 {
125  // name, description, default value
126  { "fg-extraction", "Detect and slit foreground/background components. (default: disabled)", false },
127  { "ms-bin", "Use a multi-scale binarization. (default: enabled)", true },
128  {0, 0, false}
129 };
130 
131 
132 // --<name> <args>
133 static const scribo::debug::opt_data opt_desc[] =
134 {
135  // name, description, arguments, check args function, number of args, default arg
136  { "debug-prefix", "Enable debug image outputs. Prefix image name with that "
137  "given prefix.", "<prefix>", 0, 1, 0 },
138  { "lambda", "Set the maximum area of the background objects. It is only useful if fg-extraction is enabled.", "<size>",
139  0, 1, "0" },
140  { "max-dim-size", "Set the maximum size of the largest image dimension.", "<size>", 0, 1, "1024" },
141  { "ocr-lang", "Set the language to be recognized by the OCR (Tesseract). "
142  "According to your system, you can choose between eng (default), "
143  "fra, deu, ita, nld, por, spa, vie",
144  "<lang>", scribo::debug::check_ocr_lang, 1, "eng" },
145  { "verbose", "Enable verbose mode", 0, 0, 0, 0 },
146  {0, 0, 0, 0, 0, 0}
147 };
148 
149 
150 
151 
152 int main(int argc, char* argv[])
153 {
154  using namespace scribo;
155  using namespace scribo::primitive;
156  using namespace mln;
157 
158  scribo::debug::option_parser options(arg_desc, toggle_desc, opt_desc);
159 
160  if (!options.parse(argc, argv))
161  return 1;
162 
163  if (options.is_set("debug-prefix"))
164  {
165  scribo::debug::logger().set_filename_prefix(options.opt_value("debug-prefix").c_str());
166  scribo::debug::logger().set_level(scribo::debug::All);
167  }
168 
169  mln_trace("main");
170 
171  typedef image2d<value::rgb8> I;
172  I input_rgb;
173  mln::io::magick::load(input_rgb, argv[1]);
174 
175 
176 
177  bool verbose = options.is_set("verbose");
178  unsigned max_dim_size = atoi(options.opt_value("max-dim-size").c_str());
179  bool fg_extraction = options.is_enabled("fg-extraction");
180  bool multi_scale_bin = options.is_enabled("ms-bin");
181  unsigned lambda = atoi(options.opt_value("lambda").c_str());
182 
183  if (verbose)
184  std::cout << "Using max_dim_size = " << max_dim_size
185  << " - fg_extraction = " << fg_extraction
186  << " - multi_scale_bin = " << multi_scale_bin
187  << " - lambda = " << lambda << std::endl;
188 
190 
192  f.enable_bg_removal = fg_extraction;
193  f.enable_multi_scale_bin = multi_scale_bin;
194  f.max_dim_size = max_dim_size;
195  f.lambda = lambda;
196  f.verbose = verbose;
197 
198  component_set<L> output = f(input_rgb);
199 
200  // // Grouping groups together if possible.
201  // groups = regroup::from_single_left_link_wrt_h_ratio(filtered_thin_groups,
202  // conf.regroup_dmax,
203  // conf.bbox_h_ratio);
204 
206  options.arg("output.ppm"));
207 
208  scribo::line_set<L> lines = scribo::make::line_set(f.groups);
210  text::recognition(lines, options.opt_value("ocr-lang").c_str());
211  scribo::io::text_boxes::save(lines, options.arg("output.txt"));
212 
213  if (verbose)
214  std::cout << output.nelements() << " text lines found." << std::endl;
215 }