$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
content_in_hdoc_hbr.cc
1 // Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
2 //
3 // This file is part of Olena.
4 //
5 // Olena is free software: you can redistribute it and/or modify it under
6 // the terms of the GNU General Public License as published by the Free
7 // Software Foundation, version 2 of the License.
8 //
9 // Olena is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // As a special exception, you may use this file as part of a free
18 // software project without restriction. Specifically, if other files
19 // instantiate templates or use macros or inline functions from this
20 // file, or you compile this file and link it with other files to produce
21 // an executable, this file does not by itself cause the resulting
22 // executable to be covered by the GNU General Public License. This
23 // exception does not however invalidate any other reasons why the
24 // executable file might be covered by the GNU General Public License.
25 
26 # define SCRIBO_NOCR
27 
28 #include <libgen.h>
29 #include <fstream>
30 #include <iostream>
31 
32 #include <mln/core/image/image2d.hh>
33 
34 #include <mln/io/magick/load.hh>
35 
36 #include <scribo/toolchain/content_in_hdoc.hh>
37 #include <scribo/toolchain/text_in_doc_preprocess.hh>
38 
39 #include <scribo/core/document.hh>
40 
41 #include <scribo/debug/usage.hh>
42 #include <scribo/debug/logger.hh>
43 
44 #include <scribo/preprocessing/crop_without_localization.hh>
45 #include <scribo/preprocessing/crop.hh>
46 
47 #include <scribo/io/xml/save.hh>
48 #include <scribo/io/img/save.hh>
49 
50 
51 #include <mln/core/alias/neighb2d.hh>
52 #include <mln/labeling/compute.hh>
53 #include <mln/labeling/foreground.hh>
54 #include <mln/util/timer.hh>
55 
56 
57 
58 const char *args_desc[][2] =
59 {
60  { "input.tif", "An image." },
61  { "out.xml", "Result of the document analysis (PAGE format)." },
62  {0, 0}
63 };
64 
65 
66 
67 int main(int argc, char* argv[])
68 {
69  using namespace scribo;
70  using namespace mln;
71 
72  if (argc != 3)
73  return scribo::debug::usage(argv,
74  "Document Image Analysis in Historical Books"
75  " for Historical Book Recognition Contest 2013",
76  "input.tif out.xml",
77  args_desc);
78 
79  mln_trace("main");
80 
82  t.start();
83 
86  mln::io::magick::load(input, argv[1]);
87 
88  mln::debug::internal::filename_prefix = basename(argv[1]);
89 
90  // Preprocess document
91  image2d<bool> input_preproc;
92  {
93  input_preproc = toolchain::text_in_doc_preprocess(input, false, 0, 0.34,
94  false, false);
95 
96  // Cleanup components on borders
97  {
98  typedef scribo::def::lbl_type V;
99  V nlabels;
100  image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels);
102  bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels);
103 
104  const box2d& b = input.domain();
105  for_all_ncomponents(e, nlabels)
106  if (bbox(e).pmin().row() == b.pmin().row()
107  || bbox(e).pmax().row() == b.pmax().row()
108  || bbox(e).pmin().col() == b.pmin().col()
109  || bbox(e).pmax().col() == b.pmax().col())
110  data::fill(((input_preproc | bbox(e)).rw()
111  | (pw::value(lbl) == pw::cst(e))).rw(), false);
112  }
113  }
114 
115  bool denoise = 1;
116  std::string language = "";
117  bool find_line_seps = true;
118  bool find_whitespace_seps = true;
119 
120  // Run document toolchain.
121 
122  // Text
123  std::cout << "Analysing document..." << std::endl;
124  document<L>
125  doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise,
126  find_line_seps, find_whitespace_seps,
127  !language.empty(), language);
128 
129  doc.set_filename(basename(argv[1]));
130 
131  // Saving results
132  std::cout << "Saving results..." << std::endl;
133  scribo::io::xml::save(doc, argv[2], scribo::io::xml::Page);
134  std::cout << "End of process - " << t << std::endl;
135 
136  // scribo::io::img::save(doc, mln::debug::filename("debug_wo_image.png"),
137  // scribo::io::img::DebugWoImage);
138  // scribo::io::img::save(doc, mln::debug::filename("debug_with_image.png"),
139  // scribo::io::img::DebugWithImage);
140 
141  // scribo::io::img::internal::reduction_factor = 3;
142 
143  // scribo::io::img::save(doc, mln::debug::filename("debug_wo_image_30p.png"),
144  // scribo::io::img::DebugWoImage);
145  // scribo::io::img::save(doc, mln::debug::filename("debug_with_image_30p.png"),
146  // scribo::io::img::DebugWithImage);
147 
148 }