$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
content_in_hdoc_hdlac.cc
1 // Copyright (C) 2010, 2011, 2012, 2013 EPITA Research and Development
2 // Laboratory (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 # define SCRIBO_NOCR
28 
29 #include <libgen.h>
30 #include <fstream>
31 #include <iostream>
32 
33 #include <mln/core/image/image2d.hh>
34 
35 #include <mln/io/magick/load.hh>
36 
37 #include <scribo/toolchain/content_in_hdoc.hh>
38 #include <scribo/toolchain/text_in_doc_preprocess.hh>
39 
40 #include <scribo/core/document.hh>
41 
42 #include <scribo/debug/usage.hh>
43 #include <scribo/debug/logger.hh>
44 
45 #include <scribo/preprocessing/crop_without_localization.hh>
46 #include <scribo/preprocessing/crop.hh>
47 
48 #include <scribo/io/xml/save.hh>
49 #include <scribo/io/img/save.hh>
50 
51 
52 #include <mln/core/alias/neighb2d.hh>
53 #include <mln/labeling/compute.hh>
54 #include <mln/labeling/foreground.hh>
55 #include <mln/util/timer.hh>
56 
57 
58 
59 const char *args_desc[][2] =
60 {
61  { "input.tif", "An image." },
62  { "out.xml", "Result of the document analysis." },
63  {0, 0}
64 };
65 
66 
67 
68 int main(int argc, char* argv[])
69 {
70  using namespace scribo;
71  using namespace mln;
72 
73  if (argc != 3)
74  return scribo::debug::usage(argv,
75  "Document Image Analysis in Historical Documents",
76  "input.tif out.xml",
77  args_desc);
78 
79  mln_trace("main");
80 
82  t.start();
83 
86  mln::io::magick::load(input, argv[1]);
87 
88  mln::debug::internal::filename_prefix = basename(argv[1]);
89 
90  // Preprocess document
91  image2d<bool> input_preproc;
92  {
93  input_preproc = toolchain::text_in_doc_preprocess(input, false, 0, 0.34,
94  false, false);
95 
96  // Cleanup components on borders
97  {
98  typedef scribo::def::lbl_type V;
99  V nlabels;
100  image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels);
102  bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels);
103 
104  const box2d& b = input.domain();
105  for_all_ncomponents(e, nlabels)
106  if (bbox(e).pmin().row() == b.pmin().row()
107  || bbox(e).pmax().row() == b.pmax().row()
108  || bbox(e).pmin().col() == b.pmin().col()
109  || bbox(e).pmax().col() == b.pmax().col())
110  data::fill(((input_preproc | bbox(e)).rw()
111  | (pw::value(lbl) == pw::cst(e))).rw(), false);
112  }
113  }
114 
115  bool denoise = 1;
116  std::string language = "";
117  bool find_line_seps = true;
118  bool find_whitespace_seps = true;
119 
120  // Run document toolchain.
121 
122  // Text
123  std::cout << "Analysing document..." << std::endl;
124  document<L>
125  doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise,
126  find_line_seps, find_whitespace_seps,
127  !language.empty(), language);
128 
129  doc.set_filename(basename(argv[1]));
130 
131  // Saving results
132  std::cout << "Saving results..." << std::endl;
133  scribo::io::xml::save(doc, argv[2], scribo::io::xml::Page);
134  std::cout << "End of process - " << t << std::endl;
135 
136  scribo::io::img::save(doc, mln::debug::filename("debug_wo_image.png"),
137  scribo::io::img::DebugWoImage);
138  scribo::io::img::save(doc, mln::debug::filename("debug_with_image.png"),
139  scribo::io::img::DebugWithImage);
140 
141 
142 }