$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
content_in_doc_dae.cc
1 // Copyright (C) 2011, 2012, 2013 EPITA Research and Development
2 // Laboratory (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 #define SCRIBO_NOCR
28 
29 #include <libgen.h>
30 #include <fstream>
31 #include <iostream>
32 
33 #include <mln/core/image/image2d.hh>
34 
35 #include <mln/io/pbm/save.hh>
36 #include <mln/io/magick/load.hh>
37 
38 #include <scribo/toolchain/content_in_doc.hh>
39 #include <scribo/toolchain/text_in_doc_preprocess.hh>
40 
41 #include <scribo/core/document.hh>
42 
43 #include <scribo/debug/usage.hh>
44 #include <scribo/debug/logger.hh>
45 
46 #include <scribo/preprocessing/crop_without_localization.hh>
47 #include <scribo/preprocessing/crop.hh>
48 
49 #include <scribo/io/xml/save.hh>
50 
51 
52 const char *args_desc[][2] =
53 {
54  { "input.*", "An image." },
55  { "output_dir", "Output directory" },
56 
57  {0, 0}
58 };
59 
60 
61 
62 int main(int argc, char* argv[])
63 {
64  using namespace scribo;
65  using namespace mln;
66 
67  if (argc != 3)
68  return scribo::debug::usage(argv,
69  "Find paragraph segmentation and produces images for each paragraph.",
70  "input.* output_dir",
71  args_desc);
72 
73  mln_trace("main");
74 
77  mln::io::magick::load(input, argv[1]);
78 
79  // Preprocess document
80  image2d<bool> input_preproc;
81  input_preproc = toolchain::text_in_doc_preprocess(input, false, 0, 0.34,
82  false, false);
83 
84  bool denoise = true;
85  std::string language = "";
86  bool find_line_seps = true;
87  bool find_whitespace_seps = true;
88 
89  // Run document toolchain.
90 
91  // Text
92  std::cout << "Analysing document..." << std::endl;
94  doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
95  find_line_seps, find_whitespace_seps,
96  !language.empty(), language);
97 
98  const paragraph_set<L>& par_set = doc.paragraphs();
99  image2d<bool> output(input.domain());
100  for_all_paragraphs(p, par_set)
101  {
102  data::fill(output, true);
103  const paragraph_info<L>& current_par = par_set(p);
104  const mln::util::array<line_id_t>& line_ids = current_par.line_ids();
105  const unsigned nelements = line_ids.nelements();
106 
107  for (unsigned i = 0; i < nelements; ++i)
108  {
109  const line_id_t& line_id = line_ids(i);
110  const line_info<L>& current_line = par_set.lines()(line_id);
111 
112  scribo::draw::line_components(output, par_set.lines(), current_line, false);
113  }
114 
115  std::stringstream ss;
116  ss << argv[2] << "/" << basename(argv[1]) << "." << p << ".pbm";
117  mln::io::pbm::save(output, ss.str());
118  }
119 
120 }