$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
non_text_kmean.hh
1 // Copyright (C) 2010, 2011, 2013 EPITA Research and Development Laboratory
2 // (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
32 
33 #ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
34 # define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
35 
36 # include <mln/core/image/image2d.hh>
37 # include <mln/core/alias/neighb2d.hh>
38 # include <mln/data/fill.hh>
39 # include <mln/util/array.hh>
40 # include <mln/labeling/compute.hh>
41 # include <mln/labeling/relabel.hh>
42 # include <mln/accu/math/count.hh>
43 # include <mln/pw/all.hh>
44 
45 # include <mln/draw/box_plain.hh>
46 
47 # include <mln/value/label_8.hh>
48 # include <mln/value/rgb.hh>
49 # include <mln/value/rgb8.hh>
50 
51 # include <scribo/core/macros.hh>
52 # include <scribo/core/component_set.hh>
53 # include <scribo/core/document.hh>
54 # include <scribo/core/line_set.hh>
55 # include <scribo/core/def/lbl_type.hh>
56 # include <scribo/filter/objects_small.hh>
57 
58 # include <mln/clustering/kmean_rgb.hh>
59 # include <mln/fun/v2v/rgb8_to_rgbn.hh>
60 
61 namespace scribo
62 {
63 
64  namespace primitive
65  {
66 
67  namespace extract
68  {
69 
70  using namespace mln;
71 
72 
73  template <typename L, typename I>
74  component_set<L>
75  non_text_kmean(const document<L>& doc, const Image<I>& input);
76 
77 
78 # ifndef MLN_INCLUDE_ONLY
79 
80 
81  namespace internal
82  {
83 
84  template <typename L>
85  struct order_bbox
86  {
87  order_bbox(const scribo::component_set<L>& comps)
88  : comps_(comps)
89  {
90  }
91 
92  bool operator()(const unsigned& c1, const unsigned& c2) const
93  {
94  if (comps_(c1).bbox().nsites() == comps_(c2).bbox().nsites())
95  return c1 > c2;
96  return comps_(c1).bbox().nsites() > comps_(c2).bbox().nsites();
97  }
98 
100  };
101 
102  } // end of namespace scribo::primitive::extract::internal
103 
104 
105 
106  // FACADE
107 
108  template <typename L, typename I>
109  component_set<L>
110  non_text_kmean(const document<L>& doc, const Image<I>& input_)
111  {
112  mln_trace("scribo::primitive::extract::non_text_kmean");
113 
114  const I& input = exact(input_);
115  mln_precondition(doc.is_valid());
116  mln_precondition(input.is_valid());
117 
118  const line_set<L>& lines = doc.lines();
119 
120  // Element extraction
121 
122  image2d<value::label_8> img_lbl8;
123  {
124  image2d<bool> content;
125  initialize(content, input);
126  data::fill(content, true);
127 
128  for_all_lines(l, lines)
129  if (lines(l).type() == line::Text)
130  data::fill((content | lines(l).bbox()).rw(), false);
131 
132  typedef mln::value::rgb<5> t_rgb5;
133  typedef mln::fun::v2v::rgb8_to_rgbn<5> t_rgb8_to_rgb5;
134 
135  image2d<t_rgb5>
136  img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
137 
138  img_lbl8 =
139  mln::clustering::kmean_rgb<double,5>(
140  (img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
141  data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
142 
143  mln::util::array<unsigned>
144  card = mln::labeling::compute(accu::math::count<value::label_8>(),
145  img_lbl8, img_lbl8, 3);
146 
147  unsigned max = 0, bg_id = 0;
148  for_all_ncomponents(c, 3)
149  if (card(c) > max)
150  {
151  max = card(c);
152  bg_id = c;
153  }
154 
155  mln::fun::i2v::array<bool> f(4, true);
156  f(0) = false;
157  f(bg_id) = false;
158  labeling::relabel_inplace(img_lbl8, 4, f);
159  }
160 
161 
162  component_set<L> output;
163 
164  {
165  image2d<bool> elts;
166  initialize(elts, img_lbl8);
167  data::fill(elts, false);
168  data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
169 
170  scribo::def::lbl_type nlabels;
171  elts = filter::components_small(elts, c8(), nlabels, 40);
172 
173  output = primitive::extract::components(elts, c8(), nlabels);
174  }
175 
176 
177  {
178  // FIXME: We would like to use the convex hull instead of the bbox.
179  internal::order_bbox<L> func(output);
180  util::array<unsigned> box_ordered_comps;
181  for (unsigned i = 1; i < output.nelements(); ++i)
182  box_ordered_comps.append(i);
183  std::sort(box_ordered_comps.hook_std_vector_().begin(),
184  box_ordered_comps.hook_std_vector_().end(), func);
185 
186  image2d<bool> merged_elts;
187  initialize(merged_elts, img_lbl8);
188  data::fill(merged_elts, false);
189  for (unsigned i = 0; i < box_ordered_comps.nelements(); ++i)
190  {
191  unsigned c = box_ordered_comps(i);
192  point2d
193  pminright = output(c).bbox().pmin(),
194  pmaxleft = output(c).bbox().pmax();
195  pminright.col() = output(c).bbox().pmax().col();
196  pmaxleft.col() = output(c).bbox().pmin().col();
197 
198  if (merged_elts(output(c).bbox().pmin())
199  && merged_elts(output(c).bbox().pmax())
200  && merged_elts(pminright)
201  && merged_elts(pmaxleft))
202  output(c).update_tag(component::Ignored);
203  else
204  mln::draw::box_plain(merged_elts, output(c).bbox(), true);
205  }
206  }
207 
208  return output;
209  }
210 
211 # endif // ! MLN_INCLUDE_ONLY
212 
213 
214  } // end of namespace scribo::primitive::extract
215 
216  } // end of namespace scribo::primitive
217 
218 } // end of namespace scribo
219 
220 #endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH