33 #ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
34 # define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
36 # include <mln/morpho/elementary/dilation.hh>
39 # include <scribo/make/text_components_image.hh>
40 # include <scribo/make/text_blocks_image.hh>
42 # include <scribo/primitive/extract/internal/union.hh>
43 # include <scribo/debug/logger.hh>
45 # include <mln/literal/black.hh>
48 #include <mln/util/timer.hh>
49 #include <mln/io/pbm/save.hh>
80 non_text(
const document<L>& doc,
unsigned nlines);
83 # ifndef MLN_INCLUDE_ONLY
91 learn(
const document<L>& doc,
99 seps = doc.paragraphs().lines().components().separators();
105 std::cerr <<
" txt.border() = " << txt.
border()
106 <<
" - txtblocks.border() = " << txtblocks.
border()
107 <<
" - input.border() = " << input.
border()
108 <<
" - seps.border() = " << seps.
border()
110 std::cerr <<
"different sizes for borders! Resizing..." << std::endl;
121 const unsigned q_div = std::pow(2.f, (
int)(8 - nbits));
122 const unsigned q =
unsigned(std::pow(2.f, (
int)nbits));
134 for (
unsigned i = 0; i < nelements; ++i)
135 if (txtblocks.
element(i) ==
true)
138 const value::rgb8& c = input.
element(i);
139 ++h_bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
143 typedef std::map<unsigned, unsigned> map_t;
144 map_t ncells_with_nitems;
146 mln_piter_(
box3d) c(h_bg.domain());
149 unsigned nitems_in_c = h_bg(c);
150 ++ncells_with_nitems[ nitems_in_c ];
155 unsigned n_items_min = 0;
157 map_t::const_reverse_iterator i;
159 for (i = ncells_with_nitems.rbegin(); i != ncells_with_nitems.rend(); ++i)
161 unsigned nitems = i->first, ncells = i->second;
162 N += nitems * ncells;
163 if (
float(N) > p_cover *
float(n_bg))
165 n_items_min = nitems;
170 if (n_items_min == 0)
176 mln_piter_(
box3d) c(h_bg.domain());
178 bg(c) = (h_bg(c) >= n_items_min);
185 initialize(output, input);
187 for (
unsigned i = 0; i < nelements; ++i)
192 const value::rgb8& c = input.element(i);
193 output.element(i) = ! bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
214 unsigned max_area = 0;
221 parent,
area, max_area
229 const unsigned nelements = input.
nelements();
230 const bool* p_i = input.
buffer();
231 bool* p_o = output.buffer();
232 const unsigned* p_a =
area.buffer();
233 const unsigned* p_par = parent.buffer();
235 for (
unsigned i = 0; i < nelements; ++i)
242 *p_o = (*p_a != max_area);
244 *p_o = output.element(*p_par);
259 parent,
area, max_area
268 const unsigned nelements = input.
nelements();
269 bool* p_o = output.buffer();
270 const unsigned* p_a =
area.buffer();
271 const unsigned* p_par = parent.buffer();
273 for (
unsigned i = 0; i < nelements; ++i)
278 *p_o = (*p_a > lambda);
280 *p_o = output.element(*p_par);
298 template <
typename L>
300 non_text(
const document<L>& doc,
unsigned nlines)
302 mln_trace(
"scribo::primitive::extract::non_text");
308 mln_precondition(doc.is_valid());
309 mln_precondition(doc.has_text());
318 unsigned lambda = 1000;
331 txt,
"txt_components");
333 txtblocks,
"txt_blocks");
337 element_image = internal::learn(doc, txt, txtblocks, nbits, p);
338 element_image = internal::cleaning(element_image, lambda);
342 elements = primitive::extract::
components(element_image,
348 elements.labeled_image(),
349 "non_text_components");
355 # endif // ! MLN_INCLUDE_ONLY
364 #endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH