$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
extract_paragraphs.hh
1 // Copyright (C) 2010, 2011, 2012 EPITA Research and Development
2 // Laboratory (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
30 
31 
32 #ifndef SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HH
33 # define SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HH
34 
35 #include <mln/util/array.hh>
36 #include <mln/accu/shape/bbox.hh>
37 #include <mln/core/image/image2d.hh>
38 #include <mln/core/alias/neighb2d.hh>
39 #include <mln/draw/box.hh>
40 #include <mln/data/convert.hh>
41 #include <mln/value/int_u16.hh>
42 #include <mln/value/label_16.hh>
43 #include <mln/value/int_u8.hh>
44 #include <mln/value/rgb8.hh>
45 #include <mln/io/ppm/save.hh>
46 #include <mln/io/pgm/save.hh>
47 #include <mln/geom/rotate.hh>
48 #include <mln/literal/colors.hh>
49 
50 #include <scribo/core/def/lbl_type.hh>
51 #include <scribo/core/macros.hh>
52 #include <scribo/core/line_set.hh>
53 #include <scribo/core/line_links.hh>
54 #include <scribo/core/line_info.hh>
55 #include <scribo/core/paragraph_set.hh>
56 
57 using namespace mln;
58 
59 namespace scribo
60 {
61 
62  namespace text
63  {
64 
68  template <typename L>
69  inline
70  paragraph_set<L>
71  extract_paragraphs(line_set<L>& lines,
72  const image2d<bool>& input);
73 
74 
75 # ifndef MLN_INCLUDE_ONLY
76 
77  namespace internal
78  {
79 
80 //-------------------------------------
81 // Extracting root of links
82 //-------------------------------------
83  template <typename T>
84  inline
85  unsigned
86  find_root(mln::util::array<T>& parent, unsigned x)
87  {
88  unsigned tmp_x = x;
89 
90  while (parent(tmp_x) != tmp_x)
91  tmp_x = parent(tmp_x);
92 
93  while (parent(x) != x)
94  {
95  const unsigned tmp = parent(x);
96  x = parent(x);
97  parent(tmp) = tmp_x;
98  }
99 
100  return x;
101  }
102 
103 
104 //---------------------------------------------------------------------
105 // Checks if there is a horizontal separator between the two lines
106 //---------------------------------------------------------------------
107 
108  template <typename L>
109  inline
110  bool
111  between_horizontal_separator(const line_set<L>& lines,
112  const scribo::line_id_t& l1_,
113  const scribo::line_id_t& l2_)
114  {
115  // No separators found in image.
116  mln_precondition(lines.components().has_separators());
117 
118  const scribo::line_info<L>& l1 = lines(l1_);
119  const scribo::line_info<L>& l2 = lines(l2_);
120 
121  const box2d& l1_bbox = l1.bbox();
122  const box2d& l2_bbox = l2.bbox();
123 
124  unsigned
125  row1 = l1_bbox.pcenter().row(),
126  row2 = l2_bbox.pcenter().row();
127  const mln_ch_value(L, bool)&
128  separators = lines.components().separators();
129 
130  unsigned row;
131  unsigned col_ptr;
132  unsigned left_col_ptr;
133  unsigned right_col_ptr;
134  unsigned end;
135 
136  if (row1 < row2)
137  {
138  row1 = l1_bbox.pmax().row();
139  row2 = l2_bbox.pmin().row();
140 
141  const unsigned quarter =
142  ((l1_bbox.pcenter().col() - l1_bbox.pmin().col()) >> 2);
143 
144  row = l1_bbox.pcenter().row();
145  col_ptr = l1_bbox.pcenter().col();
146  left_col_ptr = l1_bbox.pmin().col() + quarter;
147  right_col_ptr = l1_bbox.pmax().col() - quarter;
148  end = row2;
149  }
150  else
151  {
152  row2 = l2_bbox.pmax().row();
153  row1 = l1_bbox.pmin().row();
154 
155  const unsigned quarter =
156  ((l2_bbox.pcenter().col() - l2_bbox.pmin().col()) >> 2);
157 
158  row = l2_bbox.pcenter().row();
159  col_ptr = l2_bbox.pcenter().col();
160  left_col_ptr = l2_bbox.pmin().col() + quarter;
161  right_col_ptr = l2_bbox.pmax().col() - quarter;
162  end = row1;
163  }
164 
165  // If sep_ptr is true, then a separator is reached.
166  while (row < end)
167  {
168  ++row;
169  if (separators.at_(row, col_ptr)
170  || separators.at_(row, left_col_ptr)
171  || separators.at_(row, right_col_ptr))
172  return true;
173  }
174 
175  return false;
176  }
177 
178 //---------------------------------------------------------------------
179 // This method aims to cut the links between lines that do not fit the
180 // different criteria
181 //---------------------------------------------------------------------
182 
183  template <typename L>
184  inline
185  void paragraph_links(const line_links<L>& left,
186  const line_links<L>& right,
187  line_links<L>& output,
188  const line_set<L>& lines)
189  {
190  output = left.duplicate();
191 
192  // const unsigned nlines = lines.nelements();
193 
194  // image2d<value::rgb8> links = data::convert(value::rgb8(), input);
195  // for (unsigned l = 0; l < nlines; ++l)
196  // {
197  // mln::draw::line(links, lines(l).bbox().pcenter(), lines(left(l)).bbox().pcenter(), literal::red);
198  // }
199  // mln::io::ppm::save(links, "out_links.ppm");
200 
201  // For each line
202  for_all_lines(l, lines)
203  if (lines(l).is_textline())
204  {
205  // Neighbors
206  line_id_t left_nbh = output(l);
207  line_id_t right_nbh = right(l);
208  line_id_t lol_nbh = output(left_nbh);
209 
210  if (lines.components().has_separators() &&
211  between_horizontal_separator(lines, right_nbh, l))
212  {
213  if (output(right_nbh) == l)
214  {
215  output(right_nbh) = right_nbh;
216  right_nbh = l;
217  }
218  }
219  if (lines.components().has_separators() &&
220  between_horizontal_separator(lines, l, left_nbh))
221  {
222  output(l) = l;
223  left_nbh = l;
224  lol_nbh = l;
225  }
226 
227  // Line features
228  const float x_height = lines(l).x_height();
229  const float left_x_height = lines(left_nbh).x_height();
230  const float right_x_height = lines(right_nbh).x_height();
231 
232  const box2d& left_line_bbox = lines(left_nbh).bbox();
233  const box2d& current_line_bbox = lines(l).bbox();
234  const box2d& right_line_bbox = lines(right_nbh).bbox();
235  const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor
236 
237  const int lline_col_min = left_line_bbox.pmin().col();
238  const int cline_col_min = current_line_bbox.pmin().col();
239  const int rline_col_min = right_line_bbox.pmin().col();
240  const int lolline_col_min = lol_line_bbox.pmin().col();
241 
242  const int lline_col_max = left_line_bbox.pmax().col();
243  const int cline_col_max = current_line_bbox.pmax().col();
244  const int rline_col_max = right_line_bbox.pmax().col();
245 
246  const int lline_cw = lines(left_nbh).char_width();
247  const int cline_cw = lines(l).char_width();
248  const int rline_cw = lines(right_nbh).char_width();
249  // Maximal x variation to consider two lines vertically aligned
250  const int delta_alignment = cline_cw;
251 
252  // Checks the baseline distances of the two neighbors
253  {
254  // Current line baseline
255  const int c_baseline = lines(l).baseline();
256 
257  // Baseline distance with the left and right neighbors
258  const int lc_baseline = lines(left_nbh).baseline() - c_baseline;
259  const int rc_baseline = c_baseline -lines(right_nbh).baseline();
260 
261  // Max baseline distance between the two neighbors
262  const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
263  const float delta_baseline_min = std::min(lc_baseline,
264  rc_baseline);
265 
266  // Only two lines, meaning the current line has only one neighbor
267  bool two_lines = false;
268 
269  // If the current line has no left neighbor
270  if (lc_baseline == 0)
271  {
272  // ror : right neighbor of the right neighbor
273  const line_id_t ror_nbh = right(right_nbh);
274  //const box2d& ror_line_bbox = lines(ror_nbh).bbox();
275 
276  // If the current line has a ror
277  if (ror_nbh != right_nbh
278  && output(ror_nbh) == right_nbh)
279  {
280  // Distance between the current line and the right neighbor
281  const float right_distance = lines(l).meanline() - lines(right_nbh).baseline();
282  // Distance between the right neighbor and the ror
283  const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline();
284  // ror x_height
285  const float ror_x_height = lines(ror_nbh).x_height();
286 
287  // Conditions to cut the link between the current line
288  // and its right neighbor
289  if (right_distance > 1.4f * ror_distance
290  && std::max(ror_x_height, right_x_height) <
291  1.2f * std::min(ror_x_height, right_x_height)
292  && output(right_nbh) == l)
293  {
294  output(right_nbh) = right_nbh;
295  continue;
296  }
297  }
298  // Otherwise we only have a group of two lines
299  else
300  {
301  // We determine the distance between the two lines
302  const float distance = lines(l).meanline() - lines(right_nbh).baseline();
303  two_lines = true;
304 
305  // If the distance between the two lines is greater than
306  // the minimum x height of the two lines then we cut the
307  // link between them
308  if (distance > 2.0f * std::min(x_height, right_x_height)
309  && output(right_nbh) == l)
310  {
311  output(right_nbh) = right_nbh;
312  continue;
313  }
314  }
315 
316  // Lines features
317  const float min_x_height = std::min(x_height, right_x_height);
318  const float max_x_height = std::max(x_height, right_x_height);
319  const float min_char_width = std::min(rline_cw, cline_cw);
320  const float max_char_width = std::max(rline_cw, cline_cw);
321 
322  // Condition to cut the link between the current line and
323  // its right neighbor
324  if ((max_x_height > min_x_height * 1.2f) &&
325  !(max_char_width <= 1.2f * min_char_width))
326  {
327  if (output(right_nbh) == l)
328  {
329  output(right_nbh) = right_nbh;
330  continue;
331  }
332  }
333 
334  // If we only have two lines we stop the study
335  if (two_lines)
336  continue;
337  }
338  // If the current line has no right neighbor
339  else if (rc_baseline == 0)
340  {
341  // lol : left neighbor of the left neighbor
342 
343  // If the left neighbor of the current line has a left neighbor
344  if (lol_nbh != left_nbh)
345  {
346  // Distance between the current line and its left neighbor
347  const float left_distance = lines(left_nbh).meanline() -
348  lines(l).baseline();
349  // Distance between the left neighbor and the left
350  // neighbor of its left neighbor
351  const float lol_distance = lines(lol_nbh).meanline() -
352  lines(left_nbh).baseline();
353  // lol x height
354  const float lol_x_height = lines(lol_nbh).x_height();
355 
356  // Conditions to cut the link between the current line
357  // and its left neighbor
358  if (left_distance > 1.4f * lol_distance
359  && std::max(lol_x_height, left_x_height) <
360  1.2f * std::min(lol_x_height, left_x_height))
361  {
362  output(l) = l;
363  continue;
364  }
365  }
366  // Otherwise we only have a group of two lines
367  else
368  {
369  // Distance between the current line and it left neighbor
370  const float distance = lines(left_nbh).meanline() -
371  lines(l).baseline();
372 
373  two_lines = true;
374 
375  // If the distance is greater than the min x height
376  // between the two lines
377  if (distance > 2.0f * std::min(x_height, left_x_height))
378  {
379  output(l) = l;
380  continue;
381  }
382  }
383 
384  // Lines features
385  const float min_x_height = std::min(x_height, left_x_height);
386  const float max_x_height = std::max(x_height, left_x_height);
387  const float min_char_width = std::min(lline_cw, cline_cw);
388  const float max_char_width = std::max(lline_cw, cline_cw);
389 
390  // Condition to cut the link between the current line and
391  // its left neighbor
392  if ((max_x_height > min_x_height * 1.2f) &&
393  !(max_char_width <= 1.2f * min_char_width))
394  {
395  output(l) = l;
396  continue;
397  }
398 
399  // If we only have two lines we stop the study
400  if (two_lines)
401  continue;
402  }
403  // The current line has at least one left and one right neighbor
404  else if (delta_baseline_max >= 1.1f * delta_baseline_min)
405  {
406  // Distance between the left and the current line
407  const float left_distance =
408  lines(left_nbh).meanline() - lines(l).baseline();
409  // Distance between the right and the current line
410  const float right_distance =
411  lines(l).meanline() - lines(right_nbh).baseline();
412 
413  // If the left line is too far compared to the right one
414  // we cut the link with it
415  if ((left_distance > 1.2f * right_distance
416  && std::max(x_height, left_x_height) > 1.2f *
417  std::min(x_height, left_x_height))
418  || (left_distance > 2.0 * right_distance))
419  {
420  output(l) = l;
421  continue;
422  }
423  // If the right line is too far compared to the left one
424  // we cut the link with it
425  else if (((right_distance > 1.2f * left_distance
426  && std::max(x_height, right_x_height) > 1.2f *
427  std::min(x_height, right_x_height))
428  || (right_distance > 2.0f * left_distance))
429  && output(right_nbh) == l)
430  {
431  output(right_nbh) = right_nbh;
432  continue;
433  }
434 
435  // If the distance between the baseline of the left
436  // neighbor and the baseline of the current line is
437  // greater than the one between the current line baseline
438  // and the right line baseline we have to study the texte
439  // features of the right and left lines
440  if (lc_baseline > rc_baseline)
441  {
442  const float cw_max = std::max(lline_cw, cline_cw);
443  const float cw_min = std::min(lline_cw, cline_cw);
444  const float min_x_height = std::min(x_height, left_x_height);
445  const float max_x_height = std::max(x_height, left_x_height);
446 
447  if ((max_x_height > min_x_height * 1.2f) &&
448  !(cw_max <= 1.2f * cw_min))
449  {
450  output(l) = l;
451  continue;
452  }
453 
454  {
455  const float min_x_height = std::min(x_height, right_x_height);
456  const float max_x_height = std::max(x_height, right_x_height);
457  const float cw_max = std::max(rline_cw, cline_cw);
458  const float cw_min = std::min(rline_cw, cline_cw);
459 
460  if ((max_x_height > min_x_height * 1.2f)
461  && !(cw_max <= 1.2f * cw_min)
462  && output(right_nbh) == l)
463  {
464  output(right_nbh) = right_nbh;
465  continue;
466  }
467  }
468  }
469  else
470  {
471  const float cw_max = std::max(rline_cw, cline_cw);
472  const float cw_min = std::min(rline_cw, cline_cw);
473  const float min_x_height = std::min(x_height, right_x_height);
474  const float max_x_height = std::max(x_height, right_x_height);
475 
476  if ((max_x_height > min_x_height * 1.2f)
477  && !(cw_max <= 1.2f * cw_min)
478  && output(right_nbh) == l)
479  {
480  output(right_nbh) = right_nbh;
481  continue;
482  }
483 
484  {
485  const float min_x_height = std::min(x_height, left_x_height);
486  const float max_x_height = std::max(x_height, left_x_height);
487  const float cw_max = std::max(lline_cw, cline_cw);
488  const float cw_min = std::min(lline_cw, cline_cw);
489 
490  if ((max_x_height > min_x_height * 1.2f)
491  && !(cw_max <= 1.2f * cw_min))
492  {
493  output(l) = l;
494  continue;
495  }
496  }
497  }
498  }
499  }
500 
501  // If we arrive here, it means than the lines in the
502  // neighborhood of the current line are quite similar. We can
503  // then begin to study the indentations in order to determine
504  // the beginning of new paragraphs
505 
506 //-----------------------------------------------------------------------------
507 // ___________________________
508 // |___________________________|
509 // ________________________
510 // |________________________|
511 // ___________________________
512 // |___________________________|
513 // ___________________________
514 // |___________________________|
515 //
516 // Simple case : paragraphs are justified on the left. We try to find any
517 // indentation like above.
518 //
519 //-----------------------------------------------------------------------------
520 
521  {
522  // Check if the current line neighbors are aligned
523  bool left_right_aligned = false;
524  bool left_lol_aligned = false;
525  const int dx_lr = std::abs(lline_col_min - rline_col_min);
526  const int dx_llol = std::abs(lline_col_min - lolline_col_min);
527 
528  if (dx_lr < delta_alignment)
529  left_right_aligned = true;
530 
531  if (dx_llol < delta_alignment)
532  left_lol_aligned = true;
533 
534  if (left_right_aligned && left_lol_aligned)
535  {
536  const int left_right_col_min = std::min(lline_col_min, rline_col_min);
537  const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
538  const float l_char_width = 1.5f * lines(l).char_width();
539 
540  if (dx_lrc > l_char_width &&
541  dx_lrc < 3.0f * l_char_width &&
542  cline_col_min > rline_col_min &&
543  cline_col_min > lline_col_min)
544  {
545  output(right_nbh) = right_nbh;
546  continue;
547  }
548  }
549  }
550 
551 //-----------------------------------------------------------------------------
552 // ___________________________
553 // |___________________________|
554 // ___________________
555 // |___________________| End of the paragraph - Current line
556 // ________________________
557 // |________________________| Beginning of a new one
558 // ___________________________
559 // |___________________________| Left of left of current line
560 //
561 // End of paragraph case : we try to find an end to the current paragraph
562 //
563 //-----------------------------------------------------------------------------
564 
565  {
566  // Check if the current line neighbors are aligned
567  bool left_right_max_aligned = false;
568  bool left_current_min_aligned = false;
569  bool lol_current_min_aligned = false;
570  const bool lol_is_left = output(left_nbh) == left_nbh;
571  const int dx_lr_max = std::abs(lline_col_max - rline_col_max);
572  const int dx_lc_min = std::abs(lline_col_min - cline_col_min);
573  const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min);
574 
575  if (dx_lr_max < delta_alignment)
576  left_right_max_aligned = true;
577 
578  if (dx_lc_min < delta_alignment)
579  left_current_min_aligned = true;
580 
581  if (dx_lolc_min < delta_alignment)
582  lol_current_min_aligned = true;
583 
584  if (!left_current_min_aligned && left_right_max_aligned &&
585  (lol_current_min_aligned || lol_is_left))
586  {
587  const int dx_lrc = std::abs(lline_col_max - cline_col_max);
588  const int l_char_width = lines(l).char_width();
589 
590  if (dx_lrc > l_char_width &&
591  cline_col_max < lline_col_max &&
592  cline_col_min < lline_col_min &&
593  (lline_col_min > lolline_col_min || lol_is_left))
594  {
595  output(l) = l;
596  continue;
597  }
598  }
599  }
600 
601 
602 //-----------------------------------------------------------------------------
603 // ___________________________
604 // |___________________________|
605 // ___________________________
606 // |___________________________|
607 // ________________________
608 // |________________________|
609 //
610 // Simple case : paragraphs are justified on the left. We try to find any
611 // indentation like above at the end of a column.
612 //
613 //-----------------------------------------------------------------------------
614 
615  if (left_nbh == l)
616  {
617  const line_id_t ror_nbh = right(right_nbh);
618  const box2d& ror_line_bbox = lines(ror_nbh).bbox();
619  const int rorline_col_min = ror_line_bbox.pmin().col();
620 
621  bool right_ror_min_aligned = false;
622  const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
623 
624  if (dx_rror_min < delta_alignment)
625  right_ror_min_aligned = true;
626 
627  if (right_ror_min_aligned)
628  {
629  const int right_ror_col_min = std::min(rline_col_min, rorline_col_min);
630  const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min);
631  const float l_char_width = 1.5f * lines(l).char_width();
632 
633  if (dx_rrorc > l_char_width &&
634  dx_rrorc < 3.0f * l_char_width &&
635  cline_col_min > rline_col_min &&
636  cline_col_max >= rline_col_max)
637  {
638  output(right_nbh) = right_nbh;
639  continue;
640  }
641  }
642  }
643  }
644 
645 
646  // Only debug
647 
648  // {
649  // image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
650 
651  // for (unsigned i = 0; i < output.nelements(); ++i)
652  // output(i) = scribo::make::internal::find_root(output, i);
653 
654  // mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements());
655  // for_all_lines(l, lines)
656  // if (lines(l).is_textline())
657  // {
658  // // if (lines(i).is_textline())
659  // // {
660  // // mln::draw::box(debug, lines(i).bbox(), literal::red);
661  // nbbox(output(l)).take(lines(l).bbox());
662  // // }
663  // }
664 
665  // for (unsigned i = 0; i < nbbox.nelements(); ++i)
666  // if (nbbox(i).is_valid())
667  // {
668  // box2d b = nbbox(i).to_result();
669  // mln::draw::box(debug, b, literal::orange);
670  // b.enlarge(1);
671  // mln::draw::box(debug, b, literal::orange);
672  // b.enlarge(1);
673  // mln::draw::box(debug, b, literal::orange);
674  // }
675 
676  // mln::io::ppm::save(debug, "out_paragraph.ppm");
677  // }
678 
679  }
680 
681 //-------------------------------------------------------------
682 // Preparation of the lines before linking them.
683 // For each line we draw the top and the bottom of it.
684 // Assuming than i is the number of the line. Then the top of the line
685 // will be affected with the value 2 * i in the block image and the
686 // bottom with 2 * i + 1.
687 //
688 //-------------------------------------------------------------
689 
690  template <typename L>
691  inline
692  void prepare_lines(const box2d& domain,
693  const line_set<L>& lines,
694  L& blocks,
696  {
697  std::map< int, std::vector< const box2d* > > drawn_lines;
698  // const unsigned nlines = lines.nelements();
699 
700  // For each line
701  //for (unsigned l = 0; l < nlines; ++l)
702  for_all_lines(l, lines)
703  if (lines(l).is_textline())
704  {
705  // Rotation of the bounding box
706  box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter());
707 // rbbox.append(b);
708  rbbox(l) = b;
709 
710  const unsigned index = l + 1;
711  const unsigned even_index = 2 * index;
712  const unsigned odd_index = even_index + 1;
713 
714  // Top of the line
715  {
716  bool not_finished = true;
717  int col_offset = 0;
718 
719  while (not_finished)
720  {
721  // Looking for a column in the image to draw the top of the
722  // line
723 
724  const int col = b.pmax().col() + col_offset;
725  std::map< int, std::vector< const box2d* > >::iterator it
726  = drawn_lines.find(col);
727 
728  if (it != drawn_lines.end())
729  {
730  const std::vector< const box2d* >& lines = (*it).second;
731  const unsigned nb_lines = lines.size();
732  unsigned i = 0;
733 
734  for (i = 0; i < nb_lines; ++i)
735  {
736  const box2d* box = lines[i];
737  const int min_row = std::max(b.pmin().row(), box->pmin().row());
738  const int max_row = std::min(b.pmax().row(), box->pmax().row());
739 
740  if (min_row - max_row <= 0)
741  break;
742  }
743 
744  if (i == nb_lines)
745  {
746  mln::draw::line(blocks, point2d(b.pmin().row(), col),
747  point2d(b.pmax().row(), col), even_index);
748  not_finished = false;
749  drawn_lines[col].push_back(&(rbbox[l]));
750  }
751  else
752  ++col_offset;
753  }
754  else
755  {
756  mln::draw::line(blocks, point2d(b.pmin().row(), col),
757  point2d(b.pmax().row(), col), even_index);
758  not_finished = false;
759  drawn_lines[col].push_back(&(rbbox[l]));
760  }
761  }
762  }
763 
764  // Bottom of the line
765  {
766  bool not_finished = true;
767  int col_offset = 0;
768 
769  while (not_finished)
770  {
771  // Looking for a column in the image to draw the bottom of
772  // the line
773 
774  const int col = b.pmin().col() - col_offset;
775  std::map< int, std::vector< const box2d* > >::iterator it
776  = drawn_lines.find(col);
777 
778  if (it != drawn_lines.end())
779  {
780  const std::vector< const box2d* >& lines = (*it).second;
781  const unsigned nb_lines = lines.size();
782  unsigned i = 0;
783 
784  for (i = 0; i < nb_lines; ++i)
785  {
786  const box2d* box = lines[i];
787  const int min_row = std::max(b.pmin().row(), box->pmin().row());
788  const int max_row = std::min(b.pmax().row(), box->pmax().row());
789 
790  if (min_row - max_row <= 0)
791  break;
792  }
793 
794  if (i == nb_lines)
795  {
796  mln::draw::line(blocks, point2d(b.pmin().row(), col),
797  point2d(b.pmax().row(), col), odd_index);
798  not_finished = false;
799  drawn_lines[col].push_back(&(rbbox[l]));
800  }
801  else
802  ++col_offset;
803  }
804  else
805  {
806  mln::draw::line(blocks, point2d(b.pmin().row(), col),
807  point2d(b.pmax().row(), col), odd_index);
808  not_finished = false;
809  drawn_lines[col].push_back(&(rbbox[l]));
810  }
811  }
812  }
813  }
814  }
815 
816  template <typename L>
817  inline
818  void
819  process_left_link(L& blocks,
820  const mln::util::array<box2d>& rbbox,
821  const line_set<L>& lines,
822  line_links<L>& left)
823  {
824  typedef scribo::def::lbl_type V;
825 
826  // At the beginning each line is its own neighbor
827  for_all_lines(l, lines)
828  if (lines(l).is_textline())
829  left(l) = l;
830  else
831  left(l) = 0;
832 
833  // const unsigned nlines = lines.nelements();
834 
835  // For each line
836  for_all_lines(l, lines)
837  if (lines(l).is_textline())
838  {
839  // Max distance for the line search
840  int dmax = 1.5f * lines(l).x_height();
841 
842  // Starting points in the current line box
843  point2d c = rbbox(l).pcenter();
844  point2d q(rbbox(l).pmin().row() + ((c.row() - rbbox(l).pmin().row()) / 4), c.col());
845 
846  int
847  midcol = (rbbox(l).pmax().col()
848  - rbbox(l).pmin().col()) / 2;
849 
850  // Left
851  {
852  // marge gauche
853  int
854  nleftima = c.col() - blocks.domain().pmin().col(),
855  // Distance gauche
856  nleft = std::min(nleftima, midcol + dmax);
857 
858  V
859  // Starting points in the box
860  *p = &blocks(c),
861  *p2 = &blocks(q),
862  // End of search
863  *pstop = p - nleft - 1,
864  // Line neighbor
865  *nbh_p = 0;
866 
867  // While we haven't found a neighbor or reached the limit
868  for (; p != pstop; --p, --p2)
869  {
870  if (*p2 != literal::zero // Not the background
871  && ((*p2 % 2) == 0) // Looking for the bottom of a line
872  && left((*p2 >> 1) - 1) != l) // No loops
873  {
874  // Neightbor found, we stop the research
875  nbh_p = p2;
876  break;
877  }
878 
879  if (*p != literal::zero // Not the background
880  && ((*p % 2) == 0) // Looking for the bottom of a line
881  && left((*p >> 1) - 1) != l) // No loops
882  {
883  // Neightbor found, we stop the research
884  nbh_p = p;
885  break;
886  }
887  }
888 
889  // If a neighbor was found, then we have found the top of the
890  // line. We are then looking for the bottom of the encountered
891  // line. If during the search process we find a complete line
892  // included in the touched line, this line is considered as
893  // the neighbor under certain conditions (see below)
894 
895  //---------------------------------------------------------------
896  // _________________________ |
897  // |_________________________| => Current line | Search direction
898  // v
899  // => First encountered top line
900  // __________________________________________________ 2Q
901  // | Q |
902  // | _________________________ |2P
903  // | |_____________P___________| => Second top |2P + 1
904  // | line |
905  // |__________________________________________________|2Q + 1
906  //
907  //
908  //---------------------------------------------------------------
909 
910  if (nbh_p)
911  {
912  std::vector<V> lines_nbh;
913  const V end_p = *nbh_p + 1;
914  const V* nbh_p_copy = nbh_p;
915 
916  for (; *nbh_p != end_p; --nbh_p)
917  {
918  if ((*nbh_p) != literal::zero) // Not the background
919  {
920  if ((*nbh_p) % 2 == 0)// We have found the top of
921  // another line
922  lines_nbh.push_back(*nbh_p);
923  else
924  {
925  // We have found the bottom of a line. We are looking if
926  // we have already encountered the top of this
927  // line. If so, we link the current line with this one
928  // under certain conditions:
929 
930  if (std::find(lines_nbh.begin(), lines_nbh.end(),
931  (*nbh_p) - 1) != lines_nbh.end())
932  {
933  // If we can link the complete line with the current line
934  if (// It must be in the search range
935  nbh_p > pstop
936  // Avoid loops
937  && left(((*nbh_p - 1) >> 1) - 1) != l)
938  left(l) = ((*nbh_p - 1) >> 1) - 1;
939 
940  // We have found a complete line so we stop the search
941  break;
942  }
943  }
944  }
945  }
946 
947 
948  // If we haven't found any included line in the first
949  // neighbor, then the line is considered as the neighbor of
950  // the current line
951  if (*nbh_p == end_p)
952  left(l) = (*nbh_p_copy >> 1) - 1;
953  }
954  }
955  }
956  }
957 
958 
959  // We assume that the lines have been rotated
960  template <typename L>
961  inline
962  void
963  process_right_link(L& blocks,
964  const mln::util::array<box2d>& rbbox,
965  const line_set<L>& lines,
966  line_links<L>& right)
967  {
968  typedef scribo::def::lbl_type V;
969 
970  // At the beginning each line is its own neighbor
971  for_all_lines(l, lines)
972  if (lines(l).is_textline())
973  right(l) = l;
974  else
975  right(l) = 0;
976 
977  // const unsigned nlines = lines.nelements();
978 
979  // For each line
980  for_all_lines(l, lines)
981  if (lines(l).is_textline())
982  {
983  // Max distance for the line search
984  int dmax = 1.5f * lines(l).x_height();
985 
986  // Starting points in the current line box
987  point2d c = rbbox(l).pcenter();
988  point2d q(rbbox(l).pmax().row() - ((rbbox(l).pmax().row() - c.row()) / 4), c.col());
989 
990  int
991  midcol = (rbbox(l).pmax().col()
992  - rbbox(l).pmin().col()) / 2;
993 
994  // Right
995  {
996  int
997  nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(),
998  nright = std::min(nrightima, midcol + dmax);
999 
1000  V
1001  // Starting points in the box
1002  *p = &blocks(c),
1003  *p2 = &blocks(q),
1004  // End of search
1005  *pstop = p + nright - 1,
1006  // Line neighbor
1007  *nbh_p = 0;
1008 
1009  // While we haven't found a neighbor or reached the limit
1010  for (; p != pstop; ++p, ++p2)
1011  {
1012  if (*p2 != literal::zero // Not the background
1013  && ((*p2 % 2) == 1) // Looking for the bottom of a line
1014  && right(((*p2 - 1) >> 1) - 1) != l) // No loops
1015  {
1016  // Neightbor found, we stop the research
1017  nbh_p = p2;
1018  break;
1019  }
1020 
1021  if (*p != literal::zero // Not the background
1022  && ((*p % 2) == 1) // Looking for the bottom of a line
1023  && right(((*p - 1) >> 1) - 1) != l) // No loops
1024  {
1025  // Neightbor found, we stop the research
1026  nbh_p = p;
1027  break;
1028  }
1029  }
1030 
1031  // If a neighbor was found, then we have found the bottom of the
1032  // line. We are then looking for the top of the encountered
1033  // line. If during the search process we find a complete line
1034  // included in the touched line, this line is considered as
1035  // the neighbor under certain conditions (see below)
1036 
1037  //---------------------------------------------------------------
1038  //
1039  //
1040  // __________________________________________________ 2Q
1041  // | Q |
1042  // | _________________________ |2P
1043  // | |_____________P___________| => Second bottom |2P + 1
1044  // | line |
1045  // |__________________________________________________|2Q + 1
1046  // => First encountered bottom line
1047  // _________________________ ^
1048  // |_________________________| => Current line | Search direction
1049  // |
1050  //---------------------------------------------------------------
1051 
1052  if (nbh_p)
1053  {
1054  std::vector<V> lines_nbh;
1055  const V end_p = *nbh_p - 1;
1056  const V* nbh_p_copy = nbh_p;
1057 
1058  for (; *nbh_p != end_p; ++nbh_p)
1059  {
1060  if (*nbh_p != literal::zero) // Not the background
1061  {
1062  if (*nbh_p % 2 == 1) // We have found the bottom of
1063  // another line
1064  lines_nbh.push_back(*nbh_p);
1065  else
1066  {
1067  // We have found the top of a line. We are looking if
1068  //we have already encountered the bottom of this
1069  // line. If so, we link the current line with this one
1070  // under certain conditions:
1071 
1072  if (std::find(lines_nbh.begin(), lines_nbh.end(),
1073  *nbh_p + 1) != lines_nbh.end())
1074  {
1075  // If we can link the complete line with the current line
1076  if (// It must be in the search range
1077  nbh_p < pstop
1078  // Avoid loops
1079  && right((*nbh_p >> 1) - 1) != l)
1080  right(l) = (*nbh_p >> 1) - 1;
1081 
1082  // We have found a complete line, so we stop the search
1083  break;
1084  }
1085  }
1086  }
1087  }
1088 
1089  // If we haven't found any included line in the first
1090  // neighbor, then the line is considered as the neighbor of
1091  // the current line
1092 
1093  if (*nbh_p == end_p)
1094  right(l) = ((*nbh_p_copy - 1) >> 1) - 1;
1095  }
1096  }
1097  }
1098  }
1099 
1100 //-----------------------------------------------------------------------
1101 // Finalizing the links by merging information extracted from the left
1102 // and right links
1103 //-----------------------------------------------------------------------
1104 
1105  template< typename L >
1106  inline
1107  void finalize_links(line_links<L>& left,
1108  line_links<L>& right,
1109  const line_set<L>& lines)
1110  {
1111  // const unsigned nlines = lines.nelements();
1112 
1113  for_all_lines(l, lines)
1114  if (lines(l).is_textline())
1115  {
1116  const unsigned left_value = left(l);
1117  const unsigned right_value = right(l);
1118 
1119  // If the right neighbor of my left neighbor is itself then its
1120  // right neighbor is me
1121  {
1122  line_id_t& v = right(left_value);
1123 
1124  if (v == left_value)
1125  v = l;
1126  }
1127 
1128  // If the left neighbor of my right neighbor is itself then its
1129  // left neighbor is me
1130  {
1131  line_id_t& v = left(right_value);
1132 
1133  if (v == right_value)
1134  v = l;
1135  }
1136  }
1137  }
1138 
1139  } // end of namespace scribo::text::internal
1140 
1141  template <typename L>
1142  inline
1143  paragraph_set<L>
1144  extract_paragraphs(line_set<L>& lines,
1145  const image2d<bool>& input)
1146  {
1147  typedef scribo::def::lbl_type V;
1148 
1149  image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter()));
1150  data::fill(blocks, 0);
1151 
1153  line_links<L> left(lines);
1154  left(0) = 0;
1155  line_links<L> right(lines);
1156  right(0) = 0;
1157  line_links<L> output(lines);
1158  output(0) = 0;
1159 
1160  rbbox.resize(lines.nelements() + 1);
1161 
1162  // Prepare lines
1163  internal::prepare_lines(input.domain(), lines , blocks, rbbox);
1164  // Link left
1165  internal::process_left_link(blocks, rbbox, lines , left);
1166  // Link right
1167  internal::process_right_link(blocks, rbbox, lines , right);
1168  // Finalize links
1169  internal::finalize_links(left, right, lines );
1170  // Extract paragraphs
1171  internal::paragraph_links(left, right, output, lines);
1172 
1173  paragraph_set<L> par_set = make::paragraph(output);
1174  return par_set;
1175  }
1176 
1177 # endif // ! MLN_INCLUDE_ONLY
1178 
1179  } // end of namespace scribo::text
1180 
1181 } // end of namespace scribo
1182 
1183 #endif // ! SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HH