$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
scribo::primitive::extract Namespace Reference

Functions

template<typename L >
mln::util::couple
< component_set< L >, typename
mln::trait::ch_value< L, bool >
::ret
alignments (const document< L > &doc, float dmax_ratio, unsigned delta_pixel)
template<typename I , typename V >
mln::trait::ch_value< I, bool >
::ret 
canvas (const Image< I > &input_, const mln::util::array< box< typename I::site > > &hlines_, const mln::util::array< box< typename I::site > > &vlines_, unsigned max_dist_lines)
template<typename I , typename N , typename V >
mln::util::couple
< mln::util::array< box
< typename I::site >
>, mln::util::array< box
< typename I::site > > > 
cells (const Image< I > &input, const Neighborhood< N > &nbh, const V &label_type)
template<typename I , typename J , typename N , typename V >
component_set< typename
mln::trait::ch_value< I, V >
::ret
components (const Image< I > &input, const Image< J > &binary_input, const Neighborhood< N > &nbh, V &ncomponents, component::Type type=component::Undefined)
template<typename I , typename N , typename V >
component_set< typename
mln::trait::ch_value< I, V >
::ret
components (const Image< I > &binary_input, const Neighborhood< N > &nbh, V &ncomponents, component::Type type=component::Undefined)
template<typename I >
mln::trait::concrete< I >::ret horizontal_separators (const Image< I > &input, unsigned line_length)
template<typename I , typename N , typename V , typename W >
component_set< typename
mln::trait::ch_value< I, V >
::ret
lines_discontinued (const Image< I > &input_, const Neighborhood< N > &nbh_, V &nlines, const Window< W > &win_, unsigned rank_k)
template<typename I , typename N , typename V >
component_set< typename
mln::trait::ch_value< I, V >
::ret
lines_h_discontinued (const Image< I > &input, const Neighborhood< N > &nbh, V &nlines, unsigned line_length, unsigned rank_k)
template<typename I >
mln::trait::concrete< I >::ret lines_h_pattern (const Image< I > &input, unsigned length, unsigned delta)
template<typename I , typename N , typename V >
component_set< typename
mln::trait::ch_value< I, V >
::ret
lines_h_single (const Image< I > &input, const Neighborhood< N > &nbh, const V &nlines, unsigned min_line_length, float w_h_ratio)
template<typename L >
component_set< L > lines_h_single (const component_set< L > &components, unsigned min_line_length, float w_h_ratio)
template<typename I , typename N , typename V >
component_set< typename
mln::trait::ch_value< I, V >
::ret
lines_h_thick_and_single (const Image< I > &input, const Neighborhood< N > &nbh, V &nlines, unsigned min_line_length, float h_w_ratio)
template<typename I >
mln::trait::concrete< I >::ret lines_h_thick_and_thin (const Image< I > &binary_image, unsigned length, unsigned delta, float p_few=0.2, float p_enough=0.6, float ratio=8)
template<typename I , typename W >
mln::trait::concrete< I >::ret lines_pattern (const Image< I > &input_, unsigned length, unsigned dir, const Window< W > &win_)
template<typename I , typename N , typename V , typename W >
component_set< typename
mln::trait::ch_value< I, V >
::ret
lines_thick (const Image< I > &input_, const Neighborhood< N > &nbh_, V &nlines, unsigned line_length)
template<typename I , typename N , typename V , typename W >
component_set< typename
mln::trait::ch_value< I, V >
::ret
lines_thick (const Image< I > &input_, const Neighborhood< N > &nbh_, V &nlines, const Window< W > &win_)
template<typename I , typename N , typename V >
component_set< typename
mln::trait::ch_value< I, V >
::ret
lines_v_discontinued (const Image< I > &input, const Neighborhood< N > &nbh, V &nlines, unsigned line_length, unsigned rank_k)
template<typename I >
mln::trait::concrete< I >::ret lines_v_pattern (const Image< I > &input, unsigned length, unsigned delta)
template<typename I , typename N , typename V >
component_set< typename
mln::trait::ch_value< I, V >
::ret
lines_v_single (const Image< I > &input, const Neighborhood< N > &nbh, const V &nlines, unsigned min_line_length, float h_w_ratio)
template<typename L >
component_set< L > lines_v_single (const component_set< L > &components, unsigned min_line_length, float h_w_ratio)
template<typename I , typename N , typename V >
component_set< typename
mln::trait::ch_value< I, V >
::ret
lines_v_thick_and_single (const Image< I > &input, const Neighborhood< N > &nbh, V &nlines, unsigned min_line_length, float h_w_ratio)
template<typename L >
component_set< L > non_text (const document< L > &doc, unsigned nlines)
template<typename L >
component_set< L > non_text_hdoc (const document< L > &doc, unsigned closing_size)
template<typename L , typename I >
component_set< L > non_text_kmean (const document< L > &doc, const Image< I > &input)
template<typename I >
mln::trait::concrete< I >::ret separators (const Image< I > &input, unsigned line_length)
template<typename I >
mln::trait::concrete< I >::ret separators_nonvisible (const Image< I > &in_)
template<typename I >
mln::trait::concrete< I >::ret vertical_separators (const Image< I > &input, unsigned line_length)

Detailed Description

Namespace of primitive extraction related routines.

Function Documentation

template<typename L >
mln::util::couple<component_set<L>, typename mln::trait::ch_value< L , bool >::ret> scribo::primitive::extract::alignments ( const document< L > &  doc,
float  dmax_ratio,
unsigned  delta_pixel 
)

Find page delimitors from tabstops and whitespaces.

Precondition
Separators should be removed from input document image .
Text in doc must be constructed from components grouped by lines with a very strict criterion in order to keep spaces between words and paragraphs. This first grouping is necessary to avoid false positive (e.g. inside the text blocks).
doc must have text (doc.has_text() returns true).

Internal description:

1) Build an image of line bboxes 2) For TOP and BOTTOM 2.a) Link bboxes. Links are validated only if :

  • Alignment difference is less than delta_pixel.
  • Bboxes are not too far
  • No component is located at 5 pixels along the aligned side. 2.b) Invalidates groups if there are less than 3 links 2.c) Invalidates groups if a component is located at a specific distance from the aligned side.

This method handles skewed alignments and draw skew lines if possible. Examples :

        |x              |x
        | x              \x
        |  x       ->     \x
        |   x              \x
       |x
       | x
       |  x
       | x
       |x

Here, the 'x' are aligned pair by pair but globally they are not. Here we cannot draw skewed lines without processing every links and look for that pattern. More over, in step 2.c alignment is validated by looking for other components in the supposed "whitespace area" from the aligned side. With such an alignment there are more chances that a component intersect with that line. Here, we would like to split links/alignement in two groups in order to get something like that :

        \x
         \x
          \x
         /x
        /x
Parameters
[in]docA document information with text lines.
[in]dmax_ratioThe ratio used to compute the maximum lookup distance while linking up components.
[in]delta_pixelThe maximum number of pixels allowed for alignment delta precision.
template<typename I , typename V >
mln::trait::ch_value< I , bool >::ret scribo::primitive::extract::canvas ( const Image< I > &  input_,
const mln::util::array< box< typename I::site > > &  hlines_,
const mln::util::array< box< typename I::site > > &  vlines_,
unsigned  max_dist_lines 
)

Rebuild a table from its line bounding boxes.

Parameters
[in]input_A binary image.
[in]hlines_Vorizontal line bounding boxes.
[in]vlines_vertical line bounding boxes.
[in]max_dist_linesThe maximum distance allowed between vertical and horizontal lines to connect them eachother.
Returns
The canvas as a binary image. canvas lines are set to true.
template<typename I , typename N , typename V >
mln::util::couple<mln::util::array<box<typename I ::site> >, mln::util::array<box<typename I ::site> > > scribo::primitive::extract::cells ( const Image< I > &  input,
const Neighborhood< N > &  nbh,
const V &  label_type 
)

Extract canvas cells from a binary image.

Use arbitrary criterions.

Parameters
[in]inputA binary image.
[in]nbhA neighborhood.
[in,out]label_typeType of the labeled image.
Returns
A list of cell bounding boxes.
template<typename L >
component_set<L> scribo::primitive::extract::non_text ( const document< L > &  doc,
unsigned  nlines 
)

Extract non text components.

This method takes text localization into account and tries to learn the background colors to deduce the relevant non text components.

Parameters
[in]docA document structure. Its must have paragraph information.
[in]nlinesThe number of lines needed in a paragraph to consider the latter during the background color learning.
Returns
A component set of non text components.
template<typename L >
component_set<L> scribo::primitive::extract::non_text_hdoc ( const document< L > &  doc,
unsigned  closing_size 
)

Extract non text components.

Variant adapted for historical documents.