$extrastylesheet
Functions | |
template<typename L > | |
mln::util::couple < component_set< L >, typename mln::trait::ch_value< L, bool > ::ret > | alignments (const document< L > &doc, float dmax_ratio, unsigned delta_pixel) |
template<typename I , typename V > | |
mln::trait::ch_value< I, bool > ::ret | canvas (const Image< I > &input_, const mln::util::array< box< typename I::site > > &hlines_, const mln::util::array< box< typename I::site > > &vlines_, unsigned max_dist_lines) |
template<typename I , typename N , typename V > | |
mln::util::couple < mln::util::array< box < typename I::site > >, mln::util::array< box < typename I::site > > > | cells (const Image< I > &input, const Neighborhood< N > &nbh, const V &label_type) |
template<typename I , typename J , typename N , typename V > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | components (const Image< I > &input, const Image< J > &binary_input, const Neighborhood< N > &nbh, V &ncomponents, component::Type type=component::Undefined) |
template<typename I , typename N , typename V > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | components (const Image< I > &binary_input, const Neighborhood< N > &nbh, V &ncomponents, component::Type type=component::Undefined) |
template<typename I > | |
mln::trait::concrete< I >::ret | horizontal_separators (const Image< I > &input, unsigned line_length) |
template<typename I , typename N , typename V , typename W > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | lines_discontinued (const Image< I > &input_, const Neighborhood< N > &nbh_, V &nlines, const Window< W > &win_, unsigned rank_k) |
template<typename I , typename N , typename V > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | lines_h_discontinued (const Image< I > &input, const Neighborhood< N > &nbh, V &nlines, unsigned line_length, unsigned rank_k) |
template<typename I > | |
mln::trait::concrete< I >::ret | lines_h_pattern (const Image< I > &input, unsigned length, unsigned delta) |
template<typename I , typename N , typename V > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | lines_h_single (const Image< I > &input, const Neighborhood< N > &nbh, const V &nlines, unsigned min_line_length, float w_h_ratio) |
template<typename L > | |
component_set< L > | lines_h_single (const component_set< L > &components, unsigned min_line_length, float w_h_ratio) |
template<typename I , typename N , typename V > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | lines_h_thick_and_single (const Image< I > &input, const Neighborhood< N > &nbh, V &nlines, unsigned min_line_length, float h_w_ratio) |
template<typename I > | |
mln::trait::concrete< I >::ret | lines_h_thick_and_thin (const Image< I > &binary_image, unsigned length, unsigned delta, float p_few=0.2, float p_enough=0.6, float ratio=8) |
template<typename I , typename W > | |
mln::trait::concrete< I >::ret | lines_pattern (const Image< I > &input_, unsigned length, unsigned dir, const Window< W > &win_) |
template<typename I , typename N , typename V , typename W > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | lines_thick (const Image< I > &input_, const Neighborhood< N > &nbh_, V &nlines, unsigned line_length) |
template<typename I , typename N , typename V , typename W > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | lines_thick (const Image< I > &input_, const Neighborhood< N > &nbh_, V &nlines, const Window< W > &win_) |
template<typename I , typename N , typename V > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | lines_v_discontinued (const Image< I > &input, const Neighborhood< N > &nbh, V &nlines, unsigned line_length, unsigned rank_k) |
template<typename I > | |
mln::trait::concrete< I >::ret | lines_v_pattern (const Image< I > &input, unsigned length, unsigned delta) |
template<typename I , typename N , typename V > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | lines_v_single (const Image< I > &input, const Neighborhood< N > &nbh, const V &nlines, unsigned min_line_length, float h_w_ratio) |
template<typename L > | |
component_set< L > | lines_v_single (const component_set< L > &components, unsigned min_line_length, float h_w_ratio) |
template<typename I , typename N , typename V > | |
component_set< typename mln::trait::ch_value< I, V > ::ret > | lines_v_thick_and_single (const Image< I > &input, const Neighborhood< N > &nbh, V &nlines, unsigned min_line_length, float h_w_ratio) |
template<typename L > | |
component_set< L > | non_text (const document< L > &doc, unsigned nlines) |
template<typename L > | |
component_set< L > | non_text_hdoc (const document< L > &doc, unsigned closing_size) |
template<typename L , typename I > | |
component_set< L > | non_text_kmean (const document< L > &doc, const Image< I > &input) |
template<typename I > | |
mln::trait::concrete< I >::ret | separators (const Image< I > &input, unsigned line_length) |
template<typename I > | |
mln::trait::concrete< I >::ret | separators_nonvisible (const Image< I > &in_) |
template<typename I > | |
mln::trait::concrete< I >::ret | vertical_separators (const Image< I > &input, unsigned line_length) |
Namespace of primitive extraction related routines.
mln::util::couple<component_set<L>, typename mln::trait::ch_value< L , bool >::ret> scribo::primitive::extract::alignments | ( | const document< L > & | doc, |
float | dmax_ratio, | ||
unsigned | delta_pixel | ||
) |
Find page delimitors from tabstops and whitespaces.
doc
must be constructed from components grouped by lines with a very strict criterion in order to keep spaces between words and paragraphs. This first grouping is necessary to avoid false positive (e.g. inside the text blocks).doc
must have text (doc.has_text() returns true).1) Build an image of line bboxes 2) For TOP and BOTTOM 2.a) Link bboxes. Links are validated only if :
delta_pixel
.This method handles skewed alignments and draw skew lines if possible. Examples :
|x |x | x \x | x -> \x | x \x
|x | x | x | x |x
Here, the 'x' are aligned pair by pair but globally they are not. Here we cannot draw skewed lines without processing every links and look for that pattern. More over, in step 2.c alignment is validated by looking for other components in the supposed "whitespace area" from the aligned side. With such an alignment there are more chances that a component intersect with that line. Here, we would like to split links/alignement in two groups in order to get something like that :
\x \x \x /x /x
[in] | doc | A document information with text lines. |
[in] | dmax_ratio | The ratio used to compute the maximum lookup distance while linking up components. |
[in] | delta_pixel | The maximum number of pixels allowed for alignment delta precision. |
mln::trait::ch_value< I , bool >::ret scribo::primitive::extract::canvas | ( | const Image< I > & | input_, |
const mln::util::array< box< typename I::site > > & | hlines_, | ||
const mln::util::array< box< typename I::site > > & | vlines_, | ||
unsigned | max_dist_lines | ||
) |
Rebuild a table from its line bounding boxes.
[in] | input_ | A binary image. |
[in] | hlines_ | Vorizontal line bounding boxes. |
[in] | vlines_ | vertical line bounding boxes. |
[in] | max_dist_lines | The maximum distance allowed between vertical and horizontal lines to connect them eachother. |
mln::util::couple<mln::util::array<box<typename I ::site> >, mln::util::array<box<typename I ::site> > > scribo::primitive::extract::cells | ( | const Image< I > & | input, |
const Neighborhood< N > & | nbh, | ||
const V & | label_type | ||
) |
Extract canvas cells from a binary image.
Use arbitrary criterions.
[in] | input | A binary image. |
[in] | nbh | A neighborhood. |
[in,out] | label_type | Type of the labeled image. |
component_set<L> scribo::primitive::extract::non_text | ( | const document< L > & | doc, |
unsigned | nlines | ||
) |
Extract non text components.
This method takes text localization into account and tries to learn the background colors to deduce the relevant non text components.
[in] | doc | A document structure. Its must have paragraph information. |
[in] | nlines | The number of lines needed in a paragraph to consider the latter during the background color learning. |
component_set<L> scribo::primitive::extract::non_text_hdoc | ( | const document< L > & | doc, |
unsigned | closing_size | ||
) |
Extract non text components.
Variant adapted for historical documents.