26 #ifndef SCRIBO_IO_XML_INTERNAL_EXTENDED_PAGE_XML_VISITOR_HH
27 # define SCRIBO_IO_XML_INTERNAL_EXTENDED_PAGE_XML_VISITOR_HH
35 # include <mln/morpho/elementary/gradient_internal.hh>
36 # include <mln/pw/all.hh>
37 # include <mln/core/image/dmorph/image_if.hh>
39 # include <scribo/core/def/lbl_type.hh>
41 # include <scribo/core/internal/doc_serializer.hh>
42 # include <scribo/core/document.hh>
43 # include <scribo/core/component_set.hh>
44 # include <scribo/core/paragraph_set.hh>
45 # include <scribo/core/object_groups.hh>
46 # include <scribo/core/object_links.hh>
47 # include <scribo/core/line_links.hh>
48 # include <scribo/core/line_info.hh>
50 # include <scribo/io/xml/internal/print_box_coords.hh>
51 # include <scribo/io/xml/internal/print_image_coords.hh>
52 # include <scribo/io/xml/internal/print_page_preambule.hh>
53 # include <scribo/io/xml/internal/compute_text_colour.hh>
55 # include <scribo/text/paragraphs_closing.hh>
57 # include <scribo/util/component_precise_outline.hh>
58 # include <scribo/util/color_to_hex.hh>
94 std::ofstream& output;
100 # ifndef MLN_INCLUDE_ONLY
103 template <
typename L>
113 template <
typename L>
115 extended_page_xml_visitor<L>::visit(
const document<L>& doc)
const
118 print_PAGE_preambule(output, doc,
false);
122 doc.paragraphs().accept(*
this);
126 if (doc.has_elements())
127 doc.elements().accept(*
this);
130 if (doc.has_vline_seps())
131 doc.vline_seps_comps().accept(*
this);
132 if (doc.has_hline_seps())
133 doc.hline_seps_comps().accept(*
this);
136 if (doc.has_whitespace_seps())
137 doc.whitespace_seps_comps().accept(*
this);
139 output <<
" </Page>" << std::endl;
140 output <<
"</PcGts>" << std::endl;
146 template <
typename L>
151 for_all_comps(c, comp_set)
152 if (comp_set(c).is_valid())
153 comp_set(c).accept(*this);
159 template <typename L>
167 par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
173 output <<
" <WhitespaceSeparatorRegion id=\"wss"
176 <<
" x_min=\"" << info.bbox().pmin().col() <<
"\""
177 <<
" y_min=\"" << info.bbox().pmin().row() <<
"\""
178 <<
" x_max=\"" << info.bbox().pmax().col() <<
"\""
179 <<
" y_max=\"" << info.bbox().pmax().row() <<
"\""
182 internal::print_image_coords(output, par,
" ");
184 output <<
" </WhitespaceSeparatorRegion>" << std::endl;
190 output <<
" <VerticalSeparatorRegion id=\"vlsr" << info.id()
191 <<
"\" orientation=\"0.000000\" "
192 <<
" colour=\"Black\" "
193 <<
" bgColour=\"White\""
194 <<
" x_min=\"" << info.bbox().pmin().col() <<
"\""
195 <<
" y_min=\"" << info.bbox().pmin().row() <<
"\""
196 <<
" x_max=\"" << info.bbox().pmax().col() <<
"\""
197 <<
" y_max=\"" << info.bbox().pmax().row() <<
"\""
200 internal::print_image_coords(output, par,
" ");
202 output <<
" </VerticalSeparatorRegion>" << std::endl;
208 output <<
" <HorizontalSeparatorRegion id=\"hlsr" << info.id()
209 <<
"\" orientation=\"0.000000\" "
210 <<
" colour=\"Black\" "
211 <<
" bgColour=\"White\""
212 <<
" x_min=\"" << info.bbox().pmin().col() <<
"\""
213 <<
" y_min=\"" << info.bbox().pmin().row() <<
"\""
214 <<
" x_max=\"" << info.bbox().pmax().col() <<
"\""
215 <<
" y_max=\"" << info.bbox().pmax().row() <<
"\""
218 internal::print_image_coords(output, par,
" ");
220 output <<
" </HorizontalSeparatorRegion>" << std::endl;
228 output <<
" <ImageRegion id=\"ir" << info.id()
229 <<
"\" colourDepth=\"colour\""
230 <<
" orientation=\"0.000000\" "
231 <<
" embText=\"No\" "
232 <<
" bgColour=\"White\""
233 <<
" x_min=\"" << info.bbox().pmin().col() <<
"\""
234 <<
" y_min=\"" << info.bbox().pmin().row() <<
"\""
235 <<
" x_max=\"" << info.bbox().pmax().col() <<
"\""
236 <<
" y_max=\"" << info.bbox().pmax().row() <<
"\""
239 internal::print_image_coords(output, par,
" ");
241 output <<
" </ImageRegion>" << std::endl;
250 template <
typename L>
252 extended_page_xml_visitor<L>::visit(
const paragraph_set<L>& parset)
const
254 const line_set<L>& lines = parset.lines();
257 L par_clo = text::paragraphs_closing(parset);
259 for_all_paragraphs(p, parset)
260 if (parset(p).is_valid())
264 par = scribo::util::component_precise_outline(par_clo | b, p);
270 line_id_t fid = line_ids(0);
271 output <<
" <TextRegion id=\"" <<
p
272 <<
"\" orientation=\"" << lines(fid).orientation()
273 <<
"\" readingOrientation=\"" << lines(fid).reading_orientation()
274 <<
"\" readingDirection=\"" << lines(fid).reading_direction()
275 <<
"\" type=\"" << lines(fid).type()
276 <<
"\" reverseVideo=\"" << (lines(fid).reverse_video() ?
"true" :
"false")
277 <<
"\" indented=\"" << (lines(fid).indented() ?
"true" :
"false")
278 <<
"\" textColour=\"" << internal::compute_text_colour(parset(p).color())
279 <<
"\" kerning=\"" << lines(fid).char_space();
283 output <<
"\" color=\"" << scribo::util::color_to_hex(parset(p).color())
284 <<
"\" colorReliability=\"" << parset(p).color_reliability()
285 <<
"\" baseline=\"" << lines(fid).baseline()
286 <<
"\" meanline=\"" << lines(fid).meanline()
287 <<
"\" xHeight=\"" << lines(fid).x_height()
288 <<
"\" dHeight=\"" << lines(fid).d_height()
289 <<
"\" aHeight=\"" << lines(fid).a_height()
290 <<
"\" charWidth=\"" << lines(fid).char_width();
295 internal::print_image_coords(output, par,
" ");
298 for_all_paragraph_lines(lid, line_ids)
300 line_id_t l = line_ids(lid);
301 lines(l).accept(*
this);
305 output <<
" </TextRegion>" << std::endl;
310 template <
typename L>
312 extended_page_xml_visitor<L>::visit(
const line_info<L>&
line)
const
316 output <<
" <Line text=\"" << line.html_text() <<
"\" ";
321 output <<
"id=\"" << line.id()
322 <<
"\" boldness=\"" << line.boldness()
323 <<
"\" boldnessReliability=\"" << line.boldness_reliability()
324 <<
"\" color=\"" << scribo::util::color_to_hex(line.color())
325 <<
"\" colorReliability=\"" << line.color_reliability()
326 <<
"\" orientation=\"" << line.orientation()
327 <<
"\" readingOrientation=\"" << line.reading_orientation()
328 <<
"\" readingDirection=\"" << line.reading_direction()
329 <<
"\" type=\"" << line.type()
330 <<
"\" reverseVideo=\"" << (line.reverse_video() ?
"true" :
"false")
331 <<
"\" indented=\"" << (line.indented() ?
"true" :
"false")
332 <<
"\" textColour=\"" << internal::compute_text_colour(line.color())
333 <<
"\" kerning=\"" << line.char_space()
334 <<
"\" baseline=\"" << line.baseline()
335 <<
"\" meanline=\"" << line.meanline()
336 <<
"\" xHeight=\"" << line.x_height()
337 <<
"\" dHeight=\"" << line.d_height()
338 <<
"\" aHeight=\"" << line.a_height()
339 <<
"\" charWidth=\"" << line.char_width()
340 <<
"\" textConfidence=\"" << line.text_confidence()
341 <<
"\">" << std::endl;
343 internal::print_box_coords(output, line.bbox(),
" ");
345 output <<
" </Line>" << std::endl;
348 #endif // MLN_INCLUDE_ONLY
358 #endif // SCRIBO_IO_XML_INTERNAL_EXTENDED_PAGE_XML_VISITOR_HH