26 #ifndef SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH
27 # define SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH
36 # include <QByteArray>
38 # include <scribo/core/internal/doc_serializer.hh>
39 # include <scribo/core/document.hh>
40 # include <scribo/core/component_set.hh>
41 # include <scribo/core/paragraph_set.hh>
42 # include <scribo/core/object_groups.hh>
43 # include <scribo/core/object_links.hh>
44 # include <scribo/core/line_links.hh>
45 # include <scribo/core/line_info.hh>
47 # include <scribo/io/xml/internal/save_image_to_xml.hh>
48 # include <scribo/io/xml/internal/print_box_coords.hh>
49 # include <scribo/io/xml/internal/print_page_preambule.hh>
50 # include <scribo/io/xml/internal/compute_text_colour.hh>
52 # include <scribo/util/color_to_hex.hh>
99 std::ofstream& output;
104 # ifndef MLN_INCLUDE_ONLY
108 full_xml_visitor::full_xml_visitor(std::ofstream& out)
111 output << std::setiosflags(std::ios::fixed);
112 output.precision(26);
119 template <
typename L>
121 full_xml_visitor::visit(
const document<L>& doc)
const
123 print_PAGE_preambule(output, doc,
false);
136 lines.
links().accept(*
this);
139 lines.
groups().accept(*
this);
148 output <<
" <TextData nlines=\"" << lines.
nelements() <<
"\" "
149 <<
" nparagraphs=\"" << parset.nelements() <<
"\" />" << std::endl;
152 parset.links().accept(*
this);
161 if (doc.has_elements())
163 const component_set<L>& elts = doc.elements();
165 output <<
" <Elements>" << std::endl;
168 for_all_comps(e, elts)
169 if (elts(e).is_valid())
170 elts(e).accept(*this);
172 output << " </Elements>" << std::endl;
177 if (doc.has_hline_seps())
179 output <<
" <HlinesSeparators>" << std::endl;
180 doc.hline_seps_comps().accept(*
this);
182 for_all_comps(c, doc.hline_seps_comps())
183 doc.hline_seps_comps()(c).accept(*
this);
185 save_image_to_xml(output, doc.hline_seps(),
186 "HlinesSeparatorsImage");
188 output <<
" </HlinesSeparators>" << std::endl;
190 if (doc.has_vline_seps())
192 output <<
" <VlinesSeparators>" << std::endl;
193 doc.vline_seps_comps().accept(*
this);
195 for_all_comps(c, doc.vline_seps_comps())
196 doc.vline_seps_comps()(c).accept(*
this);
198 save_image_to_xml(output, doc.vline_seps(),
199 "VlinesSeparatorsImage");
201 output <<
" </VlinesSeparators>" << std::endl;
206 if (doc.has_whitespace_seps())
208 const component_set<L>&
209 whitespace_seps_comps = doc.whitespace_seps_comps();
211 output <<
" <WhitespacesDelimitors>" << std::endl;
212 whitespace_seps_comps.accept(*
this);
214 for_all_comps(c, whitespace_seps_comps)
215 whitespace_seps_comps(c).accept(*this);
217 save_image_to_xml(output, doc.whitespace_seps(),
218 "WhitespacesDelimitorsImage");
220 output << " </WhitespacesDelimitors>" << std::endl;
223 output << " </Page>" << std::endl;
224 output << "</PcGts>" << std::endl;
231 template <typename L>
233 full_xml_visitor::visit(const line_links<L>& llinks)
const
235 output <<
" <LineLinks>" << std::endl;
236 for_all_links(l, llinks)
238 output <<
" <LineLink"
240 <<
"\" to=\"" << llinks(l)
241 <<
"\"/>" << std::endl;
243 output <<
" </LineLinks>" << std::endl;
249 template <
typename L>
251 full_xml_visitor::visit(
const object_groups<L>& groups)
const
253 output <<
" <ObjectGroups ngroups=\"" << groups.nelements()
254 <<
"\">" << std::endl;
256 for_all_groups(g, groups)
258 output <<
" <group id=\"" << groups(g).id()
259 <<
"\" valid=\"" << groups(g).is_valid()
260 <<
"\" pixelArea=\"" << groups(g).pixel_area()
261 <<
"\" pmin_x=\"" << groups(g).bbox().pmin().row()
262 <<
"\" pmin_y=\"" << groups(g).bbox().pmin().col()
263 <<
"\" pmax_x=\"" << groups(g).bbox().pmax().row()
264 <<
"\" pmax_y=\"" << groups(g).bbox().pmax().col()
265 <<
"\">" << std::endl;
267 for_all_elements(e, groups(g).component_ids())
268 output << " <GroupMember comp_id=\""
269 << groups(g).component_ids()(e)
270 << "\"/>" << std::endl;
272 output << " </Group>" << std::endl;
274 output << " </ObjectGroups>" << std::endl;
280 template <typename L>
282 full_xml_visitor::visit(const object_links<L>& links)
const
284 output <<
" <ObjectLinks>" << std::endl;
285 for_all_links(l, links)
289 <<
"\" to=\"" << links(l)
290 <<
"\"/>" << std::endl;
292 output <<
" </ObjectLinks>" << std::endl;
298 template <
typename L>
300 full_xml_visitor::visit(
const component_set<L>& comp_set)
const
302 output <<
" <ComponentSet nelements=\"" << comp_set.nelements()
303 <<
"\">" << std::endl;
304 for_all_comps(c, comp_set)
305 if (comp_set(c).is_valid())
307 output <<
" <ComponentInfo"
308 <<
" id=\"" << comp_set(c).id()
309 <<
"\" massCenter_x=\"" << comp_set(c).mass_center().col()
310 <<
"\" massCenter_y=\"" << comp_set(c).mass_center().row()
311 <<
"\" card=\"" << comp_set(c).card()
312 <<
"\" tag=\"" << comp_set(c).tag()
313 <<
"\" type=\"" << comp_set(c).type()
314 <<
"\" pmin_x=\"" << comp_set(c).bbox().pmin().col()
315 <<
"\" pmin_y=\"" << comp_set(c).bbox().pmin().row()
316 <<
"\" pmax_x=\"" << comp_set(c).bbox().pmax().col()
317 <<
"\" pmax_y=\"" << comp_set(c).bbox().pmax().row();
319 if (comp_set(c).has_features())
321 output <<
"\">" << std::endl;
323 output <<
" <ComponentFeatures"
324 <<
" valid=\"" << comp_set(c).features().valid
325 <<
"\" color=\"" << comp_set(c).features().color
326 <<
"\" boldness=\"" << comp_set(c).features().boldness
327 <<
"\"/>" << std::endl;
329 output <<
" </ComponentInfo>" << std::endl;
332 output <<
"\"/>" << std::endl;
338 const L& lbl = comp_set.labeled_image();
339 save_image_to_xml(output, lbl,
"LabeledImage");
343 if (comp_set.has_separators())
345 const mln_ch_value(L,
bool)& seps = comp_set.separators();
346 save_image_to_xml(output, seps,
"SeparatorsImage");
349 output <<
"</ComponentSet>" << std::endl;
355 template <
typename L>
357 full_xml_visitor::visit(
const component_info<L>&
info)
const
363 output <<
" <WhitespaceSeparatorRegion id=\"wss"
366 <<
" x_min=\"" << info.bbox().pmin().col() <<
"\""
367 <<
" y_min=\"" << info.bbox().pmin().row() <<
"\""
368 <<
" x_max=\"" << info.bbox().pmax().col() <<
"\""
369 <<
" y_max=\"" << info.bbox().pmax().row() <<
"\""
372 internal::print_box_coords(output, info.bbox(),
" ");
374 output <<
" </WhitespaceSeparatorRegion>" << std::endl;
380 output <<
" <VerticalSeparatorRegion id=\"vlsr" << info.id()
381 <<
"\" orientation=\"0.000000\" "
382 <<
" colour=\"Black\" "
383 <<
" bgColour=\"White\""
384 <<
" x_min=\"" << info.bbox().pmin().col() <<
"\""
385 <<
" y_min=\"" << info.bbox().pmin().row() <<
"\""
386 <<
" x_max=\"" << info.bbox().pmax().col() <<
"\""
387 <<
" y_max=\"" << info.bbox().pmax().row() <<
"\""
390 internal::print_box_coords(output, info.bbox(),
" ");
392 output <<
" </VerticalSeparatorRegion>" << std::endl;
398 output <<
" <HorizontalSeparatorRegion id=\"hlsr" << info.id()
399 <<
"\" orientation=\"0.000000\" "
400 <<
" colour=\"Black\" "
401 <<
" bgColour=\"White\""
402 <<
" x_min=\"" << info.bbox().pmin().col() <<
"\""
403 <<
" y_min=\"" << info.bbox().pmin().row() <<
"\""
404 <<
" x_max=\"" << info.bbox().pmax().col() <<
"\""
405 <<
" y_max=\"" << info.bbox().pmax().row() <<
"\""
408 internal::print_box_coords(output, info.bbox(),
" ");
410 output <<
" </HorizontalSeparatorRegion>" << std::endl;
417 output <<
" <ImageRegion id=\"ir" << info.id()
418 <<
"\" colourDepth=\"colour\""
419 <<
" orientation=\"0.000000\" "
420 <<
" embText=\"No\" "
421 <<
" bgColour=\"White\""
422 <<
" x_min=\"" << info.bbox().pmin().col() <<
"\""
423 <<
" y_min=\"" << info.bbox().pmin().row() <<
"\""
424 <<
" x_max=\"" << info.bbox().pmax().col() <<
"\""
425 <<
" y_max=\"" << info.bbox().pmax().row() <<
"\""
428 internal::print_box_coords(output, info.bbox(),
" ");
430 output <<
" </ImageRegion>" << std::endl;
438 template <
typename L>
440 full_xml_visitor::visit(
const paragraph_set<L>& parset)
const
442 const line_set<L>& lines = parset.lines();
444 for_all_paragraphs(p, parset)
445 if (parset(p).is_valid())
451 line_id_t fid = line_ids(0);
452 output <<
" <TextRegion id=\"" <<
p
453 <<
"\" orientation=\"" << lines(fid).orientation()
454 <<
"\" readingOrientation=\"" << lines(fid).reading_orientation()
455 <<
"\" readingDirection=\"" << lines(fid).reading_direction()
456 <<
"\" type=\"" << lines(fid).type()
457 <<
"\" reverseVideo=\"" << (lines(fid).reverse_video() ?
"true" :
"false")
458 <<
"\" indented=\"" << (lines(fid).indented() ?
"true" :
"false")
459 <<
"\" textColour=\"" << internal::compute_text_colour(parset(p).color())
460 <<
"\" kerning=\"" << lines(fid).char_space();
463 output <<
"\" baseline=\"" << lines(fid).baseline()
464 <<
"\" meanline=\"" << lines(fid).meanline()
465 <<
"\" xHeight=\"" << lines(fid).x_height()
466 <<
"\" dHeight=\"" << lines(fid).d_height()
467 <<
"\" aHeight=\"" << lines(fid).a_height()
468 <<
"\" charWidth=\"" << lines(fid).char_width()
469 <<
"\" color=\"" << scribo::util::color_to_hex(parset(p).color())
470 <<
"\" colorReliability=\"" << parset(p).color_reliability();
475 internal::print_box_coords(output, parset(p).
bbox(),
" ");
479 for_all_paragraph_lines(lid, line_ids)
481 line_id_t l = line_ids(lid);
483 lines(l).accept(*
this);
486 output <<
" </TextRegion>" << std::endl;
491 template <
typename L>
493 full_xml_visitor::visit(
const line_info<L>&
line)
const
497 output <<
" <Line text=\"" << line.html_text() <<
"\" ";
500 output <<
" <Line " << std::endl;
502 output <<
"id=\"" << line.id()
503 <<
"\" boldness=\"" << line.boldness()
504 <<
"\" boldnessReliability=\"" << line.boldness_reliability()
505 <<
"\" color=\"" << scribo::util::color_to_hex(line.color())
506 <<
"\" colorReliability=\"" << line.color_reliability()
507 <<
"\" orientation=\"" << line.orientation()
508 <<
"\" readingOrientation=\"" << line.reading_orientation()
509 <<
"\" readingDirection=\"" << line.reading_direction()
510 <<
"\" type=\"" << line.type()
511 <<
"\" reverseVideo=\"" << (line.reverse_video() ?
"true" :
"false")
512 <<
"\" indented=\"" << (line.indented() ?
"true" :
"false")
513 <<
"\" textColour=\"" << internal::compute_text_colour(line.color())
514 <<
"\" kerning=\"" << line.char_space()
515 <<
"\" baseline=\"" << line.baseline()
516 <<
"\" meanline=\"" << line.meanline()
517 <<
"\" xHeight=\"" << line.x_height()
518 <<
"\" dHeight=\"" << line.d_height()
519 <<
"\" aHeight=\"" << line.a_height()
520 <<
"\" charWidth=\"" << line.char_width()
521 <<
"\" textConfidence=\"" << line.text_confidence()
522 <<
"\">" << std::endl;
524 internal::print_box_coords(output, line.bbox(),
" ");
526 output <<
" <CompidList>" << std::endl;
528 for_all_line_comps(c, line.component_ids())
529 output <<
" <Compid value=\""
530 << line.component_ids()(c) <<
"\" />" << std::endl;
532 output <<
" </CompidList>" << std::endl;
534 output <<
" </Line>" << std::endl;
537 #endif // MLN_INCLUDE_ONLY
548 #endif // SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH