$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
full_xml_visitor.hh
1 // Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
2 //
3 // This file is part of Olena.
4 //
5 // Olena is free software: you can redistribute it and/or modify it under
6 // the terms of the GNU General Public License as published by the Free
7 // Software Foundation, version 2 of the License.
8 //
9 // Olena is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // As a special exception, you may use this file as part of a free
18 // software project without restriction. Specifically, if other files
19 // instantiate templates or use macros or inline functions from this
20 // file, or you compile this file and link it with other files to produce
21 // an executable, this file does not by itself cause the resulting
22 // executable to be covered by the GNU General Public License. This
23 // exception does not however invalidate any other reasons why the
24 // executable file might be covered by the GNU General Public License.
25 
26 #ifndef SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH
27 # define SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH
28 
32 
33 # include <iomanip>
34 # include <fstream>
35 
36 # include <QByteArray>
37 
38 # include <scribo/core/internal/doc_serializer.hh>
39 # include <scribo/core/document.hh>
40 # include <scribo/core/component_set.hh>
41 # include <scribo/core/paragraph_set.hh>
42 # include <scribo/core/object_groups.hh>
43 # include <scribo/core/object_links.hh>
44 # include <scribo/core/line_links.hh>
45 # include <scribo/core/line_info.hh>
46 
47 # include <scribo/io/xml/internal/save_image_to_xml.hh>
48 # include <scribo/io/xml/internal/print_box_coords.hh>
49 # include <scribo/io/xml/internal/print_page_preambule.hh>
50 # include <scribo/io/xml/internal/compute_text_colour.hh>
51 
52 # include <scribo/util/color_to_hex.hh>
53 
54 namespace scribo
55 {
56 
57  namespace io
58  {
59 
60  namespace xml
61  {
62 
63  namespace internal
64  {
65 
66 
67  class full_xml_visitor : public doc_serializer<full_xml_visitor>
68  {
69  public:
70  // Constructor
71  full_xml_visitor(std::ofstream& out);
72 
73  // Visit overloads
74  template <typename L>
75  void visit(const document<L>& doc) const;
76 
77  template <typename L>
78  void visit(const line_links<L>& llinks) const;
79 
80  template <typename L>
81  void visit(const object_groups<L>& groups) const;
82 
83  template <typename L>
84  void visit(const object_links<L>& links) const;
85 
86  template <typename L>
87  void visit(const component_set<L>& comp_set) const;
88 
89  template <typename L>
90  void visit(const component_info<L>& info) const;
91 
92  template <typename L>
93  void visit(const paragraph_set<L>& parset) const;
94 
95  template <typename L>
96  void visit(const line_info<L>& line) const;
97 
98  private: // Attributes
99  std::ofstream& output;
100  };
101 
102 
103 
104 # ifndef MLN_INCLUDE_ONLY
105 
106 
107  inline
108  full_xml_visitor::full_xml_visitor(std::ofstream& out)
109  : output(out)
110  {
111  output << std::setiosflags(std::ios::fixed);
112  output.precision(26);
113  }
114 
115 
116 
118  //
119  template <typename L>
120  void
121  full_xml_visitor::visit(const document<L>& doc) const
122  {
123  print_PAGE_preambule(output, doc, false);
124 
125  // Text
126  if (doc.has_text())
127  {
128  const line_set<L>& lines = doc.lines();
129 
130  // Save component/link/group information (Extension)
131  {
132  // Component set
133  lines.components().accept(*this);
134 
135  // Object link
136  lines.links().accept(*this);
137 
138  // Object group
139  lines.groups().accept(*this);
140  }
141  // End of EXTENSIONS
142 
143  const paragraph_set<L>& parset = doc.paragraphs();
144 
145  // Save paragraphs related information (Extension)
146  {
147  // General text information
148  output << " <TextData nlines=\"" << lines.nelements() << "\" "
149  << " nparagraphs=\"" << parset.nelements() << "\" />" << std::endl;
150 
151  // line_links
152  parset.links().accept(*this);
153  }
154 
155  // Paragraph and lines
156  parset.accept(*this);
157  }
158 
159 
160  // Page elements (Pictures, ...)
161  if (doc.has_elements())
162  {
163  const component_set<L>& elts = doc.elements();
164 
165  output << " <Elements>" << std::endl;
166  elts.accept(*this);
167 
168  for_all_comps(e, elts)
169  if (elts(e).is_valid())
170  elts(e).accept(*this);
171 
172  output << " </Elements>" << std::endl;
173  }
174 
175 
176  // line seraparators
177  if (doc.has_hline_seps())
178  {
179  output << " <HlinesSeparators>" << std::endl;
180  doc.hline_seps_comps().accept(*this);
181 
182  for_all_comps(c, doc.hline_seps_comps())
183  doc.hline_seps_comps()(c).accept(*this);
184 
185  save_image_to_xml(output, doc.hline_seps(),
186  "HlinesSeparatorsImage");
187 
188  output << " </HlinesSeparators>" << std::endl;
189  }
190  if (doc.has_vline_seps())
191  {
192  output << " <VlinesSeparators>" << std::endl;
193  doc.vline_seps_comps().accept(*this);
194 
195  for_all_comps(c, doc.vline_seps_comps())
196  doc.vline_seps_comps()(c).accept(*this);
197 
198  save_image_to_xml(output, doc.vline_seps(),
199  "VlinesSeparatorsImage");
200 
201  output << " </VlinesSeparators>" << std::endl;
202  }
203 
204 
205  // Whitespace seraparators
206  if (doc.has_whitespace_seps())
207  {
208  const component_set<L>&
209  whitespace_seps_comps = doc.whitespace_seps_comps();
210 
211  output << " <WhitespacesDelimitors>" << std::endl;
212  whitespace_seps_comps.accept(*this);
213 
214  for_all_comps(c, whitespace_seps_comps)
215  whitespace_seps_comps(c).accept(*this);
216 
217  save_image_to_xml(output, doc.whitespace_seps(),
218  "WhitespacesDelimitorsImage");
219 
220  output << " </WhitespacesDelimitors>" << std::endl;
221  }
222 
223  output << " </Page>" << std::endl;
224  output << "</PcGts>" << std::endl;
225 
226  }
227 
228 
230  //
231  template <typename L>
232  void
233  full_xml_visitor::visit(const line_links<L>& llinks) const
234  {
235  output << " <LineLinks>" << std::endl;
236  for_all_links(l, llinks)
237  {
238  output << " <LineLink"
239  << " from=\"" << l
240  << "\" to=\"" << llinks(l)
241  << "\"/>" << std::endl;
242  }
243  output << " </LineLinks>" << std::endl;
244  }
245 
246 
248  //
249  template <typename L>
250  void
251  full_xml_visitor::visit(const object_groups<L>& groups) const
252  {
253  output << " <ObjectGroups ngroups=\"" << groups.nelements()
254  << "\">" << std::endl;
255 
256  for_all_groups(g, groups)
257  {
258  output << " <group id=\"" << groups(g).id()
259  << "\" valid=\"" << groups(g).is_valid()
260  << "\" pixelArea=\"" << groups(g).pixel_area()
261  << "\" pmin_x=\"" << groups(g).bbox().pmin().row()
262  << "\" pmin_y=\"" << groups(g).bbox().pmin().col()
263  << "\" pmax_x=\"" << groups(g).bbox().pmax().row()
264  << "\" pmax_y=\"" << groups(g).bbox().pmax().col()
265  << "\">" << std::endl;
266 
267  for_all_elements(e, groups(g).component_ids())
268  output << " <GroupMember comp_id=\""
269  << groups(g).component_ids()(e)
270  << "\"/>" << std::endl;
271 
272  output << " </Group>" << std::endl;
273  }
274  output << " </ObjectGroups>" << std::endl;
275  }
276 
277 
279  //
280  template <typename L>
281  void
282  full_xml_visitor::visit(const object_links<L>& links) const
283  {
284  output << " <ObjectLinks>" << std::endl;
285  for_all_links(l, links)
286  {
287  output << " <Link"
288  << " from=\"" << l
289  << "\" to=\"" << links(l)
290  << "\"/>" << std::endl;
291  }
292  output << " </ObjectLinks>" << std::endl;
293  }
294 
295 
297  //
298  template <typename L>
299  void
300  full_xml_visitor::visit(const component_set<L>& comp_set) const
301  {
302  output << " <ComponentSet nelements=\"" << comp_set.nelements()
303  << "\">" << std::endl;
304  for_all_comps(c, comp_set)
305  if (comp_set(c).is_valid())
306  {
307  output << " <ComponentInfo"
308  << " id=\"" << comp_set(c).id()
309  << "\" massCenter_x=\"" << comp_set(c).mass_center().col()
310  << "\" massCenter_y=\"" << comp_set(c).mass_center().row()
311  << "\" card=\"" << comp_set(c).card()
312  << "\" tag=\"" << comp_set(c).tag()
313  << "\" type=\"" << comp_set(c).type()
314  << "\" pmin_x=\"" << comp_set(c).bbox().pmin().col()
315  << "\" pmin_y=\"" << comp_set(c).bbox().pmin().row()
316  << "\" pmax_x=\"" << comp_set(c).bbox().pmax().col()
317  << "\" pmax_y=\"" << comp_set(c).bbox().pmax().row();
318 
319  if (comp_set(c).has_features())
320  {
321  output << "\">" << std::endl;
322 
323  output << " <ComponentFeatures"
324  << " valid=\"" << comp_set(c).features().valid
325  << "\" color=\"" << comp_set(c).features().color
326  << "\" boldness=\"" << comp_set(c).features().boldness
327  << "\"/>" << std::endl;
328 
329  output << " </ComponentInfo>" << std::endl;
330  }
331  else
332  output << "\"/>" << std::endl;
333  }
334 
335 
336  // Save labeled image
337  {
338  const L& lbl = comp_set.labeled_image();
339  save_image_to_xml(output, lbl, "LabeledImage");
340  }
341 
342  // Save separators image
343  if (comp_set.has_separators())
344  {
345  const mln_ch_value(L,bool)& seps = comp_set.separators();
346  save_image_to_xml(output, seps, "SeparatorsImage");
347  }
348 
349  output << "</ComponentSet>" << std::endl;
350  }
351 
352 
354  //
355  template <typename L>
356  void
357  full_xml_visitor::visit(const component_info<L>& info) const
358  {
359  switch (info.type())
360  {
362  {
363  output << " <WhitespaceSeparatorRegion id=\"wss"
364  << info.id()
365  << "\""
366  << " x_min=\"" << info.bbox().pmin().col() << "\""
367  << " y_min=\"" << info.bbox().pmin().row() << "\""
368  << " x_max=\"" << info.bbox().pmax().col() << "\""
369  << " y_max=\"" << info.bbox().pmax().row() << "\""
370  << ">" << std::endl;
371 
372  internal::print_box_coords(output, info.bbox(), " ");
373 
374  output << " </WhitespaceSeparatorRegion>" << std::endl;
375  break;
376  }
377 
379  {
380  output << " <VerticalSeparatorRegion id=\"vlsr" << info.id()
381  << "\" orientation=\"0.000000\" "
382  << " colour=\"Black\" "
383  << " bgColour=\"White\""
384  << " x_min=\"" << info.bbox().pmin().col() << "\""
385  << " y_min=\"" << info.bbox().pmin().row() << "\""
386  << " x_max=\"" << info.bbox().pmax().col() << "\""
387  << " y_max=\"" << info.bbox().pmax().row() << "\""
388  << ">" << std::endl;
389 
390  internal::print_box_coords(output, info.bbox(), " ");
391 
392  output << " </VerticalSeparatorRegion>" << std::endl;
393  break;
394  }
395 
397  {
398  output << " <HorizontalSeparatorRegion id=\"hlsr" << info.id()
399  << "\" orientation=\"0.000000\" "
400  << " colour=\"Black\" "
401  << " bgColour=\"White\""
402  << " x_min=\"" << info.bbox().pmin().col() << "\""
403  << " y_min=\"" << info.bbox().pmin().row() << "\""
404  << " x_max=\"" << info.bbox().pmax().col() << "\""
405  << " y_max=\"" << info.bbox().pmax().row() << "\""
406  << ">" << std::endl;
407 
408  internal::print_box_coords(output, info.bbox(), " ");
409 
410  output << " </HorizontalSeparatorRegion>" << std::endl;
411  break;
412  }
413 
414  default:
415  case component::Image:
416  {
417  output << " <ImageRegion id=\"ir" << info.id()
418  << "\" colourDepth=\"colour\""
419  << " orientation=\"0.000000\" "
420  << " embText=\"No\" "
421  << " bgColour=\"White\""
422  << " x_min=\"" << info.bbox().pmin().col() << "\""
423  << " y_min=\"" << info.bbox().pmin().row() << "\""
424  << " x_max=\"" << info.bbox().pmax().col() << "\""
425  << " y_max=\"" << info.bbox().pmax().row() << "\""
426  << ">" << std::endl;
427 
428  internal::print_box_coords(output, info.bbox(), " ");
429 
430  output << " </ImageRegion>" << std::endl;
431  break;
432  }
433  }
434  }
435 
437  //
438  template <typename L>
439  void
440  full_xml_visitor::visit(const paragraph_set<L>& parset) const
441  {
442  const line_set<L>& lines = parset.lines();
443 
444  for_all_paragraphs(p, parset)
445  if (parset(p).is_valid())
446  {
447  const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
448 
449  // FIXME: compute that information on the whole paragraph
450  // and use them here.
451  line_id_t fid = line_ids(0);
452  output << " <TextRegion id=\"" << p
453  << "\" orientation=\"" << lines(fid).orientation()
454  << "\" readingOrientation=\"" << lines(fid).reading_orientation()
455  << "\" readingDirection=\"" << lines(fid).reading_direction()
456  << "\" type=\"" << lines(fid).type()
457  << "\" reverseVideo=\"" << (lines(fid).reverse_video() ? "true" : "false")
458  << "\" indented=\"" << (lines(fid).indented() ? "true" : "false")
459  << "\" textColour=\"" << internal::compute_text_colour(parset(p).color())
460  << "\" kerning=\"" << lines(fid).char_space();
461 
462  // EXTENSIONS - Not officially supported
463  output << "\" baseline=\"" << lines(fid).baseline()
464  << "\" meanline=\"" << lines(fid).meanline()
465  << "\" xHeight=\"" << lines(fid).x_height()
466  << "\" dHeight=\"" << lines(fid).d_height()
467  << "\" aHeight=\"" << lines(fid).a_height()
468  << "\" charWidth=\"" << lines(fid).char_width()
469  << "\" color=\"" << scribo::util::color_to_hex(parset(p).color())
470  << "\" colorReliability=\"" << parset(p).color_reliability();
471  // End of EXTENSIONS
472  output << "\">"
473  << std::endl;
474 
475  internal::print_box_coords(output, parset(p).bbox(), " ");
476 
477 
478  // EXTENSIONS - Not officially supported
479  for_all_paragraph_lines(lid, line_ids)
480  {
481  line_id_t l = line_ids(lid);
482 
483  lines(l).accept(*this);
484  }
485 
486  output << " </TextRegion>" << std::endl;
487  }
488  }
489 
490 
491  template <typename L>
492  void
493  full_xml_visitor::visit(const line_info<L>& line) const
494  {
495  if (line.has_text())
496  {
497  output << " <Line text=\"" << line.html_text() << "\" ";
498  }
499  else
500  output << " <Line " << std::endl;
501 
502  output << "id=\"" << line.id()
503  << "\" boldness=\"" << line.boldness()
504  << "\" boldnessReliability=\"" << line.boldness_reliability()
505  << "\" color=\"" << scribo::util::color_to_hex(line.color())
506  << "\" colorReliability=\"" << line.color_reliability()
507  << "\" orientation=\"" << line.orientation()
508  << "\" readingOrientation=\"" << line.reading_orientation()
509  << "\" readingDirection=\"" << line.reading_direction()
510  << "\" type=\"" << line.type()
511  << "\" reverseVideo=\"" << (line.reverse_video() ? "true" : "false")
512  << "\" indented=\"" << (line.indented() ? "true" : "false")
513  << "\" textColour=\"" << internal::compute_text_colour(line.color())
514  << "\" kerning=\"" << line.char_space()
515  << "\" baseline=\"" << line.baseline()
516  << "\" meanline=\"" << line.meanline()
517  << "\" xHeight=\"" << line.x_height()
518  << "\" dHeight=\"" << line.d_height()
519  << "\" aHeight=\"" << line.a_height()
520  << "\" charWidth=\"" << line.char_width()
521  << "\" textConfidence=\"" << line.text_confidence()
522  << "\">" << std::endl;
523 
524  internal::print_box_coords(output, line.bbox(), " ");
525 
526  output << " <CompidList>" << std::endl;
527 
528  for_all_line_comps(c, line.component_ids())
529  output << " <Compid value=\""
530  << line.component_ids()(c) << "\" />" << std::endl;
531 
532  output << " </CompidList>" << std::endl;
533 
534  output << " </Line>" << std::endl;
535  }
536 
537 #endif // MLN_INCLUDE_ONLY
538 
539  } // end of namespace scribo::io::xml::internal
540 
541  } // end of namespace scribo::io::xml
542 
543  } // end of namespace scribo::io
544 
545 } // end of namespace scribo
546 
547 
548 #endif // SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH