$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
extended_page_xml_visitor.hh
1 // Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
2 //
3 // This file is part of Olena.
4 //
5 // Olena is free software: you can redistribute it and/or modify it under
6 // the terms of the GNU General Public License as published by the Free
7 // Software Foundation, version 2 of the License.
8 //
9 // Olena is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // As a special exception, you may use this file as part of a free
18 // software project without restriction. Specifically, if other files
19 // instantiate templates or use macros or inline functions from this
20 // file, or you compile this file and link it with other files to produce
21 // an executable, this file does not by itself cause the resulting
22 // executable to be covered by the GNU General Public License. This
23 // exception does not however invalidate any other reasons why the
24 // executable file might be covered by the GNU General Public License.
25 
26 #ifndef SCRIBO_IO_XML_INTERNAL_EXTENDED_PAGE_XML_VISITOR_HH
27 # define SCRIBO_IO_XML_INTERNAL_EXTENDED_PAGE_XML_VISITOR_HH
28 
32 
33 # include <fstream>
34 
35 # include <mln/morpho/elementary/gradient_internal.hh>
36 # include <mln/pw/all.hh>
37 # include <mln/core/image/dmorph/image_if.hh>
38 
39 # include <scribo/core/def/lbl_type.hh>
40 
41 # include <scribo/core/internal/doc_serializer.hh>
42 # include <scribo/core/document.hh>
43 # include <scribo/core/component_set.hh>
44 # include <scribo/core/paragraph_set.hh>
45 # include <scribo/core/object_groups.hh>
46 # include <scribo/core/object_links.hh>
47 # include <scribo/core/line_links.hh>
48 # include <scribo/core/line_info.hh>
49 
50 # include <scribo/io/xml/internal/print_box_coords.hh>
51 # include <scribo/io/xml/internal/print_image_coords.hh>
52 # include <scribo/io/xml/internal/print_page_preambule.hh>
53 # include <scribo/io/xml/internal/compute_text_colour.hh>
54 
55 # include <scribo/text/paragraphs_closing.hh>
56 
57 # include <scribo/util/component_precise_outline.hh>
58 # include <scribo/util/color_to_hex.hh>
59 
60 
61 namespace scribo
62 {
63 
64  namespace io
65  {
66 
67  namespace xml
68  {
69 
70  namespace internal
71  {
72 
73 
74  template <typename L>
76  : public doc_serializer<extended_page_xml_visitor<L> >
77  {
78  public:
79  // Constructor
80  extended_page_xml_visitor(std::ofstream& out);
81 
82  // Visit overloads
83  void visit(const document<L>& doc) const;
84 
85  void visit(const component_set<L>& comp_set) const;
86 
87  void visit(const component_info<L>& info) const;
88 
89  void visit(const paragraph_set<L>& parset) const;
90 
91  void visit(const line_info<L>& line) const;
92 
93  private: // Attributes
94  std::ofstream& output;
95  mutable L lbl_;
96  };
97 
98 
99 
100 # ifndef MLN_INCLUDE_ONLY
101 
102 
103  template <typename L>
105  : output(out)
106  {
107  }
108 
109 
110 
112  //
113  template <typename L>
114  void
115  extended_page_xml_visitor<L>::visit(const document<L>& doc) const
116  {
117  // Preambule
118  print_PAGE_preambule(output, doc, false);
119 
120  // Text
121  if (doc.has_text())
122  doc.paragraphs().accept(*this);
123 
124 
125  // Page elements (Pictures, ...)
126  if (doc.has_elements())
127  doc.elements().accept(*this);
128 
129  // line seraparators
130  if (doc.has_vline_seps())
131  doc.vline_seps_comps().accept(*this);
132  if (doc.has_hline_seps())
133  doc.hline_seps_comps().accept(*this);
134 
135  // Whitespace seraparators
136  if (doc.has_whitespace_seps())
137  doc.whitespace_seps_comps().accept(*this);
138 
139  output << " </Page>" << std::endl;
140  output << "</PcGts>" << std::endl;
141 
142  }
143 
145  //
146  template <typename L>
147  void
148  extended_page_xml_visitor<L>::visit(const component_set<L>& comp_set) const
149  {
150  lbl_ = comp_set.labeled_image();
151  for_all_comps(c, comp_set)
152  if (comp_set(c).is_valid())
153  comp_set(c).accept(*this);
154  }
155 
156 
158  //
159  template <typename L>
160  void
161  extended_page_xml_visitor<L>::visit(const component_info<L>& info) const
162  {
163  // Getting component outline
165  //const L& lbl = info.holder().labeled_image();
167  par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
168 
169  switch (info.type())
170  {
172  {
173  output << " <WhitespaceSeparatorRegion id=\"wss"
174  << info.id()
175  << "\""
176  << " x_min=\"" << info.bbox().pmin().col() << "\""
177  << " y_min=\"" << info.bbox().pmin().row() << "\""
178  << " x_max=\"" << info.bbox().pmax().col() << "\""
179  << " y_max=\"" << info.bbox().pmax().row() << "\""
180  << ">" << std::endl;
181 
182  internal::print_image_coords(output, par, " ");
183 
184  output << " </WhitespaceSeparatorRegion>" << std::endl;
185  break;
186  }
187 
189  {
190  output << " <VerticalSeparatorRegion id=\"vlsr" << info.id()
191  << "\" orientation=\"0.000000\" "
192  << " colour=\"Black\" "
193  << " bgColour=\"White\""
194  << " x_min=\"" << info.bbox().pmin().col() << "\""
195  << " y_min=\"" << info.bbox().pmin().row() << "\""
196  << " x_max=\"" << info.bbox().pmax().col() << "\""
197  << " y_max=\"" << info.bbox().pmax().row() << "\""
198  << ">" << std::endl;
199 
200  internal::print_image_coords(output, par, " ");
201 
202  output << " </VerticalSeparatorRegion>" << std::endl;
203  break;
204  }
205 
207  {
208  output << " <HorizontalSeparatorRegion id=\"hlsr" << info.id()
209  << "\" orientation=\"0.000000\" "
210  << " colour=\"Black\" "
211  << " bgColour=\"White\""
212  << " x_min=\"" << info.bbox().pmin().col() << "\""
213  << " y_min=\"" << info.bbox().pmin().row() << "\""
214  << " x_max=\"" << info.bbox().pmax().col() << "\""
215  << " y_max=\"" << info.bbox().pmax().row() << "\""
216  << ">" << std::endl;
217 
218  internal::print_image_coords(output, par, " ");
219 
220  output << " </HorizontalSeparatorRegion>" << std::endl;
221  break;
222  }
223 
224 
225  default:
226  case component::Image:
227  {
228  output << " <ImageRegion id=\"ir" << info.id()
229  << "\" colourDepth=\"colour\""
230  << " orientation=\"0.000000\" "
231  << " embText=\"No\" "
232  << " bgColour=\"White\""
233  << " x_min=\"" << info.bbox().pmin().col() << "\""
234  << " y_min=\"" << info.bbox().pmin().row() << "\""
235  << " x_max=\"" << info.bbox().pmax().col() << "\""
236  << " y_max=\"" << info.bbox().pmax().row() << "\""
237  << ">" << std::endl;
238 
239  internal::print_image_coords(output, par, " ");
240 
241  output << " </ImageRegion>" << std::endl;
242  break;
243  }
244  }
245  }
246 
247 
249  //
250  template <typename L>
251  void
252  extended_page_xml_visitor<L>::visit(const paragraph_set<L>& parset) const
253  {
254  const line_set<L>& lines = parset.lines();
255 
256  // Prepare paragraph outlines.
257  L par_clo = text::paragraphs_closing(parset);
258 
259  for_all_paragraphs(p, parset)
260  if (parset(p).is_valid())
261  {
262  const box2d& b = parset(p).bbox();
264  par = scribo::util::component_precise_outline(par_clo | b, p);
265 
266  const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
267 
268  // FIXME: compute that information on the whole paragraph
269  // and use them here.
270  line_id_t fid = line_ids(0);
271  output << " <TextRegion id=\"" << p
272  << "\" orientation=\"" << lines(fid).orientation()
273  << "\" readingOrientation=\"" << lines(fid).reading_orientation()
274  << "\" readingDirection=\"" << lines(fid).reading_direction()
275  << "\" type=\"" << lines(fid).type()
276  << "\" reverseVideo=\"" << (lines(fid).reverse_video() ? "true" : "false")
277  << "\" indented=\"" << (lines(fid).indented() ? "true" : "false")
278  << "\" textColour=\"" << internal::compute_text_colour(parset(p).color())
279  << "\" kerning=\"" << lines(fid).char_space();
280 
281  // EXTENSIONS - Not officially supported
282  // FIXME: add boldness?
283  output << "\" color=\"" << scribo::util::color_to_hex(parset(p).color())
284  << "\" colorReliability=\"" << parset(p).color_reliability()
285  << "\" baseline=\"" << lines(fid).baseline()
286  << "\" meanline=\"" << lines(fid).meanline()
287  << "\" xHeight=\"" << lines(fid).x_height()
288  << "\" dHeight=\"" << lines(fid).d_height()
289  << "\" aHeight=\"" << lines(fid).a_height()
290  << "\" charWidth=\"" << lines(fid).char_width();
291  // End of EXTENSIONS
292  output << "\">"
293  << std::endl;
294 
295  internal::print_image_coords(output, par, " ");
296 
297  // EXTENSIONS - Not officially supported
298  for_all_paragraph_lines(lid, line_ids)
299  {
300  line_id_t l = line_ids(lid);
301  lines(l).accept(*this);
302  }
303  // End of EXTENSIONS
304 
305  output << " </TextRegion>" << std::endl;
306  }
307  }
308 
309 
310  template <typename L>
311  void
312  extended_page_xml_visitor<L>::visit(const line_info<L>& line) const
313  {
314  if (line.has_text())
315  {
316  output << " <Line text=\"" << line.html_text() << "\" ";
317  }
318  else
319  output << " <Line ";
320 
321  output << "id=\"" << line.id()
322  << "\" boldness=\"" << line.boldness()
323  << "\" boldnessReliability=\"" << line.boldness_reliability()
324  << "\" color=\"" << scribo::util::color_to_hex(line.color())
325  << "\" colorReliability=\"" << line.color_reliability()
326  << "\" orientation=\"" << line.orientation()
327  << "\" readingOrientation=\"" << line.reading_orientation()
328  << "\" readingDirection=\"" << line.reading_direction()
329  << "\" type=\"" << line.type()
330  << "\" reverseVideo=\"" << (line.reverse_video() ? "true" : "false")
331  << "\" indented=\"" << (line.indented() ? "true" : "false")
332  << "\" textColour=\"" << internal::compute_text_colour(line.color())
333  << "\" kerning=\"" << line.char_space()
334  << "\" baseline=\"" << line.baseline()
335  << "\" meanline=\"" << line.meanline()
336  << "\" xHeight=\"" << line.x_height()
337  << "\" dHeight=\"" << line.d_height()
338  << "\" aHeight=\"" << line.a_height()
339  << "\" charWidth=\"" << line.char_width()
340  << "\" textConfidence=\"" << line.text_confidence()
341  << "\">" << std::endl;
342 
343  internal::print_box_coords(output, line.bbox(), " ");
344 
345  output << " </Line>" << std::endl;
346  }
347 
348 #endif // MLN_INCLUDE_ONLY
349 
350  } // end of namespace scribo::io::xml::internal
351 
352  } // end of namespace scribo::io::xml
353 
354  } // end of namespace scribo::io
355 
356 } // end of namespace scribo
357 
358 #endif // SCRIBO_IO_XML_INTERNAL_EXTENDED_PAGE_XML_VISITOR_HH