$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
page_xml_visitor.hh
1 // Copyright (C) 2011, 2013 EPITA Research and Development Laboratory
2 // (LRDE)
3 //
4 // This file is part of Olena.
5 //
6 // Olena is free software: you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation, version 2 of the License.
9 //
10 // Olena is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // As a special exception, you may use this file as part of a free
19 // software project without restriction. Specifically, if other files
20 // instantiate templates or use macros or inline functions from this
21 // file, or you compile this file and link it with other files to produce
22 // an executable, this file does not by itself cause the resulting
23 // executable to be covered by the GNU General Public License. This
24 // exception does not however invalidate any other reasons why the
25 // executable file might be covered by the GNU General Public License.
26 
27 #ifndef SCRIBO_IO_XML_INTERNAL_PAGE_XML_VISITOR_HH
28 # define SCRIBO_IO_XML_INTERNAL_PAGE_XML_VISITOR_HH
29 
33 
34 # include <fstream>
35 
36 # include <scribo/core/internal/doc_serializer.hh>
37 # include <scribo/convert/to_base64.hh>
38 
39 # include <scribo/util/component_precise_outline.hh>
40 
41 # include <scribo/io/xml/internal/print_image_coords.hh>
42 # include <scribo/io/xml/internal/print_box_coords.hh>
43 # include <scribo/io/xml/internal/print_page_preambule.hh>
44 # include <scribo/io/xml/internal/compute_text_colour.hh>
45 # include <scribo/text/paragraphs_closing.hh>
46 
47 
48 namespace scribo
49 {
50 
51  namespace io
52  {
53 
54  namespace xml
55  {
56 
57  namespace internal
58  {
59 
71  template <typename L>
72  class page_xml_visitor : public doc_serializer<page_xml_visitor<L> >
73  {
74  public:
75  // Constructor
76  page_xml_visitor<L>(std::ofstream& out);
77 
78  // Visit overloads
79  void visit(const document<L>& doc) const;
80 
81  void visit(const component_set<L>& comp_set) const;
82 
83  void visit(const component_info<L>& info) const;
84 
85  void visit(const paragraph_set<L>& parset) const;
86 
87  private: // Attributes
88  std::ofstream& output;
89  mutable int base_vertical_line_id_;
90  mutable int base_text_id_;
91 
92  mutable L lbl_;
93  };
94 
95 
96 
97 # ifndef MLN_INCLUDE_ONLY
98 
99 
100  template <typename L>
101  page_xml_visitor<L>::page_xml_visitor(std::ofstream& out)
102  : output(out)
103  {
104  }
105 
106 
107 
109  //
110  template <typename L>
111  void
112  page_xml_visitor<L>::visit(const document<L>& doc) const
113  {
114  // Make sure there are no duplicate ids for line separators.
115  // Vertical and horizontal lines are indexed separately from
116  // 0, so vertical and horizontal lines with the same id
117  // exist.
118  base_vertical_line_id_ = doc.hline_seps_comps().nelements();
119  base_text_id_ = 0;
120 
121  // Preambule
122  print_PAGE_preambule(output, doc, true);
123 
124  // Text
125  if (doc.has_text())
126  {
127 
128  // FIXME: counting the number of valid lines...
129  for_all_paragraphs(p, doc.paragraphs())
130  if (doc.paragraphs()(p).is_valid())
131  ++base_text_id_;
132  --base_text_id_;
133 
134  doc.paragraphs().accept(*this);
135  }
136 
137  // Page elements (Pictures, ...)
138  if (doc.has_elements())
139  {
140  doc.elements().accept(*this);
141  }
142 
143  // line seraparators
144  if (doc.has_vline_seps())
145  doc.vline_seps_comps().accept(*this);
146  if (doc.has_hline_seps())
147  doc.hline_seps_comps().accept(*this);
148 
149  output << " </Page>" << std::endl;
150  output << "</PcGts>" << std::endl;
151  }
152 
153 
155  //
156  template <typename L>
157  void
158  page_xml_visitor<L>::visit(const component_set<L>& comp_set) const
159  {
160  lbl_ = comp_set.labeled_image();
161  for_all_comps(c, comp_set)
162  if (comp_set(c).is_valid())
163  comp_set(c).accept(*this);
164  }
165 
166 
168  //
169  template <typename L>
170  void
171  page_xml_visitor<L>::visit(const component_info<L>& info) const
172  {
173  // Getting component outline
175  //const L& lbl = info.holder().labeled_image();
177  par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
178 
179  switch (info.type())
180  {
182  {
183  output << " <SeparatorRegion id=\"sr" << info.id() + base_vertical_line_id_
184  << "\" orientation=\"0.000000\" "
185  << " colour=\"black\">" << std::endl;
186 
187  internal::print_image_coords(output, par, " ");
188 
189  output << " </SeparatorRegion>" << std::endl;
190  break;
191  }
192 
194  {
195  output << " <SeparatorRegion id=\"sr" << info.id()
196  << "\" orientation=\"0.000000\" "
197  << " colour=\"black\">" << std::endl;
198 
199  internal::print_image_coords(output, par, " ");
200 
201  output << " </SeparatorRegion>" << std::endl;
202  break;
203  }
204 
206  {
207  output << " <TextRegion id=\"r" << base_text_id_ + id << "\" "
208  << " type=\"drop-capital\">" // FIXME: should not be inline here!
209  << std::endl;
210 
211  internal::print_image_coords(output, par, " ");
212 
213  output << " </TextRegion>" << std::endl;
214  break;
215  }
216 
217  default:
218  case component::Image:
219  {
220  output << " <ImageRegion id=\"ir" << info.id()
221  << "\" colourDepth=\"colour\""
222  << " orientation=\"0.000000\" "
223  << " embText=\"false\" "
224  << " bgColour=\"white\">" << std::endl;
225 
226  internal::print_image_coords(output, par, " ");
227 
228  output << " </ImageRegion>" << std::endl;
229  break;
230  }
231  }
232  }
233 
234 
236  //
237  template <typename L>
238  void
239  page_xml_visitor<L>::visit(const paragraph_set<L>& parset) const
240  {
241  const line_set<L>& lines = parset.lines();
242 
243  // Prepare paragraph outlines.
244  L par_clo = text::paragraphs_closing(parset);
245 
246  for_all_paragraphs(p, parset)
247  if (parset(p).is_valid())
248  {
249  p_array<mln_site(L)> par = scribo::util::component_precise_outline(par_clo
250  | parset(p).bbox(), p);
251 
252  const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
253 
254  // FIXME: compute that information on the whole paragraph
255  // and use them here.
256  line_id_t fid = line_ids(0);
257  output << " <TextRegion id=\"r" << p
258  << "\" orientation=\"" << lines(fid).orientation()
259  << "\" readingOrientation=\"" << lines(fid).reading_orientation()
260  << "\" readingDirection=\"" << lines(fid).reading_direction()
261  << "\" type=\"" << ((lines(fid).type() == line::Text) ? "paragraph" : line::type2str(lines(fid).type()))
262  << "\" reverseVideo=\"" << (lines(fid).reverse_video() ? "true" : "false")
263  << "\" indented=\"" << (lines(fid).indented() ? "true" : "false")
264  << "\" kerning=\"" << lines(fid).char_space()
265  << "\" textColour=\"" << compute_text_colour(lines(fid).color())
266 // << "\" bgColour=\"" << compute_text_color(lines(fid).bgcolor())
267 // << "\" fontSize=\"" << compute_text_color(lines(fid).x_height())
268 // << "\" leading=\"" << compute_text_color(lines(fid).leading())
269  << "\">"
270  << std::endl;
271 
272  // Save coordinates.
273  internal::print_image_coords(output, par, " ");
274 
275  // Save text recognition results.
276  output << "<TextEquiv>" << std::endl
277  << "<PlainText></PlainText>" << std::endl;
278 
279  output << "<Unicode>";
280 
281  // Retrieve and merge text from paragraph lines.
282  for_all_paragraph_lines(lid, line_ids)
283  {
284  line_id_t l = line_ids(lid);
285  if (lines(l).has_text())
286  output << lines(l).html_text() << std::endl;
287  }
288 
289  output << "</Unicode>" << std::endl
290  << "</TextEquiv>" << std::endl;
291 
292  output << " </TextRegion>" << std::endl;
293  }
294  }
295 
296 
297 #endif // MLN_INCLUDE_ONLY
298 
299  } // end of namespace scribo::io::xml::internal
300 
301  } // end of namespace scribo::io::xml
302 
303  } // end of namespace scribo::io
304 
305 } // end of namespace scribo
306 
307 #endif // SCRIBO_IO_XML_INTERNAL_PAGE_XML_VISITOR_HH