$extrastylesheet
Olena  User documentation 2.1
An Image Processing Platform
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
load.hh
1 // Copyright (C) 2011, 2013 EPITA Research and Development Laboratory (LRDE)
2 //
3 // This file is part of Olena.
4 //
5 // Olena is free software: you can redistribute it and/or modify it under
6 // the terms of the GNU General Public License as published by the Free
7 // Software Foundation, version 2 of the License.
8 //
9 // Olena is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Olena. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // As a special exception, you may use this file as part of a free
18 // software project without restriction. Specifically, if other files
19 // instantiate templates or use macros or inline functions from this
20 // file, or you compile this file and link it with other files to produce
21 // an executable, this file does not by itself cause the resulting
22 // executable to be covered by the GNU General Public License. This
23 // exception does not however invalidate any other reasons why the
24 // executable file might be covered by the GNU General Public License.
25 
26 #ifndef SCRIBO_IO_XML_LOAD_HH
27 # define SCRIBO_IO_XML_LOAD_HH
28 
32 
33 # include <QtXml>
34 
35 # include <libgen.h>
36 # include <fstream>
37 # include <sstream>
38 
39 # include <map>
40 
41 # include <mln/core/image/image2d.hh>
42 
43 # include <mln/data/wrap.hh>
44 # include <mln/value/int_u8.hh>
45 # include <mln/io/pgm/save.hh>
46 # include <mln/io/pbm/save.hh>
47 
48 # include <scribo/core/document.hh>
49 # include <scribo/core/component_set.hh>
50 # include <scribo/core/line_set.hh>
51 # include <scribo/core/line_info.hh>
52 
53 # include <scribo/util/hex_to_color.hh>
54 
55 namespace scribo
56 {
57 
58  namespace io
59  {
60 
61  namespace xml
62  {
63  using namespace mln;
64 
77  template <typename L>
78  void
79  load(document<L>& doc, const std::string& input_name);
80 
81 
82 # ifndef MLN_INCLUDE_ONLY
83 
84  namespace internal
85  {
86 
87 
88  enum Mode
89  {
90  None,
91  ComponentSet,
92  ComponentInfo,
93  ComponentFeatures,
94  Elements,
95  LabeledImage,
96  SeparatorsImage,
97  ObjectLinks,
98  ObjectGroups,
99  Point,
100  Link,
101  Group,
102  GroupMember,
103  Line,
104  LineLinks,
105  LineLink,
106  TextData,
107  TextRegion,
108  CompIdList,
109  CompId,
110  Page,
111  WhitespacesDelimitors,
112  HLineSeparators,
113  VLineSeparators,
114  WhitespacesDelimitorsImage,
115  HLineSeparatorsImage,
116  VLineSeparatorsImage,
117  };
118 
119 
120  struct ModeData
121  {
122  const char *name;
123  Mode mode;
124  };
125 
126 
127  static const ModeData mode_data[] = {
128  { "ComponentSet", ComponentSet },
129  { "ComponentInfo", ComponentInfo },
130  { "ComponentFeatures", ComponentFeatures },
131  { "Elements", Elements },
132  { "LabeledImage", LabeledImage },
133  { "SeparatorsImage", SeparatorsImage },
134  { "ObjectLinks", ObjectLinks },
135  { "ObjectGroups", ObjectGroups },
136  { "Point", Point },
137  { "Link", Link },
138  { "Group", Group },
139  { "GroupMember", GroupMember },
140  { "Line", Line },
141  { "LineLinks", LineLinks },
142  { "LineLink", LineLink },
143  { "TextData", TextData },
144  { "TextRegion", TextRegion },
145  { "CompidList", CompIdList },
146  { "Compid", CompId },
147  { "Page", Page },
148  { "WhitespacesDelimitors", WhitespacesDelimitors },
149  { "HlinesSeparators", HLineSeparators },
150  { "VlinesSeparators", VLineSeparators },
151  { "WhitespacesDelimitorsImage", WhitespacesDelimitorsImage },
152  { "HlinesSeparatorsImage", HLineSeparatorsImage },
153  { "VlinesSeparatorsImage", VLineSeparatorsImage },
154  { 0, None }
155  };
156 
157 
158  template <typename L>
159  class xml_handler : public QXmlDefaultHandler
160  {
161 
162  typedef mln_ch_value(L,bool) B;
163 
164  public:
165  xml_handler(document<L>& doc_) : current_paragraph_id(1), doc(doc_)
166  { lines_data.append(line_info<L>()); } // line info id starts from 1.
167 
168  virtual
169  bool
170  startElement(const QString& /*namespaceURI*/, const QString& /*localName*/,
171  const QString& qName, const QXmlAttributes& atts )
172  {
173  mode.push(find_mode(qName));
174 
175  switch (mode.top())
176  {
177 
178  // Component Set
179  case ComponentSet:
180  {
181  comp_set_data = new scribo::internal::component_set_data<L>();
182  comp_set_data->soft_init(atts.value("nelements").toInt());
183  }
184  break;
185 
186 
187  // Component Info
188  case ComponentInfo:
189  {
190  component_info<L> info(atts.value("id").toInt(),
191  mln::make::box2d(atts.value("pmin_y").toInt(),
192  atts.value("pmin_x").toInt(),
193  atts.value("pmax_y").toInt(),
194  atts.value("pmax_x").toInt()),
195  mln::point2d(atts.value("massCenter_y").toInt(),
196  atts.value("massCenter_x").toInt()),
197  atts.value("card").toInt());
198 
199 
200  info.update_tag(component::str2tag(atts.value("tag").toUtf8().constData()));
201  info.update_type(component::str2type(atts.value("type").toUtf8().constData()));
202 
203  comp_set_data->infos_.append(info);
204  }
205  break;
206 
207 
208  case ComponentFeatures:
209  {
210  if (atts.value("valid").toInt())
211  {
212  component_features_data comp_features;
213  comp_features.valid = true;
214  comp_features.color = scribo::util::hex_to_color(atts.value("color").toUtf8().constData());
215  comp_features.boldness = atts.value("boldness").toFloat();
216 
217  comp_set_data->infos_.last().update_features(comp_features);
218  }
219  }
220  break;
221 
222  // Object links
223  case ObjectLinks:
224  {
225  links = object_links<L>(components);
226  }
227  break;
228 
229 
230  // Object groups
231  case ObjectGroups:
232  {
233  group_info_.reserve(atts.value("ngroups").toInt());
234  group_info_.resize(1);
235  }
236  break;
237 
238 
239  // Text data
240  case TextData:
241  {
242  // Reserve space for line data.
243  lines_data.resize(atts.value("nlines").toInt() + 1);
244  lines = line_set<L>(groups, lines_data);
245  llinks = line_links<L>(lines);
246  par_data = new scribo::internal::paragraph_set_data<L>(llinks, atts.value("nparagraphs").toInt());
247  }
248  break;
249 
250 
251  // Text Region
252  case TextRegion:
253  {
254  current_paragraph = paragraph_info<L>(llinks);
255  current_paragraph.set_color_(
256  scribo::util::hex_to_color(atts.value("color").toUtf8().constData()));
257  current_paragraph.set_color_reliability_(atts.value("colorReliability").toFloat());
258  }
259  break;
260 
261 
262  // Line link
263  case LineLink:
264  {
265  llinks(atts.value("from").toInt()) = atts.value("to").toInt();
266  }
267  break;
268 
269 
270  // Line
271  case Line:
272  {
273  current_line_id = atts.value("id").toInt();
274 
276 
277  line_data->text_ = atts.value("text").toUtf8().constData();
278  line_data->text_confidence_ = atof(atts.value("textConfidence").toUtf8().constData());
279 
280  line_data->hidden_ = false;
281  line_data->tag_ = static_cast<line::Tag>(atts.value("tag").toInt());
282 
283  line_data->baseline_ = atts.value("baseline").toInt();
284  line_data->meanline_ = atts.value("meanline").toInt();
285  line_data->x_height_ = atts.value("xHeight").toInt();
286  line_data->d_height_ = atts.value("dHeight").toInt();
287  line_data->a_height_ = atts.value("aHeight").toInt();
288  line_data->char_space_ = atts.value("kerning").toInt();
289  line_data->char_width_ = atts.value("charWidth").toInt();
290  line_data->word_space_ = 0;
291 
292  line_data->reading_direction_ = line::LeftToRight;
293  line_data->type_ = line::str2type(atts.value("type").toUtf8().constData());
294  line_data->reverse_video_ = (atts.value("reverseVideo") == "false" ? false : true);
295  line_data->orientation_ = 0;
296  line_data->reading_orientation_ = atts.value("readingOrientation").toInt();
297  line_data->indented_ = (atts.value("indented") == "false" ? false : true);
298 
299 
300  line_data->boldness_ = atts.value("boldness").toFloat();
301  line_data->boldness_reliability_ = atts.value("boldnessReliability").toFloat();
302  line_data->color_ = scribo::util::hex_to_color(
303  atts.value("color").toUtf8().constData());
304 
305  line_data->color_reliability_ = atts.value("colorReliability").toFloat();
306 
307  bbox.init();
308  }
309  break;
310 
311 
312  // CompIdList
313  case CompIdList:
314  {
315 
316  }
317  break;
318 
319 
320  // CompId
321  case CompId:
322  {
323  line_data->component_ids_.append(atts.value("value").toInt());
324  }
325  break;
326 
327 
328  // Point
329  case Point:
330  {
331  point2d p(atts.value("y").toInt(), atts.value("x").toInt());
332  bbox.take(p);
333  }
334  break;
335 
336 
337  // Labeled Image
338  case LabeledImage:
339  {
340  width = atts.value("width").toInt();
341  height = atts.value("height").toInt();
342  comp_set_data->ima_ = L(mln::make::box2d(height, width), 0); // No border
343  }
344  break;
345 
346 
347  // Separator Image
348  case SeparatorsImage:
349  {
350  width = atts.value("width").toInt();
351  height = atts.value("height").toInt();
352  comp_set_data->separators_ = B(mln::make::box2d(height, width), 0); // No border
353  }
354  break;
355 
356 
357  // Separators/delimitor images
358  case WhitespacesDelimitorsImage:
359  case HLineSeparatorsImage:
360  case VLineSeparatorsImage:
361  {
362  width = atts.value("width").toInt();
363  height = atts.value("height").toInt();
364  seps = B(mln::make::box2d(height, width), 0); // No border
365  }
366  break;
367 
368  // Link
369  case Link:
370  {
371  links.update(atts.value("from").toInt(), atts.value("to").toInt());
372  }
373  break;
374 
375 
376  // Group
377  case Group:
378  {
379  group_info_.append(group_info(atts.value("id").toInt(),
380  atts.value("pixelArea").toInt(),
381  mln::make::box2d(atts.value("pmin_x").toInt(),
382  atts.value("pmin_y").toInt(),
383  atts.value("pmax_x").toInt(),
384  atts.value("pmax_y").toInt()),
385  atts.value("valid").toInt()));
386  }
387  break;
388 
389  // GroupMember
390  case GroupMember:
391  {
392  component_ids.append(atts.value("comp_id").toInt());
393  }
394  break;
395 
396 
397  // DEFAULT
398  default:
399  ;
400  }
401 
402  return true;
403  }
404 
405 
406  virtual
407  bool
408  endElement(const QString& /*namespaceURI*/, const QString& /*localName*/, const QString& /*qName*/)
409  {
410  switch(mode.top())
411  {
412  // Component set
413  case ComponentSet:
414  {
415  components = component_set<L>(comp_set_data);
416 
417  }
418  break;
419 
420  // Line
421  case Line:
422  {
423  line_data->bbox_ = bbox;
424  lines_data(current_line_id) = line_info<L>(current_line_id, line_data);
425  lines_data(current_line_id).update_ebbox();
426 
427  // Add this line to the current paragraph.
428  current_paragraph.add_line(lines_data(current_line_id));
429  }
430  break;
431 
432  // TextRegion
433  case TextRegion:
434  {
435  par_data->pars_(current_paragraph_id++) = current_paragraph;
436  }
437  break;
438 
439  // Page
440  case Page:
441  {
442  lines.update_line_data_(lines_data);
443  parset = paragraph_set<L>(par_data);
444  doc.set_paragraphs(parset);
445  }
446  break;
447 
448  // ObjectGroups
449  case ObjectGroups:
450  {
451  groups = object_groups<L>(links, group_info_);
452  }
453  break;
454 
455  // Group
456  case Group:
457  {
458  group_info_.last().component_ids_() = component_ids;
459  component_ids.clear();
460  }
461  break;
462 
463  case Elements:
464  {
465  doc.set_elements(components);
466  }
467  break;
468 
469  case WhitespacesDelimitors:
470  {
471  doc.set_whitespace_separators(seps, components);
472  }
473  break;
474 
475  case HLineSeparators:
476  {
477  doc.set_hline_separators(seps, components);
478  }
479  break;
480 
481  case VLineSeparators:
482  {
483  doc.set_vline_separators(seps, components);
484  }
485  break;
486 
487  // DEFAULT
488  default:
489  ;
490 
491  }
492 
493  mode.pop();
494  return true;
495  }
496 
497 
498 
499  bool characters(const QString & ch)
500  {
501  switch (mode.top())
502  {
503  case LabeledImage:
504  {
505  QByteArray data = ch.toUtf8();
506  data = QByteArray::fromBase64(data);
507  data = qUncompress(data);
508  memcpy((char *) comp_set_data->ima_.buffer(), data.data(), data.size());
509  }
510  break;
511 
512  case SeparatorsImage:
513  {
514  QByteArray data = ch.toUtf8();
515  data = QByteArray::fromBase64(data);
516  data = qUncompress(data);
517  memcpy((char *) comp_set_data->separators_.buffer(), data.data(), data.size());
518  }
519  break;
520 
521  case WhitespacesDelimitorsImage:
522  case HLineSeparatorsImage:
523  case VLineSeparatorsImage:
524  {
525  QByteArray data = ch.toUtf8();
526  data = QByteArray::fromBase64(data);
527  data = qUncompress(data);
528  memcpy((char *) seps.buffer(), data.data(), data.size());
529  }
530  break;
531 
532  default:
533  ;
534  }
535 
536  return true;
537  }
538 
539 
540 // private: // Methods
541 
542  Mode find_mode(const QString& qName)
543  {
544  for (int i = 0; mode_data[i].name; ++i)
545  if (mode_data[i].name == qName)
546  return mode_data[i].mode;
547  return None;
548  }
549 
550 
551 // private: // Attributes
552 
553  QStack<Mode> mode;
554 
555  // Shape
556  accu::shape::bbox<point2d> bbox;
557 
558  unsigned width;
559  unsigned height;
560 
561  // Components
563  component_set<L> components;
564 
565  object_links<L> links;
566 
567  object_groups<L> groups;
568  mln::util::array<component_id_t> component_ids;
569  mln::util::array<group_info> group_info_;
570 
571  // Lines
572  unsigned current_line_id;
574 
575  line_links<L> llinks;
576 
577  unsigned current_paragraph_id;
578  paragraph_info<L> current_paragraph;
580  paragraph_set<L> parset;
581 
582  mln::util::array<line_info<L> > lines_data;
583  line_set<L> lines;
584 
585  // Delimitors/separators
586  B seps; // Temporary image.
587 
588  document<L>& doc;
589  };
590 
591 
592 
593 
594 
595 
596 
597 
598 
599 
600  template <typename L>
601  void
602  load_extended(document<L>& doc,
603  const std::string& output_name)
604  {
605  xml_handler<L> handler(doc);
606  QXmlSimpleReader reader;
607  reader.setContentHandler(&handler);
608 
609  QFile file(output_name.c_str());
610  if (!file.open(QFile::ReadOnly | QFile::Text))
611  {
612  qDebug() << "Cannot read file";
613  return;
614  }
615 
616  QXmlInputSource xmlInputSource(&file);
617  if (reader.parse(xmlInputSource))
618  qDebug() << "Loaded successfuly";
619  }
620 
621  } // end of namespace scribo::io::xml::internal
622 
623 
624  // FACADE
625 
626  template <typename L>
627  void
628  load(document<L>& doc,
629  const std::string& output_name)
630  {
631  internal::load_extended(doc, output_name);
632  }
633 
634 
635 # endif // ! MLN_INCLUDE_ONLY
636 
637  } // end of namespace scribo::io::xml
638 
639  } // end of namespace scribo::io
640 
641 } // end of namespace scribo
642 
643 
644 #endif // ! SCRIBO_IO_XML_LOAD_HH
645