26 #ifndef SCRIBO_IO_XML_LOAD_HH
27 # define SCRIBO_IO_XML_LOAD_HH
41 # include <mln/core/image/image2d.hh>
43 # include <mln/data/wrap.hh>
44 # include <mln/value/int_u8.hh>
45 # include <mln/io/pgm/save.hh>
46 # include <mln/io/pbm/save.hh>
48 # include <scribo/core/document.hh>
49 # include <scribo/core/component_set.hh>
50 # include <scribo/core/line_set.hh>
51 # include <scribo/core/line_info.hh>
53 # include <scribo/util/hex_to_color.hh>
79 load(document<L>& doc,
const std::string& input_name);
82 # ifndef MLN_INCLUDE_ONLY
111 WhitespacesDelimitors,
114 WhitespacesDelimitorsImage,
115 HLineSeparatorsImage,
116 VLineSeparatorsImage,
127 static const ModeData mode_data[] = {
128 {
"ComponentSet", ComponentSet },
129 {
"ComponentInfo", ComponentInfo },
130 {
"ComponentFeatures", ComponentFeatures },
131 {
"Elements", Elements },
132 {
"LabeledImage", LabeledImage },
133 {
"SeparatorsImage", SeparatorsImage },
134 {
"ObjectLinks", ObjectLinks },
135 {
"ObjectGroups", ObjectGroups },
139 {
"GroupMember", GroupMember },
141 {
"LineLinks", LineLinks },
142 {
"LineLink", LineLink },
143 {
"TextData", TextData },
144 {
"TextRegion", TextRegion },
145 {
"CompidList", CompIdList },
146 {
"Compid", CompId },
148 {
"WhitespacesDelimitors", WhitespacesDelimitors },
149 {
"HlinesSeparators", HLineSeparators },
150 {
"VlinesSeparators", VLineSeparators },
151 {
"WhitespacesDelimitorsImage", WhitespacesDelimitorsImage },
152 {
"HlinesSeparatorsImage", HLineSeparatorsImage },
153 {
"VlinesSeparatorsImage", VLineSeparatorsImage },
158 template <
typename L>
159 class xml_handler :
public QXmlDefaultHandler
165 xml_handler(document<L>& doc_) : current_paragraph_id(1), doc(doc_)
166 { lines_data.append(line_info<L>()); }
170 startElement(
const QString& ,
const QString& ,
171 const QString& qName,
const QXmlAttributes& atts )
173 mode.push(find_mode(qName));
182 comp_set_data->soft_init(atts.value(
"nelements").toInt());
190 component_info<L>
info(atts.value(
"id").toInt(),
192 atts.value(
"pmin_x").toInt(),
193 atts.value(
"pmax_y").toInt(),
194 atts.value(
"pmax_x").toInt()),
196 atts.value(
"massCenter_x").toInt()),
197 atts.value(
"card").toInt());
203 comp_set_data->infos_.append(
info);
208 case ComponentFeatures:
210 if (atts.value(
"valid").toInt())
212 component_features_data comp_features;
213 comp_features.valid =
true;
214 comp_features.color = scribo::util::hex_to_color(atts.value(
"color").toUtf8().constData());
215 comp_features.boldness = atts.value(
"boldness").toFloat();
217 comp_set_data->infos_.last().update_features(comp_features);
233 group_info_.reserve(atts.value(
"ngroups").toInt());
234 group_info_.resize(1);
243 lines_data.resize(atts.value(
"nlines").toInt() + 1);
244 lines = line_set<L>(groups, lines_data);
245 llinks = line_links<L>(lines);
254 current_paragraph = paragraph_info<L>(llinks);
255 current_paragraph.set_color_(
256 scribo::util::hex_to_color(atts.value(
"color").toUtf8().constData()));
257 current_paragraph.set_color_reliability_(atts.value(
"colorReliability").toFloat());
265 llinks(atts.value(
"from").toInt()) = atts.value(
"to").toInt();
273 current_line_id = atts.value(
"id").toInt();
277 line_data->text_ = atts.value(
"text").toUtf8().constData();
278 line_data->text_confidence_ = atof(atts.value(
"textConfidence").toUtf8().constData());
280 line_data->hidden_ =
false;
281 line_data->tag_ =
static_cast<line::Tag
>(atts.value(
"tag").toInt());
283 line_data->baseline_ = atts.value(
"baseline").toInt();
284 line_data->meanline_ = atts.value(
"meanline").toInt();
285 line_data->x_height_ = atts.value(
"xHeight").toInt();
286 line_data->d_height_ = atts.value(
"dHeight").toInt();
287 line_data->a_height_ = atts.value(
"aHeight").toInt();
288 line_data->char_space_ = atts.value(
"kerning").toInt();
289 line_data->char_width_ = atts.value(
"charWidth").toInt();
290 line_data->word_space_ = 0;
292 line_data->reading_direction_ = line::LeftToRight;
293 line_data->type_ = line::str2type(atts.value(
"type").toUtf8().constData());
294 line_data->reverse_video_ = (atts.value(
"reverseVideo") ==
"false" ?
false :
true);
295 line_data->orientation_ = 0;
296 line_data->reading_orientation_ = atts.value(
"readingOrientation").toInt();
297 line_data->indented_ = (atts.value(
"indented") ==
"false" ?
false :
true);
300 line_data->boldness_ = atts.value(
"boldness").toFloat();
301 line_data->boldness_reliability_ = atts.value(
"boldnessReliability").toFloat();
302 line_data->color_ = scribo::util::hex_to_color(
303 atts.value(
"color").toUtf8().constData());
305 line_data->color_reliability_ = atts.value(
"colorReliability").toFloat();
323 line_data->component_ids_.append(atts.value(
"value").toInt());
331 point2d p(atts.value(
"y").toInt(), atts.value(
"x").toInt());
340 width = atts.value(
"width").toInt();
341 height = atts.value(
"height").toInt();
348 case SeparatorsImage:
350 width = atts.value(
"width").toInt();
351 height = atts.value(
"height").toInt();
358 case WhitespacesDelimitorsImage:
359 case HLineSeparatorsImage:
360 case VLineSeparatorsImage:
362 width = atts.value(
"width").toInt();
363 height = atts.value(
"height").toInt();
371 links.update(atts.value(
"from").toInt(), atts.value(
"to").toInt());
379 group_info_.append(group_info(atts.value(
"id").toInt(),
380 atts.value(
"pixelArea").toInt(),
382 atts.value(
"pmin_y").toInt(),
383 atts.value(
"pmax_x").toInt(),
384 atts.value(
"pmax_y").toInt()),
385 atts.value(
"valid").toInt()));
392 component_ids.append(atts.value(
"comp_id").toInt());
408 endElement(
const QString& ,
const QString& ,
const QString& )
423 line_data->bbox_ =
bbox;
424 lines_data(current_line_id) = line_info<L>(current_line_id, line_data);
425 lines_data(current_line_id).update_ebbox();
428 current_paragraph.add_line(lines_data(current_line_id));
435 par_data->pars_(current_paragraph_id++) = current_paragraph;
442 lines.update_line_data_(lines_data);
443 parset = paragraph_set<L>(par_data);
444 doc.set_paragraphs(parset);
451 groups = object_groups<L>(links, group_info_);
458 group_info_.last().component_ids_() = component_ids;
459 component_ids.clear();
469 case WhitespacesDelimitors:
471 doc.set_whitespace_separators(seps,
components);
475 case HLineSeparators:
481 case VLineSeparators:
499 bool characters(
const QString & ch)
505 QByteArray
data = ch.toUtf8();
506 data = QByteArray::fromBase64(data);
507 data = qUncompress(data);
508 memcpy((
char *) comp_set_data->ima_.buffer(), data.data(), data.size());
512 case SeparatorsImage:
514 QByteArray data = ch.toUtf8();
515 data = QByteArray::fromBase64(data);
516 data = qUncompress(data);
517 memcpy((
char *) comp_set_data->separators_.buffer(), data.data(), data.size());
521 case WhitespacesDelimitorsImage:
522 case HLineSeparatorsImage:
523 case VLineSeparatorsImage:
525 QByteArray data = ch.toUtf8();
526 data = QByteArray::fromBase64(data);
527 data = qUncompress(data);
528 memcpy((
char *) seps.buffer(), data.data(), data.size());
542 Mode find_mode(
const QString& qName)
544 for (
int i = 0; mode_data[i].name; ++i)
545 if (mode_data[i].name == qName)
546 return mode_data[i].mode;
556 accu::shape::bbox<point2d>
bbox;
565 object_links<L> links;
567 object_groups<L> groups;
572 unsigned current_line_id;
575 line_links<L> llinks;
577 unsigned current_paragraph_id;
578 paragraph_info<L> current_paragraph;
580 paragraph_set<L> parset;
600 template <
typename L>
602 load_extended(document<L>& doc,
603 const std::string& output_name)
605 xml_handler<L> handler(doc);
606 QXmlSimpleReader reader;
607 reader.setContentHandler(&handler);
609 QFile
file(output_name.c_str());
610 if (!
file.open(QFile::ReadOnly | QFile::Text))
612 qDebug() <<
"Cannot read file";
616 QXmlInputSource xmlInputSource(&file);
617 if (reader.parse(xmlInputSource))
618 qDebug() <<
"Loaded successfuly";
626 template <
typename L>
628 load(document<L>& doc,
629 const std::string& output_name)
631 internal::load_extended(doc, output_name);
635 # endif // ! MLN_INCLUDE_ONLY
644 #endif // ! SCRIBO_IO_XML_LOAD_HH