27 #ifndef SCRIBO_IO_XML_INTERNAL_PAGE_XML_VISITOR_HH
28 # define SCRIBO_IO_XML_INTERNAL_PAGE_XML_VISITOR_HH
36 # include <scribo/core/internal/doc_serializer.hh>
37 # include <scribo/convert/to_base64.hh>
39 # include <scribo/util/component_precise_outline.hh>
41 # include <scribo/io/xml/internal/print_image_coords.hh>
42 # include <scribo/io/xml/internal/print_box_coords.hh>
43 # include <scribo/io/xml/internal/print_page_preambule.hh>
44 # include <scribo/io/xml/internal/compute_text_colour.hh>
45 # include <scribo/text/paragraphs_closing.hh>
88 std::ofstream& output;
89 mutable int base_vertical_line_id_;
90 mutable int base_text_id_;
97 # ifndef MLN_INCLUDE_ONLY
100 template <
typename L>
110 template <
typename L>
112 page_xml_visitor<L>::visit(
const document<L>& doc)
const
118 base_vertical_line_id_ = doc.hline_seps_comps().nelements();
122 print_PAGE_preambule(output, doc,
true);
129 for_all_paragraphs(p, doc.paragraphs())
130 if (doc.paragraphs()(
p).is_valid())
134 doc.paragraphs().accept(*
this);
138 if (doc.has_elements())
140 doc.elements().accept(*
this);
144 if (doc.has_vline_seps())
145 doc.vline_seps_comps().accept(*
this);
146 if (doc.has_hline_seps())
147 doc.hline_seps_comps().accept(*
this);
149 output <<
" </Page>" << std::endl;
150 output <<
"</PcGts>" << std::endl;
156 template <
typename L>
158 page_xml_visitor<L>::visit(
const component_set<L>& comp_set)
const
160 lbl_ = comp_set.labeled_image();
161 for_all_comps(c, comp_set)
162 if (comp_set(c).is_valid())
163 comp_set(c).accept(*this);
169 template <typename L>
171 page_xml_visitor<L>::visit(const component_info<L>&
info)
const
177 par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
183 output <<
" <SeparatorRegion id=\"sr" << info.id() + base_vertical_line_id_
184 <<
"\" orientation=\"0.000000\" "
185 <<
" colour=\"black\">" << std::endl;
187 internal::print_image_coords(output, par,
" ");
189 output <<
" </SeparatorRegion>" << std::endl;
195 output <<
" <SeparatorRegion id=\"sr" << info.id()
196 <<
"\" orientation=\"0.000000\" "
197 <<
" colour=\"black\">" << std::endl;
199 internal::print_image_coords(output, par,
" ");
201 output <<
" </SeparatorRegion>" << std::endl;
207 output <<
" <TextRegion id=\"r" << base_text_id_ +
id <<
"\" "
208 <<
" type=\"drop-capital\">"
211 internal::print_image_coords(output, par,
" ");
213 output <<
" </TextRegion>" << std::endl;
220 output <<
" <ImageRegion id=\"ir" << info.id()
221 <<
"\" colourDepth=\"colour\""
222 <<
" orientation=\"0.000000\" "
223 <<
" embText=\"false\" "
224 <<
" bgColour=\"white\">" << std::endl;
226 internal::print_image_coords(output, par,
" ");
228 output <<
" </ImageRegion>" << std::endl;
237 template <
typename L>
239 page_xml_visitor<L>::visit(
const paragraph_set<L>& parset)
const
241 const line_set<L>& lines = parset.lines();
244 L par_clo = text::paragraphs_closing(parset);
246 for_all_paragraphs(p, parset)
247 if (parset(p).is_valid())
250 | parset(p).
bbox(), p);
256 line_id_t fid = line_ids(0);
257 output <<
" <TextRegion id=\"r" <<
p
258 <<
"\" orientation=\"" << lines(fid).orientation()
259 <<
"\" readingOrientation=\"" << lines(fid).reading_orientation()
260 <<
"\" readingDirection=\"" << lines(fid).reading_direction()
261 <<
"\" type=\"" << ((lines(fid).type() == line::Text) ?
"paragraph" : line::type2str(lines(fid).type()))
262 <<
"\" reverseVideo=\"" << (lines(fid).reverse_video() ?
"true" :
"false")
263 <<
"\" indented=\"" << (lines(fid).indented() ?
"true" :
"false")
264 <<
"\" kerning=\"" << lines(fid).char_space()
265 <<
"\" textColour=\"" << compute_text_colour(lines(fid).color())
273 internal::print_image_coords(output, par,
" ");
276 output <<
"<TextEquiv>" << std::endl
277 <<
"<PlainText></PlainText>" << std::endl;
279 output <<
"<Unicode>";
282 for_all_paragraph_lines(lid, line_ids)
284 line_id_t l = line_ids(lid);
285 if (lines(l).has_text())
286 output << lines(l).html_text() << std::endl;
289 output <<
"</Unicode>" << std::endl
290 <<
"</TextEquiv>" << std::endl;
292 output <<
" </TextRegion>" << std::endl;
297 #endif // MLN_INCLUDE_ONLY
307 #endif // SCRIBO_IO_XML_INTERNAL_PAGE_XML_VISITOR_HH