32 #include <mln/core/image/image2d.hh>
34 #include <mln/io/pbm/save.hh>
35 #include <mln/io/magick/load.hh>
37 #include <scribo/toolchain/content_in_doc.hh>
38 #include <scribo/toolchain/text_in_doc_preprocess.hh>
40 #include <scribo/core/document.hh>
42 #include <scribo/debug/usage.hh>
43 #include <scribo/debug/logger.hh>
45 #include <scribo/preprocessing/crop_without_localization.hh>
46 #include <scribo/preprocessing/crop.hh>
48 #include <scribo/io/xml/save.hh>
49 #include <scribo/io/img/save.hh>
51 #include <scribo/debug/option_parser.hh>
56 {
"input.*",
"An image." },
57 {
"out.xml",
"Result of the document analysis" },
66 {
"denoising",
"Performs a denoising. (default: enabled)",
true },
67 {
"find-delims",
"Find text alignements and whitespaces "
68 "to improve layout detection. (default: enabled)",
true },
69 {
"find-seps",
"Find separators in document (default: enabled)",
true },
70 {
"ocr",
"Performs character recognition (default: enabled)",
true },
71 {
"deskew",
"Deskew image (default: disabled)",
false},
80 {
"crop",
"Crop input image before processing it.",
81 "<pmin_row> <pmin_col> <pmax_row> <pmax_col>", 0, 4, 0 },
82 {
"debug-prefix",
"Enable debug image outputs. Prefix image name with that "
83 "given prefix.",
"<prefix>", 0, 1, 0 },
84 {
"ocr-lang",
"Set the language to be recognized by the OCR (Tesseract). "
85 "Depending on your system, you can choose between eng (default), "
86 "fra, deu, ita, nld, por, spa, vie",
87 "<lang>", scribo::debug::check_ocr_lang, 1,
"eng" },
88 {
"verbose",
"Enable verbose mode", 0, 0, 0, 0 },
89 {
"xml-format",
"Choose betwen page, page-ext and full (default: page-ext).",
"<format>",
90 scribo::debug::check_xml_format, 1,
"page-ext" },
91 {
"more-xml-format",
"Provide an additional xml output. Format can"
92 " be chosen between page, page-ext and full (default: page-ext).",
"<format>",
93 scribo::debug::check_xml_format, 1,
"none" },
94 {
"more-xml-file",
"Filename of the additional xml output.",
"<filename>",
96 {
"debug-regions",
"Save a debug image with all the regions.",
"<filename>",
102 int main(
int argc,
char* argv[])
104 using namespace scribo;
109 if (!options.parse(argc, argv))
113 if (options.is_set(
"debug-prefix"))
119 bool verbose = options.is_set(
"verbose");
127 bool enable_deskew = options.is_enabled(
"deskew");
131 0.34, enable_deskew, verbose);
134 point2d crop_shift = literal::origin;
135 if (options.is_set(
"crop"))
137 std::vector<const char *> values = options.opt_values(
"crop");
139 minr = atoi(values[0]),
140 minc = atoi(values[1]),
141 maxr = atoi(values[2]),
142 maxc = atoi(values[3]);
145 std::cout <<
"> Image cropped from (" << minr <<
"," << minc <<
")"
146 <<
" to (" << maxr <<
"," << maxc <<
")" << std::endl;
150 crop_shift =
point2d(minr, minc);
153 "input_preproc_cropped.pbm");
156 bool denoise = options.is_enabled(
"denoising");
157 std::string language = options.opt_value(
"ocr-lang");
158 bool find_line_seps = options.is_enabled(
"find-seps");
159 bool find_whitespace_seps = options.is_enabled(
"find-delims");
160 bool enable_ocr = options.is_enabled(
"ocr");
163 std::cout <<
"Running with the following options :"
164 <<
" ocr_language = " << language
165 <<
" | find_lines_seps = " << find_line_seps
166 <<
" | find_whitespace_seps = " << find_whitespace_seps
174 std::cout <<
"Analysing document..." << std::endl;
177 find_line_seps, find_whitespace_seps,
178 enable_ocr, language, verbose);
182 std::cout <<
"Saving results..." << std::endl;
185 if (options.opt_value(
"xml-format") ==
"page-ext")
187 if (options.opt_value(
"xml-format") ==
"page")
189 if (options.opt_value(
"xml-format") ==
"full")
193 if (options.opt_value(
"more-xml-format") ==
"page-ext")
195 if (options.opt_value(
"more-xml-format") ==
"page")
197 if (options.opt_value(
"more-xml-format") ==
"full")
202 if (options.opt_value(
"debug-regions") !=
"/dev/null")