31 #include <mln/core/image/image2d.hh>
32 #include <mln/core/alias/neighb2d.hh>
34 #include <mln/io/pbm/all.hh>
37 #include <scribo/toolchain/text_in_doc.hh>
39 #include <scribo/core/document.hh>
40 #include <scribo/core/line_set.hh>
42 #include <scribo/debug/option_parser.hh>
44 #include <scribo/preprocessing/crop_without_localization.hh>
46 #include <scribo/io/text_boxes/save.hh>
51 {
"input.pbm",
"A binary image." },
52 {
"output.txt",
"Text output." },
61 {
"denoising",
"Performs a denoising. (default: enabled)",
true },
62 {
"find-delims",
"Find text alignements and whitespaces "
63 "to improve layout detection. (default: enabled)",
true },
64 {
"find-seps",
"Find separators in document (default: enabled)",
true },
73 {
"crop",
"Crop input image before processing it.",
74 "<pmin_row> <pmin_col> <pmax_row> <pmax_col>", 0, 4, 0 },
75 {
"debug-prefix",
"Enable debug image outputs. Prefix image name with that "
76 "given prefix.",
"<prefix>", 0, 1, 0 },
77 {
"ocr-lang",
"Set the language to be recognized by the OCR (Tesseract). "
78 "According to your system, you can choose between eng (default), "
79 "fra, deu, ita, nld, por, spa, vie",
80 "<lang>", scribo::debug::check_ocr_lang, 1,
"eng" },
81 {
"verbose",
"Enable verbose mode", 0, 0, 0, 0 },
87 int main(
int argc,
char* argv[])
89 using namespace scribo;
94 if (!options.parse(argc, argv))
98 if (options.is_set(
"debug-prefix"))
102 scribo::make::internal::debug_filename_prefix = options.opt_value(
"debug-prefix").c_str();
105 bool verbose = options.is_set(
"verbose");
112 const char *input_name = options.arg(
"input.pbm");
116 point2d crop_shift = literal::origin;
117 if (options.is_set(
"crop"))
119 std::vector<const char *> values = options.opt_values(
"crop");
121 minr = atoi(values[0]),
122 minc = atoi(values[1]),
123 maxr = atoi(values[2]),
124 maxc = atoi(values[3]);
127 std::cout <<
"> Image cropped from (" << minr <<
"," << minc <<
")"
128 <<
" to (" << maxr <<
"," << maxc <<
")" << std::endl;
132 crop_shift =
point2d(minr, minc);
135 "input_cropped.pbm");
138 bool denoise = options.is_enabled(
"denoising");
139 std::string language = options.opt_value(
"ocr-lang");
140 bool find_line_seps = options.is_enabled(
"find-seps");
141 bool find_whitespace_seps = options.is_enabled(
"find-delims");
144 std::cout <<
"Running with the following options :"
145 <<
" ocr language = " << language
146 <<
" | find_lines_seps = " << find_line_seps
147 <<
" | find_whitespace_seps = " << find_whitespace_seps
154 language, find_line_seps,
155 find_whitespace_seps, verbose);