local OCR
This commit is contained in:
parent
145b8b78e8
commit
770b0c1dd7
|
@ -0,0 +1,109 @@
|
||||||
|
use image::{RgbImage, DynamicImage, Rgb};
|
||||||
|
use img_hash::{image::GenericImageView, ImageHash};
|
||||||
|
|
||||||
|
use crate::image_processing;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct BoundingBox {
|
||||||
|
x: u32,
|
||||||
|
y: u32,
|
||||||
|
width: u32,
|
||||||
|
height: u32
|
||||||
|
}
|
||||||
|
|
||||||
|
fn column_has_any_dark(image: &RgbImage, x: u32) -> bool {
|
||||||
|
for y in 0..image.height() {
|
||||||
|
let [r, g, b] = image.get_pixel(x, y).0;
|
||||||
|
if r < 100 && g < 100 && b < 100 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn row_has_any_dark(image: &RgbImage, y: u32, start_x: u32, width: u32) -> bool {
|
||||||
|
for x in start_x..(start_x + width) {
|
||||||
|
let [r, g, b] = image.get_pixel(x, y).0;
|
||||||
|
if r < 100 && g < 100 && b < 100 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn take_while<F: Fn(u32) -> bool>(image: &RgbImage, x: &mut u32, max: u32, f: F) {
|
||||||
|
while *x < max && f(*x) {
|
||||||
|
*x = *x + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_character_bounding_boxes(image: &RgbImage) -> Vec<BoundingBox> {
|
||||||
|
let mut x = 0;
|
||||||
|
let mut boxes = Vec::new();
|
||||||
|
while x < image.width() {
|
||||||
|
take_while(image, &mut x, image.width(), |x| !column_has_any_dark(image, x));
|
||||||
|
|
||||||
|
let start_x = x;
|
||||||
|
take_while(image, &mut x, image.width(), |x| column_has_any_dark(image, x));
|
||||||
|
let width = x - start_x;
|
||||||
|
|
||||||
|
if width > 2 {
|
||||||
|
let mut y = 0;
|
||||||
|
take_while(image, &mut y, image.height(), |y| !row_has_any_dark(image, y, start_x, width));
|
||||||
|
|
||||||
|
let start_y = y;
|
||||||
|
|
||||||
|
let mut inverse_y = 1;
|
||||||
|
take_while(image, &mut inverse_y, image.height(), |y| !row_has_any_dark(image, image.height() - y, start_x, width));
|
||||||
|
let end_y = image.height() - inverse_y - 1;
|
||||||
|
boxes.push(BoundingBox{
|
||||||
|
x,
|
||||||
|
y: start_y,
|
||||||
|
width,
|
||||||
|
height: end_y - start_y,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
boxes
|
||||||
|
}
|
||||||
|
|
||||||
|
fn trim_to_bounding_box(image: &RgbImage, bounding_box: &BoundingBox) -> RgbImage {
|
||||||
|
let mut buffer = RgbImage::new(bounding_box.width, bounding_box.height);
|
||||||
|
for y in 0..bounding_box.height {
|
||||||
|
for x in 0..bounding_box.width {
|
||||||
|
buffer.put_pixel(x, y, *image.get_pixel(bounding_box.x + x, bounding_box.y + y));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_box_hashes(image: &RgbImage) -> Vec<String> {
|
||||||
|
let mut hashes = Vec::new();
|
||||||
|
|
||||||
|
let boxes = get_character_bounding_boxes(image);
|
||||||
|
for bounding_box in boxes {
|
||||||
|
let trimmed = trim_to_bounding_box(image, &bounding_box);
|
||||||
|
hashes.push(image_processing::hash_image(&trimmed))
|
||||||
|
}
|
||||||
|
hashes
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bounding_boxes() {
|
||||||
|
let image_bytes = include_bytes!("test_data/test-image-2.png");
|
||||||
|
let image = image::load_from_memory(image_bytes).unwrap().to_rgb8();
|
||||||
|
let boxes = get_character_bounding_boxes(&image);
|
||||||
|
assert_eq!(boxes.len(), 10);
|
||||||
|
assert_ne!(boxes[0].x, 0);
|
||||||
|
assert_ne!(boxes[0].y, 0);
|
||||||
|
assert_ne!(boxes[0].height, 0);
|
||||||
|
assert_ne!(boxes[0].width, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_box_hashes() {
|
||||||
|
let image_bytes = include_bytes!("test_data/test-image-2.png");
|
||||||
|
let image = image::load_from_memory(image_bytes).unwrap().to_rgb8();
|
||||||
|
let hashes = compute_box_hashes(&image);
|
||||||
|
assert_eq!(hashes.len(), 10);
|
||||||
|
}
|
|
@ -7,6 +7,7 @@ mod image_processing;
|
||||||
mod ocr;
|
mod ocr;
|
||||||
mod state;
|
mod state;
|
||||||
mod stats_writer;
|
mod stats_writer;
|
||||||
|
mod local_ocr;
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 53 KiB |
Binary file not shown.
After Width: | Height: | Size: 51 KiB |
Loading…
Reference in New Issue