supper/src/ocr.rs

136 lines
3.7 KiB
Rust
Raw Normal View History

2022-06-03 22:11:49 +00:00
use image::{Rgb, RgbImage};
2022-06-04 02:18:46 +00:00
use img_hash::ImageHash;
2022-06-03 22:11:49 +00:00
use crate::image_processing;
2022-05-21 18:12:10 +00:00
2022-06-03 22:11:49 +00:00
#[derive(Debug)]
struct BoundingBox {
x: u32,
y: u32,
width: u32,
height: u32,
2022-05-21 18:12:10 +00:00
}
2022-06-03 22:11:49 +00:00
fn column_has_any_dark(image: &RgbImage, x: u32) -> bool {
for y in 0..image.height() {
let [r, g, b] = image.get_pixel(x, y).0;
if r < 100 && g < 100 && b < 100 {
return true;
2022-05-22 18:03:47 +00:00
}
2022-06-03 22:11:49 +00:00
}
false
2022-05-21 18:12:10 +00:00
}
2022-06-03 22:11:49 +00:00
fn row_has_any_dark(image: &RgbImage, y: u32, start_x: u32, width: u32) -> bool {
for x in start_x..(start_x + width) {
let [r, g, b] = image.get_pixel(x, y).0;
if r < 100 && g < 100 && b < 100 {
return true;
2022-05-24 00:58:28 +00:00
}
}
2022-06-03 22:11:49 +00:00
false
}
fn take_while<F: Fn(u32) -> bool>(x: &mut u32, max: u32, f: F) {
while *x < max && f(*x) {
*x += 1;
}
}
fn get_character_bounding_boxes(image: &RgbImage) -> Vec<BoundingBox> {
let mut x = 0;
let mut boxes = Vec::new();
while x < image.width() {
take_while(&mut x, image.width(), |x| !column_has_any_dark(image, x));
let start_x = x;
take_while(&mut x, image.width(), |x| column_has_any_dark(image, x));
let width = x - start_x;
if width >= 1 {
let mut y = 0;
take_while(&mut y, image.height(), |y| {
!row_has_any_dark(image, y, start_x, width)
});
let start_y = y;
let mut inverse_y = 0;
take_while(&mut inverse_y, image.height(), |y| {
!row_has_any_dark(image, image.height() - 1 - y, start_x, width)
});
let end_y = image.height() - inverse_y;
let height = end_y - start_y;
if height >= 1 {
boxes.push(BoundingBox {
x: start_x,
y: start_y,
width,
height,
});
2022-05-24 00:58:28 +00:00
}
}
}
2022-06-03 22:11:49 +00:00
boxes
2022-05-24 00:58:28 +00:00
}
2022-06-03 22:11:49 +00:00
fn trim_to_bounding_box(image: &RgbImage, bounding_box: &BoundingBox) -> RgbImage {
const PADDING: u32 = 2;
let mut buffer = RgbImage::from_pixel(
bounding_box.width + 2 * PADDING,
bounding_box.height + 2 * PADDING,
Rgb([0xFF, 0xFF, 0xFF]),
);
for y in 0..bounding_box.height {
for x in 0..bounding_box.width {
buffer.put_pixel(
x + PADDING,
y + PADDING,
*image.get_pixel(bounding_box.x + x, bounding_box.y + y),
);
}
}
buffer
}
pub fn bounding_box_images(image: &RgbImage) -> Vec<RgbImage> {
let mut trimmed = Vec::new();
let boxes = get_character_bounding_boxes(image);
for bounding_box in boxes {
trimmed.push(trim_to_bounding_box(image, &bounding_box));
2022-05-21 18:12:10 +00:00
}
2022-06-03 22:11:49 +00:00
trimmed
}
2022-06-04 02:18:46 +00:00
pub fn compute_box_hashes(image: &RgbImage) -> Vec<ImageHash> {
2022-06-03 22:11:49 +00:00
let mut hashes = Vec::new();
let boxes = get_character_bounding_boxes(image);
for bounding_box in boxes {
let trimmed = trim_to_bounding_box(image, &bounding_box);
hashes.push(image_processing::hash_image(&trimmed))
2022-05-21 18:12:10 +00:00
}
2022-06-03 22:11:49 +00:00
hashes
}
#[test]
fn test_bounding_boxes() {
let image_bytes = include_bytes!("test_data/test-image-2.png");
let image = image::load_from_memory(image_bytes).unwrap().to_rgb8();
let boxes = get_character_bounding_boxes(&image);
assert_eq!(boxes.len(), 10);
assert_ne!(boxes[0].x, 0);
assert_ne!(boxes[0].y, 0);
assert_ne!(boxes[0].height, 0);
assert_ne!(boxes[0].width, 0);
}
2022-05-21 18:12:10 +00:00
2022-06-03 22:11:49 +00:00
#[test]
fn test_box_hashes() {
let image_bytes = include_bytes!("test_data/test-image-2.png");
let image = image::load_from_memory(image_bytes).unwrap().to_rgb8();
let hashes = compute_box_hashes(&image);
assert_eq!(hashes.len(), 10);
2022-05-22 18:03:47 +00:00
}