2022-06-03 22:11:49 +00:00
|
|
|
use image::{Rgb, RgbImage};
|
2022-06-04 02:18:46 +00:00
|
|
|
use img_hash::ImageHash;
|
2022-06-03 22:11:49 +00:00
|
|
|
|
|
|
|
use crate::image_processing;
|
2022-05-21 18:12:10 +00:00
|
|
|
|
2022-06-03 22:11:49 +00:00
|
|
|
#[derive(Debug)]
|
|
|
|
struct BoundingBox {
|
|
|
|
x: u32,
|
|
|
|
y: u32,
|
|
|
|
width: u32,
|
|
|
|
height: u32,
|
2022-05-21 18:12:10 +00:00
|
|
|
}
|
|
|
|
|
2022-06-05 18:09:35 +00:00
|
|
|
fn is_dark_pixel(image: &RgbImage, x: u32, y: u32) -> bool {
|
|
|
|
let [r, g, b] = image.get_pixel(x, y).0;
|
|
|
|
r < 100 && g < 100 && b < 100
|
|
|
|
}
|
|
|
|
|
2022-06-03 22:11:49 +00:00
|
|
|
fn column_has_any_dark(image: &RgbImage, x: u32) -> bool {
|
|
|
|
for y in 0..image.height() {
|
2022-06-05 18:09:35 +00:00
|
|
|
if is_dark_pixel(image, x, y) {
|
|
|
|
return true
|
2022-05-22 18:03:47 +00:00
|
|
|
}
|
2022-06-03 22:11:49 +00:00
|
|
|
}
|
|
|
|
false
|
2022-05-21 18:12:10 +00:00
|
|
|
}
|
|
|
|
|
2022-06-03 22:11:49 +00:00
|
|
|
fn row_has_any_dark(image: &RgbImage, y: u32, start_x: u32, width: u32) -> bool {
|
|
|
|
for x in start_x..(start_x + width) {
|
2022-06-05 18:09:35 +00:00
|
|
|
if is_dark_pixel(image, x, y) {
|
2022-06-03 22:11:49 +00:00
|
|
|
return true;
|
2022-05-24 00:58:28 +00:00
|
|
|
}
|
|
|
|
}
|
2022-06-03 22:11:49 +00:00
|
|
|
false
|
|
|
|
}
|
|
|
|
|
|
|
|
fn take_while<F: Fn(u32) -> bool>(x: &mut u32, max: u32, f: F) {
|
|
|
|
while *x < max && f(*x) {
|
|
|
|
*x += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-05 18:09:35 +00:00
|
|
|
fn find_vertical_cutoff_from_right(image: &RgbImage) -> Option<(u32, u32)> {
|
|
|
|
let mut cutoffs = Vec::new();
|
|
|
|
for y in 0..image.height() {
|
|
|
|
for x in 0..image.width() {
|
|
|
|
let x = image.width() - 1 - x;
|
|
|
|
if is_dark_pixel(image, x, y) {
|
|
|
|
cutoffs.push((x, y));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
cutoffs.sort();
|
|
|
|
cutoffs.into_iter().skip(5).nth(0)
|
|
|
|
}
|
|
|
|
|
2022-06-03 22:11:49 +00:00
|
|
|
fn get_character_bounding_boxes(image: &RgbImage) -> Vec<BoundingBox> {
|
|
|
|
let mut x = 0;
|
|
|
|
let mut boxes = Vec::new();
|
2022-06-05 18:09:35 +00:00
|
|
|
let (cutoff_x, cutoff_y) = find_vertical_cutoff_from_right(image).unwrap_or_default();
|
2022-06-03 22:11:49 +00:00
|
|
|
while x < image.width() {
|
|
|
|
take_while(&mut x, image.width(), |x| !column_has_any_dark(image, x));
|
|
|
|
|
|
|
|
let start_x = x;
|
|
|
|
take_while(&mut x, image.width(), |x| column_has_any_dark(image, x));
|
|
|
|
let width = x - start_x;
|
|
|
|
|
|
|
|
if width >= 1 {
|
2022-06-05 18:09:35 +00:00
|
|
|
let mut y = if start_x > cutoff_x { cutoff_y } else { 0 };
|
2022-06-03 22:11:49 +00:00
|
|
|
take_while(&mut y, image.height(), |y| {
|
|
|
|
!row_has_any_dark(image, y, start_x, width)
|
|
|
|
});
|
|
|
|
|
2022-06-05 18:09:35 +00:00
|
|
|
let start_y = y.min(image.height() - 1);
|
2022-06-03 22:11:49 +00:00
|
|
|
|
|
|
|
let mut inverse_y = 0;
|
2022-06-05 18:09:35 +00:00
|
|
|
take_while(&mut inverse_y, image.height() - start_y, |y| {
|
2022-06-03 22:11:49 +00:00
|
|
|
!row_has_any_dark(image, image.height() - 1 - y, start_x, width)
|
|
|
|
});
|
|
|
|
let end_y = image.height() - inverse_y;
|
|
|
|
let height = end_y - start_y;
|
|
|
|
if height >= 1 {
|
|
|
|
boxes.push(BoundingBox {
|
|
|
|
x: start_x,
|
|
|
|
y: start_y,
|
|
|
|
width,
|
|
|
|
height,
|
|
|
|
});
|
2022-05-24 00:58:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-06-03 22:11:49 +00:00
|
|
|
boxes
|
2022-05-24 00:58:28 +00:00
|
|
|
}
|
|
|
|
|
2022-06-03 22:11:49 +00:00
|
|
|
fn trim_to_bounding_box(image: &RgbImage, bounding_box: &BoundingBox) -> RgbImage {
|
|
|
|
const PADDING: u32 = 2;
|
|
|
|
let mut buffer = RgbImage::from_pixel(
|
|
|
|
bounding_box.width + 2 * PADDING,
|
|
|
|
bounding_box.height + 2 * PADDING,
|
|
|
|
Rgb([0xFF, 0xFF, 0xFF]),
|
|
|
|
);
|
|
|
|
for y in 0..bounding_box.height {
|
|
|
|
for x in 0..bounding_box.width {
|
|
|
|
buffer.put_pixel(
|
|
|
|
x + PADDING,
|
|
|
|
y + PADDING,
|
|
|
|
*image.get_pixel(bounding_box.x + x, bounding_box.y + y),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
buffer
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn bounding_box_images(image: &RgbImage) -> Vec<RgbImage> {
|
|
|
|
let mut trimmed = Vec::new();
|
|
|
|
|
|
|
|
let boxes = get_character_bounding_boxes(image);
|
|
|
|
for bounding_box in boxes {
|
|
|
|
trimmed.push(trim_to_bounding_box(image, &bounding_box));
|
2022-05-21 18:12:10 +00:00
|
|
|
}
|
2022-06-03 22:11:49 +00:00
|
|
|
trimmed
|
|
|
|
}
|
|
|
|
|
2022-06-04 02:18:46 +00:00
|
|
|
pub fn compute_box_hashes(image: &RgbImage) -> Vec<ImageHash> {
|
2022-06-03 22:11:49 +00:00
|
|
|
let mut hashes = Vec::new();
|
|
|
|
|
|
|
|
let boxes = get_character_bounding_boxes(image);
|
|
|
|
for bounding_box in boxes {
|
|
|
|
let trimmed = trim_to_bounding_box(image, &bounding_box);
|
|
|
|
hashes.push(image_processing::hash_image(&trimmed))
|
2022-05-21 18:12:10 +00:00
|
|
|
}
|
2022-06-03 22:11:49 +00:00
|
|
|
hashes
|
|
|
|
}
|
|
|
|
|
2022-06-05 18:09:35 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use crate::ocr::{compute_box_hashes, get_character_bounding_boxes};
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_bounding_boxes() {
|
|
|
|
let image_bytes = include_bytes!("test_data/test-montserrat.png");
|
|
|
|
let image = image::load_from_memory(image_bytes).unwrap().to_rgb8();
|
|
|
|
let boxes = get_character_bounding_boxes(&image);
|
|
|
|
assert_eq!(boxes.len(), 10);
|
|
|
|
assert_ne!(boxes[0].x, 0);
|
|
|
|
assert_ne!(boxes[0].y, 0);
|
|
|
|
assert_ne!(boxes[0].height, 0);
|
|
|
|
assert_ne!(boxes[0].width, 0);
|
|
|
|
}
|
2022-05-21 18:12:10 +00:00
|
|
|
|
2022-06-05 18:09:35 +00:00
|
|
|
#[test]
|
|
|
|
fn test_box_hashes() {
|
|
|
|
let image_bytes = include_bytes!("test_data/test-montserrat.png");
|
|
|
|
let image = image::load_from_memory(image_bytes).unwrap().to_rgb8();
|
|
|
|
let hashes = compute_box_hashes(&image);
|
|
|
|
assert_eq!(hashes.len(), 10);
|
|
|
|
}
|
2022-05-22 18:03:47 +00:00
|
|
|
}
|