use image::{Rgb, RgbImage}; use img_hash::ImageHash; use crate::image_processing; #[derive(Debug)] struct BoundingBox { x: u32, y: u32, width: u32, height: u32, } fn is_dark_pixel(image: &RgbImage, x: u32, y: u32) -> bool { let [r, g, b] = image.get_pixel(x, y).0; r < 100 && g < 100 && b < 100 } fn column_has_any_dark(image: &RgbImage, x: u32, start_y: u32) -> bool { for y in start_y..image.height() { if is_dark_pixel(image, x, y) { return true; } } false } fn row_has_any_dark(image: &RgbImage, y: u32, start_x: u32, width: u32) -> bool { for x in start_x..(start_x + width) { if is_dark_pixel(image, x, y) { return true; } } false } fn take_while bool>(x: &mut u32, max: u32, f: F) { while *x < max && f(*x) { *x += 1; } } fn find_vertical_cutoff_from_right(image: &RgbImage) -> Option<(u32, u32)> { let mut cutoffs = Vec::new(); for y in 0..image.height() { for x in 0..image.width() { let x = image.width() - 1 - x; if is_dark_pixel(image, x, y) { cutoffs.push((x, y)); break; } } } cutoffs.sort_unstable(); cutoffs.into_iter().nth(5) } fn get_character_bounding_boxes(image: &RgbImage) -> Vec { let mut x = 0; let mut boxes = Vec::new(); let (cutoff_x, cutoff_y) = find_vertical_cutoff_from_right(image).unwrap_or_default(); while x < image.width() { let start_y = if x > cutoff_x { cutoff_y } else { 0 }; take_while(&mut x, image.width(), |x| !column_has_any_dark(image, x, start_y)); let start_x = x; take_while(&mut x, image.width(), |x| column_has_any_dark(image, x, start_y)); let width = x - start_x; if width >= 1 { let mut y = if start_x > cutoff_x { cutoff_y } else { 0 }; take_while(&mut y, image.height(), |y| { !row_has_any_dark(image, y, start_x, width) }); let start_y = y.min(image.height() - 1); let mut inverse_y = 0; take_while(&mut inverse_y, image.height() - start_y, |y| { !row_has_any_dark(image, image.height() - 1 - y, start_x, width) }); let end_y = image.height() - inverse_y; let height = end_y - start_y; if height >= 1 { boxes.push(BoundingBox { x: start_x, y: start_y, width, height, }); } } } boxes } fn trim_to_bounding_box(image: &RgbImage, bounding_box: &BoundingBox) -> RgbImage { const PADDING: u32 = 2; let mut buffer = RgbImage::from_pixel( bounding_box.width + 2 * PADDING, bounding_box.height + 2 * PADDING, Rgb([0xFF, 0xFF, 0xFF]), ); for y in 0..bounding_box.height { for x in 0..bounding_box.width { buffer.put_pixel( x + PADDING, y + PADDING, *image.get_pixel(bounding_box.x + x, bounding_box.y + y), ); } } buffer } pub fn bounding_box_images(image: &RgbImage) -> Vec { let mut trimmed = Vec::new(); let boxes = get_character_bounding_boxes(image); for bounding_box in boxes { trimmed.push(trim_to_bounding_box(image, &bounding_box)); } trimmed } pub fn compute_box_hashes(image: &RgbImage) -> Vec { let mut hashes = Vec::new(); let boxes = get_character_bounding_boxes(image); for bounding_box in boxes { let trimmed = trim_to_bounding_box(image, &bounding_box); hashes.push(image_processing::hash_image(&trimmed)) } hashes } #[cfg(test)] mod test { use crate::ocr::{compute_box_hashes, get_character_bounding_boxes}; #[test] fn test_bounding_boxes() { let image_bytes = include_bytes!("test_data/test-montserrat.png"); let image = image::load_from_memory(image_bytes).unwrap().to_rgb8(); let boxes = get_character_bounding_boxes(&image); assert_eq!(boxes.len(), 10); assert_ne!(boxes[0].x, 0); assert_ne!(boxes[0].y, 0); assert_ne!(boxes[0].height, 0); assert_ne!(boxes[0].width, 0); } #[test] fn test_box_hashes() { let image_bytes = include_bytes!("test_data/test-montserrat.png"); let image = image::load_from_memory(image_bytes).unwrap().to_rgb8(); let hashes = compute_box_hashes(&image); assert_eq!(hashes.len(), 10); } }