1use crate::ring::RingOffsets;
3use crate::{ChessParams, ResponseMap};
4
5#[cfg(feature = "rayon")]
6use rayon::prelude::*;
7
8#[cfg(feature = "simd")]
9use core::simd::Simd;
10
11#[cfg(feature = "simd")]
12use std::simd::prelude::{SimdInt, SimdUint};
13
14#[cfg(feature = "simd")]
15const LANES: usize = 16;
16
17#[cfg(feature = "simd")]
18type U8s = Simd<u8, LANES>;
19
20#[cfg(feature = "simd")]
21type I16s = Simd<i16, LANES>;
22
23#[cfg(feature = "simd")]
24type I32s = Simd<i32, LANES>;
25
26#[cfg(feature = "tracing")]
27use tracing::instrument;
28
29#[derive(Clone, Copy, Debug)]
33pub struct Roi {
34 x0: usize,
35 y0: usize,
36 x1: usize,
37 y1: usize,
38}
39
40impl Roi {
41 pub fn new(x0: usize, y0: usize, x1: usize, y1: usize) -> Option<Self> {
43 if x0 < x1 && y0 < y1 {
44 Some(Self { x0, y0, x1, y1 })
45 } else {
46 None
47 }
48 }
49
50 #[inline]
51 pub fn x0(&self) -> usize {
52 self.x0
53 }
54 #[inline]
55 pub fn y0(&self) -> usize {
56 self.y0
57 }
58 #[inline]
59 pub fn x1(&self) -> usize {
60 self.x1
61 }
62 #[inline]
63 pub fn y1(&self) -> usize {
64 self.y1
65 }
66}
67
68#[inline]
69fn ring_from_params(params: &ChessParams) -> (RingOffsets, &'static [(i32, i32); 16]) {
70 let ring = params.ring();
71 (ring, ring.offsets())
72}
73
74#[cfg_attr(
128 feature = "tracing",
129 instrument(level = "info", skip(img, params), fields(w, h))
130)]
131pub fn chess_response_u8(img: &[u8], w: usize, h: usize, params: &ChessParams) -> ResponseMap {
132 #[cfg(feature = "rayon")]
134 {
135 compute_response_parallel(img, w, h, params)
136 }
137 #[cfg(not(feature = "rayon"))]
138 {
139 compute_response_sequential(img, w, h, params)
140 }
141}
142
143pub fn chess_response_u8_scalar(
146 img: &[u8],
147 w: usize,
148 h: usize,
149 params: &ChessParams,
150) -> ResponseMap {
151 compute_response_sequential_scalar(img, w, h, params)
152}
153
154#[cfg_attr(
167 feature = "tracing",
168 instrument(
169 level = "debug",
170 skip(img, params),
171 fields(img_w, img_h, roi_w = roi.x1 - roi.x0, roi_h = roi.y1 - roi.y0)
172 )
173)]
174pub fn chess_response_u8_patch(
175 img: &[u8],
176 img_w: usize,
177 img_h: usize,
178 params: &ChessParams,
179 roi: Roi,
180) -> ResponseMap {
181 let Roi { x0, y0, x1, y1 } = roi;
182
183 let x0 = x0.min(img_w);
185 let y0 = y0.min(img_h);
186 let x1 = x1.min(img_w);
187 let y1 = y1.min(img_h);
188
189 if x1 <= x0 || y1 <= y0 {
190 return ResponseMap {
191 w: 0,
192 h: 0,
193 data: Vec::new(),
194 };
195 }
196
197 let patch_w = x1 - x0;
198 let patch_h = y1 - y0;
199 let mut data = vec![0.0f32; patch_w * patch_h];
200
201 let (ring_kind, ring) = ring_from_params(params);
202 let r = ring_kind.radius() as i32;
203
204 let gx0 = r as usize;
206 let gy0 = r as usize;
207 let gx1 = img_w - r as usize;
208 let gy1 = img_h - r as usize;
209
210 for py in 0..patch_h {
211 let gy = y0 + py;
212 if gy < gy0 || gy >= gy1 {
213 continue;
214 }
215
216 let row_gx0 = x0.max(gx0);
218 let row_gx1 = x1.min(gx1);
219 if row_gx0 >= row_gx1 {
220 continue;
221 }
222
223 let row = &mut data[py * patch_w..(py + 1) * patch_w];
224 let rel_start = row_gx0 - x0;
225 let rel_end = row_gx1 - x0;
226 let dst_row = &mut row[rel_start..rel_end];
227
228 #[cfg(feature = "simd")]
229 {
230 compute_row_range_simd(img, img_w, gy as i32, ring, dst_row, row_gx0, row_gx1);
231 }
232
233 #[cfg(not(feature = "simd"))]
234 {
235 compute_row_range_scalar(img, img_w, gy as i32, ring, dst_row, row_gx0, row_gx1);
236 }
237 }
238
239 ResponseMap {
240 w: patch_w,
241 h: patch_h,
242 data,
243 }
244}
245
246#[cfg(not(feature = "rayon"))]
247fn compute_response_sequential(
248 img: &[u8],
249 w: usize,
250 h: usize,
251 params: &ChessParams,
252) -> ResponseMap {
253 let (ring_kind, ring) = ring_from_params(params);
254 let r = ring_kind.radius() as i32;
255
256 let mut data = vec![0.0f32; w * h];
257
258 let x0 = r as usize;
260 let y0 = r as usize;
261 let x1 = w - r as usize;
262 let y1 = h - r as usize;
263
264 for y in y0..y1 {
265 let row = &mut data[y * w..(y + 1) * w];
266 let dst_row = &mut row[x0..x1];
267
268 #[cfg(feature = "simd")]
269 {
270 compute_row_range_simd(img, w, y as i32, ring, dst_row, x0, x1);
271 }
272
273 #[cfg(not(feature = "simd"))]
274 {
275 compute_row_range_scalar(img, w, y as i32, ring, dst_row, x0, x1);
276 }
277 }
278
279 ResponseMap { w, h, data }
280}
281
282fn compute_response_sequential_scalar(
283 img: &[u8],
284 w: usize,
285 h: usize,
286 params: &ChessParams,
287) -> ResponseMap {
288 let (ring_kind, ring) = ring_from_params(params);
289 let r = ring_kind.radius() as i32;
290
291 let mut data = vec![0.0f32; w * h];
292
293 let x0 = r as usize;
295 let y0 = r as usize;
296 let x1 = w - r as usize;
297 let y1 = h - r as usize;
298
299 for y in y0..y1 {
300 let row = &mut data[y * w..(y + 1) * w];
301 let dst_row = &mut row[x0..x1];
302 compute_row_range_scalar(img, w, y as i32, ring, dst_row, x0, x1);
303 }
304
305 ResponseMap { w, h, data }
306}
307
308#[cfg(feature = "rayon")]
309fn compute_response_parallel(img: &[u8], w: usize, h: usize, params: &ChessParams) -> ResponseMap {
310 let (ring_kind, ring) = ring_from_params(params);
311 let r = ring_kind.radius() as i32;
312 let mut data = vec![0.0f32; w * h];
313
314 let x0 = r as usize;
316 let y0 = r as usize;
317 let x1 = w - r as usize;
318 let y1 = h - r as usize;
319
320 data.par_chunks_mut(w).enumerate().for_each(|(y, row)| {
323 let y_i = y as i32;
324 if y_i < y0 as i32 || y_i >= y1 as i32 {
325 return;
326 }
327
328 let dst_row = &mut row[x0..x1];
329
330 #[cfg(feature = "simd")]
331 {
332 compute_row_range_simd(img, w, y_i, ring, dst_row, x0, x1);
333 }
334
335 #[cfg(not(feature = "simd"))]
336 {
337 compute_row_range_scalar(img, w, y_i, ring, dst_row, x0, x1);
338 }
339 });
340
341 ResponseMap { w, h, data }
342}
343
344#[cfg(not(feature = "rayon"))]
346#[allow(dead_code)]
347fn compute_response_parallel(img: &[u8], w: usize, h: usize, params: &ChessParams) -> ResponseMap {
348 compute_response_sequential(img, w, h, params)
349}
350
351#[inline(always)]
365fn chess_response_at_u8(img: &[u8], w: usize, x: i32, y: i32, ring: &[(i32, i32); 16]) -> f32 {
366 let mut s = [0i32; 16];
368 for k in 0..16 {
369 let (dx, dy) = ring[k];
370 let xx = (x + dx) as usize;
371 let yy = (y + dy) as usize;
372 s[k] = img[yy * w + xx] as i32;
373 }
374
375 let mut sr = 0i32;
377 for k in 0..4 {
378 let a = s[k] + s[k + 8];
379 let b = s[k + 4] + s[k + 12];
380 sr += (a - b).abs();
381 }
382
383 let mut dr = 0i32;
385 for k in 0..8 {
386 dr += (s[k] - s[k + 8]).abs();
387 }
388
389 let sum_ring: i32 = s.iter().sum();
391 let mu_n = sum_ring as f32 / 16.0;
392
393 let c = img[(y as usize) * w + (x as usize)] as f32;
395 let n = img[((y - 1) as usize) * w + (x as usize)] as f32;
396 let s0 = img[((y + 1) as usize) * w + (x as usize)] as f32;
397 let e = img[(y as usize) * w + ((x + 1) as usize)] as f32;
398 let w0 = img[(y as usize) * w + ((x - 1) as usize)] as f32;
399 let mu_l = (c + n + s0 + e + w0) / 5.0;
400
401 let mr = (mu_n - mu_l).abs();
402
403 (sr as f32) - (dr as f32) - 16.0 * mr
404}
405
406fn compute_row_range_scalar(
407 img: &[u8],
408 w: usize,
409 y: i32,
410 ring: &[(i32, i32); 16],
411 dst_row: &mut [f32],
412 x_start: usize,
413 x_end: usize,
414) {
415 for (offset, x) in (x_start..x_end).enumerate() {
416 dst_row[offset] = chess_response_at_u8(img, w, x as i32, y, ring);
417 }
418}
419
420#[cfg(feature = "simd")]
421fn compute_row_range_simd(
422 img: &[u8],
423 w: usize,
424 y: i32,
425 ring: &[(i32, i32); 16],
426 dst_row: &mut [f32],
427 x_start: usize,
428 x_end: usize,
429) {
430 let y_usize = y as usize;
431
432 let mut ring_bases: [isize; 16] = [0; 16];
434 for k in 0..16 {
435 let (dx, dy) = ring[k];
436 let yy = (y + dy) as usize;
437 ring_bases[k] = (yy * w) as isize + dx as isize;
438 }
439
440 let mut x = x_start;
441
442 while x + LANES <= x_end {
443 let mut s: [I16s; 16] = [I16s::splat(0); 16];
445
446 for k in 0..16 {
447 let base = (ring_bases[k] + x as isize) as usize;
448
449 let v_u8 = U8s::from_slice(&img[base..base + LANES]);
451 s[k] = v_u8.cast::<i16>();
452 }
453
454 let mut sum_ring_v = I32s::splat(0);
456 for &v in &s {
457 sum_ring_v += v.cast::<i32>();
458 }
459
460 let mut sr_v = I32s::splat(0);
462 for k in 0..4 {
463 let a = s[k].cast::<i32>() + s[k + 8].cast::<i32>();
464 let b = s[k + 4].cast::<i32>() + s[k + 12].cast::<i32>();
465 sr_v += (a - b).abs();
466 }
467
468 let mut dr_v = I32s::splat(0);
470 for k in 0..8 {
471 let a = s[k].cast::<i32>();
472 let b = s[k + 8].cast::<i32>();
473 dr_v += (a - b).abs();
474 }
475
476 let sr_arr = sr_v.cast::<f32>().to_array();
478 let dr_arr = dr_v.cast::<f32>().to_array();
479 let sum_ring_arr = sum_ring_v.cast::<f32>().to_array();
480
481 for lane in 0..LANES {
483 let xx = x + lane;
484 let px = xx - x_start;
485
486 let c = img[y_usize * w + xx] as f32;
488 let n = img[(y_usize - 1) * w + xx] as f32;
489 let s0 = img[(y_usize + 1) * w + xx] as f32;
490 let e = img[y_usize * w + (xx + 1)] as f32;
491 let w0 = img[y_usize * w + (xx - 1)] as f32;
492
493 let mu_n = sum_ring_arr[lane] / 16.0;
494 let mu_l = (c + n + s0 + e + w0) / 5.0;
495 let mr = (mu_n - mu_l).abs();
496
497 dst_row[px] = sr_arr[lane] - dr_arr[lane] - 16.0 * mr;
498 }
499
500 x += LANES;
501 }
502
503 while x < x_end {
505 let px = x - x_start;
506 let resp = chess_response_at_u8(img, w, x as i32, y, ring);
507 dst_row[px] = resp;
508 x += 1;
509 }
510}