Skip to main content

chess_corners_core/detect/chess/
response.rs

1//! Dense ChESS response computation for 8-bit grayscale inputs.
2use super::ring::RingOffsets;
3use crate::{ChessParams, ResponseMap};
4
5#[cfg(feature = "rayon")]
6use rayon::prelude::*;
7
8#[cfg(feature = "simd")]
9use core::simd::Simd;
10
11#[cfg(feature = "simd")]
12use std::simd::prelude::{SimdInt, SimdUint};
13
14#[cfg(feature = "simd")]
15const LANES: usize = 16;
16
17#[cfg(feature = "simd")]
18type U8s = Simd<u8, LANES>;
19
20#[cfg(feature = "simd")]
21type I16s = Simd<i16, LANES>;
22
23#[cfg(feature = "simd")]
24type I32s = Simd<i32, LANES>;
25
26#[cfg(feature = "tracing")]
27use tracing::instrument;
28
29/// Rectangular region of interest with half-open coordinate semantics
30/// `[x0, x1) × [y0, y1)`.
31///
32/// Coordinates are in image-pixel space relative to the
33/// [`ImageView`](crate::ImageView)'s `origin` offset. The invariants
34/// `x0 < x1` and `y0 < y1` are enforced by [`Roi::new`], which returns
35/// `None` when they are violated.
36#[derive(Clone, Copy, Debug)]
37pub struct Roi {
38    x0: usize,
39    y0: usize,
40    x1: usize,
41    y1: usize,
42}
43
44impl Roi {
45    /// Create a new ROI. Returns `None` if `x0 >= x1` or `y0 >= y1`.
46    pub fn new(x0: usize, y0: usize, x1: usize, y1: usize) -> Option<Self> {
47        if x0 < x1 && y0 < y1 {
48            Some(Self { x0, y0, x1, y1 })
49        } else {
50            None
51        }
52    }
53
54    #[inline]
55    pub fn x0(&self) -> usize {
56        self.x0
57    }
58    #[inline]
59    pub fn y0(&self) -> usize {
60        self.y0
61    }
62    #[inline]
63    pub fn x1(&self) -> usize {
64        self.x1
65    }
66    #[inline]
67    pub fn y1(&self) -> usize {
68        self.y1
69    }
70}
71
72#[inline]
73fn ring_from_params(params: &ChessParams) -> (RingOffsets, &'static [(i32, i32); 16]) {
74    let ring = params.ring();
75    (ring, ring.offsets())
76}
77
78/// Compute the dense ChESS response for an 8-bit grayscale image.
79///
80/// The response at each valid pixel center is computed from a 16‑sample ring
81/// around the pixel and a 5‑pixel cross at the center. For a given center
82/// `c`, let `s[0..16)` be the ring samples in the canonical order:
83///
84/// - `SR` (sum of “square” responses) compares four opposite quadrants on
85///   the ring:
86///
87///   ```text
88///   SR = sum_{k=0..3} | (s[k] + s[k+8]) - (s[k+4] + s[k+12]) |
89///   ```
90///
91/// - `DR` (sum of “difference” responses) enforces edge‑like structure:
92///
93///   ```text
94///   DR = sum_{k=0..7} | s[k] - s[k+8] |
95///   ```
96///
97/// - `μₙ` is the mean of all 16 ring samples.
98/// - `μₗ` is the local mean of the 5‑pixel cross at the center
99///   (`c`, `north`, `south`, `east`, `west`).
100///
101/// The final ChESS response is:
102///
103/// ```text
104/// R = SR - DR - 16 * |μₙ - μₗ|
105/// ```
106///
107/// where high positive values correspond to chessboard‑like corners.
108///
109/// # Score contract
110///
111/// `R` is the **unnormalized** ChESS response defined by Bennett and
112/// Lasenby (2014). The units are 8-bit pixel sums — not normalized
113/// to `[0, 1]`, not divided by a local scale, not divided by the
114/// scene max. Every term is linear in the pixel values, so with 8-bit
115/// input `R` is bounded by `SR − DR − 16·MR ∈ [−24·255, 8·255]`
116/// (≈ `[−6120, 2040]`); in practice chessboard corners produce scores
117/// well below that upper bound.
118///
119/// The paper's acceptance criterion is simply `R > 0`. That is what
120/// [`ChessParams`]`::default()` encodes via `threshold_abs = Some(0.0)`
121/// combined with a strict comparison in
122/// [`crate::detect::detect_corners_from_response`]. The optional
123/// `threshold_rel` / `threshold_abs` fields are **adaptive policies
124/// layered on top** of the paper's score — they are a convenience for
125/// trading off sensitivity vs false positives, not part of the score
126/// definition.
127///
128/// # Implementation strategy
129///
130/// Internally the image is processed row‑by‑row, but only pixels whose
131/// full ring lies inside the image bounds are evaluated; border pixels
132/// are left at zero in the returned [`ResponseMap`].
133///
134/// - Without any features, `chess_response_u8` uses a straightforward
135///   nested `for y { for x { ... } }` scalar loop and relies on the
136///   compiler’s auto‑vectorization in release builds.
137/// - With the `rayon` feature, the work is split into independent row
138///   slices and processed in parallel using `rayon::par_chunks_mut`.
139/// - With the `simd` feature, the inner loop over `x` is rewritten to
140///   operate on `LANES` pixels at a time using portable SIMD vectors
141///   (currently 16 lanes of `u8`). The ring samples are gathered into
142///   SIMD registers, `SR`/`DR`/`μₙ` are accumulated in integer vectors,
143///   and the final response is written back per lane.
144/// - With both `rayon` and `simd` enabled, each row is processed in
145///   parallel *and* each row uses the SIMD‑accelerated inner loop.
146///
147/// All feature combinations produce the same output values (within a
148/// small tolerance for floating‑point rounding), and differ only in
149/// performance characteristics.
150#[cfg_attr(
151    feature = "tracing",
152    instrument(level = "info", skip(img, params), fields(w, h))
153)]
154pub fn chess_response_u8(img: &[u8], w: usize, h: usize, params: &ChessParams) -> ResponseMap {
155    // rayon path compiled only when feature is enabled
156    #[cfg(feature = "rayon")]
157    {
158        compute_response_parallel(img, w, h, params)
159    }
160    #[cfg(not(feature = "rayon"))]
161    {
162        compute_response_sequential(img, w, h, params)
163    }
164}
165
166/// Always uses the scalar implementation (no rayon, no SIMD),
167/// useful for reference/golden testing.
168pub fn chess_response_u8_scalar(
169    img: &[u8],
170    w: usize,
171    h: usize,
172    params: &ChessParams,
173) -> ResponseMap {
174    compute_response_sequential_scalar(img, w, h, params)
175}
176
177/// Compute the ChESS response only inside a rectangular ROI of the image.
178///
179/// The ROI is given in image coordinates [x0, x1) × [y0, y1) via [`Roi`]. The
180/// returned [`ResponseMap`] has width (x1 - x0) and height (y1 - y0), with
181/// coordinates relative to (x0, y0).
182///
183/// Pixels where the ChESS ring would go out of bounds (w.r.t. the *full*
184/// image) are left at 0.0, and will be ignored by the detector because they
185/// lie inside the border margin. Internally this reuses the same scalar,
186/// SIMD, and optional `rayon` row kernels as [`chess_response_u8`], so ROI
187/// refinement benefits from the same feature combinations as the full-frame
188/// response path.
189#[cfg_attr(
190    feature = "tracing",
191    instrument(
192        level = "debug",
193        skip(img, params),
194        fields(img_w, img_h, roi_w = roi.x1 - roi.x0, roi_h = roi.y1 - roi.y0)
195    )
196)]
197pub fn chess_response_u8_patch(
198    img: &[u8],
199    img_w: usize,
200    img_h: usize,
201    params: &ChessParams,
202    roi: Roi,
203) -> ResponseMap {
204    let Roi { x0, y0, x1, y1 } = roi;
205
206    // Clamp ROI to the image bounds
207    let x0 = x0.min(img_w);
208    let y0 = y0.min(img_h);
209    let x1 = x1.min(img_w);
210    let y1 = y1.min(img_h);
211
212    if x1 <= x0 || y1 <= y0 {
213        return ResponseMap {
214            w: 0,
215            h: 0,
216            data: Vec::new(),
217        };
218    }
219
220    let patch_w = x1 - x0;
221    let patch_h = y1 - y0;
222    let mut data = vec![0.0f32; patch_w * patch_h];
223
224    let (ring_kind, ring) = ring_from_params(params);
225    let r = ring_kind.radius() as i32;
226
227    // Safe region for ring centers in *global* image coordinates
228    let gx0 = r as usize;
229    let gy0 = r as usize;
230    let gx1 = img_w - r as usize;
231    let gy1 = img_h - r as usize;
232
233    for py in 0..patch_h {
234        let gy = y0 + py;
235        if gy < gy0 || gy >= gy1 {
236            continue;
237        }
238
239        // Global x-range where the ring is valid on this row.
240        let row_gx0 = x0.max(gx0);
241        let row_gx1 = x1.min(gx1);
242        if row_gx0 >= row_gx1 {
243            continue;
244        }
245
246        let row = &mut data[py * patch_w..(py + 1) * patch_w];
247        let rel_start = row_gx0 - x0;
248        let rel_end = row_gx1 - x0;
249        let dst_row = &mut row[rel_start..rel_end];
250
251        #[cfg(feature = "simd")]
252        {
253            compute_row_range_simd(img, img_w, gy as i32, ring, dst_row, row_gx0, row_gx1);
254        }
255
256        #[cfg(not(feature = "simd"))]
257        {
258            compute_row_range_scalar(img, img_w, gy as i32, ring, dst_row, row_gx0, row_gx1);
259        }
260    }
261
262    ResponseMap {
263        w: patch_w,
264        h: patch_h,
265        data,
266    }
267}
268
269#[cfg(not(feature = "rayon"))]
270fn compute_response_sequential(
271    img: &[u8],
272    w: usize,
273    h: usize,
274    params: &ChessParams,
275) -> ResponseMap {
276    let (ring_kind, ring) = ring_from_params(params);
277    let r = ring_kind.radius() as i32;
278
279    let mut data = vec![0.0f32; w * h];
280
281    // only evaluate where full ring fits
282    let x0 = r as usize;
283    let y0 = r as usize;
284    let x1 = w - r as usize;
285    let y1 = h - r as usize;
286
287    for y in y0..y1 {
288        let row = &mut data[y * w..(y + 1) * w];
289        let dst_row = &mut row[x0..x1];
290
291        #[cfg(feature = "simd")]
292        {
293            compute_row_range_simd(img, w, y as i32, ring, dst_row, x0, x1);
294        }
295
296        #[cfg(not(feature = "simd"))]
297        {
298            compute_row_range_scalar(img, w, y as i32, ring, dst_row, x0, x1);
299        }
300    }
301
302    ResponseMap { w, h, data }
303}
304
305fn compute_response_sequential_scalar(
306    img: &[u8],
307    w: usize,
308    h: usize,
309    params: &ChessParams,
310) -> ResponseMap {
311    let (ring_kind, ring) = ring_from_params(params);
312    let r = ring_kind.radius() as i32;
313
314    let mut data = vec![0.0f32; w * h];
315
316    // only evaluate where full ring fits
317    let x0 = r as usize;
318    let y0 = r as usize;
319    let x1 = w - r as usize;
320    let y1 = h - r as usize;
321
322    for y in y0..y1 {
323        let row = &mut data[y * w..(y + 1) * w];
324        let dst_row = &mut row[x0..x1];
325        compute_row_range_scalar(img, w, y as i32, ring, dst_row, x0, x1);
326    }
327
328    ResponseMap { w, h, data }
329}
330
331#[cfg(feature = "rayon")]
332fn compute_response_parallel(img: &[u8], w: usize, h: usize, params: &ChessParams) -> ResponseMap {
333    let (ring_kind, ring) = ring_from_params(params);
334    let r = ring_kind.radius() as i32;
335    let mut data = vec![0.0f32; w * h];
336
337    // ring margin
338    let x0 = r as usize;
339    let y0 = r as usize;
340    let x1 = w - r as usize;
341    let y1 = h - r as usize;
342
343    // Parallelize over rows. We keep the exact same logic and write
344    // each row's slice independently.
345    data.par_chunks_mut(w).enumerate().for_each(|(y, row)| {
346        let y_i = y as i32;
347        if y_i < y0 as i32 || y_i >= y1 as i32 {
348            return;
349        }
350
351        let dst_row = &mut row[x0..x1];
352
353        #[cfg(feature = "simd")]
354        {
355            compute_row_range_simd(img, w, y_i, ring, dst_row, x0, x1);
356        }
357
358        #[cfg(not(feature = "simd"))]
359        {
360            compute_row_range_scalar(img, w, y_i, ring, dst_row, x0, x1);
361        }
362    });
363
364    ResponseMap { w, h, data }
365}
366
367// Fallback stub: when the `rayon` feature is off, `chess_response_u8` takes
368// the sequential branch directly and never calls `compute_response_parallel`.
369// The stub keeps the name defined so no `#[cfg]` guards are needed at call
370// sites that are themselves already gated on `#[cfg(feature = "rayon")]`.
371#[cfg(not(feature = "rayon"))]
372#[cfg_attr(not(feature = "rayon"), allow(dead_code))]
373fn compute_response_parallel(img: &[u8], w: usize, h: usize, params: &ChessParams) -> ResponseMap {
374    compute_response_sequential(img, w, h, params)
375}
376
377/// Low-level ChESS response at a single pixel center.
378///
379/// This is the scalar reference implementation used by both the sequential
380/// and SIMD paths:
381///
382/// - gathers 16 ring samples around `(x, y)` using the offsets defined in
383///   [`crate::ring`],
384/// - computes `SR`, `DR`, the ring mean `μₙ`, and the 5‑pixel local mean
385///   `μₗ`, and
386/// - returns `R = SR - DR - 16 * |μₙ - μₗ|` as a `f32`.
387///
388/// Callers are responsible for ensuring that `(x, y)` is far enough from the
389/// image border so that all ring and 5‑pixel cross accesses are in‑bounds.
390#[inline(always)]
391fn chess_response_at_u8(img: &[u8], w: usize, x: i32, y: i32, ring: &[(i32, i32); 16]) -> f32 {
392    // gather ring samples into i32
393    let mut s = [0i32; 16];
394    for k in 0..16 {
395        let (dx, dy) = ring[k];
396        let xx = (x + dx) as usize;
397        let yy = (y + dy) as usize;
398        s[k] = img[yy * w + xx] as i32;
399    }
400
401    // SR
402    let mut sr = 0i32;
403    for k in 0..4 {
404        let a = s[k] + s[k + 8];
405        let b = s[k + 4] + s[k + 12];
406        sr += (a - b).abs();
407    }
408
409    // DR
410    let mut dr = 0i32;
411    for k in 0..8 {
412        dr += (s[k] - s[k + 8]).abs();
413    }
414
415    // neighbor mean
416    let sum_ring: i32 = s.iter().sum();
417    let mu_n = sum_ring as f32 / 16.0;
418
419    // local mean (5 px cross)
420    let c = img[(y as usize) * w + (x as usize)] as f32;
421    let n = img[((y - 1) as usize) * w + (x as usize)] as f32;
422    let s0 = img[((y + 1) as usize) * w + (x as usize)] as f32;
423    let e = img[(y as usize) * w + ((x + 1) as usize)] as f32;
424    let w0 = img[(y as usize) * w + ((x - 1) as usize)] as f32;
425    let mu_l = (c + n + s0 + e + w0) / 5.0;
426
427    let mr = (mu_n - mu_l).abs();
428
429    (sr as f32) - (dr as f32) - 16.0 * mr
430}
431
432fn compute_row_range_scalar(
433    img: &[u8],
434    w: usize,
435    y: i32,
436    ring: &[(i32, i32); 16],
437    dst_row: &mut [f32],
438    x_start: usize,
439    x_end: usize,
440) {
441    for (offset, x) in (x_start..x_end).enumerate() {
442        dst_row[offset] = chess_response_at_u8(img, w, x as i32, y, ring);
443    }
444}
445
446#[cfg(feature = "simd")]
447fn compute_row_range_simd(
448    img: &[u8],
449    w: usize,
450    y: i32,
451    ring: &[(i32, i32); 16],
452    dst_row: &mut [f32],
453    x_start: usize,
454    x_end: usize,
455) {
456    let y_usize = y as usize;
457
458    // Precompute row bases for each ring sample to avoid recomputing (y+dy)*w.
459    let mut ring_bases: [isize; 16] = [0; 16];
460    for k in 0..16 {
461        let (dx, dy) = ring[k];
462        let yy = (y + dy) as usize;
463        ring_bases[k] = (yy * w) as isize + dx as isize;
464    }
465
466    let mut x = x_start;
467
468    while x + LANES <= x_end {
469        // Gather ring samples for LANES pixels starting at x
470        let mut s: [I16s; 16] = [I16s::splat(0); 16];
471
472        for k in 0..16 {
473            let base = (ring_bases[k] + x as isize) as usize;
474
475            // SAFETY: x range + radius guarantees we stay in bounds
476            let v_u8 = U8s::from_slice(&img[base..base + LANES]);
477            s[k] = v_u8.cast::<i16>();
478        }
479
480        // Sum of ring values (for neighbor mean)
481        let mut sum_ring_v = I32s::splat(0);
482        for &v in &s {
483            sum_ring_v += v.cast::<i32>();
484        }
485
486        // SR
487        let mut sr_v = I32s::splat(0);
488        for k in 0..4 {
489            let a = s[k].cast::<i32>() + s[k + 8].cast::<i32>();
490            let b = s[k + 4].cast::<i32>() + s[k + 12].cast::<i32>();
491            sr_v += (a - b).abs();
492        }
493
494        // DR
495        let mut dr_v = I32s::splat(0);
496        for k in 0..8 {
497            let a = s[k].cast::<i32>();
498            let b = s[k + 8].cast::<i32>();
499            dr_v += (a - b).abs();
500        }
501
502        // Convert vectors to scalar arrays for the MR step
503        let sr_arr = sr_v.cast::<f32>().to_array();
504        let dr_arr = dr_v.cast::<f32>().to_array();
505        let sum_ring_arr = sum_ring_v.cast::<f32>().to_array();
506
507        // Per-lane local mean + final response
508        for lane in 0..LANES {
509            let xx = x + lane;
510            let px = xx - x_start;
511
512            // center + 4-neighborhood (scalar) at base resolution
513            let c = img[y_usize * w + xx] as f32;
514            let n = img[(y_usize - 1) * w + xx] as f32;
515            let s0 = img[(y_usize + 1) * w + xx] as f32;
516            let e = img[y_usize * w + (xx + 1)] as f32;
517            let w0 = img[y_usize * w + (xx - 1)] as f32;
518
519            let mu_n = sum_ring_arr[lane] / 16.0;
520            let mu_l = (c + n + s0 + e + w0) / 5.0;
521            let mr = (mu_n - mu_l).abs();
522
523            dst_row[px] = sr_arr[lane] - dr_arr[lane] - 16.0 * mr;
524        }
525
526        x += LANES;
527    }
528
529    // Tail: scalar for remaining pixels
530    while x < x_end {
531        let px = x - x_start;
532        let resp = chess_response_at_u8(img, w, x as i32, y, ring);
533        dst_row[px] = resp;
534        x += 1;
535    }
536}