blob: fbe2e45a6fe01cad6ab6d12ce9eff09e5749108c [file] [log] [blame] [raw]
// Compile with -C opt-level=3 -C target-cpu=native to see autovectorization
#[repr(align(64))]
pub struct Aligned<T: ?Sized>(T);
// assumes input is aligned on 64-byte boundary and that
// input's length is a multiple of 64.
pub fn sum_array(input: &Aligned<[i32]>) -> i32 {
let len = input.0.len();
let input = input.0.as_ptr();
if len & 63 != 0 {
unsafe { std::hint::unreachable_unchecked() }
}
let mut sum = 0;
for i in 0..len {
sum += unsafe { *input.add(i) };
}
sum
}