1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
use coresimd::simd::*;
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.experimental.vector.reduce.and.i8.v2i8"]
fn reduce_and_i8x2(x: i8x2) -> i8;
#[link_name = "llvm.experimental.vector.reduce.and.u8.v2u8"]
fn reduce_and_u8x2(x: u8x2) -> u8;
#[link_name = "llvm.experimental.vector.reduce.and.i16.v2i16"]
fn reduce_and_i16x2(x: i16x2) -> i16;
#[link_name = "llvm.experimental.vector.reduce.and.u16.v2u16"]
fn reduce_and_u16x2(x: u16x2) -> u16;
#[link_name = "llvm.experimental.vector.reduce.and.i32.v2i32"]
fn reduce_and_i32x2(x: i32x2) -> i32;
#[link_name = "llvm.experimental.vector.reduce.and.u32.v2u32"]
fn reduce_and_u32x2(x: u32x2) -> u32;
#[link_name = "llvm.experimental.vector.reduce.and.i64.v2i64"]
fn reduce_and_i64x2(x: i64x2) -> i64;
#[link_name = "llvm.experimental.vector.reduce.and.u64.v2u64"]
fn reduce_and_u64x2(x: u64x2) -> u64;
#[link_name = "llvm.experimental.vector.reduce.and.i8.v4i8"]
fn reduce_and_i8x4(x: i8x4) -> i8;
#[link_name = "llvm.experimental.vector.reduce.and.u8.v4u8"]
fn reduce_and_u8x4(x: u8x4) -> u8;
#[link_name = "llvm.experimental.vector.reduce.and.i16.v4i16"]
fn reduce_and_i16x4(x: i16x4) -> i16;
#[link_name = "llvm.experimental.vector.reduce.and.u16.v4u16"]
fn reduce_and_u16x4(x: u16x4) -> u16;
#[link_name = "llvm.experimental.vector.reduce.and.i32.v4i32"]
fn reduce_and_i32x4(x: i32x4) -> i32;
#[link_name = "llvm.experimental.vector.reduce.and.u32.v4u32"]
fn reduce_and_u32x4(x: u32x4) -> u32;
#[link_name = "llvm.experimental.vector.reduce.and.i64.v4i64"]
fn reduce_and_i64x4(x: i64x4) -> i64;
#[link_name = "llvm.experimental.vector.reduce.and.u64.v4u64"]
fn reduce_and_u64x4(x: u64x4) -> u64;
#[link_name = "llvm.experimental.vector.reduce.and.i8.v8i8"]
fn reduce_and_i8x8(x: i8x8) -> i8;
#[link_name = "llvm.experimental.vector.reduce.and.u8.v8u8"]
fn reduce_and_u8x8(x: u8x8) -> u8;
#[link_name = "llvm.experimental.vector.reduce.and.i16.v8i16"]
fn reduce_and_i16x8(x: i16x8) -> i16;
#[link_name = "llvm.experimental.vector.reduce.and.u16.v8u16"]
fn reduce_and_u16x8(x: u16x8) -> u16;
#[link_name = "llvm.experimental.vector.reduce.and.i32.v8i32"]
fn reduce_and_i32x8(x: i32x8) -> i32;
#[link_name = "llvm.experimental.vector.reduce.and.u32.v8u32"]
fn reduce_and_u32x8(x: u32x8) -> u32;
#[link_name = "llvm.experimental.vector.reduce.and.i64.v8i64"]
fn reduce_and_i64x8(x: i64x8) -> i64;
#[link_name = "llvm.experimental.vector.reduce.and.u64.v8u64"]
fn reduce_and_u64x8(x: u64x8) -> u64;
#[link_name = "llvm.experimental.vector.reduce.and.i8.v16i8"]
fn reduce_and_i8x16(x: i8x16) -> i8;
#[link_name = "llvm.experimental.vector.reduce.and.u8.v16u8"]
fn reduce_and_u8x16(x: u8x16) -> u8;
#[link_name = "llvm.experimental.vector.reduce.and.i16.v16i16"]
fn reduce_and_i16x16(x: i16x16) -> i16;
#[link_name = "llvm.experimental.vector.reduce.and.u16.v16u16"]
fn reduce_and_u16x16(x: u16x16) -> u16;
#[link_name = "llvm.experimental.vector.reduce.and.i32.v16i32"]
fn reduce_and_i32x16(x: i32x16) -> i32;
#[link_name = "llvm.experimental.vector.reduce.and.u32.v16u32"]
fn reduce_and_u32x16(x: u32x16) -> u32;
#[link_name = "llvm.experimental.vector.reduce.and.i8.v32i8"]
fn reduce_and_i8x32(x: i8x32) -> i8;
#[link_name = "llvm.experimental.vector.reduce.and.u8.v32u8"]
fn reduce_and_u8x32(x: u8x32) -> u8;
#[link_name = "llvm.experimental.vector.reduce.and.i16.v32i16"]
fn reduce_and_i16x32(x: i16x32) -> i16;
#[link_name = "llvm.experimental.vector.reduce.and.u16.v32u16"]
fn reduce_and_u16x32(x: u16x32) -> u16;
#[link_name = "llvm.experimental.vector.reduce.and.i8.v64i8"]
fn reduce_and_i8x64(x: i8x64) -> i8;
#[link_name = "llvm.experimental.vector.reduce.and.u8.v64u8"]
fn reduce_and_u8x64(x: u8x64) -> u8;
}
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
pub trait ReduceAnd {
type Acc;
fn reduce_and(self) -> Self::Acc;
}
macro_rules! red_and {
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
impl ReduceAnd for $id {
type Acc = $elem_ty;
#[cfg(not(target_arch = "aarch64"))]
#[inline]
fn reduce_and(self) -> Self::Acc {
unsafe { $llvm_intr(self.into_bits()) }
}
#[cfg(target_arch = "aarch64")]
#[inline]
fn reduce_and(self) -> Self::Acc {
let mut x = self.extract(0) as Self::Acc;
for i in 1..$id::lanes() {
x &= self.extract(i) as Self::Acc;
}
x
}
}
};
}
red_and!(i8x2, i8, reduce_and_i8x2);
red_and!(u8x2, u8, reduce_and_u8x2);
red_and!(i16x2, i16, reduce_and_i16x2);
red_and!(u16x2, u16, reduce_and_u16x2);
red_and!(i32x2, i32, reduce_and_i32x2);
red_and!(u32x2, u32, reduce_and_u32x2);
red_and!(i64x2, i64, reduce_and_i64x2);
red_and!(u64x2, u64, reduce_and_u64x2);
red_and!(i8x4, i8, reduce_and_i8x4);
red_and!(u8x4, u8, reduce_and_u8x4);
red_and!(i16x4, i16, reduce_and_i16x4);
red_and!(u16x4, u16, reduce_and_u16x4);
red_and!(i32x4, i32, reduce_and_i32x4);
red_and!(u32x4, u32, reduce_and_u32x4);
red_and!(i64x4, i64, reduce_and_i64x4);
red_and!(u64x4, u64, reduce_and_u64x4);
red_and!(i8x8, i8, reduce_and_i8x8);
red_and!(u8x8, u8, reduce_and_u8x8);
red_and!(i16x8, i16, reduce_and_i16x8);
red_and!(u16x8, u16, reduce_and_u16x8);
red_and!(i32x8, i32, reduce_and_i32x8);
red_and!(u32x8, u32, reduce_and_u32x8);
red_and!(i64x8, i64, reduce_and_i64x8);
red_and!(u64x8, u64, reduce_and_u64x8);
red_and!(i8x16, i8, reduce_and_i8x16);
red_and!(u8x16, u8, reduce_and_u8x16);
red_and!(i16x16, i16, reduce_and_i16x16);
red_and!(u16x16, u16, reduce_and_u16x16);
red_and!(i32x16, i32, reduce_and_i32x16);
red_and!(u32x16, u32, reduce_and_u32x16);
red_and!(i8x32, i8, reduce_and_i8x32);
red_and!(u8x32, u8, reduce_and_u8x32);
red_and!(i16x32, i16, reduce_and_i16x32);
red_and!(u16x32, u16, reduce_and_u16x32);
red_and!(i8x64, i8, reduce_and_i8x64);
red_and!(u8x64, u8, reduce_and_u8x64);
red_and!(b8x2, i8, reduce_and_i8x2);
red_and!(b8x4, i8, reduce_and_i8x4);
red_and!(b8x8, i8, reduce_and_i8x8);
red_and!(b8x16, i8, reduce_and_i8x16);
red_and!(b8x32, i8, reduce_and_i8x32);
red_and!(b8x64, i8, reduce_and_i8x64);
#[cfg(test)]
mod tests {
use super::ReduceAnd;
use coresimd::simd::*;
#[test]
fn reduce_and_i32x4() {
let v = i32x4::splat(1);
assert_eq!(v.reduce_and(), 1_i32);
let v = i32x4::new(1, 1, 0, 1);
assert_eq!(v.reduce_and(), 0_i32);
}
}