mirror of
https://github.com/librempeg/librempeg
synced 2024-11-22 18:49:58 +00:00
lavu/sha512: Fully unroll the transform function loops
crypto_bench SHA-512 results using an AMD Athlon X2 7750+, mingw32-w64 GCC 4.7.3 x86_64 Before: lavu SHA-512 size: 1048576 runs: 1024 time: 12.737 +- 0.147 After: lavu SHA-512 size: 1048576 runs: 1024 time: 11.670 +- 0.173 Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
7e4fe5162a
commit
bbcaf25d4d
@ -150,27 +150,32 @@ static void sha512_transform(uint64_t *state, const uint8_t buffer[128])
|
||||
a = T1 + T2;
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < 16 - 7;) {
|
||||
ROUND512_0_TO_15(a, b, c, d, e, f, g, h);
|
||||
ROUND512_0_TO_15(h, a, b, c, d, e, f, g);
|
||||
ROUND512_0_TO_15(g, h, a, b, c, d, e, f);
|
||||
ROUND512_0_TO_15(f, g, h, a, b, c, d, e);
|
||||
ROUND512_0_TO_15(e, f, g, h, a, b, c, d);
|
||||
ROUND512_0_TO_15(d, e, f, g, h, a, b, c);
|
||||
ROUND512_0_TO_15(c, d, e, f, g, h, a, b);
|
||||
ROUND512_0_TO_15(b, c, d, e, f, g, h, a);
|
||||
}
|
||||
|
||||
for (; i < 80 - 7;) {
|
||||
ROUND512_16_TO_80(a, b, c, d, e, f, g, h);
|
||||
ROUND512_16_TO_80(h, a, b, c, d, e, f, g);
|
||||
ROUND512_16_TO_80(g, h, a, b, c, d, e, f);
|
||||
ROUND512_16_TO_80(f, g, h, a, b, c, d, e);
|
||||
ROUND512_16_TO_80(e, f, g, h, a, b, c, d);
|
||||
ROUND512_16_TO_80(d, e, f, g, h, a, b, c);
|
||||
ROUND512_16_TO_80(c, d, e, f, g, h, a, b);
|
||||
ROUND512_16_TO_80(b, c, d, e, f, g, h, a);
|
||||
}
|
||||
#define R512_0 \
|
||||
ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \
|
||||
ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \
|
||||
ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \
|
||||
ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \
|
||||
ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \
|
||||
ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \
|
||||
ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \
|
||||
ROUND512_0_TO_15(b, c, d, e, f, g, h, a)
|
||||
|
||||
i = 0;
|
||||
R512_0; R512_0;
|
||||
|
||||
#define R512_16 \
|
||||
ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \
|
||||
ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \
|
||||
ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \
|
||||
ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \
|
||||
ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \
|
||||
ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \
|
||||
ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \
|
||||
ROUND512_16_TO_80(b, c, d, e, f, g, h, a)
|
||||
|
||||
R512_16; R512_16; R512_16; R512_16;
|
||||
R512_16; R512_16; R512_16; R512_16;
|
||||
#endif
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
|
Loading…
Reference in New Issue
Block a user