earlier was this:
#if __GNUC__ && !__INTEL_COMPILER
const int_mmx
delta = { dpp->delta, dpp->delta },
msk0 = { 0x7fff, 0x7fff },
msk1 = { 0xffff, 0xffff },
round = { 512, 512 },
zero = { 0, 0 };
#else // NO GCC
const int_mmx
delta = _mm_set_pi32(dpp->delta, dpp->delta),
msk0 = _mm_set_pi32(0x7fff, 0x7fff),
msk1 = _mm_set_pi32(0xffff, 0xffff),
round = _mm_set_pi32(512, 512),
zero = _mm_set_pi32(0, 0);
#endif // __GNUC__
now is:
const int_mmx
delta = set_int_mmx(dpp->delta, dpp->delta),
fill = set_int_mmx(0x7bff, 0x7bff),
msk0 = set_int_mmx(0x7fff, 0x7fff),
msk1 = set_int_mmx(0xffff, 0xffff),
round = set_int_mmx(512, 512),
zero = set_int_mmx(0, 0);
int_mmx
sum_AB = set_int_mmx(0, 0),
weight_AB = set_int_mmx(
restore_weight (store_weight (dpp->weight_A)),
restore_weight (store_weight (dpp->weight_B))
),
left_right, sam_AB, tmp0, tmp1, samples_AB [MAX_TERM];
the set_int_mmx takes two same args (values), so switching the m1 with m2 do not gives any change,
only with weight_AB is the differece, where A and B are switched!!!
but i will try this patch