#include <cmath>#include <bit>#include <tuple>std::tuple<unsignedlong, double, double> f(double w) {
unsignedlong n = std::bit_cast<unsignedlong>(w) & 3;
double x = std::fma(1.0, 1.0, w);
double y = std::fma(1.0, 1.0, x);
double z = std::fma(1.0, 1.0, y);
return {n, y, z};
}
doubleg(double y, unsignedlong n) {
double ret = 1 + y * (2 + y * (3 + y * (4 + y * (5 + y))));
return (n & 2) ? -ret : ret;
}
doublebench(double w) {
auto [n, y, z] = f(w);
returng(y, n);
}
int dummy;
intmain(int argc, char* argv[]) {
for(size_t i=0; i < 100000000; ++i) {
double w = std::bit_cast<double>((unsignedlong)rand() << 24) * 1e300 * 1e10;
double v = bench(w);
if (dummy & 1) { dummy += v; }
}
}
b.cpp↓
#include <cmath>#include <bit>#include <tuple>std::tuple<unsignedlong, double, double> f(double w) {
unsignedlong n = std::bit_cast<unsignedlong>(w) & 3;
double x = std::fma(1.0, 1.0, w);
double y = std::fma(1.0, 1.0, x);
double z = std::fma(1.0, 1.0, y);
return {n, y, z};
}
doubleg(double y, unsignedlong n) {
double ret;
if (n == 0) ret = 1 + y * (2 + y * (3 + y * (4 + y * (5 + y))));
elseif (n == 1) ret = 1 + y * (2 + y * (3 + y * (4 + y * (5 + y))));
else ret = 1 + y * (2 + y * (3 + y * (4 + y * (5 + y))));
return (n & 2) ? -ret : ret;
}
doublebench(double w) {
auto [n, y, z] = f(w);
returng(y, n);
}
int dummy;
intmain(int argc, char* argv[]) {
for(size_t i=0; i < 100000000; ++i) {
double w = std::bit_cast<double>((unsignedlong)rand() << 24) * 1e300 * 1e10;
double v = bench(w);
if (dummy & 1) { dummy += v; }
}
}
違いは、g関数の中身だけです。
$ diff a.cpp b.cpp
15c15,18< double ret = 1 + y * (2 + y * (3 + y * (4 + y * (5 + y))));---> double ret;> if (n == 0) ret = 1 + y * (2 + y * (3 + y * (4 + y * (5 + y))));> else if (n == 1) ret = 1 + y * (2 + y * (3 + y * (4 + y * (5 + y))));> else ret = 1 + y * (2 + y * (3 + y * (4 + y * (5 + y))));
longlongpollard(longlong N) {
if (N % 2 == 0) return2;
if (is_prime(N)) return N;
auto f = [&](longlong x) -> longlong {
return (__int128_t(x) * x + 1) % N;
};
longlong step = 0;
while (true) {
++step;
longlong x = step, y = f(x);
while (true) {
longlong p = gcd(y - x + N, N);
if (p == 0 || p == N) break;
if (p != 1) return p;
x = f(x);
y = f(f(y));
}
}
}
if (N % 2 == 0) return 2;
if (is_prime(N)) return N;
+ long long step = 0;
auto f = [&](long long x) -> long long {
- return (__int128_t(x) * x + 1) % N;+ return (__int128_t(x) * x + step) % N;
};
- long long step = 0;
while (true) {
++step;
第三に、この分割ではSRTテーブルを構成することができません。(そもそも境界線が間違っているのですが、この図の境界線を信じたとしても、)一つのマスに部分商が1でなければいけない部分と2でなければいけない部分が混在しています(具体的には、 のマスです)。それにも関わらず、この図の下の説明では「この図では,s(j)を0.125刻み,4*W(j)を0.25刻みで目盛り線を書いているが,1つのマスに"0"と"1",あるいは"1"と"2"のように異なる値が入っていることはなく,"0 or 1"あるいは,"1 or 2"とくっつければ,マスごとに選択するの値を一意に決めることができる。」と書かれています。これは明らかに間違いです。
int srt_table( int32_t rem, int32_t div ) {
int d = ( div >> 20 ) - 8;
assert( 0 <= d && d < 8 );
int th12[] = { 6, 7, 8, 9, 9, 10, 11, 12 };
int th01[] = { 2, 2, 3, 3, 3, 3, 4, 4 };
rem >>= 21;
if( rem < -th12[d] ) { return -2; }
if( rem < -th01[d] ) { return -1; }
if( rem < th01[d] ) { return 0; }
if( rem < th12[d] ) { return 1; }
return 2;
}
float srt_divider( float x, float y ) {
assert( 1.0f <= x && x < 2.0f );
assert( 1.0f <= y && y < 2.0f );
int32_t X = x * 0x1.p+23;
int32_t Y = y * 0x1.p+23;
if( x < y ) { X *= 2; }
int32_t quo = 0;
int32_t rem = X;
for( int i = 0; i < 13; ++i ) {
const int q = srt_table( rem, Y );
rem = ( rem - q * Y ) << 2;
quo = ( quo << 2 ) + q;
}
// Here, quo has <1/3ULP error.
// round nearest (never tie)
if( quo & 1 ) {
quo = rem < 0 ? quo - 1 : quo + 1;
}
float result = quo * ( x < y ? 0x1.p-25 : 0x1.p-24 );
assert( result == x / y );
return result;
}
std::pair<double, double> cordic_sincos( double a ) {
double angle = 0.0;
double x = 1.0;
double y = 0.0;
double M = 1.0;
for( int i = 0; i < 53; ++i ) {
double t = std::ldexp(1.0, -i);
double b = std::atan(t);
if( angle > a ) {
std::tie(x, y) = std::tuple(x + y * t, y - x * t);
angle += -b;
M *= std::cos(-b);
} else {
std::tie(x, y) = std::tuple(x - y * t, y + x * t);
angle += b;
M *= std::cos(b);
}
M *= std::cos(b);
}
return { y * M, x * M };
}
std::pair<double, double> cordic_sinhcosh( double a ) {
double angle = 0.0;
double x = 1.0;
double y = 0.0;
double M = 1.0;
for( int i = 1; i < 53; ++i ) {
double t = std::ldexp(1.0, -i);
double b = std::atanh(t);
if( angle > a ) {
std::tie(x, y) = std::tuple(x - y * t, y - x * t);
angle += -b;
M *= std::cosh(-b);
} else {
std::tie(x, y) = std::tuple(x + y * t, y + x * t);
angle += b;
M *= std::cosh(b);
}
if( i != 1 && i % 3 == 1 ) {
if( angle > a ) {
std::tie(x, y) = std::tuple(x - y * t, y - x * t);
angle += -b;
M *= std::cosh(-b);
} else {
std::tie(x, y) = std::tuple(x + y * t, y + x * t);
angle += b;
M *= std::cosh(b);
}
}
}
return { y * M, x * M };
}
int comp( const void* lhs, const void* rhs ) {
int L = *(int*)lhs;
int R = *(int*)rhs;
if( L < R ) { return -1; }
if( L > R ) { return 1; }
return 0;
}
しかし、これよりも以下のように書いたほうが高速です。
int comp( const void* lhs, const void* rhs ) {
int L = *(int*)lhs;
int R = *(int*)rhs;
int lt = L < R;
int gt = L > R;
return gt - lt;
}