環境原猫 80~86日目

uvec4で8バイト符号なし整数を4つつなげて、最大

3.4028236692093846346337460743177e+38

の整数型を作ります。

符号は別途用意します。

で、このuvec4の足し算や引き算や掛け算を作りました。(引き算について、少しでも速度出したいので2の補数を利用していません)

/* [uvec4の足し算] */\
void vec4_add(in uvec4 a, in uvec4 b, out uvec4 res) {\
  uint over;\
  res.w = a.w + b.w;\
  if (UINT_MAX - a.w < b.w) { over = 1u; } else { over = 0u; }\
  res.z = a.z + b.z + over;\
  if ((UINT_MAX - a.z < b.z + over) || (UINT_MAX - over < b.z)) { over = 1u; } else { over = 0u; }\
  res.y = a.y + b.y + over;\
  if ((UINT_MAX - a.y < b.y + over) || (UINT_MAX - over < b.y)) { over = 1u; } else { over = 0u; }\
  res.x = a.x + b.x + over;\
  if ((UINT_MAX - a.x < b.x + over) || (UINT_MAX - over < b.x)) { over = 1u; } else { over = 0u; }\
}\
\
/* [uvec4 x 2 の足し算] */\
void vec4_add2(in uvec4 a1, in uvec4 a2, in uvec4 b1, in uvec4 b2, out uvec4 res1, out uvec4 res2) {\
  uint over;\
  res1.w = a1.w + b1.w;\
  if (UINT_MAX - a1.w < b1.w) { over = 1u; } else { over = 0u; }\
  res1.z = a1.z + b1.z + over;\
  if ((UINT_MAX - a1.z < b1.z + over) || (UINT_MAX - over < b1.z)) { over = 1u; } else { over = 0u; }\
  res1.y = a1.y + b1.y + over;\
  if ((UINT_MAX - a1.y < b1.y + over) || (UINT_MAX - over < b1.y)) { over = 1u; } else { over = 0u; }\
  res1.x = a1.x + b1.x + over;\
  if ((UINT_MAX - a1.x < b1.x + over) || (UINT_MAX - over < b1.x)) { over = 1u; } else { over = 0u; }\
  \
  res2.w = a2.w + b2.w + over;\
  if ((UINT_MAX - a2.w < b2.w + over) || (UINT_MAX - over < b2.w)) { over = 1u; } else { over = 0u; }\
  res2.z = a2.z + b2.z + over;\
  if ((UINT_MAX - a2.z < b2.z + over) || (UINT_MAX - over < b2.z)) { over = 1u; } else { over = 0u; }\
  res2.y = a2.y + b2.y + over;\
  if ((UINT_MAX - a2.y < b2.y + over) || (UINT_MAX - over < b2.y)) { over = 1u; } else { over = 0u; }\
  res2.x = a2.x + b2.x + over;\
  if ((UINT_MAX - a2.x < b2.x + over) || (UINT_MAX - over < b2.x)) { over = 1u; } else { over = 0u; }\
}\
\
/* [uvec4の引き算] */\
/* a > b の前提 */\
void vec4_sub(in uvec4 a, in uvec4 b, out uvec4 res) {\
  uint over;\
  \
  res.w = a.w - b.w;\
  if (a.w < b.w) { over = 1u; } else { over = 0u; }\
  res.z = a.z - b.z - over;\
  if ((a.z < b.z + over) || (UINT_MAX - b.z < over)) { over = 1u; } else { over = 0u; }\
  res.y = a.y - b.y - over;\
  if ((a.y < b.y + over) || (UINT_MAX - b.y < over)) { over = 1u; } else { over = 0u; }\
  res.x = a.x - b.x - over;\
}\
\
/* [uvec4 x 2 の引き算] */\
/* a > b の前提 */\
void vec4_sub2(in uvec4 a1, in uvec4 a2, in uvec4 b1, in uvec4 b2, out uvec4 res1, out uvec4 res2) {\
  uint over;\
  \
  res1.w = a1.w - b1.w;\
  if (a1.w < b1.w) { over = 1u; } else { over = 0u; }\
  res1.z = a1.z - b1.z - over;\
  if ((a1.z < b1.z + over) || (UINT_MAX - b1.z < over)) { over = 1u; } else { over = 0u; }\
  res1.y = a1.y - b1.y - over;\
  if ((a1.y < b1.y + over) || (UINT_MAX - b1.y < over)) { over = 1u; } else { over = 0u; }\
  res1.x = a1.x - b1.x - over;\
  if ((a1.x < b1.x + over) || (UINT_MAX - b1.x < over)) { over = 1u; } else { over = 0u; }\
  \
  res2.w = a2.w - b2.w - over;\
  if ((a2.w < b2.w + over) || (UINT_MAX - b2.w < over)) { over = 1u; } else { over = 0u; }\
  res2.z = a2.z - b2.z - over;\
  if ((a2.z < b2.z + over) || (UINT_MAX - b2.z < over)) { over = 1u; } else { over = 0u; }\
  res2.y = a2.y - b2.y - over;\
  if ((a2.y < b2.y + over) || (UINT_MAX - b2.y < over)) { over = 1u; } else { over = 0u; }\
  res2.x = a2.x - b2.x - over;\
  if ((a2.x < b2.x + over) || (UINT_MAX - b2.x < over)) { over = 1u; } else { over = 0u; }\
}\
\
/* [uvec4 の左シフト] */\
/* shift < 128u */\
void vec4_shift_l(in uvec4 a, in uint shift, out uvec4 res1, out uvec4 res2) {\
  if (shift < 32u) {\
    res1.x = a.x << shift;\
    res1.y = a.y << shift;\
    res1.z = a.z << shift;\
    res1.w = a.w << shift;\
    res2.x = 0u;\
    res2.y = 0u;\
    res2.z = 0u;\
    res2.w = 0u;\
    if (0u < shift) {\
      res1.x += a.y >> (32u - shift);\
      res1.y += a.z >> (32u - shift);\
      res1.z += a.w >> (32u - shift);\
      res2.w += a.x >> (32u - shift);\
    }\
  } else if (shift < 64u) {\
    res1.x = a.y << (shift - 32u);\
    res1.y = a.z << (shift - 32u);\
    res1.z = a.w << (shift - 32u);\
    res1.w = 0u;\
    res2.x = 0u;\
    res2.y = 0u;\
    res2.z = 0u;\
    res2.w = (a.x << (shift - 32u));\
    if (32u < shift) {\
      res1.x += a.z >> (64u - shift);\
      res1.y += a.w >> (64u - shift);\
      res2.z += a.x >> (64u - shift);\
      res2.w += a.y >> (64u - shift);\
    }\
  } else if (shift < 96u) {\
    res1.x = a.z << (shift - 64u);\
    res1.y = a.w << (shift - 64u);\
    res1.z = 0u;\
    res1.w = 0u;\
    res2.x = 0u;\
    res2.y = 0u;\
    res2.z = a.x << (shift - 64u);\
    res2.w = a.y << (shift - 64u);\
    if (64u < shift) {\
      res1.x += a.w >> (96u - shift);\
      res2.y += a.x >> (96u - shift);\
      res2.z += a.y >> (96u - shift);\
      res2.w += a.z >> (96u - shift);\
    }\
  } else {\
    res1.x = (a.w << (shift - 96u));\
    res1.y = 0u;\
    res1.z = 0u;\
    res1.w = 0u;\
    res2.x = 0u;\
    res2.y = a.x << (shift - 96u);\
    res2.z = a.y << (shift - 96u);\
    res2.w = a.z << (shift - 96u);\
    if (96u < shift) {\
      res2.x += a.x >> (128u - shift);\
      res2.y += a.y >> (128u - shift);\
      res2.z += a.z >> (128u - shift);\
      res2.w += a.w >> (128u - shift);\
    }\
  }\
}\
\
/* [uvec4 の右シフト] */\
/* shift < 128u */\
void vec4_shift_r(in uvec4 a1, in uvec4 a2, in uint shift, out uvec4 res1, out uvec4 res2) {\
  if (shift < 32u) {\
    res2.x = a2.x >> shift;\
    res2.y = a2.y >> shift;\
    res2.z = a2.z >> shift;\
    res2.w = a2.w >> shift;\
    res1.x = a1.x >> shift;\
    res1.y = a1.y >> shift;\
    res1.z = a1.z >> shift;\
    res1.w = a1.w >> shift;\
    if (0u < shift) {\
      res2.y += a2.x << (32u - shift);\
      res2.z += a2.y << (32u - shift);\
      res2.w += a2.z << (32u - shift);\
      res1.x += a2.w << (32u - shift);\
      res1.y += a1.x << (32u - shift);\
      res1.z += a1.y << (32u - shift);\
      res1.w += a1.z << (32u - shift);\
    }\
  } else if (shift < 64u) {\
    res2.x = 0u;\
    res2.y = a2.x >> (shift - 32u);\
    res2.z = a2.y >> (shift - 32u);\
    res2.w = a2.z >> (shift - 32u);\
    res1.x = a2.w >> (shift - 32u);\
    res1.y = a1.x >> (shift - 32u);\
    res1.z = a1.y >> (shift - 32u);\
    res1.w = a1.z >> (shift - 32u);\
    if (32u < shift) {\
      res2.z += a2.x << (64u - shift);\
      res2.w += a2.y << (64u - shift);\
      res1.x += a2.z << (64u - shift);\
      res1.y += a2.w << (64u - shift);\
      res1.z += a1.x << (64u - shift);\
      res1.w += a1.y << (64u - shift);\
    }\
  } else if (shift < 96u) {\
    res2.x = 0u;\
    res2.y = 0u;\
    res2.z = a2.x >> (shift - 64u);\
    res2.w = a2.y >> (shift - 64u);\
    res1.x = a2.z >> (shift - 64u);\
    res1.y = a2.w >> (shift - 64u);\
    res1.z = a1.x >> (shift - 64u);\
    res1.w = a1.y >> (shift - 64u);\
    if (64u < shift) {\
      res2.w += a2.x << (96u - shift);\
      res1.x += a2.y << (96u - shift);\
      res1.y += a2.z << (96u - shift);\
      res1.z += a2.w << (96u - shift);\
      res1.w += a1.x << (96u - shift);\
    }\
  } else {\
    res2.x = 0u;\
    res2.y = 0u;\
    res2.z = 0u;\
    res2.w = a2.x >> (shift - 96u);\
    res1.x = a2.y >> (shift - 96u);\
    res1.y = a2.z >> (shift - 96u);\
    res1.z = a2.w >> (shift - 96u);\
    res1.w = a1.x >> (shift - 96u);\
    if (96u < shift) {\
      res1.x += a2.x << (128u - shift);\
      res1.y += a2.y << (128u - shift);\
      res1.z += a2.z << (128u - shift);\
      res1.w += a2.w << (128u - shift);\
    }\
  }\
}\
\
/* [uvec4 の乗算] */\
void vec4_mul(in uvec4 a, in uvec4 b, out uvec4 res1, out uvec4 res2) {\
  uvec4 checkbit = uvec4(0u, 0u, 0u, 1u);\
  uvec4 tmp;\
  uvec4 a1 = a;\
  uvec4 a2 = uvec4(0u, 0u, 0u, 0u);\
  res1 = uvec4(0u, 0u, 0u, 0u);\
  res2 = uvec4(0u, 0u, 0u, 0u);\
  for (uint i = 0u; i < 128u; i++) {\
    if (0u < ((b.x & checkbit.x) + (b.y & checkbit.y) + (b.z & checkbit.z) + (b.w & checkbit.w))) {\
      vec4_add2(res1, res2, a1, a2, res1, res2);\
    }\
    vec4_shift_l(a, i + 1u, a1, a2);\
    vec4_shift_l(checkbit, 1u, checkbit, tmp);\
  }\
}\
int vec4_cmp(in uvec4 a, in uvec4 b) {\
  if (a.x > b.x) {\
    return 1;\
  } else if (a.x < b.x) {\
    return -1;\
  }\
  if (a.y > b.y) {\
    return 1;\
  } else if (a.y < b.y) {\
    return -1;\
  }\
  if (a.z > b.z) {\
    return 1;\
  } else if (a.z < b.z) {\
    return -1;\
  }\
  if (a.w > b.w) {\
    return 1;\
  } else if (a.w < b.w) {\
    return -1;\
  }\
  return 0;\
}\
int vec4_cmp2(in uvec4 a1, in uvec4 a2, in uvec4 b1, in uvec4 b2) {\
  if (a1.x > b1.x) {\
    return 1;\
  } else if (a1.x < b1.x) {\
    return -1;\
  }\
  if (a1.y > b1.y) {\
    return 1;\
  } else if (a1.y < b1.y) {\
    return -1;\
  }\
  if (a1.z > b1.z) {\
    return 1;\
  } else if (a1.z < b1.z) {\
    return -1;\
  }\
  if (a1.w > b1.w) {\
    return 1;\
  } else if (a1.w < b1.w) {\
    return -1;\
  }\
  if (a2.x > b2.x) {\
    return 1;\
  } else if (a2.x < b2.x) {\
    return -1;\
  }\
  if (a2.y > b2.y) {\
    return 1;\
  } else if (a2.y < b2.y) {\
    return -1;\
  }\
  if (a2.z > b2.z) {\
    return 1;\
  } else if (a2.z < b2.z) {\
    return -1;\
  }\
  if (a2.w > b2.w) {\
    return 1;\
  } else if (a2.w < b2.w) {\
    return -1;\
  }\
  return 0;\
}\

次回はこの型を使ってマンデルブロ集合を描くことに挑戦します。

次の週

コメントを残す

メールアドレスが公開されることはありません。 が付いている欄は必須項目です