[ruby-dev:25403] rand problems
From:
Tanaka Akira <akr@...17n.org>
Date:
2005-01-04 16:08:59 UTC
List:
ruby-dev #25403
In article <20050103233811.7dd92150.sheepman@tcn.zaq.ne.jp>,
sheepman <sheepman@tcn.zaq.ne.jp> writes:
>> 次のように、rand の結果があからさまに一様でない分布を示すことがあるようです。
>
> こんな感じでどうでしょうか。
そうすると、
% ./ruby -ve '20.times { printf "%9x\n", rand(0x1ffffffff) }'
ruby 1.8.2 (2005-01-04) [i686-linux]
2223ec71
28e25b5c
37326e3d
42e8cdd2
27a4a874
ed512afe
3b7a6d2d
1e493032
ad1b372f
36423f58
5d6232b4
97a3051
11926c94
aec1ef57
cbb7f06f
ba24d7a4
7c77dd16
99fd7e36
6ce15ae
6cfcfa17
というように、rand(0x1ffffffff) が常に 0xffffffff 以下を返すようになります。
それはそれとして、rand の問題はほかにもいくつかあるようです。
* rand(fixnum) も一様でない
% ./ruby -e '
s = 10000
t = 3
arr = [0] * t
(64*s).times {
arr[rand(0x30000000) % t] += 1
}
arr.each {|n| puts "*" * (n / s) }
'
************************
*******************
*******************
* rand(nil) で、仮数部の下位ビットに 0 が並ぶ
% ./ruby -ve 'srand(0); 10.times { p [rand].pack("G").unpack("B*") }'
ruby 1.8.2 (2005-01-04) [i686-linux]
["0011111111100001100011111110000101010101100000000000000000000000"]
["0011111111100010111110001001010101000101111000000000000000000000"]
["0011111111100110111000101101010011001110101000000000000000000000"]
["0011111111101011000001000011100110011000000000000000000000000000"]
["0011111111100011010010011101011001101000011000000000000000000000"]
["0011111111101011011101000100101001011111011000000000000000000000"]
["0011111111100001011011111010111011011000011000000000000000000000"]
["0011111111101011000111001010111110101100111000000000000000000000"]
["0011111111011011000111010010100100000010010000000000000000000000"]
["0011111111100011111101000011101111011010011000000000000000000000"]
* 64bit マシンで rand が返す値の下位ビットが 0 になる
% ruby -ve 'srand(0); 10.times { p rand(0x3000000000000000).to_s(16) }'
ruby 1.8.2 (2004-12-06) [x86_64-linux]
"1a57d20040000000"
"1c74dfe8d0000000"
"22543f35f0000000"
"2886566400000000"
"1ceec19c90000000"
"292e6f8f10000000"
"1a27864490000000"
"28ab078350000000"
"1455dec1b0000000"
"1dee59c790000000"
* 32bit の種しか与えられないので、brute force attack に弱い
という話を直したつもりなパッチをつけます。
なお、/dev/urandom から以前よりも長く読み込み、bignum として表現する都
合上、srand の返値が
% ./ruby -e 'srand; p srand'
44554367493460933033425899290213730169575912281951203251910081025545624407566
というかんじでそれなりに長くなっています。
また、いくつか必要な関数を mt19937ar-cok.tgz からもってきました。
あと、rb_big_rand は不都合があるので使わず、直接 RBignum の中身をいじっ
ています。そして SIZEOF_BDIGITS は 2 か 4 であることを仮定していますが、
2 の場合はテストしてません。
Index: random.c
===================================================================
RCS file: /src/ruby/random.c,v
retrieving revision 1.34
diff -u -p -r1.34 random.c
--- random.c 3 Jan 2005 05:13:10 -0000 1.34
+++ random.c 4 Jan 2005 16:04:52 -0000
@@ -92,6 +92,37 @@ init_genrand(s)
left = 1; initf = 1;
}
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+/* slight change for C++, 2004/2/26 */
+static void
+init_by_array(unsigned long init_key[], int key_length)
+{
+ int i, j, k;
+ init_genrand(19650218UL);
+ i=1; j=0;
+ k = (N>key_length ? N : key_length);
+ for (; k; k--) {
+ state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL))
+ + init_key[j] + j; /* non linear */
+ state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+ i++; j++;
+ if (i>=N) { state[0] = state[N-1]; i=1; }
+ if (j>=key_length) j=0;
+ }
+ for (k=N-1; k; k--) {
+ state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL))
+ - i; /* non linear */
+ state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+ i++;
+ if (i>=N) { state[0] = state[N-1]; i=1; }
+ }
+
+ state[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
+ left = 1; initf = 1;
+}
+
static void
next_state()
{
@@ -114,9 +145,9 @@ next_state()
*p = p[M-N] ^ TWIST(p[0], state[0]);
}
-/* generates a random number on [0,1)-real-interval */
-static double
-genrand_real()
+/* generates a random number on [0,0xffffffff]-interval */
+static unsigned long
+genrand_int32(void)
{
unsigned long y;
@@ -129,10 +160,18 @@ genrand_real()
y ^= (y << 15) & 0xefc60000UL;
y ^= (y >> 18);
- return (double)y * (1.0/4294967296.0);
- /* divided by 2^32 */
+ return y;
}
+/* generates a random number on [0,1) with 53-bit resolution*/
+static double
+genrand_real(void)
+{
+ unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6;
+ return(a*67108864.0+b)*(1.0/9007199254740992.0);
+}
+/* These real versions are due to Isaku Wada, 2002/01/09 added */
+
#undef N
#undef M
@@ -151,36 +190,91 @@ genrand_real()
#endif
static int first = 1;
+static VALUE saved_seed = INT2FIX(0);
-static int
-rand_init(seed)
- unsigned long seed;
+static VALUE
+rand_init(vseed)
+ VALUE vseed;
{
- static unsigned long saved_seed;
- unsigned long old;
+ volatile VALUE seed;
+ VALUE old;
+ long len;
+ unsigned long *buf;
+ seed = rb_to_int(vseed);
+ switch (TYPE(seed)) {
+ case T_FIXNUM:
+ len = sizeof(VALUE);
+ break;
+ case T_BIGNUM:
+ len = RBIGNUM(seed)->len * SIZEOF_BDIGITS;
+ if (len == 0)
+ len = 4;
+ break;
+ default:
+ rb_raise(rb_eTypeError, "failed to convert %s into Integer",
+ rb_obj_classname(vseed));
+ }
+ len = ((len + 3) / 4); /* round up to 32bits */
+ buf = ALLOCA_N(long, len); /* allocate longs for init_by_array */
+ memset(buf, 0, len * sizeof(long));
+ if (FIXNUM_P(seed)) {
+ buf[0] = FIX2ULONG(seed) & 0xffffffff;
+#if SIZEOF_LONG > 4
+ buf[1] = FIX2ULONG(seed) >> 32;
+#endif
+ }
+ else {
+ int i, j;
+ for (i = RBIGNUM(seed)->len-1; 0 <= i; i--) {
+ j = i * SIZEOF_BDIGITS / 4;
+#if SIZEOF_BDIGITS < SIZEOF_LONG
+ buf[j] = buf[j] << (SIZEOF_BDIGITS * 8);
+#endif
+ buf[j] |= ((BDIGIT *)RBIGNUM(seed)->digits)[i];
+ }
+ }
+ while (1 < len && buf[len-1] == 0) {
+ len--;
+ }
+ if (len <= 1) {
+ init_genrand(buf[0]);
+ }
+ else {
+ if (buf[len-1] == 1) /* remove leading-zero-guard */
+ len--;
+ init_by_array(buf, len);
+ }
first = 0;
- init_genrand(seed);
old = saved_seed;
saved_seed = seed;
return old;
}
-static unsigned long
+static VALUE
random_seed()
{
static int n = 0;
struct timeval tv;
- unsigned long result;
int fd;
- unsigned long buf;
struct stat statbuf;
+ BDIGIT *digits;
+ unsigned long *seed;
+ NEWOBJ(big, struct RBignum);
+ OBJSETUP(big, rb_cBignum, T_BIGNUM);
+ big->sign = 1;
+ big->len = sizeof(long) * 8 / SIZEOF_BDIGITS + 1;
+ digits = big->digits = ALLOC_N(BDIGIT, big->len);
+ seed = (unsigned long *)big->digits;
- gettimeofday(&tv, 0);
- result = tv.tv_sec ^ tv.tv_usec ^ getpid() ^ n++;
+ memset(digits, 0, big->len * SIZEOF_BDIGITS);
- result += (unsigned long)&result;
+ gettimeofday(&tv, 0);
+ seed[0] = tv.tv_usec;
+ seed[1] = tv.tv_sec;
+ seed[2] = getpid() ^ (n++ << 16);
+ seed[3] = (unsigned long)&seed;
#ifdef S_ISCHR
if ((fd = open("/dev/urandom", O_RDONLY|O_NONBLOCK
@@ -192,14 +286,16 @@ random_seed()
#endif
)) >= 0) {
if (fstat(fd, &statbuf) == 0 && S_ISCHR(statbuf.st_mode)) {
- read(fd, &buf, sizeof(buf));
- result ^= buf;
+ read(fd, &seed[4], 4 * sizeof(*seed));
}
close(fd);
}
#endif
- return result;
+ /* set leading-zero-guard if need. */
+ digits[big->len-1] = digits[big->len-2] <= 1 ? 1 : 0;
+
+ return rb_big_norm((VALUE)big);
}
/*
@@ -222,19 +318,94 @@ rb_f_srand(argc, argv, obj)
VALUE *argv;
VALUE obj;
{
- VALUE sd;
- unsigned long seed, old;
+ VALUE seed, old;
rb_secure(4);
- if (rb_scan_args(argc, argv, "01", &sd) == 0) {
+ if (rb_scan_args(argc, argv, "01", &seed) == 0) {
seed = random_seed();
}
- else {
- seed = NUM2ULONG(sd);
- }
old = rand_init(seed);
- return ULONG2NUM(old);
+ return old;
+}
+
+static unsigned long
+make_mask(unsigned long x)
+{
+ x = x | x >> 1;
+ x = x | x >> 2;
+ x = x | x >> 4;
+ x = x | x >> 8;
+ x = x | x >> 16;
+#if 4 < SIZEOF_LONG
+ x = x | x >> 32;
+#endif
+ return x;
+}
+
+static unsigned long
+limited_rand(unsigned long limit)
+{
+ unsigned long mask = make_mask(limit);
+ int i;
+ unsigned long val;
+
+ retry:
+ val = 0;
+ for (i = SIZEOF_LONG/4-1; 0 <= i; i--) {
+ if (mask >> (i * 32)) {
+ val |= genrand_int32() << (i * 32);
+ val &= mask;
+ if (limit < val)
+ goto retry;
+ }
+ }
+ return val;
+}
+
+static VALUE
+limited_big_rand(struct RBignum *limit)
+{
+ unsigned long mask, lim, rnd;
+ struct RBignum *val;
+ int i, len, boundary;
+
+ len = (limit->len * SIZEOF_BDIGITS + 3) / 4;
+ val = (struct RBignum *)rb_big_clone((VALUE)limit);
+ val->sign = 1;
+#if SIZEOF_BDIGITS == 2
+# define BIG_GET32(big,i) (((BDIGIT *)(big)->digits)[(i)/2] | \
+ ((i)/2+1 < (big)->len ? (((BDIGIT *)(big)->digits)[(i)/2+1] << 16)
+ : 0))
+# define BIG_SET32(big,i,d) ((((BDIGIT *)(big)->digits)[(i)/2] = (d) & 0xffff), \
+ ((i)/2+1 < (big)->len ? (((BDIGIT *)(big)->digits)[(i)/2+1] = (d) >> 16) \
+ : 0))
+#else
+ /* SIZEOF_BDIGITS == 4 */
+# define BIG_GET32(big,i) (((BDIGIT *)(big)->digits)[i])
+# define BIG_SET32(big,i,d) (((BDIGIT *)(big)->digits)[i] = (d))
+#endif
+ retry:
+ mask = 0;
+ boundary = 1;
+ for (i = len-1; 0 <= i; i--) {
+ lim = BIG_GET32(limit, i);
+ mask = mask ? 0xffffffff : make_mask(lim);
+ if (mask) {
+ rnd = genrand_int32() & mask;
+ if (boundary) {
+ if (lim < rnd)
+ goto retry;
+ if (rnd < lim)
+ boundary = 0;
+ }
+ }
+ else {
+ rnd = 0;
+ }
+ BIG_SET32(val, i, rnd);
+ }
+ return rb_big_norm((VALUE)val);
}
/*
@@ -276,18 +447,26 @@ rb_f_rand(argc, argv, obj)
max = (long)RFLOAT(vmax)->value;
break;
}
- vmax = rb_dbl2big(RFLOAT(vmax)->value);
+ if (RFLOAT(vmax)->value < 0)
+ vmax = rb_dbl2big(-RFLOAT(vmax)->value);
+ else
+ vmax = rb_dbl2big(RFLOAT(vmax)->value);
/* fall through */
case T_BIGNUM:
bignum:
{
- long len = RBIGNUM(vmax)->len;
- double *buf = ALLOCA_N(double, len);
-
- while (len--) {
- buf[len] = genrand_real();
- }
- return rb_big_rand(vmax, buf);
+ struct RBignum *limit = (struct RBignum *)vmax;
+ if (!limit->sign) {
+ limit = (struct RBignum *)rb_big_clone(vmax);
+ limit->sign = 1;
+ }
+ limit = (struct RBignum *)rb_big_minus((VALUE)limit, INT2FIX(1));
+ if (FIXNUM_P((VALUE)limit)) {
+ if (FIX2LONG((VALUE)limit) == -1)
+ return rb_float_new(genrand_real());
+ return LONG2NUM(limited_rand(FIX2LONG((VALUE)limit)));
+ }
+ return limited_big_rand(limit);
}
case T_NIL:
max = 0;
@@ -304,8 +483,7 @@ rb_f_rand(argc, argv, obj)
return rb_float_new(genrand_real());
}
if (max < 0) max = -max;
- val = max*genrand_real();
-
+ val = limited_rand(max-1);
return LONG2NUM(val);
}
@@ -314,4 +492,5 @@ Init_Random()
{
rb_define_global_function("srand", rb_f_srand, -1);
rb_define_global_function("rand", rb_f_rand, -1);
+ rb_global_variable(&saved_seed);
}
--
[田中 哲][たなか あきら][Tanaka Akira]