Shuffle generation

Joe Keane jgk at osc.COM
Thu May 16 10:46:05 AEST 1991


In article <8c=sAju00Vp_Ihxl4r at andrew.cmu.edu> dd2i+ at andrew.cmu.edu (Dee Dee
Two Eye) writes:
>To swap N items in an array A[0..(n-1)], the following O(N) algorithm works:
>
>for (i=0; i < N; i++) {
>     swap(i, i + random(N - i));
>}
>
>Where random(r) is a function which returns a random integer in 0..(r-1)
>and swap(a, b) swaps the stuff in array A[a] with A[b].

This algorithm is much better than the naive one, which repeatedly swaps two
randomly chosen elements.  At the end of the loop, all permutations are
equally probable, assuming the random function is uniform.  The naive
algorithm only approaches this asymptotically.

Note that if you seed your random-number generator with a 32-bit integer, then
there are only 2^32 possible permutations, no matter how big the array is.  If
you worry about such things, like i do, then you should make multiple passes
over the array.

Following i've included `shuffle.c', a program i wrote a while ago which uses
the algorithm described above.  I've found that it's a useful utility, so you
may want to `clip and save' it.
--
Joe Keane, professional C++ programmer
jgk at osc.com (...!uunet!stratus!osc!jgk)
--
#include <stdio.h>
#include <sys/time.h>

#define DEBUG 0

extern char* malloc();
extern char* realloc();

struct line
{
  int length;
  char* ptr;
};


int main (argc, argv)
  int argc;
  char* argv[];
{
  struct line* master_ptr;
  int master_size;

#if DEBUG
  fputs("shuffle: reading lines...\n", stderr);
#endif
  {
    int master_capacity;
    char *buffer_ptr;
    int buffer_capacity;

    master_capacity = 256;
    master_ptr = (struct line*)malloc(master_capacity * sizeof(struct line));
    if (!master_ptr)
      goto out_of_memory;
    master_size = 0;
    buffer_capacity = 256;
    buffer_ptr = malloc(buffer_capacity);
    if (!buffer_ptr)
      goto out_of_memory;
    for (;;)
    {
      int c;
      int buffer_size;
      char* line_ptr;

      c = getchar();
      if (c == EOF)
	goto eof;
      if (master_size >= master_capacity)
      {
	master_capacity *= 2;
	master_ptr = (struct line*)realloc(master_ptr, master_capacity * sizeof (struct line));
	if (!master_ptr)
	  goto out_of_memory;
      }
      buffer_size = 0;

      while (c != '\n')
      {
	if (buffer_size >= buffer_capacity)
	{
	  buffer_capacity *= 2;
	  buffer_ptr = realloc(buffer_ptr, buffer_capacity);
	}
	buffer_ptr[buffer_size] = c;
	buffer_size++;
	c = getchar();
	if (c == EOF)
	{
	  fputs("shuffle: adding newline at end of file\n", stderr);
	  break;
	}
      }

      line_ptr = malloc(buffer_size);
      if (!line_ptr)
	goto out_of_memory;
      memcpy(line_ptr, buffer_ptr, buffer_size);
      master_ptr[master_size].length = buffer_size;
      master_ptr[master_size].ptr = line_ptr;
      master_size++;
      if (c == EOF)
	goto eof;
    }

  eof:
    free(buffer_ptr);
  }
#if DEBUG
  fprintf(stderr, "shuffle: total of %d lines read\n", master_size);
#endif

  {
    int pass;

    for (pass = 0; pass < 16; pass++)
    {
      struct timeval tv;
      int line_number;

      gettimeofday(&tv, 0);
#if DEBUG
      fprintf(stderr, "shuffle: doing pass %d, time is %d seconds, %d microseconds...\n", pass, tv.tv_sec, tv.tv_usec);
#endif
      srandom(tv.tv_sec ^ tv.tv_usec);
      for (line_number = 0; line_number < master_size; line_number++)
      {
	int other_line;
	struct line temp;

	other_line = random() % (master_size - line_number) + line_number;
	temp = master_ptr[line_number];
	master_ptr[line_number] = master_ptr[other_line];
	master_ptr[other_line] = temp;
      }
    }
  }

#if DEBUG
  fputs("shuffle: writing lines...\n", stderr);
#endif
  {
    int line_number;

    for (line_number = 0; line_number < master_size; line_number++)
    {
      char* ptr;
      char* end;

      ptr = master_ptr[line_number].ptr;
      end = ptr + master_ptr[line_number].length;
      while (ptr < end)
      {
	putchar(*ptr);
	ptr++;
      }
      putchar('\n');
    }
  }

#if DEBUG
  fputs("shuffle: all done\n", stderr);
#endif
  return 0;

 out_of_memory:
  fputs("shuffle: out of memory\n", stderr);
  return 1;
}



More information about the Comp.lang.c mailing list