Passing 2-D Arrays of unknown size ?

Chris Torek chris at mimsy.umd.edu
Thu Dec 21 12:34:34 AEST 1989


In article <3180 at uceng.UC.EDU> mfinegan at uceng.UC.EDU (michael k finegan)
writes:
>    Is there a method for passing/using multi-D arrays in subroutines,
>without specifying the column (fastest changing) dimension in advance ?
[much more deleted]

More reruns:

From: chris at mimsy.UUCP (Chris Torek)
Subject: Re: Two dimensional arrays in C
Date: 27 Jun 87 02:12:46 GMT

C does not allow variables as dimension, not even in parameter
declarations.  Given that the caller wants to pass two differently-
shaped matrices (one [500][30], one [400][20]), the only way to
do this without creating a vector of vectors is to have the caller
pass the address of mat[0][0]:

	#define M1A ... /* and likewise for 1B, 2A, 2B */
	main(...)
	{
		static double m1[M1A][M1B];
		static double m2[M2A][M2B];
		...
		matrix_stuff(a, b, &m1[0][0], M1B);/* M1A is irrelevant */
		matrix_stuff(c, d, &m2[0][0], M2B);/* likewise */
		...
	}

	matrix_stuff(a, b, array, y)
		int a, b, y;
		double *array;
	{
		int i, j;

		for (i = 0; i < a; i++) {
			for (j = 0; j < b; j++) {
				v = array[i * y + j];
				...
				array[i * y + j] = v;
			}
		}
	}

If the `shape' of the two matrices is the same---that is, all
dimensions save the first are the same---you can do this:

	main(...) {
		static double m1[R1][C], m2[R2][C];

		...
		matrix_stuff(a, b, m1);
		matrix_stuff(c, d, m2);
		...
	}

	matrix_stuff(a, b, array)
		int a, b;
		double (*array)[C];
	{
		int i, j;

		for (i = 0; i < a; i++)
			for (j = 0; j < b; j++)
				/* work on array[i][j] here */;
	}

This generates approximately the same code, except that instead of
having `array[i * <variable> + j]', the compiler can generate
`array[i * C + j]'.  In either case a good optimiser will pull the
`i * expr' expression out of the `for j' loop, but the chance that
your compiler has a good optimiser is rather slim.

The method I prefer is vectors of vectors:

	struct matrix {
		double	**m_data;	/* vectors of (double *) */
		int	m_rows;		/* number of rows */
		int	m_cols;		/* and columns */
	};

	struct matrix *make_matrix();

	main()
	{
		struct matrix *m1, *m2;
		...
		m1 = make_matrix(rows, cols);
		m1->m_data[i][j] = value;
		m2 = make_matrix(rows2, cols2);
		...
		matrix_stuff(m1);
		matrix_stuff(m2);
		...
	}

	matrix_stuff(m)
		register struct matrix *m;
	{
		register int i, j;

		for (i = 0; i < m->m_rows; i++)
			for (j = 0; j < m->m_cols; j++)
				/* do things with m->m_data[i][j] */;
	}

	struct matrix *
	make_matrix(nr, nc)
		register int nr, nc;
	{
		register struct matrix *m;
		register double **p;
	#define alloc(v, n, t) \
		if ((v = (t *) malloc(n * sizeof (t))) != NULL); \
		else abort_because_out_of_memory()
						/* Allocate: */
		alloc(m, 1, struct matrix);	/* the matrix; */
		alloc(p, nr, double *);		/* the vector of vectors; */
		m->m_rows = nr;
		m->m_cols = nc;
		m->m_data = p;
		while (--nr >= 0)		/* and each vector */
			alloc(*p++, nc, double);/* of doubles. */
	#undef alloc
		return (m);
	}

This is `neater' in that the size of the matrix is carried about
with the matrix itself, and no multiplication is necessary to find
any matrix element.  The definition of alloc() is not suitable for
a general matrix library, since it aborts the program if it cannot
get enough memory, but this should suffice for illustration.

Note that you can dynamically allocate `flat' matrices, too:

	struct flatmat {
		double	*fm_data;
		int	fm_rows, fm_cols;
	};
	#define FM_ELT(fm, i, j) ((fm)->fm_data[(i) * (fm)->fm_cols + (j)])
	...
	struct flatmat *
	flatmat_alloc(nr, nc) {
		...
		alloc(fm, 1, struct flatmat);
		alloc(fm->fm_data, nr * nc, double);
		fm->fm_rows = nr;
		fm->fm_cols = nc;
		return (fm);
	}

but this requires all those multiplies (again, unless you have a
good optimiser), and precludes such things as upper triangular
matrices.  More complex methods of remembering the size and shape
of each matrix are appropriate in some situations.


From: chris at mimsy.UUCP (Chris Torek)
Subject: Re: arrays of pointers - NOVICE QUESTION!( Be forwarned ! )
Date: 5 Jun 89 04:05:36 GMT

>Recently someone posted a remark that these two declarations are the same:
>char *array[some size - you choose];
>and
>char **array;

They are not the same, nor do they have the same meaning, except in one
special case: as a declaration for a formal parameter.  If you write

	int
	main(argc, argv)
		int argc;
		char *argv[];
	{
		...

the compiler sees the declaration for `argv' as one saying `this is
an array of unknown size, each element of which is a pointer to zero
or more characters'---in pseudo-English,

	declare argv as array ? of pointer to char

(`?' here means `unknown size').  Since the C language definition does
not allow one to call a function with an array parameter---if you try,
e.g., with

	f() {
		char *myargv[10];
		... set up myargv[] ...
		(void) main(9, myargv);
	}

the array in that (`rvalue') position is converted to a pointer to the
array's first element---the compiler says, `Oh, you REALLY meant

	declare argv as pointer to pointer to char

or

		char **argv;

so I shall silently pretend you wrote that.'

>My understanding is that the first declaration is for an array of pointers to
>char.

Correct.

>The second one is confusing me. How is it interpreted?

It declares a single pointer, which can be set to nil (or NULL) or to
point to a pointer to a character.  Hence if we have a character:

	char c;

and a pointer to it:

	char *p = &c;

we can set a pointer to point to that pointer:

	char **q = &p;

This is not very interesting, because each pointer points to one object
only---we can use q[0] (which is an alias for p) or p[0] (which is an
alias for c) or q[0][0] (another alias for c) but not p[1] nor q[3][17].
To be more interesting, make p point at a whole slew of characters:

	char c[23];
	char *p = &c[0];
	char **q = &p;

Now we can talk about p[0] (an alias for c[0]) through p[22] (an alias
for c[22]) or q[0][0] (c[0] again) through q[0][22] (c[22]), but still
not q[1][?] or q[2][?].  To make q more interesting, make it point at
a slew of pointers:

	char c[23], d[5], e[17];
	char *p[3] = { &c[0], &d[0], &e[0] };
	char **q = &p[0];

Now we can use p[0][0] (an alias for c[0]) or p[1][0] (d[0]) or p[2][0]
(e[0]), and, similarly, q[0][?] through q[2][?].

There is still not much reason to use the pointer `q' instead of `p'
Since I am getting tired of typing, I will just segue into previous
posting (or perhaps slam into one :-) ).

From: chris at mimsy.UUCP (Chris Torek)
Subject: Re: char ***pointer;
Keywords: allocating space
Message-ID: <14617 at mimsy.UUCP>
Date: 18 Nov 88 07:40:26 GMT

	char *p;

declares an object p which has type `pointer to char' and no specific
value.  (If p is static or external, it is initialised to (char *)NULL;
if it is automatic, it is full of garbage.)  Similarly,

	char **p;

declares an object p which has type `pointer to pointer to char' and
no specific value.  We can keep this up for days :-) and write

	char *******p;

which declares an object p which has type `pointer to pointer ... to char'
and no specific value.  But we will stop with

	char ***pppc;

which declares `pppc' as type `pointer to pointer to pointer to char',
and leaves its value unspecified.  None of these pointers point *to*
anything, but if I say, e.g.,

	char c = '!';
	char *pc = &c;
	char **ppc = &pc;
	char ***pppc = &ppc;

then I have each pointer pointing to something.  pppc points to ppc;
ppc points to pc; pc points to c; and hence, ***pppc is the character
'!'.

Now, there is a peculiar status for pointers in C: they point not only
to the object immediately at *ptr, but also to any other objects an
an array named by *(ptr+offset).  (The latter can also be written as
ptr[offset].)  So I could say:

	int i, j, k;
	char c[NPPC][NPC][NC];
	char *pc[NPPC][NPC];
	char **ppc[NPPC];
	char ***pppc;

	pppc = ppc;
	for (i = 0; i < NPPC; i++) {
		ppc[i] = pc[i];
		for (j = 0; j < NPC; j++) {
			pc[i][j] = c[i][j];
			for (k = 0; k < NC; k++)
				c[i][j][k] = '!';
		}
	}

What this means is perhaps not immediately clear%.  There is a two-
dimensional array of pointers to characters pc[i][j], each of which
points to a number of characters, namely those in c[i][j][0] through
c[i][j][NC-1].  A one-dimensional array ppc[i] contains pointers to
pointers to characters; each ppc[i] points to a number of pointers to
characters, namely those in pc[i][0] through pc[i][NPC-1].  Finally,
pppc points to a number of pointers to pointers to characters, namely
those in ppc[0] through ppc[NPPC-1].
-----
% :-)
-----

The important thing to note is that each variable points to one or
more objects whose type is the type derived from removing one `*'
from the declaration of that variable.  (Clear? :-)  Maybe we should
try it this way:)  Since pppc is `char ***pppc', what ppc points to
(*pppc) is of type `char **'---one fewer `*'s.  pppc points to zero
or more objects of this type; here, it points to the first of NPPC
objects.

As to malloc: malloc obtains a blob of memory of unspecified shape.
The cast you put in front of malloc determines the shape of the blob.
The argument to malloc determines its size.  These should agree, or you
will get into trouble later.  So the first thing we need to do is
this:

	pointer = (char ***)malloc(N * sizeof(char **));
	if (pointer == NULL) quit("out of memory... goodbye");

Pointer will then point to N objects, each of which is a `char **'.
None of those `char **'s will have any particular value (i.e., they
do not point anywhere at all; they are garbage).  If we make them
point somewhere---to some object(s) of type `char **'---and make
those objects point somewhere, then we will have something useful.

Suppose we have done the one malloc above.  Then if we use:

	pointer[0] = (char **)malloc(N1 * sizeof(char *));
	if (pointer[0] == NULL) quit("out of memory");

we will have a value to which pointer[0] points, which can point to
N1 objects, each of type `char *'.  So we can then say, e.g.,

	i = 0;
	while (i < N1 && fgets(buf, sizeof(buf), input) != NULL)
		pointer[0][i++] = strdup(buf);

(strdup is a function that calls malloc to allocate space for a copy
of its string argument, and then copies the string to that space and
returns the new pointer.  If malloc fails, strdup() returns NULL.)
We could write instead

	i = 0;
	while (i < N1 && fgets(buf, sizeof(buf), input) != NULL)
		*(*pointer)++ = strdup(buf);

Note that

		**pointer++ = strdup(buf);

sets **pointer (equivalently, pointer[0][0]), then increments the
value in `pointer', not that in pointer[0].  But using *(*pointer)++
means that we will later have to write

	pointer[0] -= i;

to adjust pointer[0] backwards by the number of strings read in and
strdup()ed, or else use negative subscripts to locate the strings.

Probably all of this will be somewhat clearer with a more realistic
example.  The following code creates an array of arrays of lines.

/* begin code (untested) */
/* this assumes prototypes are available */

#include <stddef.h>
#include <stdio.h>
#include <string.h>

static char nomem[] = "out of memory, exiting";

quit(char *msg) {
	(void) fprintf(stderr, "%s\n", msg);
	exit(1);
	/* NOTREACHED */
}

/*
 * Read an input string from a file.
 * Return a pointer to dynamically allocated space.
 */
char *readstr(FILE *f) {
	register char *s = NULL, *p;
	int more = 1, curlen = 0, l;
	char inbuf[BUFSIZ];

	/*
	 * The following loop is not terribly efficient if you have
	 * many long input lines.
	 */
	while (fgets(inbuf, sizeof(inbuf), f) != NULL) {
		p = strchr(inbuf, '\n');
		if (p != NULL) {	/* got it all */
			*p = 0;
			l = p - inbuf;
			more = 0;	/* signal stop */
		} else
			l = strlen(inbuf);

		/*
		 * N.B. dpANS says realloc((void *)NULL, n) => malloc(n);
		 * if your realloc does not work that way, you will
		 * have to fix this.
		 */
		s = realloc(s, curlen + l + 1);
		if (s == NULL)
			quit(nomem);
		strcpy(s + curlen, inbuf);
		if (more == 0)		/* done; stop */
			break;
		curlen += l;
	}
	/* should check for input error, actually */
	return (s);
}

/*
 * Read an array of strings into a vector.
 * Return a pointer to dynamically allocated space.
 * There are n+1 vectors, the last one being NULL.
 */
char **readfile(FILE *f) {
	register char **vec, *s;
	register int veclen;

	/*
	 * This is terribly inefficent, but it should be correct.
	 *
	 * malloc below is implicitly cast to (char **), but this
	 * depends on it returning (void *); old compilers need the
	 * cast, since malloc() returns (char *).  The same applies
	 * to realloc() below.
	 */
	vec = malloc(sizeof(char *));
	if (vec == NULL)
		quit(nomem);
	veclen = 0;
	while ((s = readstr(f)) != NULL) {
		vec = realloc(vec, (veclen + 2) * sizeof(char *));
		if (vec == NULL)
			quit(nomem);
		vec[veclen++] = s;
	}
	vec[veclen] = NULL;
	return (vec);
}

/*
 * Read a list of files specified in an argv.
 * Each file's list of lines is stored as a vector at p[i].
 * The end of the list of files is indicated by p[i] being NULL.
 *
 * It would probably be more useful, if less appropriate
 * for this example, to return a list of (filename, contents) pairs.
 */
char ***readlots(register char **names) {
	register char ***p;
	register int nread;
	register FILE *f;
	char **vp;
	extern int errno;

	p = malloc(sizeof(char **));
	if (p == NULL)
		quit(nomem);
	for (nread = 0; *names != NULL; names++) {
		if ((f = fopen(*names, "r")) == NULL) {
			(void) fprintf(stderr, "ThisProg: cannot read %s: %s\n",
				*names, strerror(errno));
			continue;
		}
		vp = readfile(f);
		(void) fclose(f);
		p = realloc(p, (nread + 2) * sizeof(char **));
		if (p == NULL)
			quit(nomem);
		p[nread++] = vp;
	}
	p[nread] = NULL;
	return (p);
}

/* e.g., instead:
struct file_data {
	char	*fd_name;
	char	**fd_text;
};
struct file_data *readlots(register char **names) {
	register struct file_data *p;
	register int nread;
	register FILE *f;
	char **vp;
	extern int errno;

	p = malloc(sizeof(*p));
	if (p == NULL)
		quit(nomem);
	for (nread = 0; *names != NULL; names++) {
		<...same file-reading code as above...>
		p = realloc(p, (nread + 2) * sizeof(*p));
		if (p == NULL)
			quit(nomem);
		p[nread].fd_name = *names;
		p[nread].fd_text = vp;
		nread++;
	}
	p[nread].fd_name = NULL;
	p[nread].fd_text = NULL;
	return (p);
}
*/
/* end of code */
-- 
In-Real-Life: Chris Torek, Univ of MD Comp Sci Dept (+1 301 454 7163)
Domain:	chris at mimsy.umd.edu	Path:	uunet!mimsy!chris

-- 
In-Real-Life: Chris Torek, Univ of MD Comp Sci Dept (+1 301 454 7163)
Domain:	chris at cs.umd.edu	Path:	uunet!mimsy!chris



More information about the Comp.lang.c mailing list