--- sort.c.orig 2003-09-05 13:50:32.000000000 -0700 +++ sort.c.dg 2004-06-28 23:06:56.000000000 -0700 @@ -1956,18 +1956,35 @@ merge (char **files, int nfiles, int max while (max_merge < nfiles) { FILE *tfp; int i, t = 0; char *temp; - for (i = 0; i < nfiles / NMERGE; ++i) + int nextmerge; + + /* If we have more than NMERGE files to merge then we should select + a balanced number of files to merge in each step. Otherwise + we could suffer the problem of having NMERGE huge files and a + few small files -- which is very unbalanced. */ + if (nfiles > NMERGE) { - temp = create_temp_file (&tfp); - mergefps (&files[i * NMERGE], NMERGE, tfp, temp); - files[t++] = temp; + int steps = (nfiles + NMERGE - 1) / NMERGE; + int stepsize = nfiles / steps; + for (i = 0; i < steps; ++i) + { + temp = create_temp_file (&tfp); + mergefps (&files[i * stepsize], stepsize, tfp, temp); + files[t++] = temp; + } + nextmerge = i * stepsize; + } + else + { + nextmerge = 0; } + temp = create_temp_file (&tfp); - mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp); + mergefps (&files[nextmerge], (nfiles - nextmerge), tfp, temp); files[t++] = temp; nfiles = t; if (nfiles == 1) break; }