/* file compare Fcom compares two files in either a line-by-line mode or in a strict byte-by-byte mode. The byte-by-byte mode is simple; merely read both files and print the offsets where they differ and the contents. The line compare mode attempts to isolate differences in ranges of lines. Two buffers of lines are read and compared. No hashing of lines needs to be done; hashing only speedily tells you when things are different, not the same. Most files run through this are expected to be largely the same. Thus, hashing buys nothing. *********************************************************************** The algorithm that immediately follows does not work. There is an error somewhere in the range of lines 11 on. An alternative explanation follows. KGS ************************************************************************ [0] Fill buffers [1] If both buffers are empty then [1.1] Done [2] Adjust buffers so 1st differing lines are at top. [3] If buffers are empty then [3.1] Goto [0] This is the difficult part. We assume that there is a sequence of inserts, deletes and replacements that will bring the buffers back into alignment. [4] xd = yd = FALSE [5] xc = yc = 1 [6] xp = yp = 1 [7] If buffer1[xc] and buffer2[yp] begin a "sync" range then [7.1] Output lines 1 through xc-1 in buffer 1 [7.2] Output lines 1 through yp-1 in buffer 2 [7.3] Adjust buffer 1 so line xc is at beginning [7.4] Adjust buffer 2 so line yp is at beginning [7.5] Goto [0] [8] If buffer1[xp] and buffer2[yc] begin a "sync" range then [8.1] Output lines 1 through xp-1 in buffer 1 [8.2] Output lines 1 through yc-1 in buffer 2 [8.3] Adjust buffer 1 so line xp is at beginning [8.4] Adjust buffer 2 so line yc is at beginning [8.5] Goto [0] [9] xp = xp + 1 [10] if xp > xc then [10.1] xp = 1 [10.2] xc = xc + 1 [10.3] if xc > number of lines in buffer 1 then [10.4] xc = number of lines [10.5] xd = TRUE [11] if yp > yc then [11.1] yp = 1 [11.2] yc = yc + 1 [11.3] if yc > number of lines in buffer 2 then [11.4] yc = number of lines [11.5] yd = TRUE [12] if not xd or not yd then [12.1] goto [6] At this point there is no possible match between the buffers. For simplicity, we punt. [13] Display error message. EXPLANATION 2 This is a variation of the Largest Common Subsequence problem. A detailed explanation of this can be found on p 189 of Data Structures and Algorithms by Aho Hopcroft and Ulman. FC maintains two buffers within which it tries to find the Largest Common Subsequence (The largest common subsequence is simply the pattern in buffer1 that yields the most matches with the pattern in buffer2, or the pattern in buffer2 that yields the most matches with the pattern in buffer1) FC makes a simplifying assumption that the contents of one buffer can be converted to the contents of the other buffer by deleting the lines that are different between the two buffers. Two indices into each buffer are maintained: xc, yc == point to the last line that has been scanned up to now xp, yp == point to the first line that has not been exhaustively compared to lines 0 - #c in the other buffer. FC now makes a second simplifying assumption: It is unnecessary to do any calculations on lines that are equal. Hence FC scans File1 and File two line by line until a difference is encountered. When a difference is encountered the two buffers are filled such that the line containing the first difference heads the buffer. The following exhaustive search algorithm is applied to find the first "sync" occurance. (The below is simplified to use == for comparison. In practice more than one line needs to match for a "sync" to be established). FOR xc,yc = 1; xc,yx <= sizeof( BUFFERS ); xc++, yc++ FOR xp,yp = 1; xp,yp <= xc,yc; xp++, yp++ IF ( BUFFER1[xp] == BUFFER2[yc] ) Then the range of lines BUFFER1[ 1 ... xp ] and BUFFER2[ 1 ... yc ] need to be deleted for the two files to be equal. Therefore DISPLAY these ranges, and begin scanning both files starting at the matching lines. FI IF ( BUFFER1[yp] == BUFFER2[xc] ) Then the range of lines BUFFER2[ 1 ... yp ] and BUFFER1[ 1 ... xc ] need to be deleted for the two files to be equal. Therefore DISPLAY these ranges, and begin scanning both files starting at the matching lines. FI FOREND FOREND If a match is not found within the buffers, the message "RESYNC FAILED" is issued and further comparison is aborted since there is no valid way to find further matching lines. END EXPLANATION 2 Certain flags may be set to modify the behavior of the comparison: -a abbreviated output. Rather than displaying all of the modified ranges, just display the beginning, ... and the ending difference -b compare the files in binary (or byte-by-byte) mode. This mode is default on .EXE, .OBJ, .LIB, .COM, .BIN, and .SYS files -c ignore case on compare (cmp = strcmpi instead of strcmp) -l compare files in line-by-line mode -lb n set the size of the internal line buffer to n lines from default of 100 -w ignore blank lines and white space (ignore len 0, use strcmps) -t do not untabify (use fgets instead of fgetl) -n output the line number also -NNNN set the number of lines to resynchronize to n which defaults to 2. Failure to have this value set correctly can result in odd output: file1: file2: abcdefg abcdefg aaaaaaa aaaaaab aaaaaaa aaaaaaa aaaaaaa aaaaaaa abcdefg abcdefg with default sync of 2 yields: with sync => 3 yields: *****f1 *****f1 abcdefg abcdefg aaaaaaa aaaaaaa *****f2 aaaaaaa abcdefg *****f2 aaaaaab abcdefg aaaaaaa aaaaaab aaaaaaa *****f1 aaaaaaa aaaaaaa abcdefg *****f2 aaaaaaa abcdefg WARNING: This program makes use of GOTO's and hence is not as straightforward as it could be! CAVEAT PROGRAMMER. */ #include "tools.h" #include "fc.h" /* #define DEBUG FALSE */ extern int fgetl(), strcmp (); extern byte toupper (); int (*funcRead) (), /* function to use to read lines */ (*fCmp) (); /* function to use to compare lines */ extern byte BadSw[], Bad_ver[], BadOpn[], LngFil[], NoDif[], NoMem[], UseMes[], ReSyncMes[]; int ctSync = -1, /* number of lines required to sync */ cLine = -1; /* number of lines in internal buffs */ flagType fAbbrev = FALSE, /* abbreviated output */ fBinary = FALSE, /* binary comparison */ fLine = FALSE, /* line comparison */ fNumb = FALSE, /* display line numbers */ fCase = TRUE, /* case is significant */ fIgnore = FALSE; /* ignore spaces and blank lines */ #ifdef DEBUG flagType fDebug = FALSE; #endif struct lineType *buffer1, *buffer2; byte line[MAXARG]; /* single line buffer */ byte *extBin[] = { ".EXE", ".OBJ", ".LIB", ".COM", ".BIN", ".SYS", NULL }; main (c, v) int c; byte *v[]; { int i; int j; int fileargs; char *strpbrk(), *slash; char n[2][80]; char temp; extern byte _osmajor, _osminor; word version; /* _osmajor._osminor, used for */ /* version binding checks. */ /* Issue error message if DOS version is not within valid range. */ version = ((word)_osmajor << 8) + (word)_osminor; if (( LOWVERSION > version) || (version > HIGHVERSION)) { usage (Bad_ver, 1); } funcRead = (int (*) ())FNADDR(fgetl); fileargs=0; for (i=1; i < c ; i++) { /** * If argument doesn't begin with a /, parse a filename off of it * then examine the argument for following switches. * **/ if (*v[i] != '/') { slash= strpbrk( v[i],"/" ); if ( slash ) { temp = *slash; *slash='\0' ; strcpy(n[fileargs++],v[i]); *slash =temp ; } else strcpy(n[fileargs++],v[i]); } for ( j=0 ; j < strlen( v[i] ) ; j++) { if(*(v[i]+j)=='/') { switch(toupper( *(v[i]+j+1))) { case 'A' : fAbbrev = TRUE; break; case 'B' : fBinary = TRUE; break; case 'C' : fCase = FALSE; break; #ifdef DEBUG case 'D' : fDebug = TRUE; break; #endif case 'W' : fIgnore = TRUE; break; case 'L' : if (toupper(*(v[i]+j+2))=='B') { cLine = ntoi ((v[i]+j+3),10); break; } else fLine = TRUE; break; case 'N' : fNumb = TRUE; break; case 'T' : funcRead =(int (*) ())FNADDR(fgets); break; default: if (*strbskip((v[i]+j+1),"0123456789") == 0) { ctSync = ntoi ((v[i]+j+1), 10); } else { usage (NULL, 1); } } /* end switch */ } /* end if */ } /* end parse of argument for '/' */ } /* End ARGUMENT Search */ if (fileargs != 2) usage (NULL, 1); if (ctSync != -1) fLine = TRUE; else ctSync = 2; if (cLine == -1) cLine = 100; if (!fBinary && !fLine) { extention (n[0], line); for (i = 0; extBin[i]; i++) if (!strcmpi (extBin[i], line)) fBinary = TRUE; if (!fBinary) fLine = TRUE; } if (fBinary && (fLine || fNumb)) usage (BadSw, 1); if (fIgnore) { if (fCase) fCmp = FNADDR(strcmps); else fCmp = FNADDR(strcmpis); } else { if (fCase) fCmp = FNADDR(strcmp); else fCmp = FNADDR(strcmpi); } if (fBinary) BinaryCompare (n[0], n[1]); else LineCompare (n[0], n[1]); } usage (p, erc) unsigned char *p; { if (p) printf ("fc: %s\n", p); else printf (UseMes); exit (erc); } BinaryCompare (f1, f2) unsigned char *f1, *f2; { register int c1, c2; long pos; FILE *fh1, *fh2; flagType fSame; fSame = TRUE; if ((fh1 = fopen (f1, "rb")) == NULL) { sprintf (line, BadOpn, f1, error ()); usage (line, 1); } if ((fh2 = fopen (f2, "rb")) == NULL) { sprintf (line, BadOpn, f2, error ()); usage (line, 1); } pos = 0L; while (TRUE) { if ((c1 = getc (fh1)) != EOF) { if ((c2 = getc (fh2)) != EOF) { if (c1 == c2) ; else { fSame = FALSE; printf ("%08lX: %02X %02X\n", pos, c1, c2); } } else { sprintf (line, LngFil, f1, f2); usage (line, 1); } } else { if ((c2 = getc (fh2)) == EOF) { if (fSame) usage (NoDif, 0); else exit (1); } else { sprintf (line, LngFil, f2, f1); usage (line, 1); } } pos++; } } /* compare a range of lines */ flagType compare (l1, s1, l2, s2, ct) int l1, l2, ct; register int s1, s2; { #ifdef DEBUG if (fDebug) printf ("compare (%d, %d, %d, %d, %d)\n", l1, s1, l2, s2, ct); #endif if (ct == 0 || s1+ct > l1 || s2+ct > l2) return FALSE; while (ct--) { #ifdef DEBUG if (fDebug) printf ("'%s' == '%s'? ", buffer1[s1].text, buffer2[s2].text); #endif if ((*fCmp)(buffer1[s1++].text, buffer2[s2++].text)) { #ifdef DEBUG if (fDebug) printf ("No\n"); #endif return FALSE; } } #ifdef DEBUG if (fDebug) printf ("Yes\n"); #endif return TRUE; } LineCompare (f1, f2) unsigned char *f1, *f2; { FILE *fh1, *fh2; int l1, l2, i, xp, yp, xc, yc; flagType xd, yd, fSame; int line1, line2; fSame = TRUE; if ((fh1 = fopen (f1, "rb")) == NULL) { sprintf (line, BadOpn, f1, error ()); usage (line, 1); } if ((fh2 = fopen (f2, "rb")) == NULL) { sprintf (line, BadOpn, f2, error ()); usage (line, 1); } if ((buffer1 = (struct lineType *)malloc (cLine * (sizeof *buffer1))) == NULL || (buffer2 = (struct lineType *)malloc (cLine * (sizeof *buffer1))) == NULL) usage (NoMem); l1 = l2 = 0; line1 = line2 = 0; l0: #ifdef DEBUG if (fDebug) printf ("At scan beginning\n"); #endif l1 += xfill (buffer1+l1, fh1, cLine-l1, &line1); l2 += xfill (buffer2+l2, fh2, cLine-l2, &line2); if (l1 == 0 && l2 == 0) { if (fSame) usage (NoDif, 0); return; } xc = min (l1, l2); for (i=0; i < xc; i++) { if (!compare (l1, i, l2, i, 1)) break; } if (i != xc) i = max (i-1, 0); l1 = adjust (buffer1, l1, i); l2 = adjust (buffer2, l2, i); /* KLUDGE ALERT!! GOTO USED */ if (l1 == 0 && l2 == 0) goto l0; l1 += xfill (buffer1+l1, fh1, cLine-l1, &line1); l2 += xfill (buffer2+l2, fh2, cLine-l2, &line2); #ifdef DEBUG if (fDebug) printf ("buffers are adjusted, %d, %d remain\n", l1, l2); #endif xd = yd = FALSE; xc = yc = 1; xp = yp = 1; l6: #ifdef DEBUG if (fDebug) printf ("Trying resync %d,%d %d,%d\n", xc, xp, yc, yp); #endif i = min (l1-xc,l2-yp); i = min (i, ctSync); if (compare (l1, xc, l2, yp, i)) { fSame = FALSE; printf ("***** %s\n", f1); dump (buffer1, 0, xc); printf ("***** %s\n", f2); dump (buffer2, 0, yp); printf ("*****\n\n"); l1 = adjust (buffer1, l1, xc); l2 = adjust (buffer2, l2, yp); /* KLUDGE ALERT!! GOTO USED */ goto l0; } i = min (l1-xp, l2-yc); i = min (i, ctSync); if (compare (l1, xp, l2, yc, i)) { fSame = FALSE; printf ("***** %s\n", f1); dump (buffer1, 0, xp); printf ("***** %s\n", f2); dump (buffer2, 0, yc); printf ("*****\n\n"); l1 = adjust (buffer1, l1, xp); l2 = adjust (buffer2, l2, yc); /* KLUDGE ALERT!! GOTO USED */ goto l0; } if (++xp > xc) { xp = 1; if (++xc >= l1) { xc = l1; xd = TRUE; } } if (++yp > yc) { yp = 1; if (++yc >= l2) { yc = l1; yd = TRUE; } } if (!xd || !yd) goto l6; fSame = FALSE; if (l1 >= cLine || l2 >= cLine) printf ("%s", ReSyncMes); printf ("***** %s\n", f1); dump (buffer1, 0, l1-1); printf ("***** %s\n", f2); dump (buffer2, 0, l2-1); printf ("*****\n\n"); exit (1); } /* return number of lines read in */ xfill (pl, fh, ct, plnum) struct lineType *pl; FILE *fh; int ct; int *plnum; { int i; #ifdef DEBUG if (fDebug) printf ("xfill (%04x, %04x)\n", pl, fh); #endif i = 0; while (ct-- && (*funcRead) (pl->text, MAXARG, fh) != NULL) { if (funcRead == (int (*) ())FNADDR(fgets)) pl->text[strlen(pl->text)-1] = 0; if (fIgnore && !strcmps (pl->text, "")) pl->text[0] = 0; if (strlen (pl->text) != 0 || !fIgnore) { pl->line = ++*plnum; pl++; i++; } } #ifdef DEBUG if (fDebug) printf ("xfill returns %d\n", i); #endif return i; } /* adjust returns number of lines in buffer */ adjust (pl, ml, lt) struct lineType *pl; int ml; int lt; { #ifdef DEBUG if (fDebug) printf ("adjust (%04x, %d, %d) = ", pl, ml, lt); if (fDebug) printf ("%d\n", ml-lt); #endif if (ml <= lt) return 0; #ifdef DEBUG if (fDebug) printf ("move (%04x, %04x, %04x)\n", &pl[lt], &pl[0], sizeof (*pl)*(ml-lt)); #endif Move ((unsigned char far *)&pl[lt], (char far *)&pl[0], sizeof (*pl)*(ml-lt)); return ml-lt; } /* dump * dump outputs a range of lines. * * INPUTS * pl pointer to current lineType structure * start starting line number * end ending line number * * CALLS * pline, printf * */ dump (pl, start, end) struct lineType *pl; int start, end; { if (fAbbrev && end-start > 2) { pline (pl+start); printf ("...\n"); pline (pl+end); } else while (start <= end) pline (pl+start++); } /* PrintLINE * pline prints a single line of output. If the /n flag * has been specified, the line number of the printed text is added. * * Inputs * pl pointer to current lineType structure * fNumb TRUE if /n specified * */ pline (pl) struct lineType *pl; { if (fNumb) printf ("%5d: ", pl->line); printf ("%s\n", pl->text); } /* * strcmpi will compare two string lexically and return one of * the following: * - 0 if the strings are equal * - 1 if first > the second * - (-1) if first < the second * * This was written to replace the run time library version of * strcmpi which does not correctly compare the european character set. * This version relies on a version of toupper which uses IToupper. */ int strcmpi(str1, str2) unsigned char *str1, *str2; { unsigned char c1, c2; while ((c1 = toupper(*str1++)) == (c2 = toupper(*str2++))) { if (c1 == '\0') return(0); } if (c1 > c2) return(1); else return(-1); } /* compare two strings, ignoring white space, case is significant, return * 0 if identical, <>0 otherwise */ strcmps (p1, p2) unsigned char *p1, *p2; { while (TRUE) { while (ISSPACE(*p1)) p1++; while (ISSPACE(*p2)) p2++; if (*p1 == *p2) if (*p1++ == 0) return 0; else p2++; else return *p1-*p2; } } /* compare two strings, ignoring white space, case is not significant, return * 0 if identical, <>0 otherwise */ int strcmpis (p1, p2) unsigned char *p1, *p2; { while (TRUE) { while (ISSPACE(*p1)) p1++; while (ISSPACE(*p2)) p2++; if (toupper (*p1) == toupper (*p2)) if (*p1++ == 0) return 0; else p2++; else return *p1-*p2; } }