/* Use the dictionary to find anagrams. This program is copyright Tanglewood Algoritms Limited, and is distributed under the GNU General Public Licence */ #include #include #define Letters ( 'Z' - 'A' + 1 ) static char * HelpText = "\ anag [-ddictionary] [-r] [-p] [-o] words...\n\ \n\ This is anag v1.03, it finds words and phrases which are anagrams of the\n\ words given on the command line.\n\ \n\ -ddictionary Set the dictionary path, default is /usr/dict/words\n\ -r Find solutions containing repeated words\n\ -p Permit the words on the command line to appear\n\ in the solution\n\ -o Check the ordering of the dictionary\n\ \n\ " ; /* Switch values */ char * DicPath ; int Repeat, Permit, Order ; /* One instance of this structure is created for every word in the dictionary. The map is a bitmap of letters in the word, and is used for quick elimination of "impossible" words that contain any letters not in the anagram we are hunting. */ struct DictEntry { unsigned long Map ; char * Word ; } ; /* Bit set in the map to inhibit words which contain punctuation, are duplicates or exist in the original anagram */ #define Punctuation 31 #define Duplicate 30 #define Source 29 /* The actual dictionary structure, the text area, and the number of words in the dictionary. */ struct DictEntry * Dictionary ; char * Text ; long Words ; /* This structure contains the letter counts for a word or phrase */ struct LetterCount { unsigned char L [ Letters ] ; } ; struct LetterCount Original ; /* This structure points to the text of a word which may be a partial match */ struct Match { struct Match * Next ; char * Word ; } ; /* This is the root of a linked list of struct Match which holds the result */ struct Match * Root ; /* This procedure counts the letters in a word or phrase and either adds them to an existing letter count, or zeros the count first */ static void Count ( char * String, int Continue, struct LetterCount * Counter ) { int N, C ; /* Conditionally zero the count */ if ( ! Continue ) for ( N = 0; N < Letters; N++ ) Counter -> L [ N ] = 0 ; for ( N = 0; String [ N ]; N++ ) { int K ; C = String [ N ] ; K = ( C >= 'A' && C <= 'Z' ) ? ( C - 'A' ) : ( ( C >= 'a' && C <= 'z' ) ? ( C - 'a' ) : -1 ) ; if ( K >= 0 ) { Counter -> L [ K ] ++ ; /* Test for counter overflow */ if ( ! Counter -> L [ K ] ) { printf ( "The phrase %s is too long to be handled\n", String ) ; exit ( 1 ) ; } } } } /* This procedure hunts for words which can deplete the original letter counts. If it finds a word that can be made with the available letters, then it calls itself recursively to hunt for a word that can be made with what remains. When it finds that it hass called itself with a null list, it stops. It creates a list od struct Match as it goes, so that the list of words found can be printed. The point in the dictionary at which to start is passed as a parameter. */ static void Hunt ( struct Match ** Prev, long Start ) { int N ; unsigned long Map ; long Word ; struct Match Result ; /* Convert the letter counts to a map of letters present in the word */ for ( N = Map = 0; N < Letters; N++ ) if ( Original . L [ N ] ) Map |= 1 << N ; /* If we end up with no bits set, then we have finished, and report success. */ if ( !Map ) { struct Match * Report ; for ( Report = Root; Report; Report = Report -> Next ) printf ( "%s ", Report -> Word ) ; printf ( "\n" ) ; return ; } /* Link our match structure on to the end of the chain */ *Prev = &Result ; /* Now invert the map, so that it lists all the letters we don't want */ Map = ~Map ; /* Now run down the dictionary looking for possible words */ for ( Word = Start; Word < Words; Word++ ) if ( ! ( Dictionary [ Word ] . Map & Map ) ) { char * P ; int K ; /* We now have a possible word that contains only letters in the current letter count. The next step is to find out if there are enough of each letter to match the word. To do this we run down the word, depleting the letter counts as we go. */ for ( P = Dictionary [ Word ] . Word; *P; P++ ) { K = *P - 'a' ; if ( Original . L [ K ] ) Original . L [ K ] -- ; else break ; } /* If we have used up the entire word, then recurse to find the next one, either starting one word further down thi dictionary, or with the same one if the repeat flag is set. */ if ( !*P ) { Result . Next = 0 ; Result . Word = Dictionary [ Word ] . Word ; Hunt ( &Result . Next, Word + 1 - Repeat ) ; Result . Next = 0 ; } /* Now put back either the whole word, or just the letters that we used failing to get a match. */ for (;;) { P-- ; if ( !*P ) break ; K = *P - 'a' ; Original . L [ K ] ++ ; } } } /* This procedure finds a word if it exists in the dictionary, and sets the source bit in the map to prevent its being found by Hunt. It is used to inhibit words on the command line */ static void Inhibit ( char * Word ) { int LowLim, HighLim, N, C ; /* LowLim and HighLim are words in the dictionary which have been tested against the word we are looking for, and are definitely before and after it, so that if the target word exists, it lies in this range. They start with out of range values. */ LowLim = -1 ; HighLim = Words ; /* Iterate, splitting the range every time */ for (;;) { /* N is the word we will look at next */ N = ( LowLim + HighLim ) / 2 ; /* Do a comparison */ C = strcasecmp ( Word, Dictionary [ N ] . Word ) ; /* Have we found the word ? */ if ( !C ) { Dictionary [ N ] . Map |= 1L << Source ; return ; } /* Depending on which way the comparison came out, shift one or other limit */ if ( C < 0 ) HighLim = N ; else LowLim = N ; /* If the limits meet, then fail. Note that if the limits do not meet there is always at least one valid word between them, to become the new N, so we don't have to do anything about end conditions. */ if ( HighLim == LowLim + 1 ) return ; } } main ( int Ac, char ** Av ) { FILE *Dic ; int C, Cl ; long Length, Longest, LetterCount ; long WordNum, Pos ; int Bit, Arg ; /* Help text */ if ( Ac < 2 ) { puts ( HelpText ) ; exit ( 0 ) ; } /* Default dictionary */ DicPath = "/usr/dict/words" ; /* Go through the arguments looking for words */ for ( Arg = 1; Arg < Ac; Arg++ ) { if ( Av [ Arg ] [ 0 ] == '-' ) { switch ( Av [ Arg ] [ 1 ] ) { case 'd' : DicPath = & Av [ Arg ] [ 2 ] ; break ; case 'r' : Repeat = 1 ; break ; case 'p' : Permit = 1 ; break ; case 'o' : Order = 1 ; break ; default: printf ( "I don't understand \"%s\"\n", Av [ Arg ] ) ; printf ( "Run the program with no arguments for help text\n" ) ; exit ( 1 ) ; } } else Count ( Av [ Arg ], Arg > 1, &Original ) ; } /* Open the dictionary */ Dic = fopen ( DicPath, "r" ) ; if ( !Dic ) { printf ( "I can't find a dictionary at %s\n", DicPath ) ; exit ( 1 ) ; } /* Pass 1 over dictionary, count letters and newlines */ Length = LetterCount = Longest = Words = 0 ; while ( ( C = getc ( Dic ) ) != EOF ) { if ( C >= 'a' && C <= 'z' || C >= 'A' && C <= 'Z' ) { Length ++ ; LetterCount ++ ; } else if ( C == '\n' ) { Words++ ; if ( Length > Longest ) Longest = Length ; Length = 0 ; } } /* Now allocate enough space to hold the dictionary and the actual words */ Dictionary = ( struct DictEntry * ) malloc ( Words * sizeof ( struct DictEntry ) ) ; Text = ( char * ) malloc ( Words + LetterCount + 1 ) ; /* Pass 2 through the dictionary builds the structs and puts the letters in the text area, discarding punctuation */ rewind ( Dic ) ; Pos = 1 ; Text [ 0 ] = 0 ; for ( WordNum = 0; WordNum < Words; WordNum++ ) { Dictionary [ WordNum ] . Word = Text + Pos ; Dictionary [ WordNum ] . Map = 0 ; for ( ; ; ) { C = getc ( Dic ) ; if ( C == '\n' || C == EOF ) break ; if ( C >= 'A' && C <= 'Z' ) { Cl = C + 'a' - 'A' ; Bit = C - 'A' ; } else if ( C >= 'a' && C <= 'z' ) { Cl = C ; Bit = C - 'a' ; } else { Bit = Punctuation ; Cl = 0 ; } Dictionary [ WordNum ] . Map |= 1L << Bit ; if ( Cl ) Text [ Pos ++ ] = Cl ; } Text [ Pos++ ] = 0 ; } /* Do a pass down the dictionary checking for duplicates and flagging them. If the order check flag is set, report order errors as well */ for ( WordNum = 0; WordNum < Words - 1; WordNum++ ) { int C ; /* Compare words */ C = strcasecmp ( Dictionary [ WordNum ] . Word, Dictionary [ WordNum + 1 ] . Word ) ; /* For duplicates (This usually just means variation of case in the dictionary) flag one as a duplicate */ if ( !C ) Dictionary [ WordNum ] . Map |= 1L << Duplicate ; /* Report ordering errors and stop if requested. However, if the dictionary is unsorted, the algorithm will still produce useful output, so keep going. */ if ( Order && C > 0 ) { printf ( "Dictionary error, giving up now,\n" ) ; printf ( "Word %d (%s) is before word %d (%s)\n", WordNum, Dictionary [ WordNum ] . Word, WordNum + 1, Dictionary [ WordNum + 1 ] . Word ) ; exit ( 1 ) ; } } /* If the permit flag isn't set, inhibit words on the command line */ if ( !Permit ) for ( Arg = 1; Arg < Ac; Arg++ ) { if ( Av [ Arg ] [ 0 ] != '-' ) Inhibit ( Av [ Arg ] ) ; } /* Now hunt recursively for anagrams */ Hunt ( &Root, 0 ) ; }