1. A word search problem
You may have seen the classic word search problem where the goal is to change one word to another one letter at a time, so that all the intermediate states are all words. For example, we can change cat to dog in three steps as follows:
cat cot cog dog
For this assignment you are to consider a variant of this game in which you are also allowed to add or delete a single letter as a step. This allows, for example, changing dog to fish:
dog dig fig fit fist fish
For this assignment, you are to write a program that takes the initial word as argv[1], the target word as argv[2], and a dictionary from stdin, and finds a shortest sequence of intermediate words from the dictionary that gets from argv[1] to argv[2], changing, deleting, or inserting one letter at each step in the sequence. The output of your program should consist of some such sequence, one word per line, with argv[1] as the first line and argv[2] as the last.
2. Details
- The format of the dictionary is one word per line, with each line terminated by a newline.
You may assume that dictionary will consist only of words made from one or more lowercase letters in the range 'a' through 'z'.
You should not assume that argv[1] and argv[2] appear in the dictionary. Only the intermediate words need to appear in the dictionary for a path to work.
- If there is more than one shortest sequence, you may output whichever one you like.
If there is no path from argv[1] to argv[2], your program should print the single line NO PATH.
3. Submitting your assignment
Submit your assignment as usual using /c/cs223/bin/submit 10. You may either submit a single file wordpath.c, or multiple .c and .h files together with a Makefile that generates a program wordpath when called with make wordpath.
4. Testing your assignment
There are several dictionaries in /c/cs223/Hwk10 that you can use to test your assignment, including
empty.dict |
An empty file |
small.dict |
A very small dictionary |
medium.dict |
All lowercase words in /usr/share/dict/words of 5 letters or less |
big.dict |
All lowercase words in /usr/share/dict/words |
long.dict |
A specialized dictionary used to generate very long paths |
The script /c/cs223/Hwk10/test.public will run various search problems against these dictionaries. See the comment near the top of the script to find out how to read the descriptions of these problems. Because the same search problem may have multiple solutions of the same length, we cannot supply sample output; instead, the command /c/cs223/Hwk10/verify can be used to check the output of wordpath against a hashed version of the dictionary. Typical usage would be
./wordpath cat dog < /c/cs223/Hwk10/medium.dict | /c/cs223/Hwk10/verify cat dog 4 /c/cs223/Hwk10/medium.hash
The arguments to verify are the starting and ending words, the maximum length of the path, and the hashed dictionary to use. If the output of wordpath is acceptable, verify will exit with code 0 and print nothing; otherwise, it will exit with code 1 after giving a brief explanation of the problem.
Once you have submitted your assignment, you can run the public test script on your submitted files with /c/cs223/bin/testit 10 public.
5. Sample solution
The sample solution uses a lot of tools from previous assignments and lectures:
The ranktable data structure based on ternary search trees from CS223/Assignments/HW09: ranktable.h, ranktable.c.
The graph data structure and graph search routines from C/Graphs: graph.h, graph.c, search.h, search.c.
The ranktable and graph structures are both used to build a dictionary in which words are assigned numbers (using the ranktable) and then organized into a graph with nodes corresponding to the words and edges corresponding to legal moves. The use of the ranktable structure is overkill; a simple sorted array would do as well. Here is the dictionary implementation.
1 /* dictionary data structure for word search problems */
2
3 typedef struct dict *Dict;
4
5 /* create a new empty dictionary */
6 Dict dict_create(void);
7
8 /* add a word to a dictionary */
9 /* this must consist only of the letters a-z */
10 #define MIN_CHAR ('a')
11 #define MAX_CHAR ('z')
12 void dict_insert(Dict, const char *);
13
14 /* free a dictionary */
15 void dict_destroy(Dict);
16
17 /* load a dictionary one line at a time from a FILE * */
18 void dict_load(Dict, FILE *);
19
20 /* return a graph corresponding to the adjacency structure of Dict */
21 /* where two words are adjacent if one can be made from the other by
22 * a one-letter insertion, deletion, or replacement */
23 /* The graph will be destroyed when the dict is */
24 /* Do not change the dict after asking for this graph. */
25 Graph dict_graph(Dict);
26
27 /* translate a vertex number back to a word */
28 const char *dict_word(Dict, int vertex);
29
30 /* translate a word to a vertex number */
31 /* returns DICT_NO_MATCH if there is no match */
32 #define DICT_NO_MATCH (-1)
33
34 int dict_vertex(Dict, const char *);
35
36 /* test if two words are adjacent (used for verification) */
37 int dict_adjacent(Dict, const char *, const char *);
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <assert.h>
4 #include <string.h>
5
6 #include "graph.h"
7 #include "ranktable.h"
8 #include "getline.h"
9
10 #include "dict.h"
11
12
13 /* dictionary data structure for word search problems */
14 struct dict {
15 Table table; /* word list */
16 Graph graph; /* graph structure */
17 };
18
19 /* create a new empty dictionary */
20 Dict
21 dict_create(void)
22 {
23 Dict d;
24
25 d = malloc(sizeof(*d));
26 assert(d);
27
28 d->table = EMPTY_TABLE;
29 d->graph = 0;
30
31 return d;
32 }
33
34 /* add a word to a dictionary */
35 void
36 dict_insert(Dict d, const char *s)
37 {
38 #ifndef NDEBUG
39 const char *p;
40
41 for(p = s; *p; p++) assert(MIN_CHAR <= *p && *p <= MAX_CHAR);
42 #endif
43
44 /* no inserts once the graph is built */
45 assert(d->graph == 0);
46
47 d->table = table_insert(d->table, s);
48 }
49
50 /* free a dictionary */
51 void
52 dict_destroy(Dict d)
53 {
54 table_destroy(d->table);
55 graph_destroy(d->graph);
56 free(d);
57 }
58
59 /* load a dictionary one line at a time from a FILE * */
60 void
61 dict_load(Dict d, FILE *f)
62 {
63 char *line;
64
65 while((line = getline('\n')) != 0) {
66 dict_insert(d, line);
67 free(line);
68 }
69 }
70
71 /* return a graph corresponding to the adjacency structure of Dict */
72 /* where two words are adjacent if one can be made from the other by
73 * a one-letter insertion, deletion, or replacement */
74 /* It is the caller's responsibility to free this graph. */
75 Graph
76 dict_graph(Dict d)
77 {
78 int n; /* size of dictionary */
79 int i; /* dictionary index */
80 const char *u; /* word at position i */
81 char *v; /* modified word */
82 int j; /* sweeps through character positions in u */
83 char c; /* choice of characters */
84 int vv; /* vertex for v if any */
85
86 if(d->graph == 0) {
87 /* have to build it */
88 n = table_size(d->table);
89
90 d->graph = graph_create(n);
91
92 for(i = 0; i < n; i++) {
93
94 u = dict_word(d, i);
95
96 /* now try all the variations */
97 v = malloc(strlen(u) + 1);
98
99 /* replacements */
100 strcpy(v, u);
101 for(j = 0; v[j]; j++) {
102 for(c = MIN_CHAR; c <= MAX_CHAR; c++) {
103 v[j] = c;
104
105 vv = dict_vertex(d, v);
106 if(vv != DICT_NO_MATCH) {
107 /* got one */
108 /* add only forward edge
109 * since vv will get the reverse edge */
110 graph_add_edge(d->graph, i, vv);
111 }
112 }
113
114 /* restore original character */
115 v[j] = u[j];
116 }
117
118 /* deletions */
119 for(j = 0; u[j]; j++) {
120 /* this does a lot of extra copying but it works */
121 strcpy(v, u);
122 strcpy(v + j, u + j + 1);
123
124 vv = dict_vertex(d, v);
125 if(vv != DICT_NO_MATCH) {
126 /* add edge and reverse edge */
127 /* (reverse edge will not be added later) */
128 graph_add_edge(d->graph, i, vv);
129 graph_add_edge(d->graph, vv, i);
130 }
131 }
132
133 free(v);
134 }
135 }
136
137 /* now d->graph exists, return it */
138 return d->graph;
139 }
140
141 /* translate a vertex number back to a word */
142 const char *
143 dict_word(Dict d, int vertex)
144 {
145 return table_find_by_rank(d->table, vertex);
146 }
147
148 /* translate a word to a vertex number */
149 /* returns DICT_NO_MATCH if there is no match */
150 int
151 dict_vertex(Dict d, const char *s)
152 {
153 unsigned long rank;
154
155 rank = table_rank(d->table, s);
156
157 if(rank == TABLE_NOT_FOUND) {
158 return DICT_NO_MATCH;
159 } else {
160 return rank;
161 }
162 }
163
164 /* test if two words are adjacent (used for verification) */
165 int
166 dict_adjacent(Dict d, const char *s1, const char *s2)
167 {
168 return graph_has_edge(dict_graph(d), dict_vertex(d, s1), dict_vertex(d, s2));
169 }
And here is the main program.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <assert.h>
4
5 #include "graph.h"
6 #include "search.h"
7 #include "dict.h"
8
9 int
10 main(int argc, char **argv)
11 {
12 Dict d;
13 struct search_info *s;
14 int v1;
15 int v2;
16
17 d = dict_create();
18 dict_load(d, stdin);
19
20 dict_insert(d, argv[1]);
21 dict_insert(d, argv[2]);
22
23 v1 = dict_vertex(d, argv[1]);
24 assert(v1 != DICT_NO_MATCH);
25
26 v2 = dict_vertex(d, argv[2]);
27 assert(v2 != DICT_NO_MATCH);
28
29 /* we will search for a path from v2 to v1 */
30 /* this will avoid having to reverse the parent pointers */
31 /* to print it out */
32 s = search_info_create(dict_graph(d));
33 bfs(s, v2);
34
35 if(s->parent[v1] == SEARCH_INFO_NULL) {
36 puts("NO PATH");
37 } else {
38 puts(dict_word(d, v1));
39 while(s->parent[v1] != v1) {
40 v1 = s->parent[v1];
41 puts(dict_word(d, v1));
42 }
43 }
44
45 search_info_destroy(s);
46 dict_destroy(d);
47
48 return 0;
49 }
And finally a Makefile to put everything together.