From e4ebdbec894a0edf237c76737218a309db967794 Mon Sep 17 00:00:00 2001 From: Francois Fleuret Date: Tue, 17 Mar 2009 17:26:37 +0100 Subject: [PATCH] Changed the removal of duplicates so that the most recent one is kept instead of the older one. --- selector.cc | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/selector.cc b/selector.cc index 2725c2b..7422fd6 100644 --- a/selector.cc +++ b/selector.cc @@ -114,13 +114,17 @@ int test_and_add(char *new_string, int new_index, code = code % hash_table_size; while(hash_table[code] >= 0) { - if(strcmp(new_string, strings[hash_table[code]]) == 0) return 1; + if(strcmp(new_string, strings[hash_table[code]]) == 0) { + int result = hash_table[code]; + hash_table[code] = new_index; + return result; + } code = (code + 1) % hash_table_size; } hash_table[code] = new_index; - return 0; + return -1; } ////////////////////////////////////////////////////////////////////// @@ -546,16 +550,36 @@ int main(int argc, char **argv) { while(*s == ' ' || (*s >= '0' && *s <= '9')) s++; } - if(!hash_table || !test_and_add(s, nb_lines, lines, hash_table, hash_table_size)) { + int dup; + + if(hash_table) { + dup = test_and_add(s, nb_lines, lines, hash_table, hash_table_size); + } else { + dup = -1; + } + + if(dup < 0) { lines[nb_lines] = new char[strlen(s) + 1]; strcpy(lines[nb_lines], s); - nb_lines++; + } else { + lines[nb_lines] = lines[dup]; + lines[dup] = 0; } + + nb_lines++; } } delete[] hash_table; + int n = 0; + for(int k = 0; k < nb_lines; k++) { + if(lines[k]) { + lines[n++] = lines[k]; + } + } + nb_lines = n; + if(inverse_order) { for(int i = 0; i < nb_lines/2; i++) { char *s = lines[nb_lines - 1 - i]; -- 2.39.5