/* This file implements an integer set using a hashtable. Copyright (C) 2004 The University of Texas at Austin This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Email: usman@cs.njit.edu Mail: Usman Roshan, Department of Computer Sciences, NJIT-CCS, Computer Science Department, GITC 4400, University Heights, Newark, NJ 07102 */ #include #include #include #include #include #include #include static short EMPTY_ENTRY = -1; static short REMOVED_ENTRY = -2; int global_probelength = 0; void get_entries_int_set(int_set ht, int *entries) { int i, j; j = 0; assert(ht && entries); for (i = 0; i < ht->size_; i++) if (ht->table_[i] != EMPTY_ENTRY && ht->table_[i] != REMOVED_ENTRY) entries[j++] = ht->table_[i]; } void assign_int_set(int_set ht1, int_set ht2) { int i; assert(ht1 && ht2); ht1->nelements = ht2->nelements; ht1->size_ = ht2->size_; ht1->a1_ = ht2->a1_; ht1->a2_ = ht2->a2_; ht1->b1_ = ht2->b1_; ht1->b2_ = ht2->b2_; ht1->probelength_ = ht2->probelength_; ht1->h_ = ht2->h_; assert(ht1->table_); free(ht1->table_); ht1->table_ = (int *) malloc(ht1->size_ * sizeof(int)); for (i = 0; i < ht1->size_; i++) ht1->table_[i] = ht2->table_[i]; } short equal_int_set(int_set ht1, int_set ht2) { int i; int *ht1_entries; int *ht2_entries; assert(ht1 && ht2); if (ht1->nelements != ht2->nelements) return 0; ht1_entries = (int *) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for (i = 0; i < ht1->nelements; i++) if (lookup_int_set(ht2, ht1_entries[i]) == EMPTY_ENTRY) { free(ht1_entries); return 0; /* printf("probelength=%d\n", ht2->probelength_); */ } free(ht1_entries); ht2_entries = (int *) malloc(ht2->nelements * sizeof(int)); get_entries_int_set(ht2, ht2_entries); for (i = 0; i < ht2->nelements; i++) if (lookup_int_set(ht1, ht2_entries[i]) == EMPTY_ENTRY) { free(ht2_entries); return 0; /* printf("probelength=%d\n", ht1->probelength_); */ } free(ht2_entries); return 1; } int_set diff_int_set(int_set ht1, int_set ht2) { int i; int *ht1_entries; int_set result; assert(ht1 && ht2); global_probelength = 0; result = new_int_set(8); if (ht1->nelements == 0) return result; if (ht2->nelements == 0) { assign_int_set(result, ht1); return result; } ht1_entries = (int *) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for (i = 0; i < ht1->nelements; i++) if (lookup_int_set(ht2, ht1_entries[i]) == EMPTY_ENTRY) { global_probelength += ht2->probelength_; insert_int_set(result, ht1_entries[i]); global_probelength += result->probelength_; } else global_probelength += ht2->probelength_; free(ht1_entries); return result; } int_set intersect_int_set(int_set ht1, int_set ht2) { int i; int *ht1_entries; int *ht2_entries; int_set result; assert(ht1 && ht2); result = new_int_set(8); if (ht1->nelements == 0 || ht2->nelements == 0) return result; ht1_entries = (int *) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for (i = 0; i < ht1->nelements; i++) if (lookup_int_set(ht2, ht1_entries[i]) != EMPTY_ENTRY) insert_int_set(result, ht1_entries[i]); free(ht1_entries); ht2_entries = (int *) malloc(ht2->nelements * sizeof(int)); get_entries_int_set(ht2, ht2_entries); for (i = 0; i < ht2->nelements; i++) if (lookup_int_set(ht1, ht2_entries[i]) != EMPTY_ENTRY) insert_int_set(result, ht2_entries[i]); free(ht2_entries); return result; } int_set union_int_set(int_set ht1, int_set ht2) { int i; int *ht1_entries; int *ht2_entries; int_set result; assert(ht1 && ht2); global_probelength = 0; result = new_int_set(8); if (ht1->nelements == 0) { assign_int_set(result, ht2); return result; } else if (ht2->nelements == 0) { assign_int_set(result, ht1); return result; } ht1_entries = (int *) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for (i = 0; i < ht1->nelements; i++) { insert_int_set(result, ht1_entries[i]); global_probelength += result->probelength_; } free(ht1_entries); ht2_entries = (int *) malloc(ht2->nelements * sizeof(int)); get_entries_int_set(ht2, ht2_entries); for (i = 0; i < ht2->nelements; i++) { insert_int_set(result, ht2_entries[i]); global_probelength += result->probelength_; } free(ht2_entries); return result; } short subseteq_int_set(int_set ht1, int_set ht2) { int i; int *ht1_entries; assert(ht1 && ht2); if (ht1->nelements > ht2->nelements) return 0; ht1_entries = (int *) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for (i = 0; i < ht1->nelements; i++) if (lookup_int_set(ht2, ht1_entries[i]) == EMPTY_ENTRY) { free(ht1_entries); return 0; /* printf("probelength=%d\n", ht2->probelength_); */ } free(ht1_entries); return 1; } void print_int_set(int_set ht) { int i = 0; for (i = 0; i < ht->size_; i++) if (ht->table_[i] != EMPTY_ENTRY) printf(" %d", ht->table_[i]); } int probelength_int_set(int_set ht) { assert(ht); return ht->probelength_; } void del_int_set(int_set ht) { assert(ht); free(ht->table_); free(ht); } int_set new_int_set(long sz) { int i; double x; double lgsz, lg2; int_set ht; ht = (int_set) malloc(sizeof(struct int_set_struct)); /* setting int_set to be size of power of 2 */ /* this is required so that proper open addressing can occur, */ /* i.e. eventually every entry in the table is considered */ lgsz = log(sz); lg2 = log(2); if (floor(lgsz / lg2) != ceil(lgsz / lg2)) { /* which means that sz is not a power of 2 */ x = ceil(lgsz / lg2); sz = pow(2, x + 1); } ht->size_ = sz; ht->nelements = 0; /* allocate memory for table */ ht->table_ = (int *) malloc(ht->size_ * sizeof(int)); /* initialize to -1, because 0 is a possible member of the set */ for (i = 0; i < ht->size_; i++) ht->table_[i] = EMPTY_ENTRY; ht->P_ = 2147483647; /* initialize P (large prime) */ ht->probelength_ = 0; /* initialize hash1 and hash2 variables */ srand(time(0)); ht->a1_ = rand() * (ht->P_ - 1) + 1; ht->a2_ = rand() * (ht->P_ - 1) + 1; ht->b1_ = rand() * ht->P_; ht->b2_ = rand() * ht->P_; return ht; } /* * This is called before hash1 and hash2. It sets the value of h_ to be used * by hash1 and hash2 and saves excessive calls to the hash function. */ unsigned long int_set_hash(int_set ht, int key) { assert(ht); assert(key >= 0); key += (key << 12); key ^= (key >> 22); key += (key << 4); key ^= (key >> 9); key += (key << 10); key ^= (key >> 2); key += (key << 7); key ^= (key >> 12); ht->h_ = key; return key; } unsigned long int_set_hash1(int_set ht) { return ((((ht->a1_ * ht->h_) + ht->b1_) % ht->P_) % ht->size_); } unsigned long int_set_hash2(int_set ht) { return ((((ht->a2_ * ht->h_ + ht->b2_) % ht->P_) % (ht->size_ - 3)) | 1); } void insert_int_set(int_set ht, int key) { unsigned long i; int h1, h2; assert(ht); assert(key >= 0); int_set_hash(ht, key); h1 = int_set_hash1(ht); h2 = int_set_hash2(ht); ht->probelength_ = 0; for (i = h1; ht->table_[i] != EMPTY_ENTRY && ht->table_[i] != REMOVED_ENTRY; i = (i + h2) % ht->size_) { ht->probelength_++; if (ht->table_[i] == key) return; } ht->table_[i] = key; ht->nelements++; if (2 * ht->nelements > ht->size_) { resize_int_set(ht, 2 * ht->size_); /* verifying */ /* for(i=0; i < ht->size_; i++) if(ht->table_[i] != EMPTY_ENTRY) assert(lookup_int_set(ht, ht->table_[i])!=EMPTY_ENTRY); printf("new size=%d, new elements=%d\n", ht->size_,ht->nelements); */ /**********/ } return; } /* assert: key must be present in table */ void remove_int_set(int_set ht, int key) { unsigned long i; int h1, h2; assert(ht); assert(key >= 0); assert(lookup_int_set(ht, key) != EMPTY_ENTRY); int_set_hash(ht, key); h1 = int_set_hash1(ht); h2 = int_set_hash2(ht); ht->probelength_ = 0; for (i = h1; ht->table_[i] != EMPTY_ENTRY; i = (i + h2) % ht->size_) { /* find the key first */ ht->probelength_++; if (ht->table_[i] == key) { /* if found mark this entry as removed and not empty */ ht->nelements--; ht->table_[i] = REMOVED_ENTRY; if (ht->nelements < ht->size_ / 4) { resize_int_set(ht, ht->size_ / 2); } return; } } assert(0); return; } void resize_int_set(int_set ht, int newsize) { int *newtable; unsigned long i, j; int h1, h2, key, oldsize; assert(ht); oldsize = ht->size_; newtable = (int *) malloc(newsize * sizeof(int)); for (i = 0; i < newsize; i++) newtable[i] = EMPTY_ENTRY; ht->size_ = newsize; for (i = 0; i < oldsize; i++) /* for each defined key */ if (ht->table_[i] != EMPTY_ENTRY && ht->table_[i] != REMOVED_ENTRY) { /* hash it */ key = ht->table_[i]; int_set_hash(ht, key); h1 = int_set_hash1(ht); h2 = int_set_hash2(ht); /* and then insert it into newtable */ for (j = h1; newtable[j] != EMPTY_ENTRY && newtable[j] != REMOVED_ENTRY; j = (j + h2) % ht->size_) { ht->probelength_++; if (newtable[j] == key) break; } newtable[j] = key; } /* printf("total resize problength=%d\n", ht->probelength_); */ free(ht->table_); ht->table_ = newtable; return; } int choose_int_set(int_set ht) { int i; long double min; min = HUGE_VAL; for (i = 0; i < ht->size_; i++) if (ht->table_[i] != EMPTY_ENTRY && ht->table_[i] != REMOVED_ENTRY && ht->table_[i] < min) min = ht->table_[i]; return min; } int size_int_set(int_set ht) { return ht->nelements; } int member_int_set(int_set ht, int key) { if (lookup_int_set(ht, key) == EMPTY_ENTRY) return 0; else return 1; } int lookup_int_set(int_set ht, int key) { unsigned long i; int h1, h2; assert(ht); assert(key >= 0); int_set_hash(ht, key); h1 = int_set_hash1(ht); h2 = int_set_hash2(ht); ht->probelength_ = 0; for (i = h1; ht->table_[i] != EMPTY_ENTRY; i = (i + h2) % ht->size_) { ht->probelength_++; if (ht->table_[i] == key) return key; } return EMPTY_ENTRY; } /* verifying */ /* printf("new table size =%d\n", ht->size_); for(i=0; i < ht->size_; i++) if(ht->table_[i] != EMPTY_ENTRY){ printf("looking for %d\n", ht->table_[i]); assert(lookup_int_set(ht, ht->table_[i])!=EMPTY_ENTRY); } printf("new size=%d, new elements=%d\n", ht->size_,ht->nelements); */ /**********/