/* This file implements an integer set using a hashtable. Copyright (C) 2004 The University of Texas at Austin This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Email: usman@cs.njit.edu Mail: Usman Roshan, Department of Computer Sciences, NJIT-CCS, Computer Science Department, GITC 4400, University Heights, Newark, NJ 07102 */ #include #include #include #include #include #include #include static short EMPTY_ENTRY = -1; static short REMOVED_ENTRY = -2; int global_probelength = 0; void get_entries_int_set(int_set ht, int* entries){ int i, j; j = 0; assert(ht && entries); for(i=0;isize_;i++) if(ht->table_[i]!=EMPTY_ENTRY && ht->table_[i]!=REMOVED_ENTRY) entries[j++] = ht->table_[i]; } void assign_int_set(int_set ht1, int_set ht2){ int i; assert(ht1 && ht2); ht1->nelements = ht2->nelements; ht1->size_ = ht2->size_; ht1->a1_ = ht2->a1_; ht1->a2_ = ht2->a2_; ht1->b1_ = ht2->b1_; ht1->b2_ = ht2->b2_; ht1->probelength_ = ht2->probelength_; ht1->h_ = ht2->h_; assert(ht1->table_); free(ht1->table_); ht1->table_ = (int*) malloc(ht1->size_ * sizeof(int)); for(i=0;isize_;i++) ht1->table_[i] = ht2->table_[i]; } short equal_int_set(int_set ht1, int_set ht2){ int i; int* ht1_entries; int* ht2_entries; assert(ht1 && ht2); if(ht1->nelements != ht2->nelements) return 0; ht1_entries = (int*) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for(i=0; i < ht1->nelements; i++) if(lookup_int_set(ht2, ht1_entries[i]) == EMPTY_ENTRY){ free(ht1_entries); return 0; /*printf("probelength=%d\n", ht2->probelength_);*/ } free(ht1_entries); ht2_entries = (int*) malloc(ht2->nelements * sizeof(int)); get_entries_int_set(ht2, ht2_entries); for(i=0; i < ht2->nelements; i++) if(lookup_int_set(ht1, ht2_entries[i]) == EMPTY_ENTRY){ free(ht2_entries); return 0; /*printf("probelength=%d\n", ht1->probelength_);*/ } free(ht2_entries); return 1; } int_set diff_int_set(int_set ht1, int_set ht2){ int i; int* ht1_entries; int_set result; assert(ht1 && ht2); global_probelength = 0; result=new_int_set(8); if(ht1->nelements == 0) return result; if(ht2->nelements == 0) { assign_int_set(result, ht1); return result; } ht1_entries = (int*) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for(i=0; i < ht1->nelements; i++) if(lookup_int_set(ht2, ht1_entries[i]) == EMPTY_ENTRY){ global_probelength += ht2->probelength_; insert_int_set(result, ht1_entries[i]); global_probelength += result->probelength_; } else global_probelength += ht2->probelength_; free(ht1_entries); return result; } int_set intersect_int_set(int_set ht1, int_set ht2){ int i; int* ht1_entries; int* ht2_entries; int_set result; assert(ht1 && ht2); result=new_int_set(8); if(ht1->nelements == 0 || ht2->nelements == 0) return result; ht1_entries = (int*) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for(i=0; i < ht1->nelements; i++) if(lookup_int_set(ht2, ht1_entries[i]) != EMPTY_ENTRY) insert_int_set(result, ht1_entries[i]); free(ht1_entries); ht2_entries = (int*) malloc(ht2->nelements * sizeof(int)); get_entries_int_set(ht2, ht2_entries); for(i=0; i < ht2->nelements; i++) if(lookup_int_set(ht1, ht2_entries[i]) != EMPTY_ENTRY) insert_int_set(result, ht2_entries[i]); free(ht2_entries); return result; } int_set union_int_set(int_set ht1, int_set ht2){ int i; int* ht1_entries; int* ht2_entries; int_set result; assert(ht1 && ht2); global_probelength = 0; result=new_int_set(8); if(ht1->nelements == 0){ assign_int_set(result, ht2); return result; } else if(ht2->nelements == 0){ assign_int_set(result, ht1); return result; } ht1_entries = (int*) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for(i=0; i < ht1->nelements; i++){ insert_int_set(result, ht1_entries[i]); global_probelength += result->probelength_; } free(ht1_entries); ht2_entries = (int*) malloc(ht2->nelements * sizeof(int)); get_entries_int_set(ht2, ht2_entries); for(i=0; i < ht2->nelements; i++){ insert_int_set(result, ht2_entries[i]); global_probelength += result->probelength_; } free(ht2_entries); return result; } short subseteq_int_set(int_set ht1, int_set ht2){ int i; int* ht1_entries; assert(ht1 && ht2); if(ht1->nelements > ht2->nelements) return 0; ht1_entries = (int*) malloc(ht1->nelements * sizeof(int)); get_entries_int_set(ht1, ht1_entries); for(i=0; i < ht1->nelements; i++) if(lookup_int_set(ht2, ht1_entries[i]) == EMPTY_ENTRY){ free(ht1_entries); return 0; /*printf("probelength=%d\n", ht2->probelength_);*/ } free(ht1_entries); return 1; } void print_int_set(int_set ht){ int i = 0; for(i=0; i < ht->size_; i++) if(ht->table_[i]!=EMPTY_ENTRY) printf(" %d", ht->table_[i]); } int probelength_int_set(int_set ht){ assert(ht); return ht->probelength_; } void del_int_set(int_set ht){ assert(ht); free(ht->table_); free(ht); } int_set new_int_set(long sz){ int i; double x; double lgsz, lg2; int_set ht; ht = (int_set) malloc(sizeof(struct int_set_struct)); /*setting int_set to be size of power of 2*/ /*this is required so that proper open addressing can occur,*/ /*i.e. eventually every entry in the table is considered*/ lgsz = log(sz); lg2 = log(2); if(floor(lgsz/lg2) != ceil(lgsz/lg2)){ /* which means that sz is not a power of 2*/ x = ceil(lgsz/lg2); sz = pow(2, x+1); } ht->size_ = sz; ht->nelements = 0; /* allocate memory for table*/ ht->table_ = (int*) malloc(ht->size_*sizeof(int)); /* initialize to -1, because 0 is a possible member of the set*/ for(i = 0; i < ht->size_; i++) ht->table_[i] = EMPTY_ENTRY; ht->P_ = 2147483647; /* initialize P (large prime)*/ ht->probelength_ = 0; /*initialize hash1 and hash2 variables*/ srand(time(0)); ht->a1_ = rand() * (ht->P_ - 1) + 1; ht->a2_ = rand() * (ht->P_ - 1) + 1; ht->b1_ = rand() * ht->P_; ht->b2_ = rand() * ht->P_; return ht; } /* This is called before hash1 and hash2. It sets the value of h_ to be used by hash1 and hash2 and saves excessive calls to the hash function.*/ unsigned long int_set_hash(int_set ht, int key) { assert(ht); assert(key >= 0); key += (key << 12); key ^= (key >> 22); key += (key << 4); key ^= (key >> 9); key += (key << 10); key ^= (key >> 2); key += (key << 7); key ^= (key >> 12); ht->h_ = key; return key; } unsigned long int_set_hash1(int_set ht) { return ((((ht->a1_ * ht->h_) + ht->b1_) % ht->P_) % ht->size_) ; } unsigned long int_set_hash2(int_set ht) { return ((((ht->a2_ * ht->h_ + ht->b2_) % ht->P_) % (ht->size_ - 3)) | 1); } void insert_int_set(int_set ht, int key) { unsigned long i; int h1, h2; assert(ht); assert(key >= 0); int_set_hash(ht, key); h1 = int_set_hash1(ht); h2 = int_set_hash2(ht); ht->probelength_ = 0; for(i = h1; ht->table_[i] != EMPTY_ENTRY && ht->table_[i] != REMOVED_ENTRY; i = (i + h2) % ht->size_){ ht->probelength_++; if(ht->table_[i] == key) return; } ht->table_[i] = key; ht->nelements++; if(2*ht->nelements > ht->size_) { resize_int_set(ht, 2*ht->size_); /*verifying*/ /* for(i=0; i < ht->size_; i++) if(ht->table_[i] != EMPTY_ENTRY) assert(lookup_int_set(ht, ht->table_[i])!=EMPTY_ENTRY); printf("new size=%d, new elements=%d\n", ht->size_,ht->nelements); */ /**********/ } return; } /* assert: key must be present in table*/ void remove_int_set(int_set ht, int key) { unsigned long i; int h1, h2; assert(ht); assert(key >= 0); assert(lookup_int_set(ht, key) != EMPTY_ENTRY); int_set_hash(ht, key); h1 = int_set_hash1(ht); h2 = int_set_hash2(ht); ht->probelength_ = 0; for(i = h1; ht->table_[i] != EMPTY_ENTRY; i = (i + h2) % ht->size_){ /* find the key first */ ht->probelength_++; if(ht->table_[i] == key) { /* if found mark this entry as removed and not empty */ ht->nelements--; ht->table_[i] = REMOVED_ENTRY; if(ht->nelements < ht->size_/4) { resize_int_set(ht, ht->size_/2); } return; } } assert(0); return; } void resize_int_set(int_set ht, int newsize){ int* newtable; unsigned long i, j; int h1, h2, key, oldsize; assert(ht); oldsize = ht->size_; newtable=(int*)malloc(newsize*sizeof(int)); for(i=0; i < newsize; i++) newtable[i]=EMPTY_ENTRY; ht->size_ = newsize; for(i=0; i < oldsize; i++) /*for each defined key*/ if(ht->table_[i] != EMPTY_ENTRY && ht->table_[i] != REMOVED_ENTRY){ /*hash it*/ key = ht->table_[i]; int_set_hash(ht, key); h1=int_set_hash1(ht); h2=int_set_hash2(ht); /*and then insert it into newtable*/ for(j = h1; newtable[j] != EMPTY_ENTRY && newtable[j] != REMOVED_ENTRY; j = (j + h2) % ht->size_){ ht->probelength_++; if(newtable[j] == key) break; } newtable[j] = key; } /*printf("total resize problength=%d\n", ht->probelength_);*/ free(ht->table_); ht->table_ = newtable; return; } int choose_int_set(int_set ht){ int i; long double min; min = HUGE_VAL; for(i = 0; i < ht->size_; i++) if(ht->table_[i] != EMPTY_ENTRY && ht->table_[i] != REMOVED_ENTRY && ht->table_[i] < min) min = ht->table_[i]; return min; } int size_int_set(int_set ht){ return ht->nelements; } int member_int_set(int_set ht, int key){ if(lookup_int_set(ht, key) == EMPTY_ENTRY) return 0; else return 1; } int lookup_int_set(int_set ht, int key) { unsigned long i; int h1, h2; assert(ht); assert(key >= 0); int_set_hash(ht, key); h1 = int_set_hash1(ht); h2 = int_set_hash2(ht); ht->probelength_ = 0; for(i = h1; ht->table_[i] != EMPTY_ENTRY; i = (i + h2) % ht->size_) { ht->probelength_++; if(ht->table_[i] == key) return key; } return EMPTY_ENTRY; } /*verifying*/ /* printf("new table size =%d\n", ht->size_); for(i=0; i < ht->size_; i++) if(ht->table_[i] != EMPTY_ENTRY){ printf("looking for %d\n", ht->table_[i]); assert(lookup_int_set(ht, ht->table_[i])!=EMPTY_ENTRY); } printf("new size=%d, new elements=%d\n", ht->size_,ht->nelements); */ /**********/