-static inline unsigned int rehash_token(unsigned int hash, unsigned char data)
-{
- return ((hash * 16777619) ^ data);
-}
-
-static unsigned int hash_token(unsigned char *data, int len)
-{
- unsigned int hash=HASH_BASE_OFFSET;
- int i;
-
- for (i = 0; i < len; i++)
- hash = rehash_token(hash, data[i]);
-
- return HASH_FOLD(hash);
-}
-
-/* find a token given its data and hash value */
-static struct token *find_token_hash(unsigned char *data, int len, unsigned int hash)
-{
- struct token *ptr;
-
- ptr = hash_table[hash];
-
- while (ptr) {
- if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0))
- return ptr;
- ptr=ptr->next;
- }
-
- return NULL;
-}
-
-static inline void insert_token_in_group(struct token *head, struct token *ptr)
-{
- ptr->right = head->right;
- ptr->right->left = ptr;
- head->right = ptr;
- ptr->left = head;
-}
-
-static inline void remove_token_from_group(struct token *ptr)
-{
- ptr->left->right = ptr->right;
- ptr->right->left = ptr->left;
-}
-
-
-/* build the counts for all the tokens that start with "data", and have lenghts
- * from 2 to "len" */
-static void learn_token(unsigned char *data, int len)
-{
- struct token *ptr,*last_ptr;
- int i, newprofit;
- unsigned int hash = HASH_BASE_OFFSET;
- unsigned int hashes[MAX_TOK_SIZE + 1];
-
- if (len > MAX_TOK_SIZE)
- len = MAX_TOK_SIZE;
-
- /* calculate and store the hash values for all the sub-tokens */
- hash = rehash_token(hash, data[0]);
- for (i = 2; i <= len; i++) {
- hash = rehash_token(hash, data[i-1]);
- hashes[i] = HASH_FOLD(hash);
- }
-
- last_ptr = NULL;
- ptr = NULL;
-
- for (i = len; i >= 2; i--) {
- hash = hashes[i];
-
- if (!ptr) ptr = find_token_hash(data, i, hash);
-
- if (!ptr) {
- /* create a new token entry */
- ptr = (struct token *) malloc(sizeof(*ptr));
-
- memcpy(ptr->data, data, i);
- ptr->len = i;
-
- /* when we create an entry, it's profit is 0 because
- * we also take into account the size of the token on
- * the compressed table. We then subtract GOOD_BAD_THRESHOLD
- * so that the test to see if this token belongs to
- * the good or bad list, is a comparison to zero */
- ptr->profit = -GOOD_BAD_THRESHOLD;
-
- ptr->next = hash_table[hash];
- hash_table[hash] = ptr;
-
- insert_token_in_group(&bad_head, ptr);
-
- ptr->smaller = NULL;
- } else {
- newprofit = ptr->profit + (ptr->len - 1);
- /* check to see if this token needs to be moved to a
- * different list */
- if((ptr->profit < 0) && (newprofit >= 0)) {
- remove_token_from_group(ptr);
- insert_token_in_group(&good_head,ptr);
- }
- ptr->profit = newprofit;
- }
-
- if (last_ptr) last_ptr->smaller = ptr;
- last_ptr = ptr;
-
- ptr = ptr->smaller;
- }
-}
-
-/* decrease the counts for all the tokens that start with "data", and have lenghts
- * from 2 to "len". This function is much simpler than learn_token because we have
- * more guarantees (tho tokens exist, the ->smaller pointer is set, etc.)
- * The two separate functions exist only because of compression performance */
-static void forget_token(unsigned char *data, int len)
-{
- struct token *ptr;
- int i, newprofit;
- unsigned int hash=0;
-
- if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE;
-
- hash = hash_token(data, len);
- ptr = find_token_hash(data, len, hash);
-
- for (i = len; i >= 2; i--) {
-
- newprofit = ptr->profit - (ptr->len - 1);
- if ((ptr->profit >= 0) && (newprofit < 0)) {
- remove_token_from_group(ptr);
- insert_token_in_group(&bad_head, ptr);
- }
- ptr->profit=newprofit;
-
- ptr=ptr->smaller;
- }
-}
-