Welcome to ShenZhenJia Knowledge Sharing Community for programmer and developer-Open, Learning and Share
menu search
person
Welcome To Ask or Share your Answers For Others

Categories

Hello can someone help me find what is causing the problem? For some reason the find_hash function is giving me problems. It should be failing the if(table -> buckets_array[i] != NULL){ and if(table -> buckets_array[i] != ''){ but it is not and it going to the next check which gives me a segmentation fault. What can be causing the first 2 if's statement to pass since I initially set it to table -> buckets_array[i] = NULL?

Edit: Thanks for wildplasser I came up with a much better solution for my find_hash function.

if(table->buckets_array[table->hash_func(key)] == NULL){
    return NULL;
  }else{
    return table -> buckets_array[table->hash_func(key)] -> data;
  }

Thanks you everyone for the help.

/*hash.h*/
#include<stdio.h>
#include<stdlib.h>

typedef struct data_{
  char *key;
  void *data;
  struct data_ *next;
}data_el;

typedef struct hash_table_ {
  void **order;
  int *number_next_calls;
  int *number_buckets;
  int *buckets_size;
  int *worst;
  int *total;
  float *average;
  int (*hash_func)(char *);
  int (*comp_func)(void*, void*);
  data_el **buckets_array;
} hash_table, *Phash_table;

Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);

static void lower_case_word(char *w);

/*hash.c*/
#include"hash.h"

Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *)){
  int i;

  Phash_table table_p;
  hash_table hash_table;

  table_p = (Phash_table)malloc(sizeof(hash_table));

  /*Setting function pointers*/
  table_p->hash_func = hash_func;
  table_p->comp_func = cmp_func;

  /*Create buckets array*/
  table_p->buckets_array = (data_el **)malloc(sizeof(data_el *)*(size+1));
  table_p->buckets_size = (int *)malloc(sizeof(int)*(size+1));

  /*Setting order array*/
  table_p->order = NULL;

  /*Setting inital condictions*/
  table_p->worst = (int *)malloc(sizeof(int));
  table_p->total = (int *)malloc(sizeof(int));
  table_p->average = (float *)malloc(sizeof(float));
  table_p->number_buckets = (int *)malloc(sizeof(int));

  *(table_p->number_buckets) = size;

  for(i = 0; i < size; i++){
    table_p->buckets_array[i] = NULL;
  }
  return table_p;
}

void free_hash(Phash_table table){

  int i;
  i = 0;
  data_el *cur;
  data_el *prev;

  /*Free order array*/
  if(table->order != NULL){
    free(table->order);
  }

  /*Free Buckets array and buckets_size array*/
  if(table ->buckets_array != NULL){
    for(i=0; i < *(table->number_buckets); i++){
      if(table->buckets_array[i] != NULL){

    /*Travers the linked list and free it*/
    cur = table->buckets_array[i];
    prev = cur;

    while((cur = cur->next) != NULL){
      free(prev);
      prev = cur;
    }
    /*Free the last node*/
    free(cur);
    }
    }
  }
  free(table);
}

void insert_hash(Phash_table table, char *key, void *data){
  int index;
  data_el *p, *cur;

  index = table->hash_func(key);

  /*Head insertion*/
  if(table->buckets_array[index] == NULL){
    table->buckets_array[index] = (data_el *)malloc(sizeof(data_el));
    table->buckets_array[index]->data = data;
    table->buckets_array[index]->next =  NULL;
    table->buckets_array[index]->key = key;
  }else{
    cur = table->buckets_array[index];
    p = (data_el *)malloc(sizeof(data_el));
    p->key = key;
    p->data = data;
    p->next = cur;
    cur = p;
    table->buckets_array[index] = cur;
  }

  /*Update Total*/
  *table->total += 1;

  /*Update Bucket Size*/
  (table->buckets_size[index]) +=1;

  /*Updates Worst Case*/
  if((table->buckets_size[index]) > *(table->worst)){
    *(table->worst) = (table->buckets_size[index]);
  }else{
    *(table->worst) +=1;
  }

  /*Update Average*/
  int temp_total,temp_buckets;

  temp_total = *(table->total);
  temp_buckets = *(table->number_buckets);
  *(table->average)= (float)temp_total/(float)temp_buckets;
}

void *find_hash(Phash_table table, char *key){
  int i;
  i = 0;

  while(1){
    if(table->buckets_array[i] == ''){
      printf("End of Array");
      break;
    }
    if(table->buckets_array[i] != NULL){
      printf("%Checking");
      if(table->buckets_array[i] != ''){
    if(table->buckets_array[i]->key != NULL){
      if( strcmp(table->buckets_array[i]->key,key) == 0 ){
        printf("Found it
");
        break;
      }
    }
      }
    }
    i++;
  }

  if(table->buckets_array[i] != NULL){
    printf("new asdasd %d", *((int *)table->buckets_array[i]->data));
    return table->buckets_array[i]->data;
  }else{
    return NULL;
  }
}

void stat_hash(Phash_table table, int *total, int *largest, float *average){
  total =  (table->total);
  largest = (table->worst);
  average =  (table->average);
}

void *start_hash_walk(Phash_table table){
  int i, max_buckets,step,next_calls;
  step = 0;
  data_el *cur;
  next_calls = 0;

  max_buckets = *(table ->number_buckets);
  table->order = (void**)malloc(sizeof(void *)*(*(table->total)));

  /*Set number_next_calls to 0*/
  table->number_next_calls = &next_calls;
  *(table->number_next_calls) = 0;

  /*Traverse the ADT and put all data into order array*/
  for(i = 0; i < max_buckets; i++){
    if(table->buckets_array[i] != NULL){
      cur = table->buckets_array[i];
      while(cur){
    table->order[step] = cur->data;
    step ++;
    cur = cur->next;
      }
    }
  }

  /*Bubble Short*/
  int j,k;

  for(j = 0; j < (step - 1);j++){
    for(k = 0;k < (step -(j+1));k ++){
      if((table->comp_func)(table->order[j],table->order[j+1]) < 5){
    void *temp;

    temp = table->order[j];
    table->order[j] = table->order[j+1];
    table->order[j+1] =  temp;
    printf("Swaping %s with %s
",table->order[j],table->order[j+1]);
      }
    }
  }
  return table->order;
}

void *next_hash_walk(Phash_table table){

  /*
    Check the amount of times next_has_walk has been
    if higher than total, it will return null
  */

  if(*(table->number_next_calls) >= *(table->total)){
    return NULL;
  }else{
    *(table->number_next_calls) = *(table->number_next_calls) + 1;
    return table->order[*(table->number_next_calls)];
  }
}

/*project4.c*/

#include"hash.h"

#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0

int hash_function(char *word){

  int sum,i;
  i = 0;
  sum = 0;
  while(word[i] != ''){
    sum = sum + word[i];
    i++;
  }
  return sum%1000;
}

int main(void){

  /*Creating buckets array*/
  Phash_table dictionary;
  void *test;

  dictionary = new_hash(DICTIONARY_SIZE,hash_function,comp);

  int i;
  i = 0;
  void *frequency[DICTIONARY_SIZE];
  int char_index = 0, dictionary_size = 0, num_words = 0;
  char c, word[WORD_SIZE];

  printf("Parsing input ...
");

  while ((c = getchar()) != EOF) {
    if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
    (c == ':') || (c == '
')) {

      /* End of a word */
      if (char_index) {
    /* Word is not empty */
    word[char_index] = '';
    lower_case_word(word);
    if(!find_hash(dictionary,word) ){
      insert_hash(dictionary,word,frequency[hash_function(word)]);
    }
    frequency[hash_function(word)]++;
    char_index = 0;
    num_words++;
      }
    }else{
      word[char_index++] = c;
    }
  }

  printf("There were %d words; %d unique words.
", num_words,dictionary_size);
}

void lower_case_word(char *w){
  int i = 0;

  while (w[i] != '') {
    w[i] = tolower(w[i]);
    i++;
  }
}
See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
thumb_up_alt 0 like thumb_down_alt 0 dislike
119 views
Welcome To Ask or Share your Answers For Others

1 Answer

Your find_hash() function does not return a value.

You omitted some functions:

Phash_table new_hash(int size, int (*hash)(char *), int (*comp)(void *, void *));
void lower_case_word(char *word);
int hash_function(char *);
int comp_function(void *, void *);

You've not specified DICTIONARY_SIZE or WORD_SIZE.

You've not declared c in main().

You've not defined int main(void) (or int main(int argc, char **argv)). The return type is int, and it takes 0 or 2 arguments.

You've neither defined nor initialized char_index. You didn't define word. You didn't define or initialize num_words.

Generally, you should submit compilable code to SO. The missing functions and standard headers are one thing; it is legitimate (but a mild nuisance) if you omit their definitions.

You might want to consider whether the comparison function should be taking void const * (or const void *) arguments instead of just void *; it will make it more generally usable, for example with qsort() or bsearch().

I'm not clear whether the hash function would benefit from taking a void * instead of a char *; it should probably be a char const * or void const * rather than an unqualified pointer.

Stylistically, do not put spaces around either -> or . operators. They bind tighter than anything else. Generally, put spaces around binary operators such as /. Generally, put spaces after commas (but be consistent even more).


hash.h

Do not (usually — this is a usual case) declare static functions in headers. A static function is only needed in one file; there is no point in putting it in the header. A header is for sharing information between source files. If something isn't going to be used by other files, there's no point in declaring it.

Do protect headers from reinclusion:

#ifndef HASH_H_INCLUDED
#define HASH_H_INCLUDED

...rest of header here...

#endif /* HASH_H_INCLUDED */

Do not include in the header anything that is not needed to use the header. None of the material in "hash.h" requires either <stdlib.h> or <stdio.h> so it probably shouldn't be there.

The C standard uses a space between #include and the header name; what's good enough for them should be good enough for you.

project4.c

You aren't paying enough attention to compiler warnings, or don't have enough compiler warnings set. If you're using GCC, compile with -Wall at least. When I was test compiling, I used, for example:

gcc -O3 -g -std=c99 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -c project4.c

I expect my code to compile cleanly under that set of options.

You need #include <ctype.h> to declare tolower().

Function comp() is not defined anywhere. I added:

static int comp(void *v1, void *v2)
{
    char const *s1 = v1; 
    char const *s2 = v2; 
    return strcmp(s1, s2);
}

I also put static in front of hash_function() to avoid needing an external definition.

One time when I compiled, I got the warning:

project4.c:51: warning: comparison is always false due to limited range of data type

That shows a problem. You declared c as char, but then wrote:

while ((c = getchar()) != EOF)

This is a no-no. The function getchar() returns an int (yes, the name is not good). The set of values it can return includes every character plus a distinct value EOF. You must use an int to get reliable behaviour. One of two problems occurs. Either you never get EOF because the assignment is to an (implicitly) unsigned type, and -1 gets mapped to 0xFF, and when 0xFF is promoted to an int, it is positive and therefore not equal to -1; or you misinterpret a valid character (often U+00FF, ?, SMALL LATIN LETTER Y WITH DIAERESIS, colloquially y-umlaut) as EOF.

You should consider using something like !isalnum() instead of the condition:

 if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
     (c == ':') || (c == '
'))

hash.c

The code omitted #include <string.h>.

The compiler says:

hash.c: In function ‘find_hash’:
hash.c:142: warning: too few arguments for format
hash.c: In function ‘start_hash_walk’:
hash.c:219: warning: format ‘%s’ expects type ‘char *’, but argument 2 has type ‘void *’
hash.c:219: warning: format ‘%s’ expects type ‘char *’, but argument 3 has type ‘void *’

The line numbers should be treated with a pinch of salt; I use Allman style layout and therefore added a number of lines to your code when I formatted it. But if your compiler isn't giving you this sort of advice, you should be looking for a compiler that does.

Line 142 contains:

printf("%Checking");

The % is unneeded. However, you may never see the output until something prints a newline, or you use fflush(stdout); to force the data out.

printf("Checking
");

As a general rule, especially during debugging, end printed messages with a newline.

Line 219 is:

 printf("Swaping %s with %s
",table->order[j],table->order[j+1]);

It should probably read:

 printf("Swapping %s with %s
", (char *)table->order[j], (char *)table->order[j+1]);

because order is a void **. You might be better off with it being a char **, of course.


Operations

With those (minor) issues fixed, the code compiles and links. When run on some text, the output looks like:

Parsing input ...
End of ArrayEnd of ArrayEnd of ArrayEnd of ArrayEnd of ArrayEnd of Array

Output newlines! It is probably also worth printing out what string it is you've just processed, and whether you added it or found it already in the data.

One problem in main(): you do not intialize the frequency array before you use it. Further, it isn't entirely clear why the array exists; why isn't the frequency information being kept in the hash table proper? And why is the frequency information an array of void * rather than, say, int?

On one lot of sample data, the program summarized the input as:

There were 68 words; 0 unique words.

The number of unique words is initially worrisome, but it turns out that you initialize dictionary_size to 0 and never change it and then print it, so that much of the output is correct.


I think you need to write yourself a structure dumper. That's a function that when given a (pointer to the) structure, prints sensible diagnostic information about the data in the structure. I use the general form:

void dump_xyzstructure(FILE *fp, char const *tag, xyzstructure const *data);

Then I can write:

dump_xyzstructure(stdout, "After initialization", &xyz);

dump_xyzstructure(logfp, "At this point", &xyz);

Etc.


Your hash table is pretty complex. However, you have code outside the hash table calling the hash function, and I'm not sure that's a good idea. Mostly, it shouldn't need to. Since your hash table is needed to support frequency counting, the hash table should be recording the frequencies for you; you should not be doing that in the auxilliary code in the main program.

You can simplify life by streamlining the structure. You don't have to dynamically allocate everything in the structure. On a 64-bit machine in particular, that is a huge waste of space. (OK: it is a trivial waste of space, but it a waste of effort too.)

 /*Setting inital condictions*/
 table_p->worst = (int *)malloc(sizeof(int));
 table_p->total = (int *)malloc(sizeof(int));
 table_p->average = (float *)malloc(sizeof(float));
 table_p->number_buckets = (int *)malloc(sizeof(int));

It would be more compact to have a data structure without those pointers, using just an element in the structure. It will simplify the deallocation code, too (and it simplifies the printing code because it doesn't have to worry about whether the pointers have been allocated or not):

typedef struct hash_table
{
  void    **order;
  int       number_next_calls;
  int       number_buckets;
  int      *buckets_size;        // Number of entries in each bucket
  int       worst;
  int       total;
  float     average;
  int     (*hash_func)(char *); 
  int     (*comp_func)(void*, void*);
  data_el **buckets_array;
} hash_table, *Phash_table;

The average can be computed on demand; it doesn't warrant a place in the structure, really. Losing all those pointers simplifies the code considerably.

For example, this fragment:

if (*(table->number_next_calls) >= *(table->total))
{
    return NULL;
}
else
{
    *(table->number_next_calls) = *(table->number_next_calls) + 1;
    return table->order[*(table->number_next_calls)];
}

becomes:

if (table->number_next_calls >= table->total)
{
    return NULL;
}
else
{
    table->number_next_calls = table->number_next_calls + 1;
    return table->order[table->number_next_calls];
}

or even:

if (table->number_next_calls >= table->total)
    return NULL;
else
    return table->order[++table->number_next_calls];

Sample text

This is something I use quite a lot in SO comments:

Welcome to StackOverflow. Please note that the preferred way of saying 'thanks' around here is by up-voting good questions and helpful answers (once you have enough reputation to do so), and by accepting the most helpful answer to any question you ask (which also gives you a small boost to your reputation). Please see the [FAQ](http://stackoverflow.com/faq) and especially [How do I ask questions here?](http://stackoverflow.com/faq#howtoask)

Sample Output

Most of the data shown comes from the dump_hash_table() function; I needed to see what was working and what was not. Most of it was actually working — I didn't have to fix up much of the algorithm (the worst case calculation was faulty and is fixed). I've left out the trace code while the input was being parsed.

Hash Table: 0x10E700900 At end of input
 Order = 0x00000000
 Buckets = 0x7FD9A3800000
 Hash = 0x10E607A40
 Comp = 0x10E607AA0
 Next calls: 0
 Buckets: 1000
 Worst: 4
 Total: 74
 Average: 0.074000
   Bucket   3: 2
   Bucket  67: 1
   Bucket  97: 1
   Bucket  99: 2
   Bucket 105: 1
   Bucket 211: 2
   Bucket 213: 1
   Bucket 219: 2
   Bucket 220: 1
   Bucket 226: 1
   Bucket 227: 4
   Bucket 229: 1
   Bucke

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
thumb_up_alt 0 like thumb_down_alt 0 dislike
Welcome to ShenZhenJia Knowledge Sharing Community for programmer and developer-Open, Learning and Share
...