#include #include #include #include #define BUFFER_LEN 2000 #define NODI_NUMER 600 using namespace std; struct nodo { int value; long frq; struct nodo *r; struct nodo *l; int nodochiuso; }; // Huffman in C/C++ By Ugo Cirmignani char conversione[256][BUFFER_LEN] = {0}; char stringa[BUFFER_LEN] = {'\0'}; int sindex = 0; int piano = 0; int nodoH_uno; int nodoH_due; int nodoH_uno_index; int nodoH_due_index; int nodoH_somma; int radice; int simboli_trattati = 0; int nodo_corrente = 0; int index = 0; int total_s = 0; int numero_nodi = 0; long file_size = 0; //Dichiarazione dei nodi di Huffman nodo nodiH[NODI_NUMER]; //Elenco dei caratteri di Huffman int list_of_chars[256]; //Elenco dei caratteri di Huffman long frq_of_chars[256]; long char_scanning[256]; //Elenco ordinato dei caratteri di Huffman int list_of_chars_o[256]; long frq_of_chars_o[256]; void HUbuild_tree(); void reset_data() { sindex = 0; piano = 0; radice = 0; simboli_trattati = 0; nodo_corrente = 0; index = 0; total_s = 0; numero_nodi = 0; file_size = 0; } //Get bit 0-7 flag return 1 or 0, if any error return -1 int GetBitFlag( unsigned char MyByte, int BitN ) { unsigned char B; switch (BitN) { case 0: B =(MyByte & 0x01); return B; break; case 1: B = (MyByte & 0x02); B = (B >> 1); return B; break; case 2: B = (MyByte & 0x04); B = (B >> 2); return B; break; case 3: B = (MyByte & 0x08); B = (B >> 3); return B; break; case 4: B = (MyByte & 0x10); B = (B >> 4); return B; break; case 5: B = (MyByte & 0x20); B = (B >> 5); return B; break; case 6: B = (MyByte & 0x40); B = (B >> 6); return B; break; case 7: B = (MyByte & 0x80); B = (B >> 7); return B; break; default: return -1; break; } return -1; } //Set bit 0-7 flag, return a setted Byte.. if any error return the original byte unsigned char SetBitFlag(unsigned char MyByte, int BitN, int value) { // If any errors return the original Byte! if (((BitN < 0) && (BitN > 7)) || ((value < 0) && (value > 1))) return MyByte; //Setting the bit… switch (BitN) { case 0: if (value == 0) { // Is the bit 0 == 0? if (GetBitFlag(MyByte, 0) == 0) return MyByte; // It’s Already setted to 0! else //it’s setted to 1 { MyByte = ( MyByte ^ 0x01); // Need to set it to 0 return MyByte; } } if (value == 1) // Is the bit 0 == 1? if (GetBitFlag(MyByte, 0) == 1) return MyByte; // It’s Already setted to 1! else //it’s setted to 0 { MyByte = (MyByte ^ 0x01);// Need to set it to 1 return MyByte; } break; case 1: if (value == 0) { // Is the bit 0 == 0? if (GetBitFlag(MyByte, 1) == 0) return MyByte; // It’s Already setted to 0! else //it’s setted to 1 { MyByte = (MyByte ^ 0x02); // Need to set it to 0 return MyByte; } } if (value == 1) // Is the bit 0 == 1? if (GetBitFlag(MyByte, 1) == 1) return MyByte; // It’s Already setted to 1! else //it’s setted to 0 { MyByte = (MyByte ^ 0x02);// Need to set it to 1 return MyByte; } break; case 2: if (value == 0) { // Is the bit 0 == 0? if (GetBitFlag(MyByte, 2) == 0) return MyByte; // It’s Already setted to 0! else //it’s setted to 1 { MyByte = (MyByte ^ 0x04); // Need to set it to 0 return MyByte; } } if (value == 1) // Is the bit 0 == 1? if (GetBitFlag(MyByte, 2) == 1) return MyByte; // It’s Already setted to 1! else //it’s setted to 0 { MyByte = (MyByte ^ 0x04);// Need to set it to 1 return MyByte; } break; case 3: if (value == 0) { // Is the bit 0 == 0? if (GetBitFlag(MyByte, 3) == 0) return MyByte; // It’s Already setted to 0! else //it’s setted to 1 { MyByte = (MyByte ^ 0x08); // Need to set it to 0 return MyByte; } } if (value == 1) // Is the bit 0 == 1? if (GetBitFlag(MyByte, 3) == 1) return MyByte; // It’s Already setted to 1! else //it’s setted to 0 { MyByte = (MyByte ^ 0x08);// Need to set it to 1 return MyByte; } break; case 4: if (value == 0) { // Is the bit 0 == 0? if (GetBitFlag(MyByte, 4) == 0) return MyByte; // It’s Already setted to 0! else //it’s setted to 1 { MyByte = (MyByte ^ 0x10); // Need to set it to 0 return MyByte; } } if (value == 1) // Is the bit 0 == 1? if (GetBitFlag(MyByte, 4) == 1) return MyByte; // It’s Already setted to 1! else //it’s setted to 0 { MyByte = (MyByte ^ 0x10);// Need to set it to 1 return MyByte; } break; case 5: if (value == 0) { // Is the bit 0 == 0? if (GetBitFlag(MyByte, 5) == 0) return MyByte; // It’s Already setted to 0! else //it’s setted to 1 { MyByte = (MyByte ^ 0x20); // Need to set it to 0 return MyByte; } } if (value == 1) // Is the bit 0 == 1? if (GetBitFlag(MyByte, 5) == 1) return MyByte; // It’s Already setted to 1! else //it’s setted to 0 { MyByte = (MyByte ^ 0x20);// Need to set it to 1 return MyByte; } break; case 6: if (value == 0) { // Is the bit 0 == 0? if (GetBitFlag(MyByte, 6) == 0) return MyByte; // It’s Already setted to 0! else //it’s setted to 1 { MyByte = (MyByte ^ 0x40); // Need to set it to 0 return MyByte; } } if (value == 1) // Is the bit 0 == 1? if (GetBitFlag(MyByte, 6) == 1) return MyByte; // It’s Already setted to 1! else //it’s setted to 0 { MyByte = (MyByte ^ 0x40);// Need to set it to 1 return MyByte; } break; case 7: if (value == 0) { // Is the bit 0 == 0? if (GetBitFlag(MyByte, 7) == 0) return MyByte; // It’s Already setted to 0! else //it’s setted to 1 { MyByte = (MyByte ^ 0x80); // Need to set it to 0 return MyByte; } } if (value == 1) // Is the bit 0 == 1? if (GetBitFlag(MyByte, 7) == 1) return MyByte; // It’s Already setted to 1! else //it’s setted to 0 { MyByte = (MyByte ^ 0x80);// Need to set it to 1 return MyByte; } break; case 8: if (value == 0) { // Is the bit 0 == 0? if (GetBitFlag(MyByte, 8) == 0) return MyByte; // It’s Already setted to 0! else //it’s setted to 1 { MyByte = (MyByte ^ 0x01); // Need to set it to 0 return MyByte; } } if (value == 1) // Is the bit 0 == 1? if (GetBitFlag(MyByte, 8) == 1) return MyByte; // It’s Already setted to 1! else //it’s setted to 0 { MyByte = (MyByte ^ 0x01);// Need to set it to 1 return MyByte; } break; default: return MyByte; break; } return MyByte; } long get_file_size(char *filename) { long size = 0; ifstream file (filename, ios::in|ios::binary|ios::ate); if (file.is_open()) { size = file.tellg(); file.close(); } return size; } bool is_nodo_empty(nodo ilnodo) { if (ilnodo.value != -1 ) return true; else return false; } int get_file_data(char *filename) { file_size = get_file_size(filename); total_s = 0; //Azzeramento dei dati.. int x = 0; for ( x = 0; x < 256; x++) { char_scanning[x] = 0; list_of_chars[index] = -1; } FILE *fp = fopen( filename, "rb"); if (fp == NULL) { return 0; } int rc; while ( (rc = fgetc(fp)) != EOF ) { char_scanning[rc]++; } fclose(fp); for ( x = 0; x < 256; x++) { if (char_scanning[x] != 0) { //printf("\nFound symbol: %c Qty: %d",x , char_scanning[x] ); frq_of_chars[total_s] = char_scanning[x]; list_of_chars[total_s] = x; total_s++; } } //printf("\nTotal of symbols: %d\n\n", total_s ); return 1; } int HUget_file_data(char *filename) { file_size = get_file_size(filename); total_s = 0; //Azzeramento dei dati.. int x = 0; for ( x = 0; x < 256; x++) { char_scanning[x] = 0; list_of_chars[index] = -1; } FILE *fp = fopen( filename, "rb"); if (fp == NULL) { return 0; } int rc; while ( (rc = fgetc(fp)) != EOF ) { char_scanning[rc]++; } fclose(fp); for ( x = 0; x < 256; x++) { if (char_scanning[x] != 0) { //printf("\nFound symbol: %c Qty: %d",x , char_scanning[x] ); frq_of_chars[total_s] = char_scanning[x]; list_of_chars[total_s] = x; frq_of_chars_o[total_s] = char_scanning[x]; list_of_chars_o[total_s] = x; total_s++; } } int a,b; long hold = 0; int shold = 0; for ( a = 0; a < total_s - 1; a++) { for ( b = 0; b < total_s - 1; b++) { if( frq_of_chars_o[b] > frq_of_chars_o[ b + 1 ] ) { hold = frq_of_chars_o[b]; shold = list_of_chars_o[b]; frq_of_chars_o[b] = frq_of_chars_o[ b + 1 ]; frq_of_chars_o[ b + 1 ] = hold; list_of_chars_o[b] = list_of_chars_o[b+1]; list_of_chars_o[ b + 1 ] = shold; } } } //Assegno delle pseudofrequenze.. for ( b = 0; b < total_s; b++) { frq_of_chars_o[b] = b+1; } //printf("\nTotal of symbols: %d\n\n", total_s ); return 1; } int compress_print_test(char *filename) { FILE *fp = fopen( filename, "rb"); if (fp == NULL) { return 0; } int rc; while ( (rc = fgetc(fp)) != EOF ) { //printf("%s",conversione[rc]); } fclose(fp); return 1; } // // Header file compresso // // Original File size 4 bytes 32bit integer // Number of symbols 2 bytes 16bit integer // Symbol 1byte, Symbol recurrency 1byte // // int HUcompress(char *infilename, char *filename) { int sb = 0; int bs = 0; int convlen; char buf; int bufbit = 0; reset_data(); HUget_file_data(infilename); HUbuild_tree(); //Apro lo stream per scrivere il file ofstream outfile; outfile.open(filename,ios::out | ios::binary ); //Writing the original FILE SIZE! outfile.put(file_size / 16777216); outfile.put(file_size / 65536); outfile.put(file_size / 256); outfile.put(file_size % 256); //Writing the number of symbols; outfile.put(total_s / 256); outfile.put(total_s % 256); //Write the huffman table for(sb = 0; sb < total_s; sb++ ) { //Write the simbol outfile.put(list_of_chars_o[sb]); //printf("\nWrote the simbol %c %d in the table", list_of_chars_o[sb],list_of_chars_o[sb]); //Write the simbol position (NOT NEEDED ANYMORE!) //outfile.put(( frq_of_chars_o[sb] -1) ); } //Iniziamo la vera compressione del file.. FILE *fp = fopen( infilename, "rb"); if (fp == NULL) { return 0; } int rc; bufbit = 0; buf = 0; while ( (rc = fgetc(fp)) != EOF ) { //printf("%s",conversione[rc]); //for ( bs = strlen(conversione[rc]) -1; bs>=0; bs-- ) for ( bs = 0; bs=0; bs-- ) for ( bs = 0; bs=0; bs-- ) for ( bs = 0; bsr; } else { nodopt = nodopt->l; } if ( nodopt->value != -1 ) { outfile.put(nodopt->value); nodopt = &nodiH[radice]; current_size++; } //searc_ind++; indicatore++; /* continue; for ( s = 0; s < 256; s++ ) { if ( strcmp(search, conversione[s]) == 0 ) { outfile.put(s); current_size++; for ( searc_ind = 0; searc_ind < BUFFER_LEN; searc_ind++ ) search[searc_ind] = 0; searc_ind = 0; break; } } */ } fclose(fp); outfile.close(); //printf("\n\nOriginal FZ: %d", original_size); //printf("\nNumero simboli: %d", total_s); return 1; } // // Header file compresso // // Original File size 4 bytes // Number of symbols 1 byte // Symbol 1byte, Symbol bites len 1byte, Symbol conversion x bytes! // // int decompress(char *infilename, char *filename) { int longs = 0; int simbol = 0; long original_size = 0; long current_size = 0; int readen_s = 0; int sb = 0; int bs = 0; int convlen; char buf; int bufbit = 0; bufbit = 0; buf = 0; int indicatore; //Azzeramento dei dati for (index = 0; index < NODI_NUMER; index++) { nodiH[index].value = -1; nodiH[index].frq = 0; nodiH[index].l = NULL; nodiH[index].r = NULL; nodiH[index].nodochiuso = 0; } //Apro lo stream per scrivere il file ofstream outfile; outfile.open(filename,ios::out | ios::binary ); //Azzeriamo tutti i dati.. for (sb = 0; sb < 256; sb++ ) { char_scanning[sb] = 0; list_of_chars[sb] = -1; frq_of_chars[sb] = 0; } //Azzeriamo le stringhe di conversione dati! for (sb = 0; sb < 256; sb++ ) for ( bs = 0; bs< BUFFER_LEN; bs++) conversione[sb][bs] = '\0'; FILE *fp = fopen( infilename, "rb"); if (fp == NULL) { return 0; } int rc; rc = fgetc(fp); original_size = original_size + ( rc * 16777216 ); rc = fgetc(fp); original_size = original_size + ( rc * 65536 ); rc = fgetc(fp); original_size = original_size + ( rc * 256 ); rc = fgetc(fp); original_size = original_size + rc; rc = fgetc(fp); total_s = rc * 256; rc = fgetc(fp); total_s += rc; //Lettura di tutti i simboli.. while ( readen_s < total_s ) { //Primo simbolo rc = fgetc(fp); simbol = rc; //printf("\n\nSimbolo: %c (%d)", simbol, simbol); //Lunghezza simbolo rc = fgetc(fp); int primol = rc; //Lunghezza simbolo rc = fgetc(fp); longs = (primol *256) + rc; //printf("\n\nLen simbolo: %d", longs); //printf("%s",conversione[rc]); int simbolo_counter = 0; indicatore = 8; while ( simbolo_counter < longs ) { if ( indicatore == 8 ) { rc = fgetc(fp); indicatore = 0; } if ( GetBitFlag( rc, indicatore ) == 1 ) { conversione[simbol][simbolo_counter] = '1'; } else { conversione[simbol][simbolo_counter] = '0'; } simbolo_counter++; indicatore++; } //printf("\nThe Symbol %c %d conversion is: %s\n",simbol, simbol, conversione[simbol]); readen_s++; } indicatore = 8; char search[BUFFER_LEN] = {0}; int searc_ind = 0; while ( current_size < original_size ) { int s = 0; //Nel caso ci sia da leggere qualcosa.. if ( indicatore == 8 ) { if ( (rc = fgetc(fp)) != EOF ) indicatore = 0; else break; } if ( GetBitFlag( rc, indicatore ) == 1 ) { search[searc_ind] = '1'; } else { search[searc_ind] = '0'; } searc_ind++; indicatore++; for ( s = 0; s < 256; s++ ) { if ( strcmp(search, conversione[s]) == 0 ) { outfile.put(s); current_size++; for ( searc_ind = 0; searc_ind < BUFFER_LEN; searc_ind++ ) search[searc_ind] = 0; searc_ind = 0; break; } } } fclose(fp); outfile.close(); //printf("\n\nOriginal FZ: %d", original_size); //printf("\nNumero simboli: %d", total_s); return 1; } void print_albero(struct nodo nd) { if (nd.value != -1){ //printf("%s Val: %c (%d)\n",stringa, nd.value,nd.value); strcpy(conversione[nd.value],stringa); } if (nd.l != NULL) { stringa[piano]= '0'; piano++; print_albero(*nd.l); } if (nd.r != NULL) { stringa[piano]= '1'; piano++; print_albero(*nd.r); } stringa[piano]='\0'; piano--; } void HUbuild_tree() { //Azzeramento dei dati for (index = 0; index < NODI_NUMER; index++) { nodiH[index].value = -1; nodiH[index].frq = 0; nodiH[index].l = NULL; nodiH[index].r = NULL; nodiH[index].nodochiuso = 0; } nodo_corrente = total_s; int inprocess = 0; for (index = 0; index < total_s; index++) { nodiH[index].value = list_of_chars_o[index]; nodiH[index].frq = frq_of_chars_o[index]; nodiH[index].l = NULL; nodiH[index].r = NULL; nodiH[index].nodochiuso = 0; } numero_nodi = total_s; //Finche non vengono trattati tutti i simboli ... while ( simboli_trattati < total_s ) { nodoH_uno = 0; nodoH_due = 0; //Determino il primo simbolo con ricorrenza minore.. for (index = 0; index < numero_nodi; index++) { //Se il nodo non è chiuso.. if ( nodiH[index].nodochiuso==0 ) { if ( nodoH_uno == 0 || nodiH[index].frq < nodoH_uno ) { nodoH_uno = nodiH[index].frq; nodoH_uno_index = index; } } } //Determino il secondo simbolo con ricorrenza minore.. for (index = 0; index < numero_nodi; index++) { //Skip del primo dono meno ricorrente.. if ( index == nodoH_uno_index) continue; //Se il nodo non è chiuso.. if ( nodiH[index].nodochiuso==0 ) { if ( nodoH_due == 0 || nodiH[index].frq < nodoH_due ) { nodoH_due = nodiH[index].frq; nodoH_due_index = index; } } } if (nodoH_due == 0 ) { break; } nodoH_somma = nodoH_uno + nodoH_due; //Chiudo i due nodi nodiH[nodoH_uno_index].nodochiuso = 1; nodiH[nodoH_due_index].nodochiuso = 1; //Creo il nuovo nodo somma che conterrà i due nodi.. nodiH[nodo_corrente].frq = nodoH_somma; radice = nodo_corrente; if(nodoH_uno < nodoH_due) { nodiH[nodo_corrente].l = &nodiH[nodoH_uno_index]; nodiH[nodo_corrente].r = &nodiH[nodoH_due_index]; } else { nodiH[nodo_corrente].r = &nodiH[nodoH_uno_index]; nodiH[nodo_corrente].l = &nodiH[nodoH_due_index]; } nodo_corrente++; numero_nodi++; //printf("\nNodo Uno meno ricorrente index: %d valore: %d\nNodo due meno ricorrente index: %d valore: %d\nSomma dei nodi: %d", nodoH_uno_index, nodoH_uno , nodoH_due_index,nodoH_due, nodoH_somma); // break; } //printf("\n\n\n"); print_albero(nodiH[radice]); } void build_tree() { //Azzeramento dei dati for (index = 0; index < NODI_NUMER; index++) { nodiH[index].value = -1; nodiH[index].frq = 0; nodiH[index].l = NULL; nodiH[index].r = NULL; nodiH[index].nodochiuso = 0; } nodo_corrente = total_s; int inprocess = 0; for (index = 0; index < total_s; index++) { nodiH[index].value = list_of_chars[index]; nodiH[index].frq = frq_of_chars[index]; nodiH[index].l = NULL; nodiH[index].r = NULL; nodiH[index].nodochiuso = 0; } numero_nodi = total_s; //Finche non vengono trattati tutti i simboli ... while ( simboli_trattati < total_s ) { nodoH_uno = 0; nodoH_due = 0; //Determino il primo simbolo con ricorrenza minore.. for (index = 0; index < numero_nodi; index++) { //Se il nodo non è chiuso.. if ( nodiH[index].nodochiuso==0 ) { if ( nodoH_uno == 0 || nodiH[index].frq < nodoH_uno ) { nodoH_uno = nodiH[index].frq; nodoH_uno_index = index; } } } //Determino il secondo simbolo con ricorrenza minore.. for (index = 0; index < numero_nodi; index++) { //Skip del primo dono meno ricorrente.. if ( index == nodoH_uno_index) continue; //Se il nodo non è chiuso.. if ( nodiH[index].nodochiuso==0 ) { if ( nodoH_due == 0 || nodiH[index].frq < nodoH_due ) { nodoH_due = nodiH[index].frq; nodoH_due_index = index; } } } if (nodoH_due == 0 ) { break; } nodoH_somma = nodoH_uno + nodoH_due; //Chiudo i due nodi nodiH[nodoH_uno_index].nodochiuso = 1; nodiH[nodoH_due_index].nodochiuso = 1; //Creo il nuovo nodo somma che conterrà i due nodi.. nodiH[nodo_corrente].frq = nodoH_somma; radice = nodo_corrente; if(nodoH_uno < nodoH_due) { nodiH[nodo_corrente].l = &nodiH[nodoH_uno_index]; nodiH[nodo_corrente].r = &nodiH[nodoH_due_index]; } else { nodiH[nodo_corrente].r = &nodiH[nodoH_uno_index]; nodiH[nodo_corrente].l = &nodiH[nodoH_due_index]; } nodo_corrente++; numero_nodi++; // printf("\nNodo Uno meno ricorrente index: %d valore: %d\nNodo due meno ricorrente index: %d valore: %d\nSomma dei nodi: %d", nodoH_uno_index, nodoH_uno , nodoH_due_index,nodoH_due, nodoH_somma); // break; } //printf("\n\n\n"); print_albero(nodiH[radice]); } int main(int argc, char *argv[]) { if (argc < 3) { printf("Usage:\nTo compress a file-> -c input output\nTo decompress a file-> -x input output"); system("PAUSE"); return EXIT_SUCCESS; } if ( argv[1][1] == 'c' ) { HUcompress(argv[2], argv[3]); printf("\nCompression Done!!!"); } if ( argv[1][1] == 'x' ) { reset_data(); HUdecompress(argv[2], argv[3]); printf("\nDecompress done!!!"); } system("PAUSE"); return EXIT_SUCCESS; }