#include #include #include "string.h" #include "fstream" /* Implementazione by Ugo Cirmignani Analista programmatore (Roma) Il sorgente puù essere compilato con Dev-cpp (windows), GCC (unix) e altri. --Last update 1/1/2012 -Fixed a reset dictionary issue */ //262143 == 20 bit == 1gb circa //#define dictionary_size 1000000 //262143 == 19 bit == 500 MB circa //#define dictionary_size 524287 //262143 == 18 bit == 255 MB circa //#define dictionary_size 262143 //17 bit //#define dictionary_size 131071 //16 bit #define dictionary_size 65535 //14 bit //#define dictionary_size 16383 //9 bit //#define dictionary_size 5000 #define reset_dictionary 1 //Reset the dictionary if it become too big #define bit_writing 1 //Use the bit writing mode, improve the data compression using namespace std; struct lzwn { //The byte value! unsigned char value; //The index/prefix value unsigned int index; //The value code === node index unsigned int code; //Pointer struct lzwn *p[256]; }; struct lzwn dictionary[dictionary_size+100]; //The writer buffer! unsigned char write_buffer[5] = {0}; int wb_index; int wb_rest; int wb_bit_size; int wb_processed; int wb_operation_minus; int wb_old_bit_size; unsigned char given_rest_char; int operation = 0; char reversing_buffer[dictionary_size+1000]; int rb_index; unsigned int full_after; unsigned int after_full_after_successes; unsigned long next_code = 0; unsigned long primo, secondo, vecchio, givecode; void dic_reset(); void dic_init(); int GetBitFlag( unsigned char, int); unsigned char SetBitFlag(unsigned char, int, int); unsigned int get_end_bit() { return ((1 << (wb_bit_size)) - 1); } void print_buffered(unsigned int value) { if (wb_rest != 0) { write_buffer[0] = (write_buffer[wb_index-1]) << 8-wb_rest; write_buffer[0] = write_buffer[wb_index]; } wb_index = 0; for ( int i = wb_bit_size; i > 0; i-- ) { unsigned char ilbit = ( ( ( value<<(32 - i) ) >> (31) ) ); write_buffer[wb_index] = SetBitFlag(write_buffer[wb_index], 7 - wb_rest, (int)ilbit ); wb_rest++; if (wb_rest == 8 ) { wb_index++; wb_rest = 0; } } } int give_code(unsigned char ingresso) { if ( wb_bit_size == 8 ) { givecode = ingresso; given_rest_char = 0; wb_processed = 0; wb_rest =0; return ingresso; } else { int bit_to_process = wb_bit_size - wb_processed; if ( bit_to_process == wb_bit_size ) { givecode = 0; } if (wb_rest > 0) { if ( (bit_to_process - wb_rest) >= 0 ) { givecode += (unsigned int)( given_rest_char << (bit_to_process - wb_rest)) ; wb_processed = wb_processed + wb_rest; bit_to_process = bit_to_process - wb_rest; wb_rest = 0; if ( bit_to_process == 0 ) { wb_processed = 0; return givecode; } } else { ; } } if ( (bit_to_process - 8) >= 0 ) { givecode += (unsigned int)( ingresso << bit_to_process - 8); wb_rest = 0; wb_processed = wb_processed + 8; bit_to_process = bit_to_process -8; if ( bit_to_process == 0 ) { wb_processed = 0; return givecode; } else { return -1; } } else { wb_rest = 8-bit_to_process; given_rest_char = ( ( ingresso<<(bit_to_process) ) ); given_rest_char = given_rest_char >> bit_to_process; givecode += (unsigned int)( ingresso>>(8-bit_to_process) ); wb_processed = 0; return givecode; } } return -1; } //Get bit 0-7 flag return 1 or 0, if any error return -1 int GetBitFlag( unsigned char MyByte, int BitN ) { unsigned char B; switch (BitN) { case 0: B =(MyByte & 0x01); return B; break; case 1: B = (MyByte & 0x02); B = (B >> 1); return B; break; case 2: B = (MyByte & 0x04); B = (B >> 2); return B; break; case 3: B = (MyByte & 0x08); B = (B >> 3); return B; break; case 4: B = (MyByte & 0x10); B = (B >> 4); return B; break; case 5: B = (MyByte & 0x20); B = (B >> 5); return B; break; case 6: B = (MyByte & 0x40); B = (B >> 6); return B; break; case 7: B = (MyByte & 0x80); B = (B >> 7); return B; break; default: return -1; break; } return -1; } unsigned char SetBitFlag(unsigned char MyByte, int BitN, int value) { if (((BitN < 0) && (BitN > 7)) || ((value < 0) && (value > 1))) return MyByte; switch (BitN) { case 0: if (value == 0) { if (GetBitFlag(MyByte, 0) == 0) return MyByte; else { MyByte = ( MyByte ^ 0x01); return MyByte; } } if (value == 1) if (GetBitFlag(MyByte, 0) == 1) return MyByte; else { MyByte = (MyByte ^ 0x01); return MyByte; } break; case 1: if (value == 0) { if (GetBitFlag(MyByte, 1) == 0) return MyByte; else { MyByte = (MyByte ^ 0x02); return MyByte; } } if (value == 1) if (GetBitFlag(MyByte, 1) == 1) return MyByte; else { MyByte = (MyByte ^ 0x02); return MyByte; } break; case 2: if (value == 0) { if (GetBitFlag(MyByte, 2) == 0) return MyByte; else { MyByte = (MyByte ^ 0x04); return MyByte; } } if (value == 1) if (GetBitFlag(MyByte, 2) == 1) return MyByte; else { MyByte = (MyByte ^ 0x04); return MyByte; } break; case 3: if (value == 0) { if (GetBitFlag(MyByte, 3) == 0) return MyByte; else { MyByte = (MyByte ^ 0x08); return MyByte; } } if (value == 1) if (GetBitFlag(MyByte, 3) == 1) return MyByte; else { MyByte = (MyByte ^ 0x08); return MyByte; } break; case 4: if (value == 0) { if (GetBitFlag(MyByte, 4) == 0) return MyByte; else { MyByte = (MyByte ^ 0x10); return MyByte; } } if (value == 1) if (GetBitFlag(MyByte, 4) == 1) return MyByte; else { MyByte = (MyByte ^ 0x10); return MyByte; } break; case 5: if (value == 0) { if (GetBitFlag(MyByte, 5) == 0) return MyByte; else { MyByte = (MyByte ^ 0x20); // Need to set it to 0 return MyByte; } } if (value == 1) if (GetBitFlag(MyByte, 5) == 1) return MyByte; else { MyByte = (MyByte ^ 0x20); return MyByte; } break; case 6: if (value == 0) { if (GetBitFlag(MyByte, 6) == 0) return MyByte; else { MyByte = (MyByte ^ 0x40); // Need to set it to 0 return MyByte; } } if (value == 1) if (GetBitFlag(MyByte, 6) == 1) return MyByte; else { MyByte = (MyByte ^ 0x40); return MyByte; } break; case 7: if (value == 0) { if (GetBitFlag(MyByte, 7) == 0) return MyByte; else { MyByte = (MyByte ^ 0x80); return MyByte; } } if (value == 1) if (GetBitFlag(MyByte, 7) == 1) return MyByte; else { MyByte = (MyByte ^ 0x80); return MyByte; } break; case 8: if (value == 0) { if (GetBitFlag(MyByte, 8) == 0) return MyByte; else { MyByte = (MyByte ^ 0x01); return MyByte; } } if (value == 1) if (GetBitFlag(MyByte, 8) == 1) return MyByte; else { MyByte = (MyByte ^ 0x01); return MyByte; } break; default: return MyByte; break; } return MyByte; } long find(long index, unsigned char value) { if ( dictionary[index].p[value] == NULL ) return -1; else return dictionary[index].p[value]->code; } void put_to_table(unsigned long index, unsigned char value) { if ( bit_writing == 1 && operation == 0 ) { if ( next_code == get_end_bit()-2 ) { wb_bit_size++; } } if ( bit_writing == 1 && operation == 1 ) { if ( next_code == get_end_bit()-4 ) { wb_bit_size++; } } dictionary[index].p[value] = &dictionary[next_code]; dictionary[next_code].value = value; dictionary[next_code].index = index; dictionary[next_code].code = next_code; next_code++; } int compress_file(char *input, char *output) { int do_reset = 0; after_full_after_successes = 0; full_after = 0; operation = 0; FILE *fp = fopen(input, "rb"); if (fp == NULL) { return 0; } int rc; if ( (rc = fgetc(fp)) == EOF ) { return 0; } else primo = rc; ofstream outfile; outfile.open(output,ios::out | ios::binary ); restart_compression: while ( (secondo = getc(fp)) != EOF ) { long find_result = -1; find_result = find(primo, secondo); if ( find_result == -1 ) { if ( next_code < dictionary_size - 1 ) { put_to_table(primo, secondo); if ( bit_writing == 0 ) { outfile.put(primo / 256); outfile.put(primo % 256); } else { print_buffered(primo); for ( int x = 0; x < wb_index; x++ ) outfile.put(write_buffer[x]); } primo = secondo; } else { if (reset_dictionary == 1 ) { if ( bit_writing == 1 ) { do_reset = 1; } dic_reset(); } if ( bit_writing == 0 ) { outfile.put(primo / 256); outfile.put(primo % 256); } else { print_buffered(primo); for ( int x = 0; x < wb_index; x++ ) outfile.put(write_buffer[x]); if ( do_reset == 1 ) { print_buffered(dictionary_size); for ( int x = 0; x < wb_index; x++ ) outfile.put(write_buffer[x]); wb_bit_size=9; } } primo = secondo; goto restart_compression; } } else { primo = find_result; } } fclose(fp); if ( bit_writing == 0 ) { outfile.put(primo / 256); outfile.put(primo % 256); } else { print_buffered(primo); for ( int x = 0; x < wb_index; x++ ) outfile.put(write_buffer[x]); } if ( wb_rest != 0 ) { print_buffered(0); for ( int x = 0; x < wb_index; x++ ) outfile.put(write_buffer[x]); } outfile.close(); return 1; } void decoding_printf(unsigned long pcodice) { rb_index = 0; if ( pcodice < 256 ){ reversing_buffer[rb_index] = pcodice; rb_index++; } else { while (pcodice >= 256) { if ( full_after > 0 ) after_full_after_successes++; reversing_buffer[rb_index] = dictionary[pcodice].value; rb_index++; pcodice = dictionary[pcodice].index; } reversing_buffer[rb_index] = pcodice; rb_index++; } } long decompress_file(char *lfile, char *ofilen) { full_after = 0; after_full_after_successes = 0; int print_last_flag = 0; operation = 1; unsigned char fc; ofstream ofile; ofile.open(ofilen,ios::out | ios::binary ); FILE *fp = fopen(lfile, "rb"); if (fp == NULL) { printf("\nErrore nell'aprire il file %s", lfile); return 0; } int rch; int rcl; decompress_begin: if ( bit_writing == 1 ) { rch = fgetc(fp); while ( (give_code(rch)) == -1 ) { if ( (rch = fgetc(fp)) == EOF ) { return 0; } } primo = givecode; } else { if ( ((rch = fgetc(fp)) == EOF) || ((rcl = fgetc(fp)) == EOF) ) { printf("\nIl file in ingresso e' vuoto!"); return 0; } else primo = (rch*256)+rcl; } decoding_printf(primo); for ( int i = rb_index -1; i >= 0; i-- ) ofile.put(reversing_buffer[i]); fc = primo; vecchio = fc; while(( (rch = fgetc(fp)) != EOF) ) { unsigned int next_old = 0; if ( bit_writing == 1 ) { while ( (give_code(rch)) == -1 ) { if ( (rch = fgetc(fp)) == EOF ) { return 0; } } if (reset_dictionary == 1 && bit_writing == 1 ) { if ( givecode == dictionary_size ) { dic_reset(); wb_bit_size = 9; goto decompress_begin; if ( ( rch = fgetc(fp)) == EOF ) { return 0; } while ( (give_code(rch)) == -1 ) { if ( (rch = fgetc(fp)) == EOF ) { return 0; } } secondo = givecode; } else secondo = givecode; } else secondo = givecode; } else { rcl = fgetc(fp); secondo = (rch*256)+rcl; } if ( secondo == next_code ) { print_last_flag = 0; decoding_printf(primo); for ( int i = rb_index -1; i >= 0; i-- ) { ofile.put(reversing_buffer[i]); } fc = reversing_buffer[rb_index -1]; print_last_flag = 0; decoding_printf(vecchio); for ( int i = rb_index -1; i >= 0; i-- ) { ofile.put(reversing_buffer[i]); } } else { print_last_flag = 0; decoding_printf(secondo); for ( int i = rb_index -1; i >= 0; i-- ) { ofile.put(reversing_buffer[i]); } fc = reversing_buffer[rb_index -1]; } vecchio = fc; put_to_table(primo, vecchio); print_last_flag = 1; primo = secondo; } return 0; } void dic_init() { for ( int i = 0; i < dictionary_size; i++) { for ( int z = 0; z < 256; z++) dictionary[i].p[z] = NULL; } for ( int i = 0; i < 256; i++){ dictionary[i].index -1; dictionary[i].value = i; dictionary[i].code = i; } next_code = 256; wb_bit_size = 9; wb_rest = 0; wb_processed = 0; } void dic_reset() { for ( int i = 0; i < dictionary_size; i++) { for ( int z = 0; z < 256; z++) dictionary[i].p[z] = NULL; } for ( int i = 0; i < 256; i++){ dictionary[i].index -1; dictionary[i].value = i; dictionary[i].code = i; } next_code = 256; } int main(int argc, char *argv[]) { dic_init(); if ( argc < 3 ) { printf("\n--- LZW File compression utility ---\n\nUsage: \n\tuclzw -c source destination (file compress)\n\tuclzw -x source destination (file compress)\n\n"); return 0; } if ( argv[1][1] == 'c' ) { printf("\nThe file: %s will be compressed and saved as: %s\n\n", argv[2], argv[3]); compress_file(argv[2],argv[3]); } if ( argv[1][1] == 'x' ) { printf("\nThe file: %s will be extract and saved as: %s\n\n", argv[2], argv[3]); decompress_file(argv[2],argv[3]); } return EXIT_SUCCESS; }