+ All Categories
Home > Documents > Simple Huffman

Simple Huffman

Date post: 01-Dec-2015
Category:
Upload: lexie-go-cedenio
View: 33 times
Download: 0 times
Share this document with a friend
Description:
Simeple Huffman Code (Comp Engg)
31
1. /* 2. 3. * first pass using compression algorithm 4. 5. 6. 7. 8. */ 9. 10. 11. 12. /* huffman tree */ 13. 14. 15. 16. #include <stdio.h> 17. 18. #include <stdlib.h> 19. 20. #include <string.h> 21. 22. 23. 24. #include "sxhuffman.h" 25. 26. 27. 28. typedef struct _sy_huff_node syhuffnode; 29. 30. typedef struct _sy_huff_print syhuffprint; 31. 32. 33. 34. struct _sy_huff_node 35. 36. { 37. 38. uint32_t weight; /* number of occurance of c */ 39. 40. uint32_t idx; /* index of this instance in the array */ 41. 42. uint32_t pidx; /* parent-index of this child */ 43. 44. uint32_t fidx; /* fake index */ 45. 46. uint32_t lx; /* left child */ 47. 48. uint32_t rx; /* rightt child */ 49. 50. int32_t c; /* original value */ 51. 52. uint32_t cc; /* huffman code */ 53. 54. uint32_t cl; /* huffman code len */ 55. 56. unsigned bit:1;
Transcript
Page 1: Simple Huffman

1. /*2.3. * first pass using compression algorithm4.5.6.7.8. */9.10.11.12. /* huffman tree */13.14.15.16. #include <stdio.h>17.18. #include <stdlib.h>19.20. #include <string.h>21.22.23.24. #include "sxhuffman.h"25.26.27.28. typedef struct _sy_huff_node syhuffnode;29.30. typedef struct _sy_huff_print syhuffprint;31.32.33.34. struct _sy_huff_node35.36. {37.38. uint32_t weight; /* number of occurance of c */39.40. uint32_t idx; /* index of this instance in the array */41.42. uint32_t pidx; /* parent-index of this child */43.44. uint32_t fidx; /* fake index */45.46. uint32_t lx; /* left child */47.48. uint32_t rx; /* rightt child */49.50. int32_t c; /* original value */51.52. uint32_t cc; /* huffman code */53.54. uint32_t cl; /* huffman code len */55.56. unsigned bit:1;57.58. };59.60. struct _sy_huff_print61.62. {63.64. int32_t count; /* number of occurence of this carracter */65.66. uint32_t idx; /* index of this carracter in the huffman table */67.68. int32_t v; /* carracter value */

Page 2: Simple Huffman

69.70. };71.72.73.74. #define HUFF_NB_BYTES(BITS) ( (BITS >> 3) + (BITS & 7 ? 1 : 0) )75.76. 77.78. #define BYTESWAP(X,Y,Z){\79.80. register unsigned char *s = (unsigned char *)X;\81.82. register unsigned char *d = (unsigned char *)Y;\83.84. size_t ZLong = Z; unsigned long c; \85.86. for(;;){\87.88. if(!ZLong) break; c = *s ; *s = *d; *d = c; *s++; *d++; --ZLong;\89.90. if(!ZLong) break; c = *s ; *s = *d; *d = c; *s++; *d++; --ZLong;\91.92. if(!ZLong) break; c = *s ; *s = *d; *d = c; *s++; *d++; --ZLong;\93.94. if(!ZLong) break; c = *s ; *s = *d; *d = c; *s++; *d++; --ZLong;\95.96. }\97.98. }99.100. #define PARENT(x) ( (x - 1) >> 1 )101.102. #define LEFTCLD(x) ( (x << 1) + 1 )103.104. #define RIGHTCLD(x) ( (x + 1) << 1 )105.106.107.108. #define SIFT_DOWN(TAB,N,ES,MX,CMP,BO){\109.110. size_t L = LEFTCLD(N);\111.112. if( L >= MX ){\113.114. BO = FALSE;\115.116. }else{\117.118. size_t M = (L + 1) < MX && CMP(&TAB[(L+1)*ES],&TAB[L*ES],ES) > 0 ? L + 1 : L ;\119.120. BO = CMP(&TAB[M*ES],&TAB[N*ES],ES) > 0 ? TRUE : FALSE ;\121.122. if( BO == TRUE ){ BYTESWAP(&TAB[M*ES],&TAB[N*ES],ES); N = M; }\123.124. }\125.126. }127.128.129.130. #define HUFF_SIFT_DOWN(TAB,IDX,MAX,SWAP){\131.132. uint32_t L = (IDX << 1) + 1;\133.134. if( L >= MAX ){\135.136. SWAP = FALSE;\137.138. }else{\

Page 3: Simple Huffman

139.140. uint32_t MIN = L + 1 < MAX && TAB[L+1].weight < TAB[L].weight ? L + 1 : L;\141.142. SWAP = TAB[IDX].weight > TAB[MIN].weight ? TRUE :FALSE;\143.144. if( SWAP == TRUE ){\145.146. L = TAB[IDX].idx;\147.148. TAB[IDX].idx = TAB[MIN].idx;\149.150. TAB[MIN].idx = L;\151.152. BYTESWAP(&TAB[IDX],&TAB[MIN],sizeof(syhuffnode));\153.154. IDX = MIN;\155.156. }\157.158. }\159.160. }161.162.163.164. #define HUFF_SORT_BYLEAF(TAB,IDX,MAX,SWAP){\165.166. uint32_t L = (IDX << 1) + 1;\167.168. if( L >= MAX ){\169.170. SWAP = FALSE;\171.172. }else{\173.174. uint32_t LE = L + 1 < MAX && TAB[L+1].c < TAB[L].c ? L + 1 : L;\175.176. SWAP = TAB[IDX].c > TAB[LE].c ? TRUE : FALSE ;\177.178. if( SWAP == TRUE ){\179.180. HUFF_BYTE_SWAP(TAB,IDX,LE);\181.182. IDX = LE;\183.184. }\185.186. }\187.188. }189.190. #define HUFF_BYTE_SWAP(TAB,X,Y){\191.192. uint32_t t = TAB[X].idx;\193.194. TAB[X].idx = TAB[Y].idx;\195.196. TAB[Y].idx = t;\197.198. BYTESWAP(&TAB[X],&TAB[Y],sizeof(syhuffnode));\199.200. }201.202.203.204. /* ========================================== Utility ======================================================== */205.206. #ifndef _WIN32207.208. /* UNIX is assumed */

Page 4: Simple Huffman

209.210. #include <unistd.h>211.212. #include <sys/uio.h>213.214. #include <fcntl.h>215.216. #include <errno.h>217.218. #include <sys/stat.h>219.220. #endif221.222. #include <limits.h>223.224. #include <string.h>225.226.227.228. int BigEndian16Pack(unsigned char *zBuf,size_t nLen,uint16_t nB)229.230. {231.232.233.234. if( zBuf == NULL || nLen < sizeof(uint16_t) ){235.236. return -1;237.238. }239.240. zBuf[1] = nB & 0xFF; nB >>= 8;241.242. zBuf[0] = nB;243.244. return 0;245.246. }247.248. int BigEndian16Unpack(const unsigned char *zBuf,size_t nLen,uint16_t *pNumber)249.250. {251.252. if( zBuf == NULL || nLen <= 0 || pNumber == NULL ){253.254. return -1;255.256. }257.258. *pNumber = zBuf[0] << 8 | zBuf[1];259.260. return 0;261.262. }263.264. static ssize_t __sysread(sxfd FD,void *buf,size_t toRead)265.266. {267.268. #ifdef _WIN32269.270. DWORD rc;271.272. if( FALSE == ReadFile(FD,buf,toRead,&rc,NULL) ){273.274. return 0;275.276. }277.278. return rc;279.

Page 5: Simple Huffman

280. #else281.282. ssize_t rc;283.284. do{285.286. rc = read(FD,buf,toRead);287.288. }while( rc < 0 && errno == EINTR );289.290.291.292. #endif293.294. return rc;295.296. }297.298.299.300. static ssize_t __syswrite(sxfd FD,void *buf,size_t toWrite)301.302. {303.304. #ifdef _WIN32305.306. DWORD rc;307.308. WriteFile(FD,buf,toWrite,&rc,NULL);309.310. return rc;311.312. #else313.314. ssize_t rc;315.316. do{317.318. rc = write(FD,buf,toWrite);319.320. }while( rc < 0 && errno == EINTR );321.322. #endif323.324. return rc;325.326. }327.328. static sxfd __sysopen(const char *file,uint8_t create)329.330. {331.332. sxfd fd;333.334. #ifdef _WIN32335.336. DWORD wCr = create ? CREATE_NEW : OPEN_EXISTING;337.338. DWORD wAcc = GENERIC_READ | GENERIC_WRITE;339.340. #endif341.342. if( file == NULL || !*file ){343.344. return INVALID_FD;345.346. }347.348. #ifdef _WIN32349.350. if( !create ){

Page 6: Simple Huffman

351.352. wAcc &= ~GENERIC_WRITE;353.354. }355.356. fd = CreateFileA(file,wAcc,0,0,wCr,FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS,0);357.358. #else359.360. if( create ){361.362.363.364. fd = open(file,O_CREAT|O_BINARY|O_RDWR|O_TRUNC,0744);365.366.367.368. }else{369.370. /* open read only */371.372. fd = open(file,O_RDONLY|O_BINARY);373.374. }375.376. #endif377.378. return fd;379.380. }381.382. static void __sysclose(sxfd FD)383.384. {385.386.387.388. #ifdef _WIN32389.390. CloseHandle(FD);391.392. #else393.394. close(FD);395.396. #endif397.398.399.400. }401.402. static void * __sysmalloc(size_t nBytes)403.404. {405.406. char *p;407.408. #ifdef _WIN32409.410. p = HeapAlloc(GetProcessHeap(),0,nBytes);411.412. #else413.414. p = malloc(nBytes);415.416. #endif417.418. if( p == NULL ){419.420. puts("Out of Memory");421.

Page 7: Simple Huffman

422. exit(0);423.424. }425.426. return (void *)p;427.428. }429.430. static void __sysfree(void *pAddr)431.432. {433.434. #ifdef _WIN32435.436. HeapFree(GetProcessHeap(),0,pAddr);437.438. #else439.440. free(pAddr);441.442. #endif443.444. }445.446. static int __sysfilesize(sxfd FD,size_t *pSize)447.448. {449.450. if( pSize == NULL || FD == INVALID_FD ){451.452. return -1;453.454. }455.456. #ifdef _WIN32457.458. *pSize = GetFileSize(FD,NULL);459.460. #else461.462. struct stat st;463.464. if( fstat(FD,&st) < 0 ){465.466. return -1;467.468. }469.470. *pSize = st.st_size;471.472. #endif473.474. return 0;475.476. }477.478.479.480. /* ========================================== Compress Passs ================================================= */481.482. static int __MakeNodes(syhuffprint *Tab,syhuffnode **pList,uint32_t *nb,ProcAlloc pAlloc)483.484. {485.486. uint32_t i,j,Leaf;487.488. syhuffnode *pNode;489.490.491.

Page 8: Simple Huffman

492.493.494. *nb = ( (*nb) << 1 ) - 1 ;495.496. *pList = (syhuffnode *)pAlloc( (*nb) * sizeof(syhuffnode));497.498. if( *pList == NULL ){499.500. return -1;501.502. }503.504. for( j = 0 ; j < *nb ; ++j ){505.506. (*pList)[j].fidx = (*pList)[j].idx = j + 1;507.508. (*pList)[j].pidx = 0;509.510. (*pList)[j].rx = (*pList)[j].lx = 0;511.512. (*pList)[j].weight = 0;513.514. (*pList)[j].c = -1;515.516. (*pList)[j].cc = 0;517.518. (*pList)[j].cl = 0;519.520.521.522. }523.524. Leaf = ( *nb + 1 ) >> 1;525.526.527.528. for( i = j = 0 ; j < Leaf ; ++j ){529.530. pNode = &(*pList)[j];531.532. while( ! Tab[i].count ){533.534. ++i;535.536. }537.538. pNode->weight = Tab[i].count;539.540. pNode->c = Tab[i].v;541.542. i++;543.544. } 545.546.547.548. (*pList)[*nb - 1].c = -2; /* mark the last item as root */549.550. return 0;551.552. }553.554. static void __Heapify(syhuffnode *pList,uint32_t nb)555.556. {557.558. uint32_t i,idx;559.560. uint8_t swap;561.562.

Page 9: Simple Huffman

563.564. swap = TRUE; i = ( nb - 1 ) >> 1;565.566. for (;;){567.568. idx = i;569.570. for(;;){571.572. if (!swap) break; HUFF_SIFT_DOWN(pList,idx,nb,swap);573.574. if (!swap) break; HUFF_SIFT_DOWN(pList,idx,nb,swap); 575.576. if (!swap) break; HUFF_SIFT_DOWN(pList,idx,nb,swap); 577.578. if (!swap) break; HUFF_SIFT_DOWN(pList,idx,nb,swap); 579.580. }581.582. if( i == 0 ) break; i--; swap = TRUE;583.584. }585.586.587.588. }589.590. static int __GetByteWeight(const void *In,size_t Len,syhuffprint *pTab,size_t TLen,uint32_t *nbi)591.592. {593.594. uint32_t i;595.596. syhuffprint *pPrint;597.598. register unsigned char *s = (unsigned char *)In;599.600.601.602. for(i = 0 ; i < TLen ; ++i ){603.604. pTab[i].count = pTab[i].idx = 0;605.606. pTab[i].v = 0;607.608. }609.610. *nbi = 0;611.612. for(;;){613.614. if(!Len) break; pPrint = &pTab[*s & (TLen-1) ]; if( !pPrint->count ){ pPrint->v = (int)*s; (*nbi)++; } pPrint->count++; *s++; --Len;615.616. if(!Len) break; pPrint = &pTab[*s & (TLen-1) ]; if( !pPrint->count ){ pPrint->v = (int)*s; (*nbi)++; } pPrint->count++; *s++; --Len;617.618. if(!Len) break; pPrint = &pTab[*s & (TLen-1) ]; if( !pPrint->count ){ pPrint->v = (int)*s; (*nbi)++; } pPrint->count++; *s++; --Len;619.620. if(!Len) break; pPrint = &pTab[*s & (TLen-1) ]; if( !pPrint->count ){ pPrint->v = (int)*s; (*nbi)++; } pPrint->count++; *s++; --Len;621.622.623.624. }625.626. return 0;627.628. }

Page 10: Simple Huffman

629.630. static int __BuildHuffTree(syhuffnode *pList,size_t nb)631.632. {633.634. uint32_t i;635.636. uint32_t fi;637.638. uint32_t tmp;639.640. syhuffnode *pNode,*pParent;641.642. uint8_t SWAP;643.644. /* get the first parent */645.646. fi = ( nb + 1 ) >> 1;647.648. i = fi - 1;649.650. for(;;){651.652. if( fi >= nb || i == 0 ) break;653.654.655.656. pParent = &pList[fi];657.658.659.660. pNode = &pList[0];661.662. pParent->weight = pNode->weight;663.664. pNode->pidx = pParent->fidx;665.666. pNode->bit = 0;667.668.669.670.671.672. HUFF_BYTE_SWAP(pList,0,i); 673.674. SWAP = TRUE; tmp = 0;675.676. for(;;){677.678. HUFF_SIFT_DOWN(pList,tmp,i,SWAP); if( SWAP == FALSE ) break;679.680. }681.682.683.684. pNode = &pList[0];685.686. pParent->weight += pNode->weight;687.688. pNode->bit = 1;689.690. pNode->pidx = pParent->fidx;691.692.693.694.695.696. HUFF_BYTE_SWAP(pList,0,fi); 697.698. SWAP = TRUE; tmp = 0;699.

Page 11: Simple Huffman

700. for(;;){701.702. HUFF_SIFT_DOWN(pList,tmp,i,SWAP); if( SWAP == FALSE ) break;703.704. }705.706. i--; fi++;707.708.709.710. }711.712.713.714. /* sort list by leaf first 715.716. * for that we do heapsort so the root will be in the last index since its c = -1717.718. */719.720. i = (nb - 1 ) >> 1; SWAP = TRUE;721.722. for(;;){723.724. tmp = i;725.726. for(;;){727.728. if(!SWAP) break; HUFF_SORT_BYLEAF(pList,tmp,nb,SWAP);729.730. }731.732. if( i == 0 ) break; SWAP = TRUE; i--;733.734. }735.736. i = nb -1;737.738. for(;;){739.740. HUFF_BYTE_SWAP(pList,0,i);741.742. SWAP = TRUE; tmp = 0;while( SWAP == TRUE ){ HUFF_SORT_BYLEAF(pList,tmp,i,SWAP);}743.744. if(!i) break; i--;745.746. }747.748. /* remove faked parent index and build real index */749.750. fi = (nb + 1) >> 1;751.752. for( i = 0 ; i < nb ; i++ ){753.754. if( pList[i].pidx == 0 ){755.756. continue;757.758. }759.760. pNode = &pList[i];761.762. for(tmp = fi ; tmp < nb ; ++tmp ){763.764. if( pList[tmp].fidx == pNode->pidx ){765.766. pNode->pidx = pList[tmp].idx;767.768. break;769.

Page 12: Simple Huffman

770. }771.772. }773.774. } 775.776.777.778.779.780. return 0;781.782. }783.784. static int __BuildHuffCode(syhuffnode *pList,uint32_t nItem,syhuffprint *pPrint,size_t pSize,uint32_t *nBits)785.786. {787.788. uint32_t i,l;789.790. uint32_t *pCLD = NULL;791.792. syhuffnode *pNode = NULL;793.794. syhuffnode *pParent = NULL;795.796.797.798. const uint32_t mLeaf = (nItem + 1) >> 1;799.800.801.802. *nBits = 0;803.804. for( i = 0 ; i < mLeaf ; ++i ){805.806. pNode = &pList[i]; pNode->cc = 0; pNode->cl = 1;807.808. pNode->cc |= pNode->bit; 809.810. if( pNode->pidx != 0 ){811.812. pParent = &pList[pNode->pidx - 1];813.814. pCLD = pNode->bit == 0 ? &pParent->lx : &pParent->rx;815.816. *pCLD = pNode->idx;817.818. while(pParent->pidx != 0){819.820. pNode->cc <<= 1; pNode->cc |= pParent->bit; pNode->cl++; 821.822. pCLD = &pParent->idx; l = pParent->bit == 0 ? 1 : 0;823.824. pParent = &pList[pParent->pidx-1];825.826. if( l ){827.828. pParent->lx = *pCLD;829.830. }else{831.832. pParent->rx = *pCLD;833.834. }835.836. }837.838. }839.

Page 13: Simple Huffman

840.841.842. *nBits += pNode->cl * pNode->weight;843.844.845.846. pPrint[pNode->c & (pSize-1)].idx = pNode->idx; 847.848. }849.850.851.852. return 0;853.854. }855.856. static int __PrintHuffmanHeader(size_t nbItem,void *OUTFD,size_t OLen,uint32_t *offset)857.858. {859.860. uint32_t rc;861.862. register unsigned char *s = (unsigned char *)(OUTFD);863.864. if( OLen < 2 * sizeof(char) + sizeof(unsigned short int) ){865.866. return -1;867.868. }869.870.871.872. /* Magic */873.874. s[0] = 'H'; s[1] = 'F'; 875.876. rc = 2 * sizeof(char); OLen -= rc;877.878.879.880. /* number of item */881.882. BigEndian16Pack(&s[rc],OLen,(uint16_t)nbItem);883.884. rc += sizeof(unsigned short int); OLen -= sizeof(unsigned short int);885.886.887.888. *offset += rc;889.890. return 0;891.892. }893.894.895.896. static int __PrintHuffTable(syhuffnode *pTable,size_t nb,void *pOut,size_t OLen,uint32_t *offt)897.898. {899.900. register unsigned char *buf = (unsigned char *)(pOut);901.902. syhuffnode *pNode;903.904. uint32_t i;905.906. uint32_t c ;907.908. uint32_t Leaf;909.

Page 14: Simple Huffman

910.911.912.913.914. /* print the whole table */915.916. Leaf = (nb + 1 ) >> 1;917.918.919.920. if( OLen < sizeof(unsigned char) * Leaf + ( sizeof(unsigned short int) * 2 * ( nb - Leaf) ) ){921.922. return -2;923.924. }925.926.927.928. i = c = 0 ;929.930. for( ;; ){931.932. if( i >= nb ) break; pNode = &pTable[i];933.934.935.936. if( i < Leaf ){937.938. buf[c] = (uint8_t)pNode->c ;939.940. c += sizeof(unsigned char) ; OLen -= sizeof(unsigned char);941.942. i++; continue;943.944.945.946. }947.948. if( 0 != BigEndian16Pack(&buf[c],OLen,(uint16_t)pNode->rx) ){949.950. return -1;951.952. }953.954. c += sizeof(unsigned short int) ; OLen -= sizeof(unsigned short int);955.956. if( 0 != BigEndian16Pack(&buf[c],OLen,(uint16_t)pNode->lx) ){957.958. return -1;959.960. }961.962. c += sizeof(unsigned short int) ; OLen -= sizeof(unsigned short int);963.964.965.966. i++;967.968.969.970. }971.972.973.974. *offt += c;975.976. return 0;977.978. }979.

Page 15: Simple Huffman

980.981.982.983.984. static int __CompressBuffer(const void *In,size_t ILen,void *pOut,size_t OLen,syhuffprint *pPrint,size_t PLen,syhuffnode *pList)985.986. {987.988. register unsigned char *src = (unsigned char *)In;989.990. register unsigned char *Bstream = (unsigned char *)pOut;991.992. uint32_t c,L,idx;993.994. uint32_t mask,val;995.996. syhuffnode *pNode;997.998.999.1000. mask = 0; 1001.1002. for(;;){1003.1004. if( !ILen ) break;1005.1006. c = *src; idx = pPrint[c & (PLen - 1)].idx - 1; pNode = &pList[idx];1007.1008. val = pNode->cc;1009.1010. if( pNode->cl + mask < 8 ){1011.1012. val <<= mask; *Bstream |= val; mask += pNode->cl;1013.1014. }else{1015.1016. L = pNode->cl;1017.1018. for(;;){1019.1020. while( mask < 8 && L ){1021.1022. *Bstream |= ( val & 0x01 ) << mask;1023.1024. mask++; L--; val >>= 1;1025.1026. }1027.1028. if( !L ) break; mask = 0; *Bstream++;1029.1030. }1031.1032. }1033.1034. if( mask >= 8 ){1035.1036. *Bstream++; mask = 0;1037.1038. }1039.1040. *src++; --ILen; 1041.1042. }1043.1044.1045.1046. return (OLen = 0);1047.1048. }1049.

Page 16: Simple Huffman

1050. /* ========================================== Decompress Pass ========================================= */1051.1052. static int __IsValidHeader(const void *In,uint32_t ILen,size_t *pSize,uint32_t *rootIndex)1053.1054. {1055.1056. uint32_t c;1057.1058. register unsigned char *buf = (unsigned char *)In;1059.1060. if( ILen < 2 * sizeof(unsigned char) + sizeof(unsigned short int) ){1061.1062. return 0;1063.1064. }1065.1066. if( buf[0] != 'H' || buf[1] != 'F' ){1067.1068. return 0;1069.1070. }1071.1072. c = 2 * sizeof(char);1073.1074. /* get total number of item */1075.1076. if( 0 != BigEndian16Unpack(&buf[c],ILen - c,(uint16_t *)&(*pSize)) ){1077.1078. return 0;1079.1080. }1081.1082. c += sizeof(unsigned short int);1083.1084. if( *pSize <= 0 ){1085.1086. return 0;1087.1088. }1089.1090. *rootIndex = *pSize - 1; 1091.1092.1093.1094. return 1; /* valid huffman table */1095.1096. }1097.1098.1099.1100. static int __ReadHuffmanTable(const void *In,size_t ILen,syhuffnode *pTable,size_t nb)1101.1102. {1103.1104. register unsigned char *table = (unsigned char *)In;1105.1106. uint32_t i,c,rc;1107.1108. syhuffnode *pNode;1109.1110. uint32_t Leaf = ( nb + 1 ) >> 1;1111.1112.1113.1114. if( ILen < Leaf * sizeof(unsigned char) + ( 2 * sizeof(unsigned short int) * ( nb - Leaf) ) ){1115.1116. return -1;1117.1118. }

Page 17: Simple Huffman

1119.1120. rc = 0; i = c = 0 ;1121.1122. for( ;; ){1123.1124. if( i >= nb ) break; pNode = &pTable[i];1125.1126.1127.1128. pNode->idx = i + 1;1129.1130. pNode->cc = pNode->cl = pNode->weight = 0;1131.1132. pNode->fidx = pNode->pidx = 0;1133.1134. pNode->rx = pNode->lx = 0;1135.1136.1137.1138. if( i < Leaf ){1139.1140. pNode->c = (int)table[c];1141.1142. c += sizeof(unsigned char); ILen -= sizeof(unsigned char);1143.1144. i++; continue;1145.1146. }1147.1148.1149.1150. if( 0 != BigEndian16Unpack(&table[c],ILen,(uint16_t *)&pNode->rx) ) {1151.1152. return -1; 1153.1154. }1155.1156. c += sizeof(unsigned short int); ILen -= sizeof(unsigned short int);1157.1158.1159.1160. if( 0 != BigEndian16Unpack(&table[c],ILen,(uint16_t *)&pNode->lx)) {1161.1162. return -1; 1163.1164. }1165.1166. c += sizeof(unsigned short int); ILen -= sizeof(unsigned short int);1167.1168.1169.1170. i++;1171.1172. }1173.1174. return 0;1175.1176. }1177.1178.1179.1180. #define BLOCK_SIZE 5121181.1182. static int __DecompressBufferToFD(const void *In,size_t ILen,sxfd OUTFD,syhuffnode *pTable,uint32_t rootIdx)1183.1184. {1185.1186. unsigned char buffer[BLOCK_SIZE];1187.1188. register unsigned char *src = ( unsigned char * )In;

Page 18: Simple Huffman

1189.1190. syhuffnode *pNode;1191.1192. int c;1193.1194. uint32_t bLen,i;1195.1196. bLen = i = 0;1197.1198. pNode = &pTable[rootIdx];1199.1200. for(;;){1201.1202. if( !ILen ) break; c = (int)*src; i = 8;1203.1204. while( i ){1205.1206. pNode = &pTable[c & 0x01 ? pNode->rx - 1 : pNode->lx - 1];1207.1208. if( pNode->rx == 0 && pNode->lx == 0 ){1209.1210. /* found one */1211.1212.1213.1214. if( bLen >= BLOCK_SIZE ){1215.1216. if( __syswrite(OUTFD,buffer,bLen) <= 0 ){1217.1218. return -1;1219.1220. }1221.1222. bLen = 0;1223.1224. }1225.1226. buffer[bLen++] = pNode->c ;1227.1228. pNode = &pTable[rootIdx];1229.1230. }1231.1232. c >>= 1; i--;1233.1234. }1235.1236. *src++; --ILen; 1237.1238. }1239.1240.1241.1242. if( bLen > 0 ){1243.1244. return __syswrite(OUTFD,buffer,bLen) > 0 ? 0 : -1;1245.1246. }1247.1248. return 0 ;1249.1250.1251.1252. }1253.1254.1255.1256.1257.1258.

Page 19: Simple Huffman

1259.1260. /* ================================== File Compression/Decompression ========================================== */1261.1262.1263.1264. static int __CompressFD(sxfd FD,const void *In,size_t ILen,syhuffstats *pStats,ProcAlloc pAlloc,ProcFree pFree)1265.1266. {1267.1268. syhuffnode *pTable = NULL;1269.1270. syhuffprint Print[UCHAR_MAX+1];1271.1272. unsigned char *pOut = NULL;1273.1274. uint32_t nbItem;1275.1276. uint32_t nBits,nBytes;1277.1278. uint32_t OLen;1279.1280. uint32_t Offt;1281.1282. uint32_t nHeaders;1283.1284. uint32_t nLeaf;1285.1286.1287.1288. if( pStats ){1289.1290. pStats->InputLen = ILen;1291.1292. pStats->cKBytes = pStats->cBytes = 0;1293.1294. pStats->ratio = 0;1295.1296. } 1297.1298.1299.1300. /* get carracter frequencies */1301.1302. if( 0 != __GetByteWeight(In,ILen,Print,sizeof(Print)/sizeof(Print[0]),&nbItem) ){1303.1304. return -1;1305.1306. }1307.1308. /* create node */1309.1310. if( 0 != __MakeNodes(Print,&pTable,&nbItem,pAlloc) ){1311.1312. return -1;1313.1314. }1315.1316. /* make a min-heap */1317.1318. __Heapify(pTable,(nbItem + 1 ) >> 1);1319.1320. /* build the tree */1321.1322. __BuildHuffTree(pTable,nbItem);1323.1324. /* build huffman code */1325.1326. __BuildHuffCode(pTable,nbItem,Print,sizeof(Print)/sizeof(Print[0]),&nBits);1327.

Page 20: Simple Huffman

1328.1329.1330. /* Total number of bytes needed for compression */1331.1332. nBytes = HUFF_NB_BYTES(nBits);1333.1334.1335.1336. /* allocate space for data + table */1337.1338. nLeaf = ( nbItem + 1 ) >> 1;1339.1340. nHeaders = 2 * sizeof(unsigned char) + sizeof(unsigned short int);1341.1342. nHeaders += nLeaf * sizeof(unsigned char) + ( 2 * sizeof(unsigned short int) * (nbItem - nLeaf));1343.1344. OLen = nHeaders + nBytes ;1345.1346.1347.1348. pOut = (unsigned char *)pAlloc(sizeof(unsigned char) * OLen);1349.1350. if( pOut == NULL ){1351.1352. pFree(pTable);1353.1354. return -1;1355.1356. }1357.1358.1359.1360. Offt = 0; 1361.1362. /* print the headers */1363.1364. if( 0 != __PrintHuffmanHeader(nbItem,pOut,OLen,&Offt) ){1365.1366. pFree(pTable);1367.1368. pFree(pOut);1369.1370. return -1;1371.1372. }1373.1374. /* print table */1375.1376. if( 0 != __PrintHuffTable(pTable,nbItem,&pOut[Offt],OLen - Offt,&Offt) ){1377.1378. pFree(pTable);1379.1380. pFree(pOut);1381.1382. return -1;1383.1384. }1385.1386. /* finally compress the data */1387.1388. __CompressBuffer(In,ILen,&pOut[Offt],OLen-Offt,Print,sizeof(Print)/sizeof(Print[0]),pTable) ;1389.1390.1391.1392.1393.1394. if( pStats ){1395.1396. pStats->cKBytes = nBytes >> 10;

Page 21: Simple Huffman

1397.1398. pStats->cBytes = pStats->cKBytes > 0 ? nBytes & 0x3FF : nBytes;1399.1400. pStats->Table = nHeaders;1401.1402.1403.1404. } 1405.1406.1407.1408. /* write data on FD */1409.1410. __syswrite(FD,pOut,OLen);1411.1412.1413.1414. pFree(pTable);1415.1416. pFree(pOut);1417.1418. return 0; 1419.1420.1421.1422. }1423.1424.1425.1426. /* ==================================== High Level Interface ==================================== */1427.1428. int SyCompresFile(const char *FileName,syhuffstats *pStats)1429.1430. {1431.1432. sxfd INFD;1433.1434. sxfd OUTFD;1435.1436. size_t nSize;1437.1438. char *buf;1439.1440. int rc = -1;1441.1442. char fout[MAX_PATH + 50];1443.1444. ssize_t rd;1445.1446.1447.1448.1449.1450. INFD = __sysopen(FileName,0);1451.1452. if( INFD == INVALID_FD ){1453.1454. return -1;1455.1456. }1457.1458. if( 0 != __sysfilesize(INFD,&nSize) ){1459.1460. return -1;1461.1462. }1463.1464.1465.1466. strncpy(fout,FileName,sizeof(fout));

Page 22: Simple Huffman

1467.1468. strncat(fout,".huff",5);1469.1470.1471.1472. OUTFD = __sysopen(fout,1);1473.1474. if( OUTFD == INVALID_FD ){1475.1476. __sysclose(INFD); return -1;1477.1478. }1479.1480.1481.1482. buf = __sysmalloc(nSize);1483.1484.1485.1486. rd = __sysread(INFD,buf,nSize);1487.1488. if( rd != (ssize_t)nSize ){1489.1490. return -1;1491.1492. }1493.1494. rc = __CompressFD(OUTFD,buf,nSize,pStats,__sysmalloc,__sysfree);1495.1496.1497.1498. __sysfree(buf);1499.1500. __sysclose(INFD);1501.1502. __sysclose(OUTFD);1503.1504.1505.1506. return rc;1507.1508. }1509.1510. int SyDecompressFile(const char *File)1511.1512. {1513.1514. sxfd INFD,OUTFD;1515.1516. uint32_t nbItem;1517.1518. char header[16];1519.1520. ssize_t rd;1521.1522. uint32_t nHeaders;1523.1524. syhuffnode *pTable;1525.1526. size_t Tsize;1527.1528. uint32_t Ridx;1529.1530. unsigned char *BinTable;1531.1532. unsigned char *Bindata;1533.1534. char oname[MAX_PATH + 50];1535.1536. size_t Fsize; /* file size */1537.

Page 23: Simple Huffman

1538. int rc;1539.1540.1541.1542. if( (INFD = __sysopen(File,0)) == INVALID_FD ){1543.1544. return -1;1545.1546. }1547.1548. if( __sysfilesize(INFD,&Fsize) != 0 ){1549.1550. __sysclose(INFD); return -1;1551.1552. }1553.1554.1555.1556.1557.1558. nHeaders = 2 * sizeof(unsigned char) + sizeof(unsigned short int);1559.1560.1561.1562. rd = __sysread(INFD,header,nHeaders);1563.1564. if( rd != (ssize_t)nHeaders ){1565.1566. __sysclose(INFD);1567.1568. return -1;1569.1570. }1571.1572.1573.1574. /* check for a valid header */1575.1576. if( ! __IsValidHeader(header,nHeaders,&nbItem,&Ridx) ){1577.1578. __sysclose(INFD);1579.1580. return -1;1581.1582. }1583.1584.1585.1586. Tsize = (sizeof(unsigned char) * (( nbItem + 1 ) >> 1) ) + ( 2 * sizeof(unsigned short int) * ( nbItem - ( ( nbItem + 1 ) >> 1 ) ) );1587.1588.1589.1590. BinTable = __sysmalloc(Tsize );1591.1592.1593.1594.1595.1596.1597.1598. /* read the binary table first */1599.1600. rd = __sysread(INFD,BinTable,Tsize);1601.1602. if( rd != (ssize_t)Tsize ){1603.1604.1605.1606. __sysfree(BinTable);1607.

Page 24: Simple Huffman

1608. __sysclose(INFD);1609.1610. return -1;1611.1612. }1613.1614. pTable = (syhuffnode *)__sysmalloc(nbItem * sizeof(syhuffnode));1615.1616. /* make it an in mem table */1617.1618. if( 0 != __ReadHuffmanTable(BinTable,Tsize,pTable,nbItem) ){1619.1620. __sysfree(pTable);1621.1622. __sysfree(BinTable);1623.1624. __sysclose(INFD);1625.1626. return -1;1627.1628. }1629.1630.1631.1632. __sysfree(BinTable);1633.1634. /* open the ouput */1635.1636.1637.1638. strncpy(oname,File,strlen(File) - 5);1639.1640. if( (OUTFD = __sysopen(oname,1)) < 0 ){1641.1642. __sysfree(pTable);1643.1644. __sysclose(INFD);1645.1646. return -1;1647.1648. }1649.1650. /* get the size of the compressed data */1651.1652.1653.1654. Fsize -= (Tsize + nHeaders);1655.1656. /* allocate a whole block of memory */1657.1658. Bindata = __sysmalloc(Fsize);1659.1660.1661.1662. /* read the whole block */1663.1664. if( (rd = __sysread(INFD,Bindata,Fsize)) != (ssize_t)Fsize ){1665.1666. __sysfree(pTable);1667.1668. __sysfree(Bindata);1669.1670. __sysclose(INFD);1671.1672. __sysclose(OUTFD);1673.1674. return -1;1675.1676. }1677.1678. /* decompress */

Page 25: Simple Huffman

1679.1680. rc = __DecompressBufferToFD(Bindata,Fsize,OUTFD,pTable,Ridx);1681.1682.1683.1684. //__SysTruncate(OUTFD,Fsize);1685.1686. __sysfree(pTable);1687.1688. __sysfree(Bindata);1689.1690. __sysclose(INFD);1691.1692. __sysclose(OUTFD);1693.1694.1695.1696. return rc;1697.1698. }1699.1700. /* ========================================= DEBUG ============================================= */1701.1702. int main(int argc,char *argv[])1703.1704. {1705.1706. syhuffstats sStats;1707.1708. int rc;1709.1710. char *ptr;1711.1712.1713.1714. if( argc < 3 ){1715.1716. puts("At Least 2 argumements:");1717.1718. puts(" -c FileName ==> Compression");1719.1720. puts(" -d [Filename].huff ==> Decompression");1721.1722. return 0;1723.1724. }1725.1726. ptr = argv[1];1727.1728. if( ptr[0] != '-' ){1729.1730. puts("Options first -(c|d)");1731.1732. return 0;1733.1734. }1735.1736. if( ptr[1] == 'c' ){1737.1738. if( 0 !=(rc = SyCompresFile(argv[2],&sStats))){1739.1740. puts("Failed To Compress File");1741.1742. }else{1743.1744. printf("\tCompress %s ==> %s.huff \n\n",argv[2],argv[2]);1745.1746. printf("\t Input Size %d,%d KB\t Output Size %d,%d KB + Table Size : %d \n",sStats.InputLen >> 10,sStats.InputLen > 0 ? sStats.InputLen & 0x3F : sStats.InputLen , sStats.cKBytes,sStats.cBytes,sStats.Table);

Page 26: Simple Huffman

1747.1748. }1749.1750.1751.1752. }else{1753.1754. char *s = argv[2];1755.1756. if( strlen(s) < 5 || strncmp(&s[strlen(s) - 5],".huff",5) != 0 ){1757.1758. printf("Invlaid Extensions: cannot decompres\n");1759.1760. return 0;1761.1762. }1763.1764. if( (rc = SyDecompressFile(s)) != 0 ){1765.1766. puts("Decompression Failed");1767.1768. }else{1769.1770. printf("%s ==> %.*s := Success \n",argv[2],strlen(s) -5,s);1771.1772. }1773.1774. }1775.1776.1777.1778. return 0;1779.1780. }

\


Recommended