aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGao Xiang <hsiangkao@linux.alibaba.com>2023-07-10 19:02:48 +0800
committerGao Xiang <hsiangkao@linux.alibaba.com>2023-07-20 17:06:15 +0800
commit861037f4fc157d50415618e902f5c37cf1467119 (patch)
tree39a9b7562af5f72551807c760b3026488574b87e
parentfcda6240148c8c89ca2800a5b60d9913f27e1284 (diff)
downloaderofs-utils-861037f4fc157d50415618e902f5c37cf1467119.tar.gz
erofs-utils: add a built-in DEFLATE compressor
As Apple documentation written "If you require interoperability with non-Apple devices, use COMPRESSION_ZLIB. [1]", DEFLATE is a popular generic-purpose compression algorithm for a quite long time (many advanced formats like zlib, gzip, zip, png are all based on that), which is made of LZ77 as well as Huffman coding, fully documented as RFC1951 [2] and quite easy to understand, implement. There are several hardware on-market DEFLATE accelerators as well, such as (s390) DFLTCC, (Intel) IAA/QAT, (HiSilicon) ZIP accelerator, etc. Therefore, it's useful to support DEFLATE compression in order to use these for async I/Os and get benefits from these. Since there is _no fixed-sized output DEFLATE compression appoach_ available in public (fitblk is somewhat ineffective) and the original zlib is quite slowly developping, let's work out one for our use cases. Fortunately, it's only less than 1.5kLOC with lazy matching to just match the full zlib abilities. Besides, near-optimal block splitting (based on price function) doesn't support since it's no rush to us. In the future, there might be more built-in optimizers landed to fulfill our needs even further (especially for other popular algorithms without native fixed-sized output support). In addition, I'd be quite happy to see more popular encoders to support native fixed-sized output compression too. [1] https://developer.apple.com/documentation/compression/compression_algorithm [2] https://datatracker.ietf.org/doc/html/rfc1951 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20230710110251.89464-2-hsiangkao@linux.alibaba.com
-rw-r--r--lib/Makefile.am2
-rw-r--r--lib/kite_deflate.c1270
2 files changed, 1272 insertions, 0 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index e243c1c..b729098 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -43,3 +43,5 @@ if ENABLE_LIBLZMA
liberofs_la_CFLAGS += ${liblzma_CFLAGS}
liberofs_la_SOURCES += compressor_liblzma.c
endif
+
+liberofs_la_SOURCES += kite_deflate.c
diff --git a/lib/kite_deflate.c b/lib/kite_deflate.c
new file mode 100644
index 0000000..f5bb2fd
--- /dev/null
+++ b/lib/kite_deflate.c
@@ -0,0 +1,1270 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * erofs-utils/lib/kite_deflate.c
+ *
+ * Copyright (C) 2023, Alibaba Cloud
+ * Copyright (C) 2023, Gao Xiang <xiang@kernel.org>
+ */
+#include "erofs/defs.h"
+#include "erofs/print.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
+ unsigned long sz);
+
+#ifdef TEST
+#define kite_dbg(x, ...) fprintf(stderr, x "\n", ##__VA_ARGS__)
+#else
+#define kite_dbg(x, ...)
+#endif
+
+#define kHistorySize32 (1U << 15)
+
+#define kNumLenSymbols32 256
+#define kNumLenSymbolsMax kNumLenSymbols32
+
+#define kSymbolEndOfBlock 256
+#define kSymbolMatch (kSymbolEndOfBlock + 1)
+#define kNumLenSlots 29
+#define kMainTableSize (kSymbolMatch + kNumLenSlots)
+
+#define kFixedLenTableSize (kSymbolMatch + 31)
+#define FixedDistTableSize 32
+
+#define kMainTableSize (kSymbolMatch + kNumLenSlots)
+#define kDistTableSize32 30
+
+#define kNumLitLenCodesMin 257
+#define kNumDistCodesMin 1
+
+#define kNumLensCodesMin 4
+#define kLensTableSize 19
+
+#define kMatchMinLen 3
+#define kMatchMaxLen32 kNumLenSymbols32 + kMatchMinLen - 1
+
+static u32 kstaticHuff_mainCodes[kFixedLenTableSize];
+static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = {
+ [0 ... 143] = 8, [144 ... 255] = 9,
+ [256 ... 279] = 7, [280 ... 287] = 8,
+};
+static u32 kstaticHuff_distCodes[kFixedLenTableSize];
+
+const u8 kLenStart32[kNumLenSlots] =
+ {0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255};
+
+const u8 kLenExtraBits32[kNumLenSlots] =
+ {0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5,
+ 5, 5, 5, 0};
+
+/* First normalized distance for each code (0 = distance of 1) */
+const u32 kDistStart[kDistTableSize32] =
+ {0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768,
+ 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576};
+
+/* extra bits for each distance code */
+const u8 kDistExtraBits[kDistTableSize32] =
+ {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+const u8 kCodeLengthAlphabetOrder[kLensTableSize] =
+ {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+const u8 kLevelExtraBits[3] = {2, 3, 7};
+
+const unsigned int kTableDirectLevels = 16;
+const unsigned int kBitLensRepNumber_3_6 = kTableDirectLevels;
+const unsigned int kBitLens0Number_3_10 = kBitLensRepNumber_3_6 + 1;
+const unsigned int kBitLens0Number_11_138 = kBitLens0Number_3_10 + 1;
+
+#define kStored 0
+#define kFixedHuffman 1
+#define kDynamicHuffman 2
+
+struct kite_deflate_symbol {
+ u16 len, dist;
+};
+
+struct kite_deflate_table {
+ u32 mainCodes[kMainTableSize];
+ u8 litLenLevels[kMainTableSize];
+ u32 distCodes[kDistTableSize32];
+ u8 distLevels[kDistTableSize32];
+ u32 levelCodes[kLensTableSize];
+ u8 levelLens[kLensTableSize];
+
+ u8 numdistlens, numblcodes;
+ u16 numlitlens;
+};
+
+struct kite_deflate {
+ struct kite_deflate_table *tab;
+ const u8 *in;
+ u8 *out;
+
+ u32 inlen, outlen;
+ u32 pos_in, pos_out;
+ u32 inflightbits;
+ u8 bitpos;
+ u8 numHuffBits;
+ u32 symbols;
+
+ u32 costbits, startpos;
+ u8 encode_mode;
+ bool freq_changed, lastblock;
+
+ /* Previous match for lazy matching */
+ bool prev_valid;
+ u16 prev_longest;
+
+ u32 mainFreqs[kMainTableSize];
+ u32 distFreqs[kDistTableSize32];
+ struct kite_deflate_table tables[2];
+
+ /* don't reset the following fields */
+ struct kite_matchfinder *mf;
+ struct kite_deflate_symbol *sym;
+ u32 max_symbols;
+ bool lazy_search;
+};
+
+#define ZLIB_DISTANCE_TOO_FAR 4096
+
+static u8 g_LenSlots[kNumLenSymbolsMax];
+
+#define kNumLogBits 9 // do not change it
+static u8 g_FastPos[1 << kNumLogBits];
+
+static void writebits(struct kite_deflate *s, unsigned int v, u8 bits)
+{
+ unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos;
+
+ s->inflightbits |= (v << s->bitpos) & (!rem - 1);
+ if (bits > rem) {
+ u8 *out = s->out + s->pos_out;
+
+ out[0] = s->inflightbits & 0xff;
+ out[1] = (s->inflightbits >> 8) & 0xff;
+ out[2] = (s->inflightbits >> 16) & 0xff;
+ out[3] = (s->inflightbits >> 24) & 0xff;
+ s->pos_out += 4;
+ DBG_BUGON(s->pos_out > s->outlen);
+ s->inflightbits = v >> rem;
+ s->bitpos = bits - rem;
+ return;
+ }
+ s->bitpos += bits;
+}
+
+static void flushbits(struct kite_deflate *s)
+{
+ u8 *out = s->out + s->pos_out;
+
+ if (!s->bitpos)
+ return;
+ out[0] = s->inflightbits & 0xff;
+ if (s->bitpos >= 8) {
+ out[1] = (s->inflightbits >> 8) & 0xff;
+ if (s->bitpos >= 16) {
+ out[2] = (s->inflightbits >> 16) & 0xff;
+ if (s->bitpos >= 24)
+ out[3] = (s->inflightbits >> 24) & 0xff;
+ }
+ }
+ s->pos_out += round_up(s->bitpos, 8) >> 3;
+ DBG_BUGON(s->pos_out > s->outlen);
+ s->bitpos = 0;
+ s->inflightbits = 0;
+}
+
+#define kMaxLen 16
+
+static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes,
+ const u32 *bl_count)
+{
+ u32 nextCodes[kMaxLen + 1]; /* next code value for each bit length */
+ unsigned int code = 0; /* running code value */
+ unsigned int bits, k;
+
+ for (bits = 1; bits <= kMaxLen; ++bits) {
+ code = (code + bl_count[bits - 1]) << 1;
+ nextCodes[bits] = code;
+ }
+
+ DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen);
+
+ for (k = 0; k < nr_codes; ++k)
+ p[k] = nextCodes[lens[k]]++;
+}
+
+static u32 deflate_reversebits_one(u32 code, u8 bits)
+{
+ unsigned int x = code;
+
+ x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1);
+ x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2);
+ x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4);
+
+ return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits);
+}
+
+static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n)
+{
+ while (n) {
+ u32 code = *codes;
+
+ *codes++ = deflate_reversebits_one(code, *lens++);
+ --n;
+ }
+}
+
+static void kite_deflate_init_once(void)
+{
+ static const u32 static_bl_count[kMaxLen + 1] = {
+ [7] = 279 - 256 + 1,
+ [8] = (143 + 1) + (287 - 280 + 1),
+ [9] = 255 - 144 + 1,
+ };
+ unsigned int i, c, j, k;
+
+ if (kstaticHuff_distCodes[31])
+ return;
+ deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes,
+ kFixedLenTableSize, static_bl_count);
+ Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels,
+ kFixedLenTableSize);
+
+ for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i)
+ kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5);
+
+ for (i = 0; i < kNumLenSlots; i++) {
+ c = kLenStart32[i];
+ j = 1 << kLenExtraBits32[i];
+
+ for (k = 0; k < j; k++, c++)
+ g_LenSlots[c] = (u8)i;
+ }
+
+ c = 0;
+ for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) {
+ k = 1 << kDistExtraBits[i];
+ for (j = 0; j < k; j++)
+ g_FastPos[c++] = i;
+ }
+}
+
+static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs)
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = lens[0]; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+
+ if (!nextlen)
+ max_count = 138, min_count = 3;
+
+ for (n = 0; n < numlens; n++) {
+ curlen = nextlen;
+ nextlen = n + 1 < numlens ? lens[n + 1] : -1;
+ ++count;
+
+ if (count < max_count && curlen == nextlen)
+ continue;
+ if (count < min_count) {
+ freqs[curlen] += count;
+ } else if (curlen != 0) {
+ if (curlen != prevlen)
+ freqs[curlen]++;
+ freqs[kBitLensRepNumber_3_6]++;
+ } else if (count <= 10) {
+ freqs[kBitLens0Number_3_10]++;
+ } else {
+ freqs[kBitLens0Number_11_138]++;
+ }
+
+ count = 0;
+ prevlen = curlen;
+ if (!nextlen)
+ max_count = 138, min_count = 3;
+ else if (curlen == nextlen)
+ max_count = 6, min_count = 3;
+ else
+ max_count = 7, min_count = 4;
+ }
+}
+
+static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens,
+ unsigned int numlens)
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = lens[0]; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+ const u8 *bl_lens = s->tab->levelLens;
+ const u32 *bl_codes = s->tab->levelCodes;
+
+ if (!nextlen)
+ max_count = 138, min_count = 3;
+
+ for (n = 0; n < numlens; n++) {
+ curlen = nextlen;
+ nextlen = n + 1 < numlens ? lens[n + 1] : -1;
+ ++count;
+
+ if (count < max_count && curlen == nextlen)
+ continue;
+ if (count < min_count) {
+ do {
+ writebits(s, bl_codes[curlen], bl_lens[curlen]);
+ } while (--count);
+ } else if (curlen) {
+ if (curlen != prevlen) {
+ writebits(s, bl_codes[curlen], bl_lens[curlen]);
+ count--;
+ }
+ writebits(s, bl_codes[kBitLensRepNumber_3_6],
+ bl_lens[kBitLensRepNumber_3_6]);
+ writebits(s, count - 3, 2);
+ } else if (count <= 10) {
+ writebits(s, bl_codes[kBitLens0Number_3_10],
+ bl_lens[kBitLens0Number_3_10]);
+ writebits(s, count - 3, 3);
+ } else {
+ writebits(s, bl_codes[kBitLens0Number_11_138],
+ bl_lens[kBitLens0Number_11_138]);
+ writebits(s, count - 11, 7);
+ }
+
+ count = 0;
+ prevlen = curlen;
+ if (!nextlen)
+ max_count = 138, min_count = 3;
+ else if (curlen == nextlen)
+ max_count = 6, min_count = 3;
+ else
+ max_count = 7, min_count = 4;
+ }
+}
+
+static void kite_deflate_setfixedtrees(struct kite_deflate *s)
+{
+ writebits(s, (kFixedHuffman << 1) + s->lastblock, 3);
+}
+
+static void kite_deflate_sendtrees(struct kite_deflate *s)
+{
+ struct kite_deflate_table *t = s->tab;
+ unsigned int i;
+
+ writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3);
+ writebits(s, t->numlitlens - kNumLitLenCodesMin, 5);
+ writebits(s, t->numdistlens - kNumDistCodesMin, 5);
+ writebits(s, t->numblcodes - kNumLensCodesMin, 4);
+
+ for (i = 0; i < t->numblcodes; i++)
+ writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3);
+
+ Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize);
+ kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens);
+ kite_deflate_sendtree(s, t->distLevels, t->numdistlens);
+}
+
+static inline unsigned int deflateDistSlot(unsigned int pos)
+{
+ const unsigned int zz = (kNumLogBits - 1) &
+ ((((1U << kNumLogBits) - 1) - pos) >> (31 - 3));
+
+ return g_FastPos[pos >> zz] + (zz * 2);
+}
+
+static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed)
+{
+ int i;
+ u32 *mainCodes, *distCodes;
+ const u8 *litLenLevels, *distLevels;
+
+ if (!fixed) {
+ struct kite_deflate_table *t = s->tab;
+
+ mainCodes = t->mainCodes; distCodes = t->distCodes;
+ litLenLevels = t->litLenLevels; distLevels = t->distLevels;
+
+ Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize);
+ Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32);
+ } else {
+ mainCodes = kstaticHuff_mainCodes;
+ distCodes = kstaticHuff_distCodes;
+
+ litLenLevels = kstaticHuff_litLenLevels;
+ }
+
+ for (i = 0; i < s->symbols; ++i) {
+ struct kite_deflate_symbol *sym = &s->sym[i];
+
+ if (sym->len < kMatchMinLen) { /* literal */
+ writebits(s, mainCodes[sym->dist],
+ litLenLevels[sym->dist]);
+ } else {
+ unsigned int lenSlot, distSlot;
+ unsigned int lc = sym->len - kMatchMinLen;
+
+ lenSlot = g_LenSlots[lc];
+ writebits(s, mainCodes[kSymbolMatch + lenSlot],
+ litLenLevels[kSymbolMatch + lenSlot]);
+ writebits(s, lc - kLenStart32[lenSlot],
+ kLenExtraBits32[lenSlot]);
+
+ distSlot = deflateDistSlot(sym->dist - 1);
+ writebits(s, distCodes[distSlot],
+ fixed ? 5 : distLevels[distSlot]);
+ writebits(s, sym->dist - 1 - kDistStart[distSlot],
+ kDistExtraBits[distSlot]);
+ }
+ }
+ writebits(s, mainCodes[kSymbolEndOfBlock],
+ litLenLevels[kSymbolEndOfBlock]);
+}
+
+static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num)
+{
+ u32 price = 0;
+
+ while (num) {
+ price += (*lens++) * (*freqs++);
+ --num;
+ }
+ return price;
+}
+
+static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num,
+ const u8 *extraBits, u32 extraBase)
+{
+ return Huffman_GetPrice(freqs, lens, num) +
+ Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase);
+}
+
+/* Adapted from C/HuffEnc.c (7zip) for now */
+#define HeapSortDown(p, k, size, temp) \
+ { for (;;) { \
+ size_t s = (k << 1); \
+ if (s > size) break; \
+ if (s < size && p[s + 1] > p[s]) s++; \
+ if (temp >= p[s]) break; \
+ p[k] = p[s]; k = s; \
+ } p[k] = temp; }
+
+static void HeapSort(u32 *p, size_t size)
+{
+ if (size <= 1)
+ return;
+ p--;
+ {
+ size_t i = size / 2;
+ do
+ {
+ u32 temp = p[i];
+ size_t k = i;
+ HeapSortDown(p, k, size, temp)
+ }
+ while (--i != 0);
+ }
+ /*
+ do
+ {
+ size_t k = 1;
+ UInt32 temp = p[size];
+ p[size--] = p[1];
+ HeapSortDown(p, k, size, temp)
+ }
+ while (size > 1);
+ */
+ while (size > 3)
+ {
+ u32 temp = p[size];
+ size_t k = (p[3] > p[2]) ? 3 : 2;
+ p[size--] = p[1];
+ p[1] = p[k];
+ HeapSortDown(p, k, size, temp)
+ }
+ {
+ u32 temp = p[size];
+ p[size] = p[1];
+ if (size > 2 && p[2] < temp)
+ {
+ p[1] = p[2];
+ p[2] = temp;
+ }
+ else
+ p[1] = temp;
+ }
+}
+
+#define NUM_BITS 10
+#define MASK (((unsigned)1 << NUM_BITS) - 1)
+
+static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens,
+ unsigned int numSymbols, unsigned int maxLen)
+{
+ u32 num, i;
+
+ num = 0;
+ /* if (maxLen > 10) maxLen = 10; */
+
+ for (i = 0; i < numSymbols; i++) {
+ u32 freq = freqs[i];
+
+ if (!freq)
+ lens[i] = 0;
+ else
+ p[num++] = i | (freq << NUM_BITS);
+ }
+ HeapSort(p, num);
+
+ if (num < 2) {
+ unsigned int minCode = 0, maxCode = 1;
+
+ if (num == 1) {
+ maxCode = (unsigned int)p[0] & MASK;
+ if (!maxCode)
+ maxCode++;
+ }
+ p[minCode] = 0;
+ p[maxCode] = 1;
+ lens[minCode] = lens[maxCode] = 1;
+ return;
+ }
+
+ {
+ u32 b, e, i;
+
+ i = b = e = 0;
+ do {
+ u32 n, m, freq;
+
+ n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
+ freq = (p[n] & ~MASK);
+ p[n] = (p[n] & MASK) | (e << NUM_BITS);
+ m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
+ freq += (p[m] & ~MASK);
+ p[m] = (p[m] & MASK) | (e << NUM_BITS);
+ p[e] = (p[e] & MASK) | freq;
+ e++;
+ } while (num - e > 1);
+
+ {
+ u32 lenCounters[kMaxLen + 1];
+
+ for (i = 0; i <= kMaxLen; i++)
+ lenCounters[i] = 0;
+
+ p[--e] &= MASK;
+ lenCounters[1] = 2;
+ while (e > 0) {
+ u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
+
+ p[e] = (p[e] & MASK) | (len << NUM_BITS);
+ if (len >= maxLen)
+ for (len = maxLen - 1; lenCounters[len] == 0; len--);
+ lenCounters[len]--;
+ lenCounters[(size_t)len + 1] += 2;
+ }
+
+ {
+ u32 len;
+
+ i = 0;
+ for (len = maxLen; len != 0; len--) {
+ u32 k;
+ for (k = lenCounters[len]; k != 0; k--)
+ lens[p[i++] & MASK] = (u8)len;
+ }
+ }
+ deflate_genhuffcodes(lens, p, numSymbols, lenCounters);
+ }
+ }
+}
+
+static void kite_deflate_fixdynblock(struct kite_deflate *s)
+{
+ struct kite_deflate_table *t = s->tab;
+ unsigned int numlitlens, numdistlens, numblcodes;
+ u32 levelFreqs[kLensTableSize] = {0};
+ u32 opt_mainlen;
+
+ if (!s->freq_changed)
+ return;
+
+ /* in order to match zlib */
+ s->numHuffBits = kMaxLen;
+// s->numHuffBits = (s->symbols > 18000 ? 12 :
+// (s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9)));
+
+ Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels,
+ kMainTableSize, s->numHuffBits);
+ Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels,
+ kDistTableSize32, s->numHuffBits);
+
+ /* code lengths for the literal/length alphabet */
+ numlitlens = kMainTableSize;
+ while (numlitlens > kNumLitLenCodesMin &&
+ !t->litLenLevels[numlitlens - 1])
+ --numlitlens;
+
+ /* code lengths for the distance alphabet */
+ numdistlens = kDistTableSize32;
+ while (numdistlens > kNumDistCodesMin &&
+ !t->distLevels[numdistlens - 1])
+ --numdistlens;
+
+ kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs);
+ kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs);
+ Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens,
+ kLensTableSize, 7);
+ numblcodes = kLensTableSize;
+ while (numblcodes > kNumLensCodesMin &&
+ !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]])
+ --numblcodes;
+
+ t->numlitlens = numlitlens;
+ t->numdistlens = numdistlens;
+ t->numblcodes = numblcodes;
+
+ opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels,
+ kMainTableSize, kLenExtraBits32, kSymbolMatch) +
+ Huffman_GetPriceEx(s->distFreqs, t->distLevels,
+ kDistTableSize32, kDistExtraBits, 0);
+ s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes +
+ Huffman_GetPriceEx(levelFreqs, t->levelLens,
+ kLensTableSize, kLevelExtraBits, kTableDirectLevels) +
+ opt_mainlen;
+ s->freq_changed = false;
+}
+
+
+/*
+ * an array used used by the LZ-based encoder to hold the length-distance pairs
+ * found by LZ matchfinder.
+ */
+struct kite_match {
+ unsigned int len;
+ unsigned int dist;
+};
+
+struct kite_matchfinder {
+ /* pointer to buffer with data to be compressed */
+ const u8 *buffer;
+
+ /* indicate the first byte that doesn't contain valid input data */
+ const u8 *end;
+
+ /* LZ matchfinder hash chain representation */
+ u32 *hash, *chain;
+
+ u32 base;
+
+ /* indicate the next byte to run through the match finder */
+ u32 offset;
+
+ u32 cyclic_pos;
+
+ /* maximum length of a match that the matchfinder will try to find. */
+ u16 nice_len;
+
+ /* the total sliding window size */
+ u16 wsiz;
+
+ /* how many rounds a matchfinder searches on a hash chain for */
+ u16 depth;
+
+ /* do not perform lazy search no less than this match length */
+ u16 max_lazy;
+
+ /* reduce lazy search no less than this match length */
+ u8 good_len;
+
+ /* current match for lazy matching */
+ struct kite_match *matches;
+ struct kite_match matches_matrix[2][4];
+};
+
+/*
+ * This mysterious table is just the CRC of each possible byte. It can be
+ * computed using the standard bit-at-a-time methods. The polynomial can
+ * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
+ * Add the implicit x^16, and you have the standard CRC-CCITT.
+ */
+u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = {
+ 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+ 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+ 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+ 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+ 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+ 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+ 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+ 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+ 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+ 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+ 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+ 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+ 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+ 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+ 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+ 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+ 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+ 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+ 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+ 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+ 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+ 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+ 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+ 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+ 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+ 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+ 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+ 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+ 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+ 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+ 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+ 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+
+int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen)
+{
+ const u8 *cur = mf->buffer + mf->offset;
+ const u8 *qbase = mf->buffer - mf->base;
+ u32 curMatch;
+ unsigned int v, hv, i, k, p, wsiz;
+
+ if (mf->end - cur < bestlen + 1)
+ return 0;
+
+ v = get_unaligned((u16 *)cur);
+ hv = v ^ crc_ccitt_table[cur[2]];
+ curMatch = mf->hash[hv];
+ p = mf->base + mf->offset;
+ mf->hash[hv] = p;
+ mf->chain[mf->cyclic_pos] = curMatch;
+ wsiz = mf->wsiz;
+ k = 1;
+
+ if (depth) {
+ unsigned int wpos = wsiz + mf->cyclic_pos;
+
+ hv = min_t(unsigned int, mf->nice_len, mf->end - cur);
+ DBG_BUGON(hv > kMatchMaxLen32);
+ do {
+ unsigned int diff = p - curMatch;
+ const u8 *q;
+
+ if (diff >= wsiz)
+ break;
+
+ q = qbase + curMatch;
+ curMatch = mf->chain[(wpos - diff) & (wsiz - 1)];
+ if (v == get_unaligned((u16 *)q) && (bestlen < 3 || (
+ get_unaligned((u16 *)(cur + bestlen - 1)) ==
+ get_unaligned((u16 *)(q + bestlen - 1)) &&
+ !memcmp(cur + 3, q + 3, bestlen - 3)))) {
+ DBG_BUGON(cur[2] != q[2]);
+ i = erofs_memcmp2(cur + bestlen + 1,
+ q + bestlen + 1, hv - bestlen - 1);
+ bestlen += 1 + i;
+
+ k -= (k >= ARRAY_SIZE(mf->matches_matrix[0]));
+ mf->matches[k++] = (struct kite_match) {
+ .len = bestlen,
+ .dist = diff,
+ };
+ if (bestlen >= hv)
+ break;
+ }
+ } while (--depth);
+ }
+ mf->offset++;
+ mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1);
+ return k - 1;
+}
+
+/* let's align with zlib */
+static const struct kite_matchfinder_cfg {
+ u16 good_length; /* reduce lazy search above this match length */
+ u16 max_lazy; /* do not perform lazy search above this match length */
+ u16 nice_length; /* quit search above this match length */
+ u16 depth;
+ bool lazy_search;
+} kite_mfcfg[10] = {
+/* good lazy nice depth */
+/* 0 */ {0, 0, 0, 0, false}, /* store only [unsupported] */
+/* 1 */ {4, 4, 8, 4, false}, /* maximum speed, no lazy matches */
+/* 2 */ {4, 5, 16, 8, false},
+/* 3 */ {4, 6, 32, 32, false},
+
+/* 4 */ {4, 4, 16, 16, true}, /* lazy matches */
+/* 5 */ {8, 16, 32, 32, true},
+/* 6 */ {8, 16, 128, 128, true},
+/* 7 */ {8, 32, 128, 256, true},
+/* 8 */ {32, 128, 258, 1024, true},
+/* 9 */ {32, 258, 258, 4096, true}, /* maximum compression */
+};
+
+static int kite_mf_init(struct kite_matchfinder *mf, int wsiz, int level)
+{
+ const struct kite_matchfinder_cfg *cfg;
+
+ if (!level || level >= ARRAY_SIZE(kite_mfcfg))
+ return -EINVAL;
+ cfg = &kite_mfcfg[level];
+
+ if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz)
+ return -EINVAL;
+
+ mf->hash = calloc(0x10000, sizeof(mf->hash[0]));
+ if (!mf->hash)
+ return -ENOMEM;
+
+ mf->chain = malloc(sizeof(mf->chain[0]) * wsiz);
+ if (!mf->chain) {
+ free(mf->hash);
+ mf->hash = NULL;
+ return -ENOMEM;
+ }
+ mf->wsiz = wsiz;
+
+ mf->good_len = cfg->good_length;
+ mf->nice_len = cfg->nice_length;
+ mf->depth = cfg->depth;
+ mf->max_lazy = cfg->max_lazy;
+ return cfg->lazy_search;
+}
+
+static void kite_mf_reset(struct kite_matchfinder *mf,
+ const void *buffer, const void *end)
+{
+ mf->buffer = buffer;
+ mf->end = end;
+
+ /*
+ * Set the initial value as max_distance + 1. This would avoid hash
+ * zero initialization.
+ */
+ mf->base += mf->offset + kHistorySize32 + 1;
+
+ mf->offset = 0;
+ mf->cyclic_pos = 0;
+
+ mf->matches = mf->matches_matrix[0];
+ mf->matches_matrix[0][0].len =
+ mf->matches_matrix[1][0].len = kMatchMinLen - 1;
+}
+
+static bool deflate_count_code(struct kite_deflate *s, bool literal,
+ unsigned int lenSlot, unsigned int distSlot)
+{
+ struct kite_deflate_table *t = s->tab;
+ unsigned int lenbase = (literal ? 0 : kSymbolMatch);
+ u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos;
+ bool recalc = false;
+ unsigned int bits;
+
+ s->freq_changed = true;
+ ++s->mainFreqs[lenbase + lenSlot];
+ if (!literal)
+ ++s->distFreqs[distSlot];
+
+ if (s->encode_mode == 1) {
+ if (literal) {
+ bits = kstaticHuff_litLenLevels[lenSlot];
+ goto out;
+ }
+ bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] +
+ kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot];
+ goto out;
+ }
+
+ /* XXX: more ideas to be done later */
+ recalc |= (!literal && !t->distLevels[distSlot]);
+ recalc |= !t->litLenLevels[lenbase + lenSlot];
+ if (recalc) {
+ kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm',
+ lenSlot, distSlot);
+ s->tab = s->tables + (s->tab == s->tables);
+ kite_deflate_fixdynblock(s);
+ bits = 0;
+ goto out;
+ }
+
+ if (literal) {
+ bits = t->litLenLevels[lenSlot];
+ goto out;
+ }
+
+ bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] +
+ t->litLenLevels[kSymbolMatch + lenSlot] +
+ kLenExtraBits32[lenSlot];
+out:
+ if (rem < s->costbits + bits) {
+ --s->mainFreqs[lenbase + lenSlot];
+ if (!literal)
+ --s->distFreqs[distSlot];
+ if (recalc)
+ s->tab = s->tables + (s->tab == s->tables);
+ return false;
+ }
+ s->costbits += bits;
+ return true;
+}
+
+static bool kite_deflate_tally(struct kite_deflate *s,
+ struct kite_match *match)
+{
+ struct kite_deflate_symbol *sym = s->sym + s->symbols;
+ u32 fixedcost = ~0;
+ bool hassp;
+
+ *sym = (struct kite_deflate_symbol) {
+ .len = match->len,
+ .dist = match->dist,
+ };
+
+retry:
+ if (sym->len < kMatchMinLen) {
+ hassp = deflate_count_code(s, true, sym->dist, 0);
+ } else {
+ unsigned int lc = sym->len - kMatchMinLen;
+ unsigned int lenSlot = g_LenSlots[lc];
+ unsigned int distSlot = deflateDistSlot(sym->dist - 1);
+
+ hassp = deflate_count_code(s, false, lenSlot, distSlot);
+ }
+
+ if (!hassp) {
+ if (s->encode_mode == 1) {
+ fixedcost = s->costbits;
+ s->encode_mode = 2;
+ goto retry;
+ }
+ s->lastblock = true;
+ if (fixedcost <= s->costbits)
+ s->encode_mode = 1;
+ return true;
+ }
+ ++s->symbols;
+ return false;
+}
+
+static void kite_deflate_writestore(struct kite_deflate *s)
+{
+ bool fb = !s->startpos && !s->bitpos;
+ unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos;
+
+ do {
+ unsigned int len = min_t(unsigned int, totalsiz, 65535);
+
+ totalsiz -= len;
+ writebits(s, (fb << 3) | (kStored << 1) |
+ (s->lastblock && !totalsiz), 3 + fb);
+ flushbits(s);
+ writebits(s, len, 16);
+ writebits(s, len ^ 0xffff, 16);
+ flushbits(s);
+ memcpy(s->out + s->pos_out, s->in + s->startpos, len);
+ s->pos_out += len;
+ s->startpos += len;
+ } while (totalsiz);
+}
+
+static void kite_deflate_endblock(struct kite_deflate *s)
+{
+ if (s->encode_mode == 1) {
+ u32 fixedcost = s->costbits;
+ unsigned int storelen, storeblocks, storecost;
+
+ kite_deflate_fixdynblock(s);
+ if (fixedcost > s->costbits)
+ s->encode_mode = 2;
+ else
+ s->costbits = fixedcost;
+
+ storelen = s->pos_in - s->prev_valid - s->startpos;
+ storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U);
+ storecost = (8 - s->bitpos) + storeblocks - 1 +
+ storeblocks * 32 + storelen * 8;
+ if (s->costbits > storecost) {
+ s->costbits = storecost;
+ s->encode_mode = 0;
+ }
+ }
+
+ s->lastblock |= (s->costbits + s->bitpos >=
+ (s->outlen - s->pos_out) * 8);
+}
+
+static void kite_deflate_startblock(struct kite_deflate *s)
+{
+ memset(s->mainFreqs, 0, sizeof(s->mainFreqs));
+ memset(s->distFreqs, 0, sizeof(s->distFreqs));
+ memset(s->tables, 0, sizeof(s->tables[0]));
+ s->symbols = 0;
+ s->mainFreqs[kSymbolEndOfBlock]++;
+ s->encode_mode = 1;
+ s->tab = s->tables;
+ s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock];
+}
+
+static bool kite_deflate_commitblock(struct kite_deflate *s)
+{
+ if (s->encode_mode == 1) {
+ kite_deflate_setfixedtrees(s);
+ kite_deflate_writeblock(s, true);
+ } else if (s->encode_mode == 2) {
+ kite_deflate_sendtrees(s);
+ kite_deflate_writeblock(s, false);
+ } else {
+ kite_deflate_writestore(s);
+ }
+ s->startpos = s->pos_in - s->prev_valid;
+ return s->lastblock;
+}
+
+static bool kite_deflate_fast(struct kite_deflate *s)
+{
+ struct kite_matchfinder *mf = s->mf;
+
+ kite_deflate_startblock(s);
+ while (1) {
+ int matches = kite_mf_getmatches_hc3(mf, mf->depth,
+ kMatchMinLen - 1);
+
+ if (matches) {
+ unsigned int len = mf->matches[matches].len;
+ unsigned int dist = mf->matches[matches].dist;
+
+ if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR)
+ goto nomatch;
+
+ kite_dbg("%u matches found: longest [%u,%u] of distance %u",
+ matches, s->pos_in, s->pos_in + len - 1, dist);
+
+ if (kite_deflate_tally(s, mf->matches + matches))
+ break;
+ s->pos_in += len;
+ /* skip the rest bytes */
+ while (--len)
+ (void)kite_mf_getmatches_hc3(mf, 0, 0);
+ } else {
+nomatch:
+ mf->matches[0].dist = s->in[s->pos_in];
+ if (isprint(s->in[s->pos_in]))
+ kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in);
+ else
+ kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in);
+
+ if (kite_deflate_tally(s, mf->matches))
+ break;
+ ++s->pos_in;
+ }
+
+ s->lastblock |= (s->pos_in >= s->inlen);
+ if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) {
+ kite_deflate_endblock(s);
+ break;
+ }
+ }
+ return kite_deflate_commitblock(s);
+}
+
+static bool kite_deflate_slow(struct kite_deflate *s)
+{
+ struct kite_matchfinder *mf = s->mf;
+ bool flush = false;
+
+ kite_deflate_startblock(s);
+ while (1) {
+ struct kite_match *prev_matches = mf->matches;
+ unsigned int len = kMatchMinLen - 1;
+ int matches;
+ unsigned int len0;
+
+ mf->matches = mf->matches_matrix[
+ mf->matches == mf->matches_matrix[0]];
+ mf->matches[0].dist = s->in[s->pos_in];
+
+ len0 = prev_matches[s->prev_longest].len;
+ if (len0 < mf->max_lazy) {
+ matches = kite_mf_getmatches_hc3(mf, mf->depth >>
+ (len0 >= mf->good_len), len0);
+ if (matches) {
+ len = mf->matches[matches].len;
+ if (len == kMatchMinLen &&
+ mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) {
+ matches = 0;
+ len = kMatchMinLen - 1;
+ }
+ }
+ } else {
+ matches = 0;
+ (void)kite_mf_getmatches_hc3(mf, 0, 0);
+ }
+
+ if (len < len0) {
+ if (kite_deflate_tally(s,
+ prev_matches + s->prev_longest))
+ break;
+
+ s->pos_in += --len0;
+ /* skip the rest bytes */
+ while (--len0)
+ (void)kite_mf_getmatches_hc3(mf, 0, 0);
+ s->prev_valid = false;
+ s->prev_longest = 0;
+ } else {
+ if (!s->prev_valid)
+ s->prev_valid = true;
+ else if (kite_deflate_tally(s, prev_matches))
+ break;
+ ++s->pos_in;
+ s->prev_longest = matches;
+ }
+
+ s->lastblock |= (s->pos_in >= s->inlen);
+ if (s->pos_in >= s->inlen) {
+ flush = true;
+ break;
+ }
+ if (s->symbols >= s->max_symbols) {
+ kite_deflate_endblock(s);
+ break;
+ }
+ }
+
+ if (flush && s->prev_valid) {
+ (void)kite_deflate_tally(s, mf->matches + s->prev_longest);
+ s->prev_valid = false;
+ }
+ return kite_deflate_commitblock(s);
+}
+
+void kite_deflate_end(struct kite_deflate *s)
+{
+ if (s->mf) {
+ if (s->mf->hash)
+ free(s->mf->hash);
+ if (s->mf->chain)
+ free(s->mf->chain);
+ free(s->mf);
+ }
+ if (s->sym)
+ free(s->sym);
+ free(s);
+}
+
+struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size)
+{
+ struct kite_deflate *s;
+ int err;
+
+ kite_deflate_init_once();
+ s = calloc(1, sizeof(*s));
+ if (!s)
+ return ERR_PTR(-ENOMEM);
+
+ s->max_symbols = 16384;
+ s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols);
+ if (!s->sym) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ s->mf = malloc(sizeof(*s->mf));
+ if (!s->mf) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ if (!dict_size)
+ dict_size = kHistorySize32;
+
+ err = kite_mf_init(s->mf, dict_size, level);
+ if (err < 0)
+ goto err_out;
+
+ s->lazy_search = err;
+ return s;
+err_out:
+ if (s->mf)
+ free(s->mf);
+ if (s->sym)
+ free(s->sym);
+ free(s);
+ return ERR_PTR(err);
+}
+
+int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out,
+ unsigned int *srcsize, unsigned int target_dstsize)
+{
+ memset(s, 0, offsetof(struct kite_deflate, mainFreqs));
+ s->in = in;
+ s->inlen = *srcsize;
+ s->out = out;
+ s->outlen = target_dstsize;
+ kite_mf_reset(s->mf, in, in + s->inlen);
+
+ if (s->lazy_search)
+ while (!kite_deflate_slow(s));
+ else
+ while (!kite_deflate_fast(s));
+ flushbits(s);
+
+ *srcsize = s->startpos;
+ return s->pos_out;
+}
+
+#if TEST
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ u64 filelength;
+ u8 out[1048576], *buf;
+ int dstsize = 4096;
+ unsigned int srcsize, outsize;
+ struct kite_deflate *s;
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0)
+ return -errno;
+ if (argc > 2)
+ dstsize = atoi(argv[2]);
+ filelength = lseek(fd, 0, SEEK_END);
+
+ s = kite_deflate_init(9, 0);
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+
+ filelength = lseek(fd, 0, SEEK_END);
+ buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0);
+ if (buf == MAP_FAILED)
+ return -errno;
+ close(fd);
+
+ srcsize = filelength;
+ outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize);
+ fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ write(fd, out, outsize);
+ close(fd);
+ kite_deflate_end(s);
+ return 0;
+}
+#endif