diff --git a/config.py b/config.py index 6bb421e..54e40a5 100644 --- a/config.py +++ b/config.py @@ -23,6 +23,8 @@ else: # [step 3]>> 以下配置可以优化体验,但大部分场合下并不需要修改 +# 对话窗的高度 +CHATBOT_HEIGHT = 1117 # 发送请求到OpenAI后,等待多久判定为超时 TIMEOUT_SECONDS = 25 diff --git a/crazy_functions/test_project/cpp/longcode/jpgd.cpp b/crazy_functions/test_project/cpp/longcode/jpgd.cpp new file mode 100644 index 0000000..36d06c8 --- /dev/null +++ b/crazy_functions/test_project/cpp/longcode/jpgd.cpp @@ -0,0 +1,3276 @@ +// jpgd.cpp - C++ class for JPEG decompression. +// Public domain, Rich Geldreich +// Last updated Apr. 16, 2011 +// Alex Evans: Linear memory allocator (taken from jpge.h). +// +// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2. +// +// Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling. +// Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain" +// http://vision.ai.uiuc.edu/~dugad/research/dct/index.html + +#include "jpgd.h" +#include + +#include +// BEGIN EPIC MOD +#define JPGD_ASSERT(x) { assert(x); CA_ASSUME(x); } (void)0 +// END EPIC MOD + +#ifdef _MSC_VER +#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable +#endif + +// Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling). +// This is slower, but results in higher quality on images with highly saturated colors. +#define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1 + +#define JPGD_TRUE (1) +#define JPGD_FALSE (0) + +#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b)) +#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b)) + +namespace jpgd { + + static inline void *jpgd_malloc(size_t nSize) { return FMemory::Malloc(nSize); } + static inline void jpgd_free(void *p) { FMemory::Free(p); } + +// BEGIN EPIC MOD +//@UE3 - use UE3 BGRA encoding instead of assuming RGBA + // stolen from IImageWrapper.h + enum ERGBFormatJPG + { + Invalid = -1, + RGBA = 0, + BGRA = 1, + Gray = 2, + }; + static ERGBFormatJPG jpg_format; +// END EPIC MOD + + // DCT coefficients are stored in this sequence. + static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; + + enum JPEG_MARKER + { + M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8, + M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC, + M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7, + M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF, + M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0 + }; + + enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }; + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define SCALEDONE ((int32)1) + +#define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */ + +#define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n)) +#define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)) + +#define MULTIPLY(var, cnst) ((var) * (cnst)) + +#define CLAMP(i) ((static_cast(i) > 255) ? (((~i) >> 31) & 0xFF) : (i)) + + // Compiler creates a fast path 1D IDCT for X non-zero columns + template + struct Row + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + // ACCESS_COL() will be optimized at compile time to either an array access, or 0. +#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0) + + const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS; + const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS; + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, - FIX_0_899976223); + const int az2 = MULTIPLY(bz2, - FIX_2_562915447); + const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS); + pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS); + pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS); + pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS); + pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS); + pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS); + pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS); + pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS); + } + }; + + template <> + struct Row<0> + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { +#ifdef _MSC_VER + pTemp; pSrc; +#endif + } + }; + + template <> + struct Row<1> + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + const int dcval = (pSrc[0] << PASS1_BITS); + + pTemp[0] = dcval; + pTemp[1] = dcval; + pTemp[2] = dcval; + pTemp[3] = dcval; + pTemp[4] = dcval; + pTemp[5] = dcval; + pTemp[6] = dcval; + pTemp[7] = dcval; + } + }; + + // Compiler creates a fast path 1D IDCT for X non-zero rows + template + struct Col + { + static void idct(uint8* pDst_ptr, const int* pTemp) + { + // ACCESS_ROW() will be optimized at compile time to either an array access, or 0. +#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0) + + const int z2 = ACCESS_ROW(2); + const int z3 = ACCESS_ROW(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS; + const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS; + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, - FIX_0_899976223); + const int az2 = MULTIPLY(bz2, - FIX_2_562915447); + const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*0] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*7] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*1] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*6] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*2] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*5] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*3] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*4] = (uint8)CLAMP(i); + } + }; + + template <> + struct Col<1> + { + static void idct(uint8* pDst_ptr, const int* pTemp) + { + int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3); + const uint8 dcval_clamped = (uint8)CLAMP(dcval); + pDst_ptr[0*8] = dcval_clamped; + pDst_ptr[1*8] = dcval_clamped; + pDst_ptr[2*8] = dcval_clamped; + pDst_ptr[3*8] = dcval_clamped; + pDst_ptr[4*8] = dcval_clamped; + pDst_ptr[5*8] = dcval_clamped; + pDst_ptr[6*8] = dcval_clamped; + pDst_ptr[7*8] = dcval_clamped; + } + }; + + static const uint8 s_idct_row_table[] = + { + 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0, + 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0, + 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0, + 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0, + 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2, + 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2, + 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4, + 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, + }; + + static const uint8 s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }; + + void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag) + { + JPGD_ASSERT(block_max_zag >= 1); + JPGD_ASSERT(block_max_zag <= 64); + + if (block_max_zag == 1) + { + int k = ((pSrc_ptr[0] + 4) >> 3) + 128; + k = CLAMP(k); + k = k | (k<<8); + k = k | (k<<16); + + for (int i = 8; i > 0; i--) + { + *(int*)&pDst_ptr[0] = k; + *(int*)&pDst_ptr[4] = k; + pDst_ptr += 8; + } + return; + } + + int temp[64]; + + const jpgd_block_t* pSrc = pSrc_ptr; + int* pTemp = temp; + + const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8]; + int i; + for (i = 8; i > 0; i--, pRow_tab++) + { + switch (*pRow_tab) + { + case 0: Row<0>::idct(pTemp, pSrc); break; + case 1: Row<1>::idct(pTemp, pSrc); break; + case 2: Row<2>::idct(pTemp, pSrc); break; + case 3: Row<3>::idct(pTemp, pSrc); break; + case 4: Row<4>::idct(pTemp, pSrc); break; + case 5: Row<5>::idct(pTemp, pSrc); break; + case 6: Row<6>::idct(pTemp, pSrc); break; + case 7: Row<7>::idct(pTemp, pSrc); break; + case 8: Row<8>::idct(pTemp, pSrc); break; + } + + pSrc += 8; + pTemp += 8; + } + + pTemp = temp; + + const int nonzero_rows = s_idct_col_table[block_max_zag - 1]; + for (i = 8; i > 0; i--) + { + switch (nonzero_rows) + { + case 1: Col<1>::idct(pDst_ptr, pTemp); break; + case 2: Col<2>::idct(pDst_ptr, pTemp); break; + case 3: Col<3>::idct(pDst_ptr, pTemp); break; + case 4: Col<4>::idct(pDst_ptr, pTemp); break; + case 5: Col<5>::idct(pDst_ptr, pTemp); break; + case 6: Col<6>::idct(pDst_ptr, pTemp); break; + case 7: Col<7>::idct(pDst_ptr, pTemp); break; + case 8: Col<8>::idct(pDst_ptr, pTemp); break; + } + + pTemp++; + pDst_ptr++; + } + } + + void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr) + { + int temp[64]; + int* pTemp = temp; + const jpgd_block_t* pSrc = pSrc_ptr; + + for (int i = 4; i > 0; i--) + { + Row<4>::idct(pTemp, pSrc); + pSrc += 8; + pTemp += 8; + } + + pTemp = temp; + for (int i = 8; i > 0; i--) + { + Col<4>::idct(pDst_ptr, pTemp); + pTemp++; + pDst_ptr++; + } + } + + // Retrieve one character from the input stream. + inline uint jpeg_decoder::get_char() + { + // Any bytes remaining in buffer? + if (!m_in_buf_left) + { + // Try to get more bytes. + prep_in_buffer(); + // Still nothing to get? + if (!m_in_buf_left) + { + // Pad the end of the stream with 0xFF 0xD9 (EOI marker) + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; + } + + // Same as previous method, except can indicate if the character is a pad character or not. + inline uint jpeg_decoder::get_char(bool *pPadding_flag) + { + if (!m_in_buf_left) + { + prep_in_buffer(); + if (!m_in_buf_left) + { + *pPadding_flag = true; + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + *pPadding_flag = false; + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; + } + + // Inserts a previously retrieved character back into the input buffer. + inline void jpeg_decoder::stuff_char(uint8 q) + { + *(--m_pIn_buf_ofs) = q; + m_in_buf_left++; + } + + // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered. + inline uint8 jpeg_decoder::get_octet() + { + bool padding_flag; + int c = get_char(&padding_flag); + + if (c == 0xFF) + { + if (padding_flag) + return 0xFF; + + c = get_char(&padding_flag); + if (padding_flag) + { + stuff_char(0xFF); + return 0xFF; + } + + if (c == 0x00) + return 0xFF; + else + { + stuff_char(static_cast(c)); + stuff_char(0xFF); + return 0xFF; + } + } + + return static_cast(c); + } + + // Retrieves a variable number of bits from the input stream. Does not recognize markers. + inline uint jpeg_decoder::get_bits(int num_bits) + { + if (!num_bits) + return 0; + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + uint c1 = get_char(); + uint c2 = get_char(); + m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2; + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + JPGD_ASSERT(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; + } + + // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered. + inline uint jpeg_decoder::get_bits_no_markers(int num_bits) + { + if (!num_bits) + return 0; + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) + { + uint c1 = get_octet(); + uint c2 = get_octet(); + m_bit_buf |= (c1 << 8) | c2; + } + else + { + m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1]; + m_in_buf_left -= 2; + m_pIn_buf_ofs += 2; + } + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + JPGD_ASSERT(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; + } + + // Decodes a Huffman encoded symbol. + inline int jpeg_decoder::huff_decode(huff_tables *pH) + { + int symbol; + + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) + { + // Decode more bits, use a tree traversal to find symbol. + int ofs = 23; + do + { + symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + } + else + get_bits_no_markers(pH->code_size[symbol]); + + return symbol; + } + + // Decodes a Huffman encoded symbol. + inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits) + { + int symbol; + + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) + { + // Use a tree traversal to find symbol. + int ofs = 23; + do + { + symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + + extra_bits = get_bits_no_markers(symbol & 0xF); + } + else + { + JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0)); + + if (symbol & 0x8000) + { + get_bits_no_markers((symbol >> 8) & 31); + extra_bits = symbol >> 16; + } + else + { + int code_size = (symbol >> 8) & 31; + int num_extra_bits = symbol & 0xF; + int bits = code_size + num_extra_bits; + if (bits <= (m_bits_left + 16)) + extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1); + else + { + get_bits_no_markers(code_size); + extra_bits = get_bits_no_markers(num_extra_bits); + } + } + + symbol &= 0xFF; + } + + return symbol; + } + + // Tables and macro used to fully decode the DPCM differences. + static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; + static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 }; + static const int s_extend_mask[] = { 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) }; +#define HUFF_EXTEND(x,s) ((x) < s_extend_test[s] ? (x) + s_extend_offset[s] : (x)) + + // Clamps a value between 0-255. + inline uint8 jpeg_decoder::clamp(int i) + { + if (static_cast(i) > 255) + i = (((~i) >> 31) & 0xFF); + + return static_cast(i); + } + + namespace DCT_Upsample + { + struct Matrix44 + { + typedef int Element_Type; + enum { NUM_ROWS = 4, NUM_COLS = 4 }; + + Element_Type v[NUM_ROWS][NUM_COLS]; + + inline int rows() const { return NUM_ROWS; } + inline int cols() const { return NUM_COLS; } + + inline const Element_Type & at(int r, int c) const { return v[r][c]; } + inline Element_Type & at(int r, int c) { return v[r][c]; } + + inline Matrix44() { } + + inline Matrix44& operator += (const Matrix44& a) + { + for (int r = 0; r < NUM_ROWS; r++) + { + at(r, 0) += a.at(r, 0); + at(r, 1) += a.at(r, 1); + at(r, 2) += a.at(r, 2); + at(r, 3) += a.at(r, 3); + } + return *this; + } + + inline Matrix44& operator -= (const Matrix44& a) + { + for (int r = 0; r < NUM_ROWS; r++) + { + at(r, 0) -= a.at(r, 0); + at(r, 1) -= a.at(r, 1); + at(r, 2) -= a.at(r, 2); + at(r, 3) -= a.at(r, 3); + } + return *this; + } + + friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b) + { + Matrix44 ret; + for (int r = 0; r < NUM_ROWS; r++) + { + ret.at(r, 0) = a.at(r, 0) + b.at(r, 0); + ret.at(r, 1) = a.at(r, 1) + b.at(r, 1); + ret.at(r, 2) = a.at(r, 2) + b.at(r, 2); + ret.at(r, 3) = a.at(r, 3) + b.at(r, 3); + } + return ret; + } + + friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b) + { + Matrix44 ret; + for (int r = 0; r < NUM_ROWS; r++) + { + ret.at(r, 0) = a.at(r, 0) - b.at(r, 0); + ret.at(r, 1) = a.at(r, 1) - b.at(r, 1); + ret.at(r, 2) = a.at(r, 2) - b.at(r, 2); + ret.at(r, 3) = a.at(r, 3) - b.at(r, 3); + } + return ret; + } + + static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b) + { + for (int r = 0; r < 4; r++) + { + pDst[0*8 + r] = static_cast(a.at(r, 0) + b.at(r, 0)); + pDst[1*8 + r] = static_cast(a.at(r, 1) + b.at(r, 1)); + pDst[2*8 + r] = static_cast(a.at(r, 2) + b.at(r, 2)); + pDst[3*8 + r] = static_cast(a.at(r, 3) + b.at(r, 3)); + } + } + + static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b) + { + for (int r = 0; r < 4; r++) + { + pDst[0*8 + r] = static_cast(a.at(r, 0) - b.at(r, 0)); + pDst[1*8 + r] = static_cast(a.at(r, 1) - b.at(r, 1)); + pDst[2*8 + r] = static_cast(a.at(r, 2) - b.at(r, 2)); + pDst[3*8 + r] = static_cast(a.at(r, 3) - b.at(r, 3)); + } + } + }; + + const int FRACT_BITS = 10; + const int SCALE = 1 << FRACT_BITS; + + typedef int Temp_Type; +#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS) +#define F(i) ((int)((i) * SCALE + .5f)) + + // Any decent C++ compiler will optimize this at compile time to a 0, or an array access. +#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8]) + + // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix + template + struct P_Q + { + static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc) + { + // 4x8 = 4x8 times 8x8, matrix 0 is constant + const Temp_Type X000 = AT(0, 0); + const Temp_Type X001 = AT(0, 1); + const Temp_Type X002 = AT(0, 2); + const Temp_Type X003 = AT(0, 3); + const Temp_Type X004 = AT(0, 4); + const Temp_Type X005 = AT(0, 5); + const Temp_Type X006 = AT(0, 6); + const Temp_Type X007 = AT(0, 7); + const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0)); + const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1)); + const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2)); + const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3)); + const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4)); + const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5)); + const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6)); + const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7)); + const Temp_Type X020 = AT(4, 0); + const Temp_Type X021 = AT(4, 1); + const Temp_Type X022 = AT(4, 2); + const Temp_Type X023 = AT(4, 3); + const Temp_Type X024 = AT(4, 4); + const Temp_Type X025 = AT(4, 5); + const Temp_Type X026 = AT(4, 6); + const Temp_Type X027 = AT(4, 7); + const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0)); + const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1)); + const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2)); + const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3)); + const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4)); + const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5)); + const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6)); + const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7)); + + // 4x4 = 4x8 times 8x4, matrix 1 is constant + P.at(0, 0) = X000; + P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f)); + P.at(0, 2) = X004; + P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f)); + P.at(1, 0) = X010; + P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f)); + P.at(1, 2) = X014; + P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f)); + P.at(2, 0) = X020; + P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f)); + P.at(2, 2) = X024; + P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f)); + P.at(3, 0) = X030; + P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f)); + P.at(3, 2) = X034; + P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f)); + // 40 muls 24 adds + + // 4x4 = 4x8 times 8x4, matrix 1 is constant + Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f)); + Q.at(0, 1) = X002; + Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f)); + Q.at(0, 3) = X006; + Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f)); + Q.at(1, 1) = X012; + Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f)); + Q.at(1, 3) = X016; + Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f)); + Q.at(2, 1) = X022; + Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f)); + Q.at(2, 3) = X026; + Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f)); + Q.at(3, 1) = X032; + Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f)); + Q.at(3, 3) = X036; + // 40 muls 24 adds + } + }; + + template + struct R_S + { + static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc) + { + // 4x8 = 4x8 times 8x8, matrix 0 is constant + const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0)); + const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1)); + const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2)); + const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3)); + const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4)); + const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5)); + const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6)); + const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7)); + const Temp_Type X110 = AT(2, 0); + const Temp_Type X111 = AT(2, 1); + const Temp_Type X112 = AT(2, 2); + const Temp_Type X113 = AT(2, 3); + const Temp_Type X114 = AT(2, 4); + const Temp_Type X115 = AT(2, 5); + const Temp_Type X116 = AT(2, 6); + const Temp_Type X117 = AT(2, 7); + const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0)); + const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1)); + const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2)); + const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3)); + const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4)); + const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5)); + const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6)); + const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7)); + const Temp_Type X130 = AT(6, 0); + const Temp_Type X131 = AT(6, 1); + const Temp_Type X132 = AT(6, 2); + const Temp_Type X133 = AT(6, 3); + const Temp_Type X134 = AT(6, 4); + const Temp_Type X135 = AT(6, 5); + const Temp_Type X136 = AT(6, 6); + const Temp_Type X137 = AT(6, 7); + // 80 muls 48 adds + + // 4x4 = 4x8 times 8x4, matrix 1 is constant + R.at(0, 0) = X100; + R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f)); + R.at(0, 2) = X104; + R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f)); + R.at(1, 0) = X110; + R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f)); + R.at(1, 2) = X114; + R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f)); + R.at(2, 0) = X120; + R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f)); + R.at(2, 2) = X124; + R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f)); + R.at(3, 0) = X130; + R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f)); + R.at(3, 2) = X134; + R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f)); + // 40 muls 24 adds + // 4x4 = 4x8 times 8x4, matrix 1 is constant + S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f)); + S.at(0, 1) = X102; + S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f)); + S.at(0, 3) = X106; + S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f)); + S.at(1, 1) = X112; + S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f)); + S.at(1, 3) = X116; + S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f)); + S.at(2, 1) = X122; + S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f)); + S.at(2, 3) = X126; + S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f)); + S.at(3, 1) = X132; + S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f)); + S.at(3, 3) = X136; + // 40 muls 24 adds + } + }; + } // end namespace DCT_Upsample + + // Unconditionally frees all allocated m_blocks. + void jpeg_decoder::free_all_blocks() + { + m_pStream = NULL; + for (mem_block *b = m_pMem_blocks; b; ) + { + mem_block *n = b->m_pNext; + jpgd_free(b); + b = n; + } + m_pMem_blocks = NULL; + } + + // This method handles all errors. + // It could easily be changed to use C++ exceptions. + void jpeg_decoder::stop_decoding(jpgd_status status) + { + m_error_code = status; + free_all_blocks(); + longjmp(m_jmp_state, status); + + // we shouldn't get here as longjmp shouldn't return, but we put it here to make it explicit + // that this function doesn't return, otherwise we get this error: + // + // error : function declared 'noreturn' should not return + exit(1); + } + + void *jpeg_decoder::alloc(size_t nSize, bool zero) + { + nSize = (JPGD_MAX(nSize, 1) + 3) & ~3; + char *rv = NULL; + for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext) + { + if ((b->m_used_count + nSize) <= b->m_size) + { + rv = b->m_data + b->m_used_count; + b->m_used_count += nSize; + break; + } + } + if (!rv) + { + int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047); + mem_block *b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity); + if (!b) stop_decoding(JPGD_NOTENOUGHMEM); + b->m_pNext = m_pMem_blocks; m_pMem_blocks = b; + b->m_used_count = nSize; + b->m_size = capacity; + rv = b->m_data; + } + if (zero) memset(rv, 0, nSize); + return rv; + } + + void jpeg_decoder::word_clear(void *p, uint16 c, uint n) + { + uint8 *pD = (uint8*)p; + const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF; + while (n) + { + pD[0] = l; pD[1] = h; pD += 2; + n--; + } + } + + // Refill the input buffer. + // This method will sit in a loop until (A) the buffer is full or (B) + // the stream's read() method reports and end of file condition. + void jpeg_decoder::prep_in_buffer() + { + m_in_buf_left = 0; + m_pIn_buf_ofs = m_in_buf; + + if (m_eof_flag) + return; + + do + { + int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag); + if (bytes_read == -1) + stop_decoding(JPGD_STREAM_READ); + + m_in_buf_left += bytes_read; + } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag)); + + m_total_bytes_read += m_in_buf_left; + + // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid). + // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.) + word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64); + } + + // Read a Huffman code table. + void jpeg_decoder::read_dht_marker() + { + int i, index, count; + uint8 huff_num[17]; + uint8 huff_val[256]; + + uint num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= 2; + + while (num_left) + { + index = get_bits(8); + + huff_num[0] = 0; + + count = 0; + + for (i = 1; i <= 16; i++) + { + huff_num[i] = static_cast(get_bits(8)); + count += huff_num[i]; + } + + if (count > 255) + stop_decoding(JPGD_BAD_DHT_COUNTS); + + for (i = 0; i < count; i++) + huff_val[i] = static_cast(get_bits(8)); + + i = 1 + 16 + count; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= i; + + if ((index & 0x10) > 0x10) + stop_decoding(JPGD_BAD_DHT_INDEX); + + index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1); + + if (index >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_BAD_DHT_INDEX); + + if (!m_huff_num[index]) + m_huff_num[index] = (uint8 *)alloc(17); + + if (!m_huff_val[index]) + m_huff_val[index] = (uint8 *)alloc(256); + + m_huff_ac[index] = (index & 0x10) != 0; + memcpy(m_huff_num[index], huff_num, 17); + memcpy(m_huff_val[index], huff_val, 256); + } + } + + // Read a quantization table. + void jpeg_decoder::read_dqt_marker() + { + int n, i, prec; + uint num_left; + uint temp; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DQT_MARKER); + + num_left -= 2; + + while (num_left) + { + n = get_bits(8); + prec = n >> 4; + n &= 0x0F; + + if (n >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_BAD_DQT_TABLE); + + if (!m_quant[n]) + m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t)); + + // read quantization entries, in zag order + for (i = 0; i < 64; i++) + { + temp = get_bits(8); + + if (prec) + temp = (temp << 8) + get_bits(8); + + m_quant[n][i] = static_cast(temp); + } + + i = 64 + 1; + + if (prec) + i += 64; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DQT_LENGTH); + + num_left -= i; + } + } + + // Read the start of frame (SOF) marker. + void jpeg_decoder::read_sof_marker() + { + int i; + uint num_left; + + num_left = get_bits(16); + + if (get_bits(8) != 8) /* precision: sorry, only 8-bit precision is supported right now */ + stop_decoding(JPGD_BAD_PRECISION); + + m_image_y_size = get_bits(16); + + if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) + stop_decoding(JPGD_BAD_HEIGHT); + + m_image_x_size = get_bits(16); + + if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) + stop_decoding(JPGD_BAD_WIDTH); + + m_comps_in_frame = get_bits(8); + + if (m_comps_in_frame > JPGD_MAX_COMPONENTS) + stop_decoding(JPGD_TOO_MANY_COMPONENTS); + + if (num_left != (uint)(m_comps_in_frame * 3 + 8)) + stop_decoding(JPGD_BAD_SOF_LENGTH); + + for (i = 0; i < m_comps_in_frame; i++) + { + m_comp_ident[i] = get_bits(8); + m_comp_h_samp[i] = get_bits(4); + m_comp_v_samp[i] = get_bits(4); + m_comp_quant[i] = get_bits(8); + } + } + + // Used to skip unrecognized markers. + void jpeg_decoder::skip_variable_marker() + { + uint num_left; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_VARIABLE_MARKER); + + num_left -= 2; + + while (num_left) + { + get_bits(8); + num_left--; + } + } + + // Read a define restart interval (DRI) marker. + void jpeg_decoder::read_dri_marker() + { + if (get_bits(16) != 4) + stop_decoding(JPGD_BAD_DRI_LENGTH); + + m_restart_interval = get_bits(16); + } + + // Read a start of scan (SOS) marker. + void jpeg_decoder::read_sos_marker() + { + uint num_left; + int i, ci, n, c, cc; + + num_left = get_bits(16); + + n = get_bits(8); + + m_comps_in_scan = n; + + num_left -= 3; + + if ( (num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) ) + stop_decoding(JPGD_BAD_SOS_LENGTH); + + for (i = 0; i < n; i++) + { + cc = get_bits(8); + c = get_bits(8); + num_left -= 2; + + for (ci = 0; ci < m_comps_in_frame; ci++) + if (cc == m_comp_ident[ci]) + break; + + if (ci >= m_comps_in_frame) + stop_decoding(JPGD_BAD_SOS_COMP_ID); + + m_comp_list[i] = ci; + m_comp_dc_tab[ci] = (c >> 4) & 15; + m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1); + } + + m_spectral_start = get_bits(8); + m_spectral_end = get_bits(8); + m_successive_high = get_bits(4); + m_successive_low = get_bits(4); + + if (!m_progressive_flag) + { + m_spectral_start = 0; + m_spectral_end = 63; + } + + num_left -= 3; + + while (num_left) /* read past whatever is num_left */ + { + get_bits(8); + num_left--; + } + } + + // Finds the next marker. + int jpeg_decoder::next_marker() + { + uint c, bytes; + + bytes = 0; + + do + { + do + { + bytes++; + c = get_bits(8); + } while (c != 0xFF); + + do + { + c = get_bits(8); + } while (c == 0xFF); + + } while (c == 0); + + // If bytes > 0 here, there where extra bytes before the marker (not good). + + return c; + } + + // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is + // encountered. + int jpeg_decoder::process_markers() + { + int c; + + for ( ; ; ) + { + c = next_marker(); + + switch (c) + { + case M_SOF0: + case M_SOF1: + case M_SOF2: + case M_SOF3: + case M_SOF5: + case M_SOF6: + case M_SOF7: + // case M_JPG: + case M_SOF9: + case M_SOF10: + case M_SOF11: + case M_SOF13: + case M_SOF14: + case M_SOF15: + case M_SOI: + case M_EOI: + case M_SOS: + { + return c; + } + case M_DHT: + { + read_dht_marker(); + break; + } + // No arithmitic support - dumb patents! + case M_DAC: + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + case M_DQT: + { + read_dqt_marker(); + break; + } + case M_DRI: + { + read_dri_marker(); + break; + } + //case M_APP0: /* no need to read the JFIF marker */ + + case M_JPG: + case M_RST0: /* no parameters */ + case M_RST1: + case M_RST2: + case M_RST3: + case M_RST4: + case M_RST5: + case M_RST6: + case M_RST7: + case M_TEM: + { + stop_decoding(JPGD_UNEXPECTED_MARKER); + break; + } + default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */ + { + skip_variable_marker(); + break; + } + } + } + } + + // Finds the start of image (SOI) marker. + // This code is rather defensive: it only checks the first 512 bytes to avoid + // false positives. + void jpeg_decoder::locate_soi_marker() + { + uint lastchar, thischar; + uint bytesleft; + + lastchar = get_bits(8); + + thischar = get_bits(8); + + /* ok if it's a normal JPEG file without a special header */ + + if ((lastchar == 0xFF) && (thischar == M_SOI)) + return; + + bytesleft = 4096; //512; + + for ( ; ; ) + { + if (--bytesleft == 0) + stop_decoding(JPGD_NOT_JPEG); + + lastchar = thischar; + + thischar = get_bits(8); + + if (lastchar == 0xFF) + { + if (thischar == M_SOI) + break; + else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end + stop_decoding(JPGD_NOT_JPEG); + } + } + + // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad. + thischar = (m_bit_buf >> 24) & 0xFF; + + if (thischar != 0xFF) + stop_decoding(JPGD_NOT_JPEG); + } + + // Find a start of frame (SOF) marker. + void jpeg_decoder::locate_sof_marker() + { + locate_soi_marker(); + + int c = process_markers(); + + switch (c) + { + case M_SOF2: + m_progressive_flag = JPGD_TRUE; + case M_SOF0: /* baseline DCT */ + case M_SOF1: /* extended sequential DCT */ + { + read_sof_marker(); + break; + } + case M_SOF9: /* Arithmitic coding */ + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + default: + { + stop_decoding(JPGD_UNSUPPORTED_MARKER); + break; + } + } + } + + // Find a start of scan (SOS) marker. + int jpeg_decoder::locate_sos_marker() + { + int c; + + c = process_markers(); + + if (c == M_EOI) + return JPGD_FALSE; + else if (c != M_SOS) + stop_decoding(JPGD_UNEXPECTED_MARKER); + + read_sos_marker(); + + return JPGD_TRUE; + } + + // Reset everything to default/uninitialized state. + void jpeg_decoder::init(jpeg_decoder_stream *pStream) + { + m_pMem_blocks = NULL; + m_error_code = JPGD_SUCCESS; + m_ready_flag = false; + m_image_x_size = m_image_y_size = 0; + m_pStream = pStream; + m_progressive_flag = JPGD_FALSE; + + memset(m_huff_ac, 0, sizeof(m_huff_ac)); + memset(m_huff_num, 0, sizeof(m_huff_num)); + memset(m_huff_val, 0, sizeof(m_huff_val)); + memset(m_quant, 0, sizeof(m_quant)); + + m_scan_type = 0; + m_comps_in_frame = 0; + + memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp)); + memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp)); + memset(m_comp_quant, 0, sizeof(m_comp_quant)); + memset(m_comp_ident, 0, sizeof(m_comp_ident)); + memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks)); + memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks)); + + m_comps_in_scan = 0; + memset(m_comp_list, 0, sizeof(m_comp_list)); + memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab)); + memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab)); + + m_spectral_start = 0; + m_spectral_end = 0; + m_successive_low = 0; + m_successive_high = 0; + m_max_mcu_x_size = 0; + m_max_mcu_y_size = 0; + m_blocks_per_mcu = 0; + m_max_blocks_per_row = 0; + m_mcus_per_row = 0; + m_mcus_per_col = 0; + m_expanded_blocks_per_component = 0; + m_expanded_blocks_per_mcu = 0; + m_expanded_blocks_per_row = 0; + m_freq_domain_chroma_upsample = false; + + memset(m_mcu_org, 0, sizeof(m_mcu_org)); + + m_total_lines_left = 0; + m_mcu_lines_left = 0; + m_real_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_pixel = 0; + + memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs)); + + memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs)); + memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs)); + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + m_eob_run = 0; + + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + m_pIn_buf_ofs = m_in_buf; + m_in_buf_left = 0; + m_eof_flag = false; + m_tem_flag = 0; + + memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start)); + memset(m_in_buf, 0, sizeof(m_in_buf)); + memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end)); + + m_restart_interval = 0; + m_restarts_left = 0; + m_next_restart_num = 0; + + m_max_mcus_per_row = 0; + m_max_blocks_per_mcu = 0; + m_max_mcus_per_col = 0; + + memset(m_last_dc_val, 0, sizeof(m_last_dc_val)); + m_pMCU_coefficients = NULL; + m_pSample_buf = NULL; + + m_total_bytes_read = 0; + + m_pScan_line_0 = NULL; + m_pScan_line_1 = NULL; + + // Ready the input buffer. + prep_in_buffer(); + + // Prime the bit buffer. + m_bits_left = 16; + m_bit_buf = 0; + + get_bits(16); + get_bits(16); + + for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) + m_mcu_block_max_zag[i] = 64; + } + +#define SCALEBITS 16 +#define ONE_HALF ((int) 1 << (SCALEBITS-1)) +#define FIX(x) ((int) ((x) * (1L<> SCALEBITS; + m_cbb[i] = ( FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS; + m_crg[i] = (-FIX(0.71414f)) * k; + m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF; + } + } + + // This method throws back into the stream any bytes that where read + // into the bit buffer during initial marker scanning. + void jpeg_decoder::fix_in_buffer() + { + // In case any 0xFF's where pulled into the buffer during marker scanning. + JPGD_ASSERT((m_bits_left & 7) == 0); + + if (m_bits_left == 16) + stuff_char( (uint8)(m_bit_buf & 0xFF)); + + if (m_bits_left >= 8) + stuff_char( (uint8)((m_bit_buf >> 8) & 0xFF)); + + stuff_char((uint8)((m_bit_buf >> 16) & 0xFF)); + stuff_char((uint8)((m_bit_buf >> 24) & 0xFF)); + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); + } + + void jpeg_decoder::transform_mcu(int mcu_row) + { + jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; + uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64; + + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); + pSrc_ptr += 64; + pDst_ptr += 64; + } + } + + static const uint8 s_max_rc[64] = + { + 17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86, + 102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136 + }; + + void jpeg_decoder::transform_mcu_expand(int mcu_row) + { + jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; + uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64; + + // Y IDCT + int mcu_block; + for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++) + { + idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); + pSrc_ptr += 64; + pDst_ptr += 64; + } + + // Chroma IDCT, with upsampling + jpgd_block_t temp_block[64]; + + for (int i = 0; i < 2; i++) + { + DCT_Upsample::Matrix44 P, Q, R, S; + + JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1); + JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64); + + switch (s_max_rc[m_mcu_block_max_zag[mcu_block++] - 1]) + { + case 1*16+1: + DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr); + break; + case 1*16+2: + DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr); + break; + case 2*16+2: + DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr); + break; + case 3*16+2: + DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr); + break; + case 3*16+3: + DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr); + break; + case 3*16+4: + DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr); + break; + case 4*16+4: + DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr); + break; + case 5*16+4: + DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr); + break; + case 5*16+5: + DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr); + break; + case 5*16+6: + DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr); + break; + case 6*16+6: + DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr); + break; + case 7*16+6: + DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr); + break; + case 7*16+7: + DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr); + break; + case 7*16+8: + DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr); + break; + case 8*16+8: + DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr); + break; + default: + JPGD_ASSERT(false); + } + + DCT_Upsample::Matrix44 a(P + Q); P -= Q; + DCT_Upsample::Matrix44& b = P; + DCT_Upsample::Matrix44 c(R + S); R -= S; + DCT_Upsample::Matrix44& d = R; + + DCT_Upsample::Matrix44::add_and_store(temp_block, a, c); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + DCT_Upsample::Matrix44::add_and_store(temp_block, b, d); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + pSrc_ptr += 64; + } + } + + // Loads and dequantizes the next row of (already decoded) coefficients. + // Progressive images only. + void jpeg_decoder::load_next_row() + { + int i; + jpgd_block_t *p; + jpgd_quant_t *q; + int mcu_row, mcu_block, row_block = 0; + int component_num, component_id; + int block_x_mcu[JPGD_MAX_COMPONENTS]; + + memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + q = m_quant[m_comp_quant[component_id]]; + + p = m_pMCU_coefficients + 64 * mcu_block; + + jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + p[0] = pDC[0]; + memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t)); + + for (i = 63; i > 0; i--) + if (p[g_ZAG[i]]) + break; + + m_mcu_block_max_zag[mcu_block] = i + 1; + + for ( ; i >= 0; i--) + if (p[g_ZAG[i]]) + p[g_ZAG[i]] = static_cast(p[g_ZAG[i]] * q[i]); + + row_block++; + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + if (m_freq_domain_chroma_upsample) + transform_mcu_expand(mcu_row); + else + transform_mcu(mcu_row); + } + + if (m_comps_in_scan == 1) + m_block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + + m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } + + // Restart interval processing. + void jpeg_decoder::process_restart() + { + int i; + int c = 0; + + // Align to a byte boundry + // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers! + //get_bits_no_markers(m_bits_left & 7); + + // Let's scan a little bit to find the marker, but not _too_ far. + // 1536 is a "fudge factor" that determines how much to scan. + for (i = 1536; i > 0; i--) + if (get_char() == 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + for ( ; i > 0; i--) + if ((c = get_char()) != 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Is it the expected marker? If not, something bad happened. + if (c != (m_next_restart_num + M_RST0)) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Reset each component's DC prediction values. + memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + m_restarts_left = m_restart_interval; + + m_next_restart_num = (m_next_restart_num + 1) & 7; + + // Get the bit buffer going again... + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); + } + + static inline int dequantize_ac(int c, int q) { c *= q; return c; } + + // Decodes and dequantizes the next row of coefficients. + void jpeg_decoder::decode_next_row() + { + int row_block = 0; + + for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + jpgd_block_t* p = m_pMCU_coefficients; + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) + { + int component_id = m_mcu_org[mcu_block]; + jpgd_quant_t* q = m_quant[m_comp_quant[component_id]]; + + int r, s; + s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r); + s = HUFF_EXTEND(r, s); + + m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]); + + p[0] = static_cast(s * q[0]); + + int prev_num_set = m_mcu_block_max_zag[mcu_block]; + + huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]]; + + int k; + for (k = 1; k < 64; k++) + { + int extra_bits; + s = huff_decode(pH, extra_bits); + + r = s >> 4; + s &= 15; + + if (s) + { + if (r) + { + if ((k + r) > 63) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(r, prev_num_set - k); + int kt = k; + while (n--) + p[g_ZAG[kt++]] = 0; + } + + k += r; + } + + s = HUFF_EXTEND(extra_bits, s); + + JPGD_ASSERT(k < 64); + + p[g_ZAG[k]] = static_cast(dequantize_ac(s, q[k])); //s * q[k]; + } + else + { + if (r == 15) + { + if ((k + 16) > 64) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(16, prev_num_set - k); + int kt = k; + while (n--) + { + JPGD_ASSERT(kt <= 63); + p[g_ZAG[kt++]] = 0; + } + } + + k += 16 - 1; // - 1 because the loop counter is k + // BEGIN EPIC MOD + JPGD_ASSERT(k < 64 && p[g_ZAG[k]] == 0); + // END EPIC MOD + } + else + break; + } + } + + if (k < prev_num_set) + { + int kt = k; + while (kt < prev_num_set) + p[g_ZAG[kt++]] = 0; + } + + m_mcu_block_max_zag[mcu_block] = k; + + row_block++; + } + + if (m_freq_domain_chroma_upsample) + transform_mcu_expand(mcu_row); + else + transform_mcu(mcu_row); + + m_restarts_left--; + } + } + + // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB + void jpeg_decoder::H1V1Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d = m_pScan_line_0; + uint8 *s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int y = s[j]; + int cb = s[64+j]; + int cr = s[128+j]; + + if (jpg_format == ERGBFormatJPG::BGRA) + { + d[0] = clamp(y + m_cbb[cb]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_crr[cr]); + d[3] = 255; + } + else + { + d[0] = clamp(y + m_crr[cr]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_cbb[cb]); + d[3] = 255; + } + d += 4; + } + + s += 64*3; + } + } + + // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H2V1Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d0 = m_pScan_line_0; + uint8 *y = m_pSample_buf + row * 8; + uint8 *c = m_pSample_buf + 2*64 + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 4; j++) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j<<1]; + if (jpg_format == ERGBFormatJPG::BGRA) + { + d0[0] = clamp(yy+bc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+rc); + d0[3] = 255; + yy = y[(j<<1)+1]; + d0[4] = clamp(yy+bc); + d0[5] = clamp(yy+gc); + d0[6] = clamp(yy+rc); + d0[7] = 255; + } + else + { + d0[0] = clamp(yy+rc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+bc); + d0[3] = 255; + yy = y[(j<<1)+1]; + d0[4] = clamp(yy+rc); + d0[5] = clamp(yy+gc); + d0[6] = clamp(yy+bc); + d0[7] = 255; + } + + d0 += 8; + + c++; + } + y += 64; + } + + y += 64*4 - 64*2; + c += 64*4 - 8; + } + } + + // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H1V2Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d0 = m_pScan_line_0; + uint8 *d1 = m_pScan_line_1; + uint8 *y; + uint8 *c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64*1 + (row & 7) * 8; + + c = m_pSample_buf + 64*2 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int cb = c[0+j]; + int cr = c[64+j]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + if (jpg_format == ERGBFormatJPG::BGRA) + { + d0[0] = clamp(yy+bc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+rc); + d0[3] = 255; + yy = y[8+j]; + d1[0] = clamp(yy+bc); + d1[1] = clamp(yy+gc); + d1[2] = clamp(yy+rc); + d1[3] = 255; + } + else + { + d0[0] = clamp(yy+rc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+bc); + d0[3] = 255; + yy = y[8+j]; + d1[0] = clamp(yy+rc); + d1[1] = clamp(yy+gc); + d1[2] = clamp(yy+bc); + d1[3] = 255; + } + + d0 += 4; + d1 += 4; + } + + y += 64*4; + c += 64*4; + } + } + + // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB + void jpeg_decoder::H2V2Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d0 = m_pScan_line_0; + uint8 *d1 = m_pScan_line_1; + uint8 *y; + uint8 *c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64*2 + (row & 7) * 8; + + c = m_pSample_buf + 64*4 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 8; j += 2) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + if (jpg_format == ERGBFormatJPG::BGRA) + { + d0[0] = clamp(yy+bc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+rc); + d0[3] = 255; + yy = y[j+1]; + d0[4] = clamp(yy+bc); + d0[5] = clamp(yy+gc); + d0[6] = clamp(yy+rc); + d0[7] = 255; + yy = y[j+8]; + d1[0] = clamp(yy+bc); + d1[1] = clamp(yy+gc); + d1[2] = clamp(yy+rc); + d1[3] = 255; + yy = y[j+8+1]; + d1[4] = clamp(yy+bc); + d1[5] = clamp(yy+gc); + d1[6] = clamp(yy+rc); + d1[7] = 255; + } + else + { + d0[0] = clamp(yy+rc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+bc); + d0[3] = 255; + yy = y[j+1]; + d0[4] = clamp(yy+rc); + d0[5] = clamp(yy+gc); + d0[6] = clamp(yy+bc); + d0[7] = 255; + yy = y[j+8]; + d1[0] = clamp(yy+rc); + d1[1] = clamp(yy+gc); + d1[2] = clamp(yy+bc); + d1[3] = 255; + yy = y[j+8+1]; + d1[4] = clamp(yy+rc); + d1[5] = clamp(yy+gc); + d1[6] = clamp(yy+bc); + d1[7] = 255; + } + + d0 += 8; + d1 += 8; + + c++; + } + y += 64; + } + + y += 64*6 - 64*2; + c += 64*6 - 8; + } + } + + // Y (1 block per MCU) to 8-bit grayscale + void jpeg_decoder::gray_convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d = m_pScan_line_0; + uint8 *s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + *(uint *)d = *(uint *)s; + *(uint *)(&d[4]) = *(uint *)(&s[4]); + + s += 64; + d += 8; + } + } + + void jpeg_decoder::expanded_convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + + uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8; + + uint8* d = m_pScan_line_0; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int k = 0; k < m_max_mcu_x_size; k += 8) + { + const int Y_ofs = k * 8; + const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component; + const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2; + for (int j = 0; j < 8; j++) + { + int y = Py[Y_ofs + j]; + int cb = Py[Cb_ofs + j]; + int cr = Py[Cr_ofs + j]; + + if (jpg_format == ERGBFormatJPG::BGRA) + { + d[0] = clamp(y + m_cbb[cb]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_crr[cr]); + d[3] = 255; + } + else + { + d[0] = clamp(y + m_crr[cr]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_cbb[cb]); + d[3] = 255; + } + + d += 4; + } + } + + Py += 64 * m_expanded_blocks_per_mcu; + } + } + + // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream. + void jpeg_decoder::find_eoi() + { + if (!m_progressive_flag) + { + // Attempt to read the EOI marker. + //get_bits_no_markers(m_bits_left & 7); + + // Prime the bit buffer + m_bits_left = 16; + get_bits(16); + get_bits(16); + + // The next marker _should_ be EOI + process_markers(); + } + + m_total_bytes_read -= m_in_buf_left; + } + + int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len) + { + if ((m_error_code) || (!m_ready_flag)) + return JPGD_FAILED; + + if (m_total_lines_left == 0) + return JPGD_DONE; + + if (m_mcu_lines_left == 0) + { + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + if (m_progressive_flag) + load_next_row(); + else + decode_next_row(); + + // Find the EOI marker if that was the last row. + if (m_total_lines_left <= m_max_mcu_y_size) + find_eoi(); + + m_mcu_lines_left = m_max_mcu_y_size; + } + + if (m_freq_domain_chroma_upsample) + { + expanded_convert(); + *pScan_line = m_pScan_line_0; + } + else + { + switch (m_scan_type) + { + case JPGD_YH2V2: + { + if ((m_mcu_lines_left & 1) == 0) + { + H2V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + + break; + } + case JPGD_YH2V1: + { + H2V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_YH1V2: + { + if ((m_mcu_lines_left & 1) == 0) + { + H1V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + + break; + } + case JPGD_YH1V1: + { + H1V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_GRAYSCALE: + { + gray_convert(); + *pScan_line = m_pScan_line_0; + + break; + } + } + } + + *pScan_line_len = m_real_dest_bytes_per_scan_line; + + m_mcu_lines_left--; + m_total_lines_left--; + + return JPGD_SUCCESS; + } + + // Creates the tables needed for efficient Huffman decoding. + void jpeg_decoder::make_huff_table(int index, huff_tables *pH) + { + int p, i, l, si; + uint8 huffsize[257]; + uint huffcode[257]; + uint code; + uint subtree; + int code_size; + int lastp; + int nextfreeentry; + int currententry; + + pH->ac_table = m_huff_ac[index] != 0; + + p = 0; + + for (l = 1; l <= 16; l++) + { + for (i = 1; i <= m_huff_num[index][l]; i++) + huffsize[p++] = static_cast(l); + } + + huffsize[p] = 0; + + lastp = p; + + code = 0; + si = huffsize[0]; + p = 0; + + while (huffsize[p]) + { + while (huffsize[p] == si) + { + huffcode[p++] = code; + code++; + } + + code <<= 1; + si++; + } + + memset(pH->look_up, 0, sizeof(pH->look_up)); + memset(pH->look_up2, 0, sizeof(pH->look_up2)); + memset(pH->tree, 0, sizeof(pH->tree)); + memset(pH->code_size, 0, sizeof(pH->code_size)); + + nextfreeentry = -1; + + p = 0; + + while (p < lastp) + { + i = m_huff_val[index][p]; + code = huffcode[p]; + code_size = huffsize[p]; + + pH->code_size[i] = static_cast(code_size); + + if (code_size <= 8) + { + code <<= (8 - code_size); + + for (l = 1 << (8 - code_size); l > 0; l--) + { + JPGD_ASSERT(i < 256); + + pH->look_up[code] = i; + + bool has_extrabits = false; + int extra_bits = 0; + int num_extra_bits = i & 15; + + int bits_to_fetch = code_size; + if (num_extra_bits) + { + int total_codesize = code_size + num_extra_bits; + if (total_codesize <= 8) + { + has_extrabits = true; + extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize)); + JPGD_ASSERT(extra_bits <= 0x7FFF); + bits_to_fetch += num_extra_bits; + } + } + + if (!has_extrabits) + pH->look_up2[code] = i | (bits_to_fetch << 8); + else + pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8); + + code++; + } + } + else + { + subtree = (code >> (code_size - 8)) & 0xFF; + + currententry = pH->look_up[subtree]; + + if (currententry == 0) + { + pH->look_up[subtree] = currententry = nextfreeentry; + pH->look_up2[subtree] = currententry = nextfreeentry; + + nextfreeentry -= 2; + } + + code <<= (16 - (code_size - 8)); + + for (l = code_size; l > 9; l--) + { + if ((code & 0x8000) == 0) + currententry--; + + if (pH->tree[-currententry - 1] == 0) + { + pH->tree[-currententry - 1] = nextfreeentry; + + currententry = nextfreeentry; + + nextfreeentry -= 2; + } + else + currententry = pH->tree[-currententry - 1]; + + code <<= 1; + } + + if ((code & 0x8000) == 0) + currententry--; + + pH->tree[-currententry - 1] = i; + } + + p++; + } + } + + // Verifies the quantization tables needed for this scan are available. + void jpeg_decoder::check_quant_tables() + { + for (int i = 0; i < m_comps_in_scan; i++) + if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL) + stop_decoding(JPGD_UNDEFINED_QUANT_TABLE); + } + + // Verifies that all the Huffman tables needed for this scan are available. + void jpeg_decoder::check_huff_tables() + { + for (int i = 0; i < m_comps_in_scan; i++) + { + if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + + if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + } + + for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) + if (m_huff_num[i]) + { + if (!m_pHuff_tabs[i]) + m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables)); + + make_huff_table(i, m_pHuff_tabs[i]); + } + } + + // Determines the component order inside each MCU. + // Also calcs how many MCU's are on each row, etc. + void jpeg_decoder::calc_mcu_block_order() + { + int component_num, component_id; + int max_h_samp = 0, max_v_samp = 0; + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + if (m_comp_h_samp[component_id] > max_h_samp) + max_h_samp = m_comp_h_samp[component_id]; + + if (m_comp_v_samp[component_id] > max_v_samp) + max_v_samp = m_comp_v_samp[component_id]; + } + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8; + m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8; + } + + if (m_comps_in_scan == 1) + { + m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]]; + m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]]; + } + else + { + m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp; + m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp; + } + + if (m_comps_in_scan == 1) + { + m_mcu_org[0] = m_comp_list[0]; + + m_blocks_per_mcu = 1; + } + else + { + m_blocks_per_mcu = 0; + + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + int num_blocks; + + component_id = m_comp_list[component_num]; + + num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id]; + + while (num_blocks--) + m_mcu_org[m_blocks_per_mcu++] = component_id; + } + } + } + + // Starts a new scan. + int jpeg_decoder::init_scan() + { + if (!locate_sos_marker()) + return JPGD_FALSE; + + calc_mcu_block_order(); + + check_huff_tables(); + + check_quant_tables(); + + memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + if (m_restart_interval) + { + m_restarts_left = m_restart_interval; + m_next_restart_num = 0; + } + + fix_in_buffer(); + + return JPGD_TRUE; + } + + // Starts a frame. Determines if the number of components or sampling factors + // are supported. + void jpeg_decoder::init_frame() + { + int i; + + if (m_comps_in_frame == 1) + { + if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + m_scan_type = JPGD_GRAYSCALE; + m_max_blocks_per_mcu = 1; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if (m_comps_in_frame == 3) + { + if ( ((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || + ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)) ) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH1V1; + + m_max_blocks_per_mcu = 3; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH2V1; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH1V2; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 16; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH2V2; + m_max_blocks_per_mcu = 6; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 16; + } + else + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + } + else + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size; + m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size; + + // These values are for the *destination* pixels: after conversion. + if (m_scan_type == JPGD_GRAYSCALE) + m_dest_bytes_per_pixel = 1; + else + m_dest_bytes_per_pixel = 4; + + m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel; + + m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel); + + // Initialize two scan line buffers. + m_pScan_line_0 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true); + if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) + m_pScan_line_1 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true); + + m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu; + + // Should never happen + if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) + stop_decoding(JPGD_ASSERTION_ERROR); + + // Allocate the coefficient buffer, enough for one MCU + m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t)); + + for (i = 0; i < m_max_blocks_per_mcu; i++) + m_mcu_block_max_zag[i] = 64; + + m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0]; + m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame; + m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu; + // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor. +// BEGIN EPIC MOD +#if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING + m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3); +#else + m_freq_domain_chroma_upsample = 0; +#endif +// END EPIC MOD + + if (m_freq_domain_chroma_upsample) + m_pSample_buf = (uint8 *)alloc(m_expanded_blocks_per_row * 64); + else + m_pSample_buf = (uint8 *)alloc(m_max_blocks_per_row * 64); + + m_total_lines_left = m_image_y_size; + + m_mcu_lines_left = 0; + + create_look_ups(); + } + + // The coeff_buf series of methods originally stored the coefficients + // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache + // was used to make this process more efficient. Now, we can store the entire + // thing in RAM. + jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) + { + coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf)); + + cb->block_num_x = block_num_x; + cb->block_num_y = block_num_y; + cb->block_len_x = block_len_x; + cb->block_len_y = block_len_y; + cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t); + cb->pData = (uint8 *)alloc(cb->block_size * block_num_x * block_num_y, true); + return cb; + } + + inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y) + { + JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y)); + return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x)); + } + + // The following methods decode the various types of m_blocks encountered + // in progressively encoded images. + void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y) + { + int s, r; + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) + { + r = pD->get_bits_no_markers(s); + s = HUFF_EXTEND(r, s); + } + + pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]); + + p[0] = static_cast(s << pD->m_successive_low); + } + + void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y) + { + if (pD->get_bits_no_markers(1)) + { + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + p[0] |= (1 << pD->m_successive_low); + } + } + + void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y) + { + int k, s, r; + + if (pD->m_eob_run) + { + pD->m_eob_run--; + return; + } + + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + + for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) + { + s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]); + + r = s >> 4; + s &= 15; + + if (s) + { + if ((k += r) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + + r = pD->get_bits_no_markers(s); + s = HUFF_EXTEND(r, s); + + p[g_ZAG[k]] = static_cast(s << pD->m_successive_low); + } + else + { + if (r == 15) + { + if ((k += 15) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + } + else + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + pD->m_eob_run--; + + break; + } + } + } + } + + void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y) + { + int s, k, r; + int p1 = 1 << pD->m_successive_low; + int m1 = (-1) << pD->m_successive_low; + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + + k = pD->m_spectral_start; + + if (pD->m_eob_run == 0) + { + for ( ; k <= pD->m_spectral_end; k++) + { + s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]); + + r = s >> 4; + s &= 15; + + if (s) + { + if (s != 1) + pD->stop_decoding(JPGD_DECODE_ERROR); + + if (pD->get_bits_no_markers(1)) + s = p1; + else + s = m1; + } + else + { + if (r != 15) + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + break; + } + } + + do + { + // BEGIN EPIC MOD + JPGD_ASSERT(k < 64); + // END EPIC MOD + + jpgd_block_t *this_coef = p + g_ZAG[k]; + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + else + { + if (--r < 0) + break; + } + + k++; + + } while (k <= pD->m_spectral_end); + + if ((s) && (k < 64)) + { + p[g_ZAG[k]] = static_cast(s); + } + } + } + + if (pD->m_eob_run > 0) + { + for ( ; k <= pD->m_spectral_end; k++) + { + // BEGIN EPIC MOD + JPGD_ASSERT(k < 64); + // END EPIC MOD + + jpgd_block_t *this_coef = p + g_ZAG[k]; + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + } + + pD->m_eob_run--; + } + } + + // Decode a scan in a progressively encoded image. + void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func) + { + int mcu_row, mcu_col, mcu_block; + int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS]; + + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) + { + int component_num, component_id; + + memset(block_x_mcu, 0, sizeof(block_x_mcu)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + + decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + m_restarts_left--; + } + + if (m_comps_in_scan == 1) + m_block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } + } + + // Decode a progressively encoded image. + void jpeg_decoder::init_progressive() + { + int i; + + if (m_comps_in_frame == 4) + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + // Allocate the coefficient buffers. + for (i = 0; i < m_comps_in_frame; i++) + { + m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1); + m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8); + } + + for ( ; ; ) + { + int dc_only_scan, refinement_scan; + pDecode_block_func decode_block_func; + + if (!init_scan()) + break; + + dc_only_scan = (m_spectral_start == 0); + refinement_scan = (m_successive_high != 0); + + if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if (dc_only_scan) + { + if (m_spectral_end) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + } + else if (m_comps_in_scan != 1) /* AC scans can only contain one component */ + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) + stop_decoding(JPGD_BAD_SOS_SUCCESSIVE); + + if (dc_only_scan) + { + if (refinement_scan) + decode_block_func = decode_block_dc_refine; + else + decode_block_func = decode_block_dc_first; + } + else + { + if (refinement_scan) + decode_block_func = decode_block_ac_refine; + else + decode_block_func = decode_block_ac_first; + } + + decode_scan(decode_block_func); + + m_bits_left = 16; + get_bits(16); + get_bits(16); + } + + m_comps_in_scan = m_comps_in_frame; + + for (i = 0; i < m_comps_in_frame; i++) + m_comp_list[i] = i; + + calc_mcu_block_order(); + } + + void jpeg_decoder::init_sequential() + { + if (!init_scan()) + stop_decoding(JPGD_UNEXPECTED_MARKER); + } + + void jpeg_decoder::decode_start() + { + init_frame(); + + if (m_progressive_flag) + init_progressive(); + else + init_sequential(); + } + + void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream) + { + init(pStream); + locate_sof_marker(); + } + + jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream) + { + if (setjmp(m_jmp_state)) + return; + decode_init(pStream); + } + + int jpeg_decoder::begin_decoding() + { + if (m_ready_flag) + return JPGD_SUCCESS; + + if (m_error_code) + return JPGD_FAILED; + + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + decode_start(); + + m_ready_flag = true; + + return JPGD_SUCCESS; + } + + jpeg_decoder::~jpeg_decoder() + { + free_all_blocks(); + } + + jpeg_decoder_file_stream::jpeg_decoder_file_stream() + { + m_pFile = NULL; + m_eof_flag = false; + m_error_flag = false; + } + + void jpeg_decoder_file_stream::close() + { + if (m_pFile) + { + fclose(m_pFile); + m_pFile = NULL; + } + + m_eof_flag = false; + m_error_flag = false; + } + + jpeg_decoder_file_stream::~jpeg_decoder_file_stream() + { + close(); + } + + bool jpeg_decoder_file_stream::open(const char *Pfilename) + { + close(); + + m_eof_flag = false; + m_error_flag = false; + +#if defined(_MSC_VER) + m_pFile = NULL; + fopen_s(&m_pFile, Pfilename, "rb"); +#else + m_pFile = fopen(Pfilename, "rb"); +#endif + return m_pFile != NULL; + } + + int jpeg_decoder_file_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) + { + if (!m_pFile) + return -1; + + if (m_eof_flag) + { + *pEOF_flag = true; + return 0; + } + + if (m_error_flag) + return -1; + + int bytes_read = static_cast(fread(pBuf, 1, max_bytes_to_read, m_pFile)); + if (bytes_read < max_bytes_to_read) + { + if (ferror(m_pFile)) + { + m_error_flag = true; + return -1; + } + + m_eof_flag = true; + *pEOF_flag = true; + } + + return bytes_read; + } + + bool jpeg_decoder_mem_stream::open(const uint8 *pSrc_data, uint size) + { + close(); + m_pSrc_data = pSrc_data; + m_ofs = 0; + m_size = size; + return true; + } + + int jpeg_decoder_mem_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) + { + *pEOF_flag = false; + + if (!m_pSrc_data) + return -1; + + uint bytes_remaining = m_size - m_ofs; + if ((uint)max_bytes_to_read > bytes_remaining) + { + max_bytes_to_read = bytes_remaining; + *pEOF_flag = true; + } + + memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read); + m_ofs += max_bytes_to_read; + + return max_bytes_to_read; + } + + unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps) + { + if (!actual_comps) + return NULL; + *actual_comps = 0; + + if ((!pStream) || (!width) || (!height) || (!req_comps)) + return NULL; + + if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) + return NULL; + + jpeg_decoder decoder(pStream); + if (decoder.get_error_code() != JPGD_SUCCESS) + return NULL; + + const int image_width = decoder.get_width(), image_height = decoder.get_height(); + *width = image_width; + *height = image_height; + *actual_comps = decoder.get_num_components(); + + if (decoder.begin_decoding() != JPGD_SUCCESS) + return NULL; + + const int dst_bpl = image_width * req_comps; + + uint8 *pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height); + if (!pImage_data) + return NULL; + + for (int y = 0; y < image_height; y++) + { + const uint8* pScan_line = 0; + uint scan_line_len; + if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) + { + jpgd_free(pImage_data); + return NULL; + } + + uint8 *pDst = pImage_data + y * dst_bpl; + + if (((req_comps == 4) && (decoder.get_num_components() == 3)) || + ((req_comps == 1) && (decoder.get_num_components() == 1))) + { + memcpy(pDst, pScan_line, dst_bpl); + } + else if (decoder.get_num_components() == 1) + { + if (req_comps == 3) + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst += 3; + } + } + else + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst[3] = 255; + pDst += 4; + } + } + } + else if (decoder.get_num_components() == 3) + { + if (req_comps == 1) + { + const int YR = 19595, YG = 38470, YB = 7471; + for (int x = 0; x < image_width; x++) + { + int r = pScan_line[x*4+0]; + int g = pScan_line[x*4+1]; + int b = pScan_line[x*4+2]; + *pDst++ = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + } + } + else + { + for (int x = 0; x < image_width; x++) + { + pDst[0] = pScan_line[x*4+0]; + pDst[1] = pScan_line[x*4+1]; + pDst[2] = pScan_line[x*4+2]; + pDst += 3; + } + } + } + } + + return pImage_data; + } + +// BEGIN EPIC MOD + unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps, int format) + { + jpg_format = (ERGBFormatJPG)format; +// EMD EPIC MOD + jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size); + return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps); + } + + unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps) + { + jpgd::jpeg_decoder_file_stream file_stream; + if (!file_stream.open(pSrc_filename)) + return NULL; + return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps); + } + +} // namespace jpgd diff --git a/crazy_functions/test_project/cpp/longcode/jpge.cpp b/crazy_functions/test_project/cpp/longcode/jpge.cpp new file mode 100644 index 0000000..2e26b71 --- /dev/null +++ b/crazy_functions/test_project/cpp/longcode/jpge.cpp @@ -0,0 +1,1049 @@ +// jpge.cpp - C++ class for JPEG compression. +// Public domain, Rich Geldreich +// v1.01, Dec. 18, 2010 - Initial release +// v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.) +// v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc. +// Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03). +// v1.04, May. 19, 2012: Forgot to set m_pFile ptr to NULL in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug. +// Code tweaks to fix VS2008 static code analysis warnings (all looked harmless). +// Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02. + +#include "jpge.h" + +#include +#include +#if PLATFORM_WINDOWS +#include +#endif + +#define JPGE_MAX(a,b) (((a)>(b))?(a):(b)) +#define JPGE_MIN(a,b) (((a)<(b))?(a):(b)) + +namespace jpge { + +static inline void *jpge_malloc(size_t nSize) { return FMemory::Malloc(nSize); } +static inline void jpge_free(void *p) { FMemory::Free(p);; } + +// Various JPEG enums and tables. +enum { M_SOF0 = 0xC0, M_DHT = 0xC4, M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_APP0 = 0xE0 }; +enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 }; + +static uint8 s_zag[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; +static int16 s_std_lum_quant[64] = { 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 }; +static int16 s_std_croma_quant[64] = { 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 }; +static uint8 s_dc_lum_bits[17] = { 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 }; +static uint8 s_dc_lum_val[DC_LUM_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; +static uint8 s_ac_lum_bits[17] = { 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d }; +static uint8 s_ac_lum_val[AC_LUM_CODES] = +{ + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0, + 0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49, + 0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5, + 0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8, + 0xf9,0xfa +}; +static uint8 s_dc_chroma_bits[17] = { 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; +static uint8 s_dc_chroma_val[DC_CHROMA_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; +static uint8 s_ac_chroma_bits[17] = { 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 }; +static uint8 s_ac_chroma_val[AC_CHROMA_CODES] = +{ + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0, + 0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48, + 0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3, + 0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8, + 0xf9,0xfa +}; + +// Low-level helper functions. +template inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); } + +const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; +static inline uint8 clamp(int i) { if (static_cast(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast(i); } + +static void RGB_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels) +{ + for ( ; num_pixels; pDst += 3, pSrc += 3, num_pixels--) + { + const int r = pSrc[0], g = pSrc[1], b = pSrc[2]; + pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); + pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); + } +} + +static void RGB_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels) +{ + for ( ; num_pixels; pDst++, pSrc += 3, num_pixels--) + pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16); +} + +static void RGBA_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels) +{ + for ( ; num_pixels; pDst += 3, pSrc += 4, num_pixels--) + { + const int r = pSrc[0], g = pSrc[1], b = pSrc[2]; + pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); + pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); + } +} + +static void RGBA_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels) +{ + for ( ; num_pixels; pDst++, pSrc += 4, num_pixels--) + pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16); +} + +static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels) +{ + for( ; num_pixels; pDst += 3, pSrc++, num_pixels--) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; } +} + +// Forward DCT - DCT derived from jfdctint. +#define CONST_BITS 13 +#define ROW_BITS 2 +#define DCT_DESCALE(x, n) (((x) + (((int32)1) << ((n) - 1))) >> (n)) +#define DCT_MUL(var, c) (static_cast(var) * static_cast(c)) +#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7) \ + int32 t0 = s0 + s7, t7 = s0 - s7, t1 = s1 + s6, t6 = s1 - s6, t2 = s2 + s5, t5 = s2 - s5, t3 = s3 + s4, t4 = s3 - s4; \ + int32 t10 = t0 + t3, t13 = t0 - t3, t11 = t1 + t2, t12 = t1 - t2; \ + int32 u1 = DCT_MUL(t12 + t13, 4433); \ + s2 = u1 + DCT_MUL(t13, 6270); \ + s6 = u1 + DCT_MUL(t12, -15137); \ + u1 = t4 + t7; \ + int32 u2 = t5 + t6, u3 = t4 + t6, u4 = t5 + t7; \ + int32 z5 = DCT_MUL(u3 + u4, 9633); \ + t4 = DCT_MUL(t4, 2446); t5 = DCT_MUL(t5, 16819); \ + t6 = DCT_MUL(t6, 25172); t7 = DCT_MUL(t7, 12299); \ + u1 = DCT_MUL(u1, -7373); u2 = DCT_MUL(u2, -20995); \ + u3 = DCT_MUL(u3, -16069); u4 = DCT_MUL(u4, -3196); \ + u3 += z5; u4 += z5; \ + s0 = t10 + t11; s1 = t7 + u1 + u4; s3 = t6 + u2 + u3; s4 = t10 - t11; s5 = t5 + u2 + u4; s7 = t4 + u1 + u3; + +static void DCT2D(int32 *p) +{ + int32 c, *q = p; + for (c = 7; c >= 0; c--, q += 8) + { + int32 s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7]; + DCT1D(s0, s1, s2, s3, s4, s5, s6, s7); + q[0] = s0 << ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS); + q[4] = s4 << ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS); + } + for (q = p, c = 7; c >= 0; c--, q++) + { + int32 s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8]; + DCT1D(s0, s1, s2, s3, s4, s5, s6, s7); + q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3); + q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3); + } +} + +struct sym_freq { uint m_key, m_sym_index; }; + +// Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values. +static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1) +{ + const uint cMaxPasses = 4; + uint32 hist[256 * cMaxPasses]; clear_obj(hist); + for (uint i = 0; i < num_syms; i++) { uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; hist[256*2 + ((freq >> 16) & 0xFF)]++; hist[256*3 + ((freq >> 24) & 0xFF)]++; } + sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; + uint total_passes = cMaxPasses; while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (uint pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const uint32* pHist = &hist[pass << 8]; + uint offsets[256], cur_ofs = 0; + for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (uint i = 0; i < num_syms; i++) + pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; + } + return pCur_syms; +} + +// calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void calculate_minimum_redundancy(sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = dpth; avbl--; } + avbl = 2*used; dpth++; used = 0; + } +} + +// Limits canonical Huffman code table's max code size to max_code_size. +static void huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + if (code_list_len <= 1) return; + + for (int i = max_code_size + 1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + + uint32 total = 0; + for (int i = max_code_size; i > 0; i--) + total += (((uint32)pNum_codes[i]) << (max_code_size - i)); + + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (int i = max_code_size - 1; i > 0; i--) + { + if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + } + total--; + } +} + +// Generates an optimized offman table. +void jpeg_encoder::optimize_huffman_table(int table_num, int table_len) +{ + sym_freq syms0[MAX_HUFF_SYMBOLS], syms1[MAX_HUFF_SYMBOLS]; + syms0[0].m_key = 1; syms0[0].m_sym_index = 0; // dummy symbol, assures that no valid code contains all 1's + int num_used_syms = 1; + const uint32 *pSym_count = &m_huff_count[table_num][0]; + for (int i = 0; i < table_len; i++) + if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i + 1; } + sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0, syms1); + calculate_minimum_redundancy(pSyms, num_used_syms); + + // Count the # of symbols of each code size. + int num_codes[1 + MAX_HUFF_CODESIZE]; clear_obj(num_codes); + for (int i = 0; i < num_used_syms; i++) + num_codes[pSyms[i].m_key]++; + + const uint JPGE_CODE_SIZE_LIMIT = 16; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol) + huffman_enforce_max_code_size(num_codes, num_used_syms, JPGE_CODE_SIZE_LIMIT); + + // Compute m_huff_bits array, which contains the # of symbols per code size. + clear_obj(m_huff_bits[table_num]); + for (int i = 1; i <= (int)JPGE_CODE_SIZE_LIMIT; i++) + m_huff_bits[table_num][i] = static_cast(num_codes[i]); + + // Remove the dummy symbol added above, which must be in largest bucket. + for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--) + { + if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; } + } + + // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest). + for (int i = num_used_syms - 1; i >= 1; i--) + m_huff_val[table_num][num_used_syms - 1 - i] = static_cast(pSyms[i].m_sym_index - 1); +} + +// JPEG marker generation. +void jpeg_encoder::emit_byte(uint8 i) +{ + m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_obj(i); +} + +void jpeg_encoder::emit_word(uint i) +{ + emit_byte(uint8(i >> 8)); emit_byte(uint8(i & 0xFF)); +} + +void jpeg_encoder::emit_marker(int marker) +{ + emit_byte(uint8(0xFF)); emit_byte(uint8(marker)); +} + +// Emit JFIF marker +void jpeg_encoder::emit_jfif_app0() +{ + emit_marker(M_APP0); + emit_word(2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); + emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */ + emit_byte(0); + emit_byte(1); /* Major version */ + emit_byte(1); /* Minor version */ + emit_byte(0); /* Density unit */ + emit_word(1); + emit_word(1); + emit_byte(0); /* No thumbnail image */ + emit_byte(0); +} + +// Emit quantization tables +void jpeg_encoder::emit_dqt() +{ + for (int i = 0; i < ((m_num_components == 3) ? 2 : 1); i++) + { + emit_marker(M_DQT); + emit_word(64 + 1 + 2); + emit_byte(static_cast(i)); + for (int j = 0; j < 64; j++) + emit_byte(static_cast(m_quantization_tables[i][j])); + } +} + +// Emit start of frame marker +void jpeg_encoder::emit_sof() +{ + emit_marker(M_SOF0); /* baseline */ + emit_word(3 * m_num_components + 2 + 5 + 1); + emit_byte(8); /* precision */ + emit_word(m_image_y); + emit_word(m_image_x); + emit_byte(m_num_components); + for (int i = 0; i < m_num_components; i++) + { + emit_byte(static_cast(i + 1)); /* component ID */ + emit_byte((m_comp_h_samp[i] << 4) + m_comp_v_samp[i]); /* h and v sampling */ + emit_byte(i > 0); /* quant. table num */ + } +} + +// Emit Huffman table. +void jpeg_encoder::emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag) +{ + emit_marker(M_DHT); + + int length = 0; + for (int i = 1; i <= 16; i++) + length += bits[i]; + + emit_word(length + 2 + 1 + 16); + emit_byte(static_cast(index + (ac_flag << 4))); + + for (int i = 1; i <= 16; i++) + emit_byte(bits[i]); + + for (int i = 0; i < length; i++) + emit_byte(val[i]); +} + +// Emit all Huffman tables. +void jpeg_encoder::emit_dhts() +{ + emit_dht(m_huff_bits[0+0], m_huff_val[0+0], 0, false); + emit_dht(m_huff_bits[2+0], m_huff_val[2+0], 0, true); + if (m_num_components == 3) + { + emit_dht(m_huff_bits[0+1], m_huff_val[0+1], 1, false); + emit_dht(m_huff_bits[2+1], m_huff_val[2+1], 1, true); + } +} + +// emit start of scan +void jpeg_encoder::emit_sos() +{ + emit_marker(M_SOS); + emit_word(2 * m_num_components + 2 + 1 + 3); + emit_byte(m_num_components); + for (int i = 0; i < m_num_components; i++) + { + emit_byte(static_cast(i + 1)); + if (i == 0) + emit_byte((0 << 4) + 0); + else + emit_byte((1 << 4) + 1); + } + emit_byte(0); /* spectral selection */ + emit_byte(63); + emit_byte(0); +} + +// Emit all markers at beginning of image file. +void jpeg_encoder::emit_markers() +{ + emit_marker(M_SOI); + emit_jfif_app0(); + emit_dqt(); + emit_sof(); + emit_dhts(); + emit_sos(); +} + +// Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays. +void jpeg_encoder::compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val) +{ + int i, l, last_p, si; + uint8 huff_size[257]; + uint huff_code[257]; + uint code; + + int p = 0; + for (l = 1; l <= 16; l++) + for (i = 1; i <= bits[l]; i++) + huff_size[p++] = (char)l; + + huff_size[p] = 0; last_p = p; // write sentinel + + code = 0; si = huff_size[0]; p = 0; + + while (huff_size[p]) + { + while (huff_size[p] == si) + huff_code[p++] = code++; + code <<= 1; + si++; + } + + memset(codes, 0, sizeof(codes[0])*256); + memset(code_sizes, 0, sizeof(code_sizes[0])*256); + for (p = 0; p < last_p; p++) + { + codes[val[p]] = huff_code[p]; + code_sizes[val[p]] = huff_size[p]; + } +} + +// Quantization table generation. +void jpeg_encoder::compute_quant_table(int32 *pDst, int16 *pSrc) +{ + int32 q; + if (m_params.m_quality < 50) + q = 5000 / m_params.m_quality; + else + q = 200 - m_params.m_quality * 2; + for (int i = 0; i < 64; i++) + { + int32 j = *pSrc++; j = (j * q + 50L) / 100L; + *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255); + } +} + +// Higher-level methods. +void jpeg_encoder::first_pass_init() +{ + m_bit_buffer = 0; m_bits_in = 0; + memset(m_last_dc_val, 0, 3 * sizeof(m_last_dc_val[0])); + m_mcu_y_ofs = 0; + m_pass_num = 1; +} + +bool jpeg_encoder::second_pass_init() +{ + compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0], m_huff_val[0+0]); + compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0], m_huff_val[2+0]); + if (m_num_components > 1) + { + compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1], m_huff_val[0+1]); + compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1], m_huff_val[2+1]); + } + first_pass_init(); + emit_markers(); + m_pass_num = 2; + return true; +} + +bool jpeg_encoder::jpg_open(int p_x_res, int p_y_res, int src_channels) +{ + m_num_components = 3; + switch (m_params.m_subsampling) + { + case Y_ONLY: + { + m_num_components = 1; + m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1; + m_mcu_x = 8; m_mcu_y = 8; + break; + } + case H1V1: + { + m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1; + m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; + m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; + m_mcu_x = 8; m_mcu_y = 8; + break; + } + case H2V1: + { + m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1; + m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; + m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; + m_mcu_x = 16; m_mcu_y = 8; + break; + } + case H2V2: + { + m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2; + m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; + m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; + m_mcu_x = 16; m_mcu_y = 16; + } + } + + m_image_x = p_x_res; m_image_y = p_y_res; + m_image_bpp = src_channels; + m_image_bpl = m_image_x * src_channels; + m_image_x_mcu = (m_image_x + m_mcu_x - 1) & (~(m_mcu_x - 1)); + m_image_y_mcu = (m_image_y + m_mcu_y - 1) & (~(m_mcu_y - 1)); + m_image_bpl_xlt = m_image_x * m_num_components; + m_image_bpl_mcu = m_image_x_mcu * m_num_components; + m_mcus_per_row = m_image_x_mcu / m_mcu_x; + + if ((m_mcu_lines[0] = static_cast(jpge_malloc(m_image_bpl_mcu * m_mcu_y))) == NULL) return false; + for (int i = 1; i < m_mcu_y; i++) + m_mcu_lines[i] = m_mcu_lines[i-1] + m_image_bpl_mcu; + + compute_quant_table(m_quantization_tables[0], s_std_lum_quant); + compute_quant_table(m_quantization_tables[1], m_params.m_no_chroma_discrim_flag ? s_std_lum_quant : s_std_croma_quant); + + m_out_buf_left = JPGE_OUT_BUF_SIZE; + m_pOut_buf = m_out_buf; + + if (m_params.m_two_pass_flag) + { + clear_obj(m_huff_count); + first_pass_init(); + } + else + { + memcpy(m_huff_bits[0+0], s_dc_lum_bits, 17); memcpy(m_huff_val [0+0], s_dc_lum_val, DC_LUM_CODES); + memcpy(m_huff_bits[2+0], s_ac_lum_bits, 17); memcpy(m_huff_val [2+0], s_ac_lum_val, AC_LUM_CODES); + memcpy(m_huff_bits[0+1], s_dc_chroma_bits, 17); memcpy(m_huff_val [0+1], s_dc_chroma_val, DC_CHROMA_CODES); + memcpy(m_huff_bits[2+1], s_ac_chroma_bits, 17); memcpy(m_huff_val [2+1], s_ac_chroma_val, AC_CHROMA_CODES); + if (!second_pass_init()) return false; // in effect, skip over the first pass + } + return m_all_stream_writes_succeeded; +} + +void jpeg_encoder::load_block_8_8_grey(int x) +{ + uint8 *pSrc; + sample_array_t *pDst = m_sample_array; + x <<= 3; + for (int i = 0; i < 8; i++, pDst += 8) + { + pSrc = m_mcu_lines[i] + x; + pDst[0] = pSrc[0] - 128; pDst[1] = pSrc[1] - 128; pDst[2] = pSrc[2] - 128; pDst[3] = pSrc[3] - 128; + pDst[4] = pSrc[4] - 128; pDst[5] = pSrc[5] - 128; pDst[6] = pSrc[6] - 128; pDst[7] = pSrc[7] - 128; + } +} + +void jpeg_encoder::load_block_8_8(int x, int y, int c) +{ + uint8 *pSrc; + sample_array_t *pDst = m_sample_array; + x = (x * (8 * 3)) + c; + y <<= 3; + for (int i = 0; i < 8; i++, pDst += 8) + { + pSrc = m_mcu_lines[y + i] + x; + pDst[0] = pSrc[0 * 3] - 128; pDst[1] = pSrc[1 * 3] - 128; pDst[2] = pSrc[2 * 3] - 128; pDst[3] = pSrc[3 * 3] - 128; + pDst[4] = pSrc[4 * 3] - 128; pDst[5] = pSrc[5 * 3] - 128; pDst[6] = pSrc[6 * 3] - 128; pDst[7] = pSrc[7 * 3] - 128; + } +} + +void jpeg_encoder::load_block_16_8(int x, int c) +{ + uint8 *pSrc1, *pSrc2; + sample_array_t *pDst = m_sample_array; + x = (x * (16 * 3)) + c; + int a = 0, b = 2; + for (int i = 0; i < 16; i += 2, pDst += 8) + { + pSrc1 = m_mcu_lines[i + 0] + x; + pSrc2 = m_mcu_lines[i + 1] + x; + pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3] + pSrc2[ 0 * 3] + pSrc2[ 1 * 3] + a) >> 2) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3] + pSrc2[ 2 * 3] + pSrc2[ 3 * 3] + b) >> 2) - 128; + pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3] + pSrc2[ 4 * 3] + pSrc2[ 5 * 3] + a) >> 2) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3] + pSrc2[ 6 * 3] + pSrc2[ 7 * 3] + b) >> 2) - 128; + pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3] + pSrc2[ 8 * 3] + pSrc2[ 9 * 3] + a) >> 2) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + pSrc2[10 * 3] + pSrc2[11 * 3] + b) >> 2) - 128; + pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + pSrc2[12 * 3] + pSrc2[13 * 3] + a) >> 2) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + pSrc2[14 * 3] + pSrc2[15 * 3] + b) >> 2) - 128; + int temp = a; a = b; b = temp; + } +} + +void jpeg_encoder::load_block_16_8_8(int x, int c) +{ + uint8 *pSrc1; + sample_array_t *pDst = m_sample_array; + x = (x * (16 * 3)) + c; + for (int i = 0; i < 8; i++, pDst += 8) + { + pSrc1 = m_mcu_lines[i + 0] + x; + pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3]) >> 1) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3]) >> 1) - 128; + pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3]) >> 1) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3]) >> 1) - 128; + pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3]) >> 1) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3]) >> 1) - 128; + pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3]) >> 1) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3]) >> 1) - 128; + } +} + +void jpeg_encoder::load_quantized_coefficients(int component_num) +{ + int32 *q = m_quantization_tables[component_num > 0]; + int16 *pDst = m_coefficient_array; + for (int i = 0; i < 64; i++) + { + sample_array_t j = m_sample_array[s_zag[i]]; + if (j < 0) + { + if ((j = -j + (*q >> 1)) < *q) + *pDst++ = 0; + else + *pDst++ = static_cast(-(j / *q)); + } + else + { + if ((j = j + (*q >> 1)) < *q) + *pDst++ = 0; + else + *pDst++ = static_cast((j / *q)); + } + q++; + } +} + +void jpeg_encoder::flush_output_buffer() +{ + if (m_out_buf_left != JPGE_OUT_BUF_SIZE) + m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_buf(m_out_buf, JPGE_OUT_BUF_SIZE - m_out_buf_left); + m_pOut_buf = m_out_buf; + m_out_buf_left = JPGE_OUT_BUF_SIZE; +} + +void jpeg_encoder::put_bits(uint bits, uint len) +{ + m_bit_buffer |= ((uint32)bits << (24 - (m_bits_in += len))); + while (m_bits_in >= 8) + { + uint8 c; + #define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); } + JPGE_PUT_BYTE(c = (uint8)((m_bit_buffer >> 16) & 0xFF)); + if (c == 0xFF) JPGE_PUT_BYTE(0); + m_bit_buffer <<= 8; + m_bits_in -= 8; + } +} + +void jpeg_encoder::code_coefficients_pass_one(int component_num) +{ + if (component_num >= 3) return; // just to shut up static analysis + int i, run_len, nbits, temp1; + int16 *src = m_coefficient_array; + uint32 *dc_count = component_num ? m_huff_count[0 + 1] : m_huff_count[0 + 0], *ac_count = component_num ? m_huff_count[2 + 1] : m_huff_count[2 + 0]; + + temp1 = src[0] - m_last_dc_val[component_num]; + m_last_dc_val[component_num] = src[0]; + if (temp1 < 0) temp1 = -temp1; + + nbits = 0; + while (temp1) + { + nbits++; temp1 >>= 1; + } + + dc_count[nbits]++; + for (run_len = 0, i = 1; i < 64; i++) + { + if ((temp1 = m_coefficient_array[i]) == 0) + run_len++; + else + { + while (run_len >= 16) + { + ac_count[0xF0]++; + run_len -= 16; + } + if (temp1 < 0) temp1 = -temp1; + nbits = 1; + while (temp1 >>= 1) nbits++; + ac_count[(run_len << 4) + nbits]++; + run_len = 0; + } + } + if (run_len) ac_count[0]++; +} + +void jpeg_encoder::code_coefficients_pass_two(int component_num) +{ + int i, j, run_len, nbits, temp1, temp2; + int16 *pSrc = m_coefficient_array; + uint *codes[2]; + uint8 *code_sizes[2]; + + if (component_num == 0) + { + codes[0] = m_huff_codes[0 + 0]; codes[1] = m_huff_codes[2 + 0]; + code_sizes[0] = m_huff_code_sizes[0 + 0]; code_sizes[1] = m_huff_code_sizes[2 + 0]; + } + else + { + codes[0] = m_huff_codes[0 + 1]; codes[1] = m_huff_codes[2 + 1]; + code_sizes[0] = m_huff_code_sizes[0 + 1]; code_sizes[1] = m_huff_code_sizes[2 + 1]; + } + + temp1 = temp2 = pSrc[0] - m_last_dc_val[component_num]; + m_last_dc_val[component_num] = pSrc[0]; + + if (temp1 < 0) + { + temp1 = -temp1; temp2--; + } + + nbits = 0; + while (temp1) + { + nbits++; temp1 >>= 1; + } + + put_bits(codes[0][nbits], code_sizes[0][nbits]); + if (nbits) put_bits(temp2 & ((1 << nbits) - 1), nbits); + + for (run_len = 0, i = 1; i < 64; i++) + { + if ((temp1 = m_coefficient_array[i]) == 0) + run_len++; + else + { + while (run_len >= 16) + { + put_bits(codes[1][0xF0], code_sizes[1][0xF0]); + run_len -= 16; + } + if ((temp2 = temp1) < 0) + { + temp1 = -temp1; + temp2--; + } + nbits = 1; + while (temp1 >>= 1) + nbits++; + j = (run_len << 4) + nbits; + put_bits(codes[1][j], code_sizes[1][j]); + put_bits(temp2 & ((1 << nbits) - 1), nbits); + run_len = 0; + } + } + if (run_len) + put_bits(codes[1][0], code_sizes[1][0]); +} + +void jpeg_encoder::code_block(int component_num) +{ + DCT2D(m_sample_array); + load_quantized_coefficients(component_num); + if (m_pass_num == 1) + code_coefficients_pass_one(component_num); + else + code_coefficients_pass_two(component_num); +} + +void jpeg_encoder::process_mcu_row() +{ + if (m_num_components == 1) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8_grey(i); code_block(0); + } + } + else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2); + } + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0); + load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2); + } + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0); + load_block_8_8(i * 2 + 0, 1, 0); code_block(0); load_block_8_8(i * 2 + 1, 1, 0); code_block(0); + load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2); + } + } +} + +bool jpeg_encoder::terminate_pass_one() +{ + optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES); + if (m_num_components > 1) + { + optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES); + } + return second_pass_init(); +} + +bool jpeg_encoder::terminate_pass_two() +{ + put_bits(0x7F, 7); + flush_output_buffer(); + emit_marker(M_EOI); + m_pass_num++; // purposely bump up m_pass_num, for debugging + return true; +} + +bool jpeg_encoder::process_end_of_image() +{ + if (m_mcu_y_ofs) + { + if (m_mcu_y_ofs < 16) // check here just to shut up static analysis + { + for (int i = m_mcu_y_ofs; i < m_mcu_y; i++) + memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs - 1], m_image_bpl_mcu); + } + + process_mcu_row(); + } + + if (m_pass_num == 1) + return terminate_pass_one(); + else + return terminate_pass_two(); +} + +void jpeg_encoder::load_mcu(const void *pSrc) +{ + const uint8* Psrc = reinterpret_cast(pSrc); + + uint8* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst + + if (m_num_components == 1) + { + if (m_image_bpp == 4) + RGBA_to_Y(pDst, Psrc, m_image_x); + else if (m_image_bpp == 3) + RGB_to_Y(pDst, Psrc, m_image_x); + else + memcpy(pDst, Psrc, m_image_x); + } + else + { + if (m_image_bpp == 4) + RGBA_to_YCC(pDst, Psrc, m_image_x); + else if (m_image_bpp == 3) + RGB_to_YCC(pDst, Psrc, m_image_x); + else + Y_to_YCC(pDst, Psrc, m_image_x); + } + + // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16 + if (m_num_components == 1) + memset(m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt, pDst[m_image_bpl_xlt - 1], m_image_x_mcu - m_image_x); + else + { + const uint8 y = pDst[m_image_bpl_xlt - 3 + 0], cb = pDst[m_image_bpl_xlt - 3 + 1], cr = pDst[m_image_bpl_xlt - 3 + 2]; + uint8 *q = m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt; + for (int i = m_image_x; i < m_image_x_mcu; i++) + { + *q++ = y; *q++ = cb; *q++ = cr; + } + } + + if (++m_mcu_y_ofs == m_mcu_y) + { + process_mcu_row(); + m_mcu_y_ofs = 0; + } +} + +void jpeg_encoder::clear() +{ + m_mcu_lines[0] = NULL; + m_pass_num = 0; + m_all_stream_writes_succeeded = true; +} + +jpeg_encoder::jpeg_encoder() +{ + clear(); +} + +jpeg_encoder::~jpeg_encoder() +{ + deinit(); +} + +bool jpeg_encoder::init(output_stream *pStream, int64_t width, int64_t height, int64_t src_channels, const params &comp_params) +{ + deinit(); + if (((!pStream) || (width < 1) || (height < 1)) || ((src_channels != 1) && (src_channels != 3) && (src_channels != 4)) || (!comp_params.check_valid())) return false; + m_pStream = pStream; + m_params = comp_params; + return jpg_open(width, height, src_channels); +} + +void jpeg_encoder::deinit() +{ + jpge_free(m_mcu_lines[0]); + clear(); +} + +bool jpeg_encoder::process_scanline(const void* pScanline) +{ + if ((m_pass_num < 1) || (m_pass_num > 2)) return false; + if (m_all_stream_writes_succeeded) + { + if (!pScanline) + { + if (!process_end_of_image()) return false; + } + else + { + load_mcu(pScanline); + } + } + return m_all_stream_writes_succeeded; +} + +// Higher level wrappers/examples (optional). +#include + +class cfile_stream : public output_stream +{ + cfile_stream(const cfile_stream &); + cfile_stream &operator= (const cfile_stream &); + + FILE* m_pFile; + bool m_bStatus; + +public: + cfile_stream() : m_pFile(NULL), m_bStatus(false) { } + + virtual ~cfile_stream() + { + close(); + } + + bool open(const char *pFilename) + { + close(); +#if defined(_MSC_VER) + if (fopen_s(&m_pFile, pFilename, "wb") != 0) + { + return false; + } +#else + m_pFile = fopen(pFilename, "wb"); +#endif + m_bStatus = (m_pFile != NULL); + return m_bStatus; + } + + bool close() + { + if (m_pFile) + { + if (fclose(m_pFile) == EOF) + { + m_bStatus = false; + } + m_pFile = NULL; + } + return m_bStatus; + } + + virtual bool put_buf(const void* pBuf, int64_t len) + { + m_bStatus = m_bStatus && (fwrite(pBuf, len, 1, m_pFile) == 1); + return m_bStatus; + } + + uint get_size() const + { + return m_pFile ? ftell(m_pFile) : 0; + } +}; + +// Writes JPEG image to file. +bool compress_image_to_jpeg_file(const char *pFilename, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params) +{ + cfile_stream dst_stream; + if (!dst_stream.open(pFilename)) + return false; + + jpge::jpeg_encoder dst_image; + if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params)) + return false; + + for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++) + { + for (int64_t i = 0; i < height; i++) + { + // i, width, and num_channels are all 64bit + const uint8* pBuf = pImage_data + i * width * num_channels; + if (!dst_image.process_scanline(pBuf)) + return false; + } + if (!dst_image.process_scanline(NULL)) + return false; + } + + dst_image.deinit(); + + return dst_stream.close(); +} + +class memory_stream : public output_stream +{ + memory_stream(const memory_stream &); + memory_stream &operator= (const memory_stream &); + + uint8 *m_pBuf; + uint64_t m_buf_size, m_buf_ofs; + +public: + memory_stream(void *pBuf, uint64_t buf_size) : m_pBuf(static_cast(pBuf)), m_buf_size(buf_size), m_buf_ofs(0) { } + + virtual ~memory_stream() { } + + virtual bool put_buf(const void* pBuf, int64_t len) + { + uint64_t buf_remaining = m_buf_size - m_buf_ofs; + if ((uint64_t)len > buf_remaining) + return false; + memcpy(m_pBuf + m_buf_ofs, pBuf, len); + m_buf_ofs += len; + return true; + } + + uint64_t get_size() const + { + return m_buf_ofs; + } +}; + +bool compress_image_to_jpeg_file_in_memory(void *pDstBuf, int64_t &buf_size, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params) +{ + if ((!pDstBuf) || (!buf_size)) + return false; + + memory_stream dst_stream(pDstBuf, buf_size); + + buf_size = 0; + + jpge::jpeg_encoder dst_image; + if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params)) + return false; + + for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++) + { + for (int64_t i = 0; i < height; i++) + { + const uint8* pScanline = pImage_data + i * width * num_channels; + if (!dst_image.process_scanline(pScanline)) + return false; + } + if (!dst_image.process_scanline(NULL)) + return false; + } + + dst_image.deinit(); + + buf_size = dst_stream.get_size(); + return true; +} + +} // namespace jpge \ No newline at end of file diff --git a/crazy_functions/test_project/cpp/longcode/prod_cons.h b/crazy_functions/test_project/cpp/longcode/prod_cons.h new file mode 100644 index 0000000..28d99bd --- /dev/null +++ b/crazy_functions/test_project/cpp/longcode/prod_cons.h @@ -0,0 +1,433 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "libipc/def.h" + +#include "libipc/platform/detail.h" +#include "libipc/circ/elem_def.h" +#include "libipc/utility/log.h" +#include "libipc/utility/utility.h" + +namespace ipc { + +//////////////////////////////////////////////////////////////// +/// producer-consumer implementation +//////////////////////////////////////////////////////////////// + +template +struct prod_cons_impl; + +template <> +struct prod_cons_impl> { + + template + struct elem_t { + std::aligned_storage_t data_ {}; + }; + + alignas(cache_line_size) std::atomic rd_; // read index + alignas(cache_line_size) std::atomic wt_; // write index + + constexpr circ::u2_t cursor() const noexcept { + return 0; + } + + template + bool push(W* /*wrapper*/, F&& f, E* elems) { + auto cur_wt = circ::index_of(wt_.load(std::memory_order_relaxed)); + if (cur_wt == circ::index_of(rd_.load(std::memory_order_acquire) - 1)) { + return false; // full + } + std::forward(f)(&(elems[cur_wt].data_)); + wt_.fetch_add(1, std::memory_order_release); + return true; + } + + /** + * In single-single-unicast, 'force_push' means 'no reader' or 'the only one reader is dead'. + * So we could just disconnect all connections of receiver, and return false. + */ + template + bool force_push(W* wrapper, F&&, E*) { + wrapper->elems()->disconnect_receiver(~static_cast(0u)); + return false; + } + + template + bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E* elems) { + auto cur_rd = circ::index_of(rd_.load(std::memory_order_relaxed)); + if (cur_rd == circ::index_of(wt_.load(std::memory_order_acquire))) { + return false; // empty + } + std::forward(f)(&(elems[cur_rd].data_)); + std::forward(out)(true); + rd_.fetch_add(1, std::memory_order_release); + return true; + } +}; + +template <> +struct prod_cons_impl> + : prod_cons_impl> { + + template + bool force_push(W* wrapper, F&&, E*) { + wrapper->elems()->disconnect_receiver(1); + return false; + } + + template class E, std::size_t DS, std::size_t AS> + bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E* elems) { + byte_t buff[DS]; + for (unsigned k = 0;;) { + auto cur_rd = rd_.load(std::memory_order_relaxed); + if (circ::index_of(cur_rd) == + circ::index_of(wt_.load(std::memory_order_acquire))) { + return false; // empty + } + std::memcpy(buff, &(elems[circ::index_of(cur_rd)].data_), sizeof(buff)); + if (rd_.compare_exchange_weak(cur_rd, cur_rd + 1, std::memory_order_release)) { + std::forward(f)(buff); + std::forward(out)(true); + return true; + } + ipc::yield(k); + } + } +}; + +template <> +struct prod_cons_impl> + : prod_cons_impl> { + + using flag_t = std::uint64_t; + + template + struct elem_t { + std::aligned_storage_t data_ {}; + std::atomic f_ct_ { 0 }; // commit flag + }; + + alignas(cache_line_size) std::atomic ct_; // commit index + + template + bool push(W* /*wrapper*/, F&& f, E* elems) { + circ::u2_t cur_ct, nxt_ct; + for (unsigned k = 0;;) { + cur_ct = ct_.load(std::memory_order_relaxed); + if (circ::index_of(nxt_ct = cur_ct + 1) == + circ::index_of(rd_.load(std::memory_order_acquire))) { + return false; // full + } + if (ct_.compare_exchange_weak(cur_ct, nxt_ct, std::memory_order_acq_rel)) { + break; + } + ipc::yield(k); + } + auto* el = elems + circ::index_of(cur_ct); + std::forward(f)(&(el->data_)); + // set flag & try update wt + el->f_ct_.store(~static_cast(cur_ct), std::memory_order_release); + while (1) { + auto cac_ct = el->f_ct_.load(std::memory_order_acquire); + if (cur_ct != wt_.load(std::memory_order_relaxed)) { + return true; + } + if ((~cac_ct) != cur_ct) { + return true; + } + if (!el->f_ct_.compare_exchange_strong(cac_ct, 0, std::memory_order_relaxed)) { + return true; + } + wt_.store(nxt_ct, std::memory_order_release); + cur_ct = nxt_ct; + nxt_ct = cur_ct + 1; + el = elems + circ::index_of(cur_ct); + } + return true; + } + + template + bool force_push(W* wrapper, F&&, E*) { + wrapper->elems()->disconnect_receiver(1); + return false; + } + + template class E, std::size_t DS, std::size_t AS> + bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E* elems) { + byte_t buff[DS]; + for (unsigned k = 0;;) { + auto cur_rd = rd_.load(std::memory_order_relaxed); + auto cur_wt = wt_.load(std::memory_order_acquire); + auto id_rd = circ::index_of(cur_rd); + auto id_wt = circ::index_of(cur_wt); + if (id_rd == id_wt) { + auto* el = elems + id_wt; + auto cac_ct = el->f_ct_.load(std::memory_order_acquire); + if ((~cac_ct) != cur_wt) { + return false; // empty + } + if (el->f_ct_.compare_exchange_weak(cac_ct, 0, std::memory_order_relaxed)) { + wt_.store(cur_wt + 1, std::memory_order_release); + } + k = 0; + } + else { + std::memcpy(buff, &(elems[circ::index_of(cur_rd)].data_), sizeof(buff)); + if (rd_.compare_exchange_weak(cur_rd, cur_rd + 1, std::memory_order_release)) { + std::forward(f)(buff); + std::forward(out)(true); + return true; + } + ipc::yield(k); + } + } + } +}; + +template <> +struct prod_cons_impl> { + + using rc_t = std::uint64_t; + + enum : rc_t { + ep_mask = 0x00000000ffffffffull, + ep_incr = 0x0000000100000000ull + }; + + template + struct elem_t { + std::aligned_storage_t data_ {}; + std::atomic rc_ { 0 }; // read-counter + }; + + alignas(cache_line_size) std::atomic wt_; // write index + alignas(cache_line_size) rc_t epoch_ { 0 }; // only one writer + + circ::u2_t cursor() const noexcept { + return wt_.load(std::memory_order_acquire); + } + + template + bool push(W* wrapper, F&& f, E* elems) { + E* el; + for (unsigned k = 0;;) { + circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed); + if (cc == 0) return false; // no reader + el = elems + circ::index_of(wt_.load(std::memory_order_relaxed)); + // check all consumers have finished reading this element + auto cur_rc = el->rc_.load(std::memory_order_acquire); + circ::cc_t rem_cc = cur_rc & ep_mask; + if ((cc & rem_cc) && ((cur_rc & ~ep_mask) == epoch_)) { + return false; // has not finished yet + } + // consider rem_cc to be 0 here + if (el->rc_.compare_exchange_weak( + cur_rc, epoch_ | static_cast(cc), std::memory_order_release)) { + break; + } + ipc::yield(k); + } + std::forward(f)(&(el->data_)); + wt_.fetch_add(1, std::memory_order_release); + return true; + } + + template + bool force_push(W* wrapper, F&& f, E* elems) { + E* el; + epoch_ += ep_incr; + for (unsigned k = 0;;) { + circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed); + if (cc == 0) return false; // no reader + el = elems + circ::index_of(wt_.load(std::memory_order_relaxed)); + // check all consumers have finished reading this element + auto cur_rc = el->rc_.load(std::memory_order_acquire); + circ::cc_t rem_cc = cur_rc & ep_mask; + if (cc & rem_cc) { + ipc::log("force_push: k = %u, cc = %u, rem_cc = %u\n", k, cc, rem_cc); + cc = wrapper->elems()->disconnect_receiver(rem_cc); // disconnect all invalid readers + if (cc == 0) return false; // no reader + } + // just compare & exchange + if (el->rc_.compare_exchange_weak( + cur_rc, epoch_ | static_cast(cc), std::memory_order_release)) { + break; + } + ipc::yield(k); + } + std::forward(f)(&(el->data_)); + wt_.fetch_add(1, std::memory_order_release); + return true; + } + + template + bool pop(W* wrapper, circ::u2_t& cur, F&& f, R&& out, E* elems) { + if (cur == cursor()) return false; // acquire + auto* el = elems + circ::index_of(cur++); + std::forward(f)(&(el->data_)); + for (unsigned k = 0;;) { + auto cur_rc = el->rc_.load(std::memory_order_acquire); + if ((cur_rc & ep_mask) == 0) { + std::forward(out)(true); + return true; + } + auto nxt_rc = cur_rc & ~static_cast(wrapper->connected_id()); + if (el->rc_.compare_exchange_weak(cur_rc, nxt_rc, std::memory_order_release)) { + std::forward(out)((nxt_rc & ep_mask) == 0); + return true; + } + ipc::yield(k); + } + } +}; + +template <> +struct prod_cons_impl> { + + using rc_t = std::uint64_t; + using flag_t = std::uint64_t; + + enum : rc_t { + rc_mask = 0x00000000ffffffffull, + ep_mask = 0x00ffffffffffffffull, + ep_incr = 0x0100000000000000ull, + ic_mask = 0xff000000ffffffffull, + ic_incr = 0x0000000100000000ull + }; + + template + struct elem_t { + std::aligned_storage_t data_ {}; + std::atomic rc_ { 0 }; // read-counter + std::atomic f_ct_ { 0 }; // commit flag + }; + + alignas(cache_line_size) std::atomic ct_; // commit index + alignas(cache_line_size) std::atomic epoch_ { 0 }; + + circ::u2_t cursor() const noexcept { + return ct_.load(std::memory_order_acquire); + } + + constexpr static rc_t inc_rc(rc_t rc) noexcept { + return (rc & ic_mask) | ((rc + ic_incr) & ~ic_mask); + } + + constexpr static rc_t inc_mask(rc_t rc) noexcept { + return inc_rc(rc) & ~rc_mask; + } + + template + bool push(W* wrapper, F&& f, E* elems) { + E* el; + circ::u2_t cur_ct; + rc_t epoch = epoch_.load(std::memory_order_acquire); + for (unsigned k = 0;;) { + circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed); + if (cc == 0) return false; // no reader + el = elems + circ::index_of(cur_ct = ct_.load(std::memory_order_relaxed)); + // check all consumers have finished reading this element + auto cur_rc = el->rc_.load(std::memory_order_relaxed); + circ::cc_t rem_cc = cur_rc & rc_mask; + if ((cc & rem_cc) && ((cur_rc & ~ep_mask) == epoch)) { + return false; // has not finished yet + } + else if (!rem_cc) { + auto cur_fl = el->f_ct_.load(std::memory_order_acquire); + if ((cur_fl != cur_ct) && cur_fl) { + return false; // full + } + } + // consider rem_cc to be 0 here + if (el->rc_.compare_exchange_weak( + cur_rc, inc_mask(epoch | (cur_rc & ep_mask)) | static_cast(cc), std::memory_order_relaxed) && + epoch_.compare_exchange_weak(epoch, epoch, std::memory_order_acq_rel)) { + break; + } + ipc::yield(k); + } + // only one thread/process would touch here at one time + ct_.store(cur_ct + 1, std::memory_order_release); + std::forward(f)(&(el->data_)); + // set flag & try update wt + el->f_ct_.store(~static_cast(cur_ct), std::memory_order_release); + return true; + } + + template + bool force_push(W* wrapper, F&& f, E* elems) { + E* el; + circ::u2_t cur_ct; + rc_t epoch = epoch_.fetch_add(ep_incr, std::memory_order_release) + ep_incr; + for (unsigned k = 0;;) { + circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed); + if (cc == 0) return false; // no reader + el = elems + circ::index_of(cur_ct = ct_.load(std::memory_order_relaxed)); + // check all consumers have finished reading this element + auto cur_rc = el->rc_.load(std::memory_order_acquire); + circ::cc_t rem_cc = cur_rc & rc_mask; + if (cc & rem_cc) { + ipc::log("force_push: k = %u, cc = %u, rem_cc = %u\n", k, cc, rem_cc); + cc = wrapper->elems()->disconnect_receiver(rem_cc); // disconnect all invalid readers + if (cc == 0) return false; // no reader + } + // just compare & exchange + if (el->rc_.compare_exchange_weak( + cur_rc, inc_mask(epoch | (cur_rc & ep_mask)) | static_cast(cc), std::memory_order_relaxed)) { + if (epoch == epoch_.load(std::memory_order_acquire)) { + break; + } + else if (push(wrapper, std::forward(f), elems)) { + return true; + } + epoch = epoch_.fetch_add(ep_incr, std::memory_order_release) + ep_incr; + } + ipc::yield(k); + } + // only one thread/process would touch here at one time + ct_.store(cur_ct + 1, std::memory_order_release); + std::forward(f)(&(el->data_)); + // set flag & try update wt + el->f_ct_.store(~static_cast(cur_ct), std::memory_order_release); + return true; + } + + template + bool pop(W* wrapper, circ::u2_t& cur, F&& f, R&& out, E(& elems)[N]) { + auto* el = elems + circ::index_of(cur); + auto cur_fl = el->f_ct_.load(std::memory_order_acquire); + if (cur_fl != ~static_cast(cur)) { + return false; // empty + } + ++cur; + std::forward(f)(&(el->data_)); + for (unsigned k = 0;;) { + auto cur_rc = el->rc_.load(std::memory_order_acquire); + if ((cur_rc & rc_mask) == 0) { + std::forward(out)(true); + el->f_ct_.store(cur + N - 1, std::memory_order_release); + return true; + } + auto nxt_rc = inc_rc(cur_rc) & ~static_cast(wrapper->connected_id()); + bool last_one = false; + if ((last_one = (nxt_rc & rc_mask) == 0)) { + el->f_ct_.store(cur + N - 1, std::memory_order_release); + } + if (el->rc_.compare_exchange_weak(cur_rc, nxt_rc, std::memory_order_release)) { + std::forward(out)(last_one); + return true; + } + ipc::yield(k); + } + } +}; + +} // namespace ipc diff --git a/crazy_functions/高级功能函数模板.py b/crazy_functions/高级功能函数模板.py index ec14e74..b5c84b9 100644 --- a/crazy_functions/高级功能函数模板.py +++ b/crazy_functions/高级功能函数模板.py @@ -11,7 +11,7 @@ def 高阶功能模板函数(txt, top_p, temperature, chatbot, history, systemPr for i in range(5): currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day - i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述改事件的三个最重要的单词。' + i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。' chatbot.append((i_say, "[Local Message] waiting gpt response.")) yield chatbot, history, '正常' # 由于请求gpt需要一段时间,我们先及时地做一次状态显示 diff --git a/functional_crazy.py b/functional_crazy.py index af4c83e..2f91a32 100644 --- a/functional_crazy.py +++ b/functional_crazy.py @@ -19,10 +19,10 @@ def get_crazy_functionals(): function_plugins = { "请解析并解构此项目本身": { - # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效 + "AsButton": False, # 加入下拉菜单中 "Function": 解析项目本身 }, - "解析整个py项目": { + "解析整个Py项目": { "Color": "stop", # 按钮颜色 "Function": 解析一个Python项目 }, @@ -32,9 +32,10 @@ def get_crazy_functionals(): }, "解析整个C++项目": { "Color": "stop", # 按钮颜色 + "AsButton": False, # 加入下拉菜单中 "Function": 解析一个C项目 }, - "读tex论文写摘要": { + "读Tex论文写摘要": { "Color": "stop", # 按钮颜色 "Function": 读文章写摘要 }, @@ -52,7 +53,7 @@ def get_crazy_functionals(): }, } - # VisibleLevel=1 经过测试,但功能未达到理想状态 + # VisibleLevel=1 经过测试,但功能上距离达到完美状态还差一点点 if UserVisibleLevel >= 1: from crazy_functions.批量总结PDF文档 import 批量总结PDF文档 from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer @@ -60,11 +61,11 @@ def get_crazy_functionals(): function_plugins.update({ "[仅供开发调试] 批量总结PDF文档": { "Color": "stop", - # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 - "Function": HotReload(批量总结PDF文档) + "Function": HotReload(批量总结PDF文档) # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 }, "[仅供开发调试] 批量总结PDF文档pdfminer": { "Color": "stop", + "AsButton": False, # 加入下拉菜单中 "Function": HotReload(批量总结PDF文档pdfminer) }, "[仅供开发调试] 批量总结Word文档": { diff --git a/main.py b/main.py index 10bbddc..cdcfaa4 100644 --- a/main.py +++ b/main.py @@ -4,9 +4,8 @@ from predict import predict from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 -proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = \ - get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION') - +proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT = \ + get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT') # 如果WEB_PORT是-1, 则随机选取WEB端口 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT @@ -17,18 +16,18 @@ title_html = """

ChatGPT 学术优化

""" # 问询记录, python 版本建议3.9+(越新越好) import logging -os.makedirs('gpt_log', exist_ok=True) -try:logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO, encoding='utf-8') -except:logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO) -print('所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!') +os.makedirs("gpt_log", exist_ok=True) +try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8") +except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO) +print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!") # 一些普通功能模块 from functional import get_functionals functional = get_functionals() -# 对一些丧心病狂的实验性功能模块进行测试 +# 高级函数插件 from functional_crazy import get_crazy_functionals -crazy_functional = get_crazy_functionals() +crazy_fns = get_crazy_functionals() # 处理markdown文本格式的转变 gr.Chatbot.postprocess = format_io @@ -40,11 +39,10 @@ set_theme = adjust_theme() cancel_handles = [] with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo: gr.HTML(title_html) - with gr.Row(): + with gr.Row().style(equal_height=True): with gr.Column(scale=2): chatbot = gr.Chatbot() - chatbot.style(height=1150) - chatbot.style() + chatbot.style(height=CHATBOT_HEIGHT) history = gr.State([]) with gr.Column(scale=1): with gr.Row(): @@ -66,49 +64,70 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo: with gr.Row(): gr.Markdown("注意:以下“红颜色”标识的函数插件需从input区读取路径作为参数.") with gr.Row(): - for k in crazy_functional: - variant = crazy_functional[k]["Color"] if "Color" in crazy_functional[k] else "secondary" - crazy_functional[k]["Button"] = gr.Button(k, variant=variant) + for k in crazy_fns: + if not crazy_fns[k].get("AsButton", True): continue + variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary" + crazy_fns[k]["Button"] = gr.Button(k, variant=variant) with gr.Row(): - with gr.Accordion("展开“文件上传区”。上传本地文件供“红颜色”的函数插件调用。", open=False): - file_upload = gr.Files(label='任何文件, 但推荐上传压缩文件(zip, tar)', file_count="multiple") + with gr.Accordion("更多函数插件", open=True): + dropdown_fn_list = [k for k in crazy_fns.keys() if not crazy_fns[k].get("AsButton", True)] + with gr.Column(scale=1): + dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="").style(container=False) + with gr.Column(scale=1): + switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary") + with gr.Row(): + with gr.Accordion("点击展开“文件上传区”。上传本地文件可供红色函数插件调用。", open=False) as area_file_up: + file_upload = gr.Files(label="任何文件, 但推荐上传压缩文件(zip, tar)", file_count="multiple") with gr.Accordion("展开SysPrompt & GPT参数 & 交互界面布局", open=False): system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt) top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",) temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",) - checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区"], - value=["基础功能区", "函数插件区"], label="显示哪些功能区") + checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区") - def what_is_this(a): + # 功能区显示开关与功能区的互动 + def fn_area_visibility(a): ret = {} - # if area_basic_fn.visible != ("基础功能区" in a): - ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))}) - # if area_crazy_fn.visible != ("函数插件区" in a): - ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))}) + ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))}) + ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))}) return ret - - checkboxes.select(what_is_this, [checkboxes], [area_basic_fn, area_crazy_fn] ) - - predict_args = dict(fn=predict, inputs=[txt, top_p, temperature, chatbot, history, system_prompt], outputs=[chatbot, history, statusDisplay], show_progress=True) + checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn] ) + # 整理反复出现的控件句柄组合 + input_combo = [txt, top_p, temperature, chatbot, history, system_prompt] + output_combo = [chatbot, history, statusDisplay] + predict_args = dict(fn=predict, inputs=input_combo, outputs=output_combo, show_progress=True) empty_txt_args = dict(fn=lambda: "", inputs=[], outputs=[txt]) # 用于在提交后清空输入栏 - - cancel_handles.append(txt.submit(**predict_args)) - # txt.submit(**empty_txt_args) 在提交后清空输入栏 - cancel_handles.append(submitBtn.click(**predict_args)) - # submitBtn.click(**empty_txt_args) 在提交后清空输入栏 - resetBtn.click(lambda: ([], [], "已重置"), None, [chatbot, history, statusDisplay]) + # 提交按钮、重置按钮 + cancel_handles.append(txt.submit(**predict_args)) #; txt.submit(**empty_txt_args) 在提交后清空输入栏 + cancel_handles.append(submitBtn.click(**predict_args)) #; submitBtn.click(**empty_txt_args) 在提交后清空输入栏 + resetBtn.click(lambda: ([], [], "已重置"), None, output_combo) + # 基础功能区的回调函数注册 for k in functional: - click_handle = functional[k]["Button"].click(predict, - [txt, top_p, temperature, chatbot, history, system_prompt, gr.State(True), gr.State(k)], [chatbot, history, statusDisplay], show_progress=True) + click_handle = functional[k]["Button"].click(predict, [*input_combo, gr.State(True), gr.State(k)], output_combo, show_progress=True) cancel_handles.append(click_handle) + # 文件上传区,接收文件后与chatbot的互动 file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt], [chatbot, txt]) - for k in crazy_functional: - click_handle = crazy_functional[k]["Button"].click(crazy_functional[k]["Function"], - [txt, top_p, temperature, chatbot, history, system_prompt, gr.State(PORT)], [chatbot, history, statusDisplay] - ) - try: click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot]) - except: pass + # 函数插件-固定按钮区 + for k in crazy_fns: + if not crazy_fns[k].get("AsButton", True): continue + click_handle = crazy_fns[k]["Button"].click(crazy_fns[k]["Function"], [*input_combo, gr.State(PORT)], output_combo) + click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot]) cancel_handles.append(click_handle) + # 函数插件-下拉菜单与随变按钮的互动 + def on_dropdown_changed(k): + variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary" + return {switchy_bt: gr.update(value=k, variant=variant)} + dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt] ) + # 随变按钮的回调函数注册 + def route(k, *args, **kwargs): + if k in [r"打开插件列表", r"先从插件列表中选择"]: return + yield from crazy_fns[k]["Function"](*args, **kwargs) + click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo) + click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot]) + def expand_file_area(file_upload, area_file_up): + if len(file_upload)>0: return {area_file_up: gr.update(open=True)} + click_handle.then(expand_file_area, [file_upload, area_file_up], [area_file_up]) + cancel_handles.append(click_handle) + # 终止按钮的回调函数注册 stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) # gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数 @@ -117,7 +136,7 @@ def auto_opentab_delay(): print(f"如果浏览器没有自动打开,请复制并转到以下URL: http://localhost:{PORT}") def open(): time.sleep(2) - webbrowser.open_new_tab(f'http://localhost:{PORT}') + webbrowser.open_new_tab(f"http://localhost:{PORT}") threading.Thread(target=open, name="open-browser", daemon=True).start() auto_opentab_delay() diff --git a/predict.py b/predict.py index 84036bc..31a5861 100644 --- a/predict.py +++ b/predict.py @@ -96,13 +96,19 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr except StopIteration: break if len(chunk)==0: continue if not chunk.startswith('data:'): - chunk = get_full_error(chunk.encode('utf8'), stream_response) - raise ConnectionAbortedError("OpenAI拒绝了请求:" + chunk.decode()) - delta = json.loads(chunk.lstrip('data:'))['choices'][0]["delta"] + error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode() + if "reduce the length" in error_msg: + raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg) + else: + raise RuntimeError("OpenAI拒绝了请求:" + error_msg) + json_data = json.loads(chunk.lstrip('data:'))['choices'][0] + delta = json_data["delta"] if len(delta) == 0: break if "role" in delta: continue if "content" in delta: result += delta["content"]; print(delta["content"], end='') else: raise RuntimeError("意外Json结构:"+delta) + if json_data['finish_reason'] == 'length': + raise ConnectionAbortedError("正常结束,但显示Token不足。") return result diff --git a/toolbox.py b/toolbox.py index b78a513..bf88760 100644 --- a/toolbox.py +++ b/toolbox.py @@ -2,21 +2,21 @@ import markdown, mdtex2html, threading, importlib, traceback, importlib, inspect from show_math import convert as convert_math from functools import wraps -def get_reduce_token_percent(e): +def get_reduce_token_percent(text): try: # text = "maximum context length is 4097 tokens. However, your messages resulted in 4870 tokens" pattern = r"(\d+)\s+tokens\b" match = re.findall(pattern, text) - eps = 50 # 稍微留一点余地, 确保下次别再超过token - max_limit = float(match[0]) - eps + EXCEED_ALLO = 500 # 稍微留一点余地,否则在回复时会因余量太少出问题 + max_limit = float(match[0]) - EXCEED_ALLO current_tokens = float(match[1]) ratio = max_limit/current_tokens assert ratio > 0 and ratio < 1 - return ratio + return ratio, str(int(current_tokens-max_limit)) except: - return 0.5 + return 0.5, '不详' -def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt='', long_connection=False): +def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt='', long_connection=True): """ 调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断 i_say: 当前输入 @@ -45,19 +45,18 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp break except ConnectionAbortedError as token_exceeded_error: # 尝试计算比例,尽可能多地保留文本 - p_ratio = get_reduce_token_percent(str(token_exceeded_error)) + p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error)) if len(history) > 0: history = [his[ int(len(his) *p_ratio): ] for his in history if his is not None] - mutable[1] = 'Warning! History conversation is too long, cut into half. ' else: i_say = i_say[: int(len(i_say) *p_ratio) ] - mutable[1] = 'Warning! Input file is too long, cut into half. ' + mutable[1] = f'警告,文本过长将进行截断,Token溢出数:{n_exceed},截断比例:{(1-p_ratio):.0%}。' except TimeoutError as e: - mutable[0] = '[Local Message] Failed with timeout.' + mutable[0] = '[Local Message] 请求超时。' raise TimeoutError except Exception as e: - mutable[0] = f'[Local Message] Failed with {str(e)}.' - raise RuntimeError(f'[Local Message] Failed with {str(e)}.') + mutable[0] = f'[Local Message] 异常:{str(e)}.' + raise RuntimeError(f'[Local Message] 异常:{str(e)}.') # 创建新线程发出http请求 thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start() # 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成 @@ -286,7 +285,7 @@ def on_report_generated(files, chatbot): report_files = find_recent_files('gpt_log') if len(report_files) == 0: return report_files, chatbot # files.extend(report_files) - chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧文件上传区,请查收。']) + chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。']) return report_files, chatbot def get_conf(*args):