diff --git a/src/Makefile.housekeeping b/src/Makefile.housekeeping index bb2b0cb4..274945ae 100644 --- a/src/Makefile.housekeeping +++ b/src/Makefile.housekeeping @@ -1220,15 +1220,12 @@ endif # defined(BIN) # # The compression utilities # -$(NRV2B) : util/nrv2b.c $(MAKEDEPS) - $(QM)$(ECHO) " [HOSTCC] $@" - $(Q)$(HOST_CC) $(HOST_CFLAGS) -DENCODE -DDECODE -DMAIN -DVERBOSE \ - -DNDEBUG -DBITSIZE=32 -DENDIAN=0 -o $@ $< -CLEANUP += $(NRV2B) -$(ZBIN) : util/zbin.c util/nrv2b.c $(MAKEDEPS) +ZBIN_LDFLAGS := -llzma + +$(ZBIN) : util/zbin.c $(MAKEDEPS) $(QM)$(ECHO) " [HOSTCC] $@" - $(Q)$(HOST_CC) $(HOST_CFLAGS) -o $@ $< + $(Q)$(HOST_CC) $(HOST_CFLAGS) $< $(ZBIN_LDFLAGS) -o $@ CLEANUP += $(ZBIN) ############################################################################### diff --git a/src/arch/i386/prefix/unlzma.S b/src/arch/i386/prefix/unlzma.S new file mode 100644 index 00000000..f6c74286 --- /dev/null +++ b/src/arch/i386/prefix/unlzma.S @@ -0,0 +1,905 @@ +/* + * Copyright (C) 2015 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/**************************************************************************** + * + * This file provides the decompress() and decompress16() functions + * which can be called in order to decompress an LZMA-compressed + * image. The code is modelled on the public-domain "XZ Embedded" + * implementation as used by the Linux kernel. Symbol names are + * chosen to match the XZ Embedded implementation where possible, for + * ease of reference. + * + * This code is optimised for size rather than speed, since the amount + * of data to be decompressed is trivially small by modern standards. + * + * The same basic assembly code is used to compile both decompress() + * and decompress16(). + * + * Note that these functions require large amounts of stack space. + * + **************************************************************************** + */ + + .text + .arch i586 + .section ".prefix.lib", "ax", @progbits + +#ifdef CODE16 +#define ADDR16 +#define ADDR32 addr32 +#define decompress decompress16 + .code16 +#else /* CODE16 */ +#define ADDR16 addr16 +#define ADDR32 + .code32 +#endif /* CODE16 */ + +/**************************************************************************** + * Debugging (via 0xe9 debug port) + **************************************************************************** + */ + +#define DEBUG 0 + +#if DEBUG + .macro print_character, char + pushw %ax + movb $\char, %al + outb %al, $0xe9 + popw %ax + .endm + + .macro print_hex_nibble + cmpb $10, %al + sbb $0x69, %al + das + outb %al, $0xe9 + .endm + + .macro print_hex_byte, reg + pushfl + pushw %ax + movb \reg, %al + pushw %ax + shrb $4, %al + print_hex_nibble + popw %ax + andb $0x0f, %al + print_hex_nibble + popw %ax + popfl + .endm + + .macro print_hex_word, reg + pushw %ax + movw \reg, %ax + print_hex_byte %ah + print_hex_byte %al + popw %ax + .endm + + .macro print_hex_dword, reg + pushl %eax + movl \reg, %eax + rorl $16, %eax + print_hex_word %ax + rorl $16, %eax + print_hex_word %ax + popl %eax + .endm +#else + .macro print_character, char + .endm + .macro print_hex_byte, reg + .endm + .macro print_hex_word, reg + .endm + .macro print_hex_dword, reg + .endm +#endif + +/**************************************************************************** + * LZMA parameters and data structures + **************************************************************************** + */ + +/* LZMA decompressor states (as used in XZ Embedded) */ +#define STATE_LIT_LIT 0x00 +#define STATE_MATCH_LIT_LIT 0x01 +#define STATE_REP_LIT_LIT 0x02 +#define STATE_SHORTREP_LIT_LIT 0x03 +#define STATE_MATCH_LIT 0x04 +#define STATE_REP_LIT 0x05 +#define STATE_SHORTREP_LIT 0x06 +#define STATE_LIT_MATCH 0x07 +#define STATE_LIT_LONGREP 0x08 +#define STATE_LIT_SHORTREP 0x09 +#define STATE_NONLIT_MATCH 0x0a +#define STATE_NONLIT_REP 0x0b + +/* LZMA maximum decompressor state in which most recent symbol was a literal */ +#define STATE_LIT_MAX 0x06 + +/* LZMA number of literal context bits ("lc=" parameter) */ +#define LZMA_LC 2 + + .struct 0 +lzma_len_dec: +choice: .word 0 +choice2: .word 0 +low: .rept ( 1 << 3 ) + .word 0 + .endr +mid: .rept ( 1 << 3 ) + .word 0 + .endr +high: .rept ( 1 << 8 ) + .word 0 + .endr + .equ sizeof__lzma_len_dec, . - lzma_len_dec + .previous + + .struct 0 +lzma_dec: +in_start: .long 0 +out_start: .long 0 +rc_code: .long 0 +rc_range: .long 0 +len: .word 0 +reps: +rep0: .long 0 +rep1: .long 0 +rep2: .long 0 +rep3: .long 0 +probs: +is_match: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +is_rep: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +is_rep0: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +is_rep1: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +is_rep2: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +is_rep0_long: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +dist_slot: .rept ( 4 * ( 1 << 6 ) ) + .word 0 + .endr +dist_special: .rept ( ( 1 << ( 14 / 2 ) ) - 14 ) + .word 0 + .endr +dist_align: .rept ( 1 << 4 ) + .word 0 + .endr +match_len_dec: .space sizeof__lzma_len_dec +rep_len_dec: .space sizeof__lzma_len_dec +literal: .rept ( ( 1 << LZMA_LC ) * 0x300 ) + .word 0 + .endr + .align 4 + .equ sizeof__lzma_dec, . - lzma_dec + .previous + +/***************************************************************************** + * Normalise range encoder + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %eax : current range + * Corrupts: + * %eax + ***************************************************************************** + */ +rc_normalise: + /* Check if rc_range is less than 1<<24 */ + testb $0xff, (rc_range+3)(%ebp) + jnz 1f + /* If it is, shift in a new byte from the compressed input data */ + shll $8, rc_range(%ebp) + shll $8, rc_code(%ebp) + ADDR32 lodsb + movb %al, (rc_code+0)(%ebp) +1: /* Return current range */ + movl rc_range(%ebp), %eax + ret + .size rc_normalise, . - rc_normalise + +/***************************************************************************** + * Decode single range-encoded bit using a probability estimate + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %ebx : probability estimate pointer (offset from %ebp) + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * CF : decoded bit + * ZF : inverse of decoded bit + * Corrupts: + * none + ***************************************************************************** + */ +rc_bit: + /* Preserve registers */ + pushl %eax + pushl %edx + /* Perform normalisation */ + call rc_normalise + /* Calculate bound in %eax and probability estimate in %dx */ + shrl $11, %eax + movzwl (%ebp,%ebx), %edx + mul %edx /* will zero %edx */ + movw (%ebp,%ebx), %dx + /* Compare code against bound */ + cmpl %eax, rc_code(%ebp) + jae 2f +1: /* Code is less than bound */ + movl %eax, rc_range(%ebp) + negw %dx + addw $(1<<11), %dx + shrw $5, %dx + addw %dx, (%ebp,%ebx) + xorw %ax, %ax /* Clear CF, set ZF */ + jmp 99f +2: /* Code is greater than or equal to bound */ + subl %eax, rc_range(%ebp) + subl %eax, rc_code(%ebp) + shrw $5, %dx + subw %dx, (%ebp,%ebx) + incw %dx /* Clear ZF (%dx is 11-bit; can never wrap) */ + stc /* Set CF */ +99: /* Restore registers and return */ + popl %edx + popl %eax + ret + .size rc_bit, . - rc_bit + +/***************************************************************************** + * Decode MSB-first bittree + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %ebx : probability estimate set pointer (offset from %ebp) + * %cx : number of bits to decode + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %eax : decoded bittree + * Corrupts: + * none + ***************************************************************************** + */ +rc_bittree: + /* Preserve registers */ + pushl %edi + pushw %cx + movl %ebx, %edi + /* Initialise registers */ + movl $1, %eax +1: /* Decode bit */ + leaw (%edi,%eax,2), %bx /* high word always zero anyway */ + call rc_bit + rclw %ax + ADDR16 loop 1b + /* Restore registers, clear unwanted high bit of result, and return */ + movl %edi, %ebx + popw %cx + popl %edi + btrw %cx, %ax + ret + .size rc_bittree, . - rc_bittree + +/***************************************************************************** + * Decode LSB-first bittree + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %ebx : probability estimate set pointer (offset from %ebp) + * %cx : number of bits to decode + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %eax : decoded bittree + * Corrupts: + * none + ***************************************************************************** + */ +rc_bittree_reverse: + /* Preserve registers */ + pushw %cx + /* Decode bittree */ + call rc_bittree +1: /* Reverse result */ + rcrb %al + rclb %ah + ADDR16 loop 1b + shrw $8, %ax + /* Restore registers and return */ + popw %cx + ret + .size rc_bittree_reverse, . - rc_bittree_reverse + +/***************************************************************************** + * Decode MSB-first bittree with optional match byte + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %ebx : probability estimate set pointer (offset from %ebp) + * %cl : match byte + * %ch : 1 to use match byte, 0 to ignore match byte + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %eax : decoded bittree + * Corrupts: + * none + ***************************************************************************** + */ +rc_bittree_match: + /* Preserve registers */ + pushl %edi + pushw %cx + pushw %dx + movl %ebx, %edi + /* Initialise registers */ + movl $1, %eax +1: /* Decode bit */ + rolb $1, %cl + movw %cx, %dx + andb %dh, %dl /* match_bit in %dl */ + movw %dx, %bx + addb %bl, %bh + xorb %bl, %bl + addw %ax, %bx /* offset + match_bit + symbol */ + leaw (%edi,%ebx,2), %bx /* high word always zero anyway */ + call rc_bit + rclw %ax + movb %al, %dh + notb %dh + xorb %dh, %dl + andb %dl, %ch /* offset &= ( match_bit ^ bit ) */ + testb %ah, %ah + jz 1b + /* Restore registers, clear unwanted high bit of result, and return */ + movl %edi, %ebx + popw %dx + popw %cx + popl %edi + xorb %ah, %ah + ret + .size rc_bittree_match, . - rc_bittree_match + +/***************************************************************************** + * Decode direct bits (no probability estimates) + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %cx : number of bits to decode + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %eax : decoded bits + * Corrupts: + * none + ***************************************************************************** + */ +rc_direct: + /* Preserve registers */ + pushl %ebx + pushw %cx + pushl %edx + /* Initialise registers */ + xorl %edx, %edx +1: /* Perform normalisation */ + call rc_normalise + /* Decode bit */ + shrl $1, %eax + movl %eax, rc_range(%ebp) + movl rc_code(%ebp), %ebx + subl %eax, %ebx + js 2f + movl %ebx, rc_code(%ebp) +2: rcll %ebx + rcll %edx + xorb $1, %dl + ADDR16 loop 1b + /* Restore registers and return */ + movl %edx, %eax + popl %edx + popw %cx + popl %ebx + ret + .size rc_direct, . - rc_direct + +/***************************************************************************** + * Decode an LZMA literal + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %es:%edi : uncompressed output data pointer + * %edx : LZMA state + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %es:%edi : uncompressed output data pointer (updated) + * %edx : LZMA state + * CF : end of payload marker found (always zero) + * Corrupts: + * %eax + * %ebx + * %ecx + ***************************************************************************** + * + * Literals are coded as an eight-bit tree, using a match byte if the + * previous symbol was not a literal. + * + */ +lzma_literal: + /* Get most recent output byte, if available */ + xorl %ebx, %ebx + cmpl %esi, in_start(%ebp) + je 1f + movb %es:-1(%edi), %bh +1: /* Locate probability estimate set */ + shrb $( 8 - LZMA_LC ), %bh + shlb $1, %bh + leaw literal(%ebx,%ebx,2), %bx + /* Get match byte, if applicable */ + xorw %cx, %cx + cmpb $STATE_LIT_MAX, %dl + jbe 1f + movl rep0(%ebp), %eax + notl %eax + movb %es:(%edi,%eax), %cl + movb $1, %ch +1: /* Decode bittree */ + call rc_bittree_match + /* Store output byte */ + ADDR32 stosb + print_hex_byte %al + print_character ' ' + /* Update LZMA state */ + subb $3, %dl + jns 1f + xorb %dl, %dl +1: cmpb $7, %dl + jb 1f + subb $3, %dl +1: /* Clear CF and return */ + clc + ret + .size lzma_literal, . - lzma_literal + +/***************************************************************************** + * Decode an LZMA length + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %ebx : length parameter pointer (offset from %ebp) + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * Corrupts: + * %ebx + ***************************************************************************** + * + * Lengths are encoded as: + * + * "0" + 3 bits : lengths 2-9 ("low") + * "10" + 3 bits : lengths 10-17 ("mid") + * "11" + 8 bits : lengths 18-273 ("high") + */ +lzma_len: + /* Preserve registers */ + pushl %eax + pushl %ecx + pushl %edi + movl %ebx, %edi + /* Start by assuming three bits and a base length of 2 */ + movw $3, %cx + movw $2, len(%ebp) + /* Check low-length choice bit */ + leal choice(%edi), %ebx + call rc_bit + leal low(%edi), %ebx + jz 1f + /* Check high-length choice bit */ + leal choice2(%edi), %ebx + call rc_bit + leal mid(%edi), %ebx + movb $10, len(%ebp) + jz 1f + leal high(%edi), %ebx + movb $8, %cl + movb $18, len(%ebp) +1: /* Get encoded length */ + call rc_bittree + addw %ax, len(%ebp) + /* Restore registers and return */ + movl %edi, %ebx + popl %edi + popl %ecx + popl %eax + ret + .size lzma_len, . - lzma_len + +/***************************************************************************** + * Copy (possibly repeated) matched data + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %es:%edi : uncompressed output data pointer + * %cl : repeated match distance index (for repeated matches) + * %eax : match distance (for non-repeated matches) + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %es:%edi : uncompressed output data pointer + * CF : match distance is out of range + * Corrupts: + * %eax + * %ebx + * %ecx + ***************************************************************************** + */ +match: /* Update repeated match list */ + print_character '[' + movl $3, %ecx + jmp 1f +match_rep: + print_character '[' + print_character 'R' + print_hex_byte %cl + print_character '=' + movzbl %cl, %ecx + movl reps(%ebp,%ecx,4), %eax + jcxz 2f +1: movl (reps-4)(%ebp,%ecx,4), %ebx + movl %ebx, reps(%ebp,%ecx,4) + loop 1b + movl %eax, rep0(%ebp) +2: /* Preserve registers */ + pushl %esi + /* Get stored match length */ + movzwl len(%ebp), %ecx + print_hex_dword %eax + print_character '+' + print_hex_word %cx + print_character ']' + print_character ' ' + /* Abort with CF set if match distance is out of range */ + movl out_start(%ebp), %esi + negl %esi + leal -1(%edi,%esi), %esi + cmpl %eax, %esi + jc 99f + /* Perform copy */ + notl %eax + leal (%edi,%eax), %esi + ADDR32 es rep movsb +99: /* Restore registers and return */ + popl %esi + ret + .size match, . - match + +/***************************************************************************** + * Decode an LZMA match + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %es:%edi : uncompressed output data pointer + * %edx : LZMA state + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %es:%edi : uncompressed output data pointer + * %edx : LZMA state + * CF : end of payload marker found + * Corrupts: + * %eax + * %ebx + * %ecx + ***************************************************************************** + * + * Matches are encoded as an LZMA length followed by a 6-bit "distance + * slot" code, 0-26 fixed-probability bits, and 0-5 context encoded + * bits. + */ +lzma_match: + /* Preserve registers */ + pushl %edi + /* Update LZMA state */ + cmpb $STATE_LIT_MAX, %dl + movb $STATE_LIT_MATCH, %dl + jbe 1f + movb $STATE_NONLIT_MATCH, %dl +1: /* Decode length */ + movl $match_len_dec, %ebx + call lzma_len + /* Decode distance slot */ + movw len(%ebp), %bx + subw $2, %bx + cmpw $4, %bx + jb 1f + movw $3, %bx +1: shlw $7, %bx + addw $dist_slot, %bx + movw $6, %cx + call rc_bittree + /* Distance slots 0-3 are literal distances */ + cmpb $4, %al + jb 99f + /* Determine initial bits: 10/11 for even/odd distance codes */ + movl %eax, %edi + andw $1, %di + orw $2, %di + /* Determine number of context-encoded bits */ + movw %ax, %cx + shrb $1, %cl + decb %cl + /* Select context to be used in absence of fixed-probability bits */ + movl %edi, %ebx + shlw %cl, %bx + subw %ax, %bx + leaw (dist_special-2)(%ebx,%ebx), %bx + /* Decode fixed-probability bits, if any */ + cmpb $6, %cl + jb 1f + subb $4, %cl + shll %cl, %edi + call rc_direct + orl %eax, %edi + /* Select context to be used in presence of fixed-probability bits */ + movb $4, %cl + movl $dist_align, %ebx +1: /* Decode context-encoded bits */ + shll %cl, %edi + call rc_bittree_reverse + orl %edi, %eax +99: /* Restore registers and tail-call */ + popl %edi + jmp match + .size lzma_match, . - lzma_match + +/***************************************************************************** + * Decode an LZMA repeated match + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %es:%edi : uncompressed output data pointer + * %edx : LZMA state + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %es:%edi : uncompressed output data pointer + * %edx : LZMA state + * CF : end of payload marker found + * Corrupts: + * %eax + * %ebx + * %ecx + ***************************************************************************** + * + * Repeated matches are encoded as: + * + * "00" : shortrep0 (implicit length 1) + * "01" + len : longrep0 + * "10" + len : longrep1 + * "110" + len : longrep2 + * "111" + len : longrep3 + */ +lzma_rep_match: + /* Initially assume longrep0 */ + movw $(STATE_LIT_LONGREP << 8), %cx + /* Get is_rep0 bit */ + leal is_rep0(,%edx,2), %ebx + call rc_bit + jnz 1f + /* Get is_rep0_long bit */ + leal is_rep0_long(,%edx,2), %ebx + call rc_bit + jnz 98f + movw $1, len(%ebp) + movb $STATE_LIT_SHORTREP, %ch + jmp 99f +1: /* Get is_rep1 bit */ + incb %cl + leal is_rep1(,%edx,2), %ebx + call rc_bit + jz 98f + /* Get is_rep2 bit */ + incb %cl + leal is_rep2(,%edx,2), %ebx + call rc_bit + adcb $0, %cl +98: /* Decode length */ + movl $rep_len_dec, %ebx + call lzma_len +99: /* Update LZMA state */ + cmpb $STATE_LIT_MAX, %dl + movb %ch, %dl + jbe 1f + movb $STATE_NONLIT_REP, %dl +1: /* Tail call */ + jmp match_rep + .size lzma_match, . - lzma_match + +/***************************************************************************** + * Decode one LZMA symbol + * + * Parameters: + * %ss:%ebp : LZMA parameter block + * %ds:%esi : compressed input data pointer + * %es:%edi : uncompressed output data pointer + * %edx : LZMA state + * Returns: + * %ds:%esi : compressed input data pointer (possibly updated) + * %es:%edi : uncompressed output data pointer (updated) + * %edx : LZMA state + * CF : end of payload marker found + * Corrupts: + * %eax + * %ebx + * %ecx + ***************************************************************************** + */ +lzma_decode: + /* Get is_match bit */ + leal is_match(,%edx,2), %ebx + call rc_bit + jz lzma_literal + /* Get is_rep bit */ + leal is_rep(,%edx,2), %ebx + call rc_bit + jz lzma_match + jmp lzma_rep_match + .size lzma_decode, . - lzma_decode + +/**************************************************************************** + * Undo effect of branch-call-jump (BCJ) filter + * + * Parameters: + * %es:%esi : start of uncompressed output data (note %es) + * %es:%edi : end of uncompressed output data + * Returns: + * Corrupts: + * %eax + * %ebx + * %ecx + * %edx + * %esi + ***************************************************************************** + */ +bcj_filter: + /* Store (negative) start of data in %edx */ + movl %esi, %edx + negl %edx + /* Calculate limit in %ecx */ + leal -5(%edi,%edx), %ecx +1: /* Calculate offset in %ebx */ + leal (%esi,%edx), %ebx + /* Check for end of data */ + cmpl %ecx, %ebx + ja 99f + /* Check for an opcode which would be followed by a rel32 address */ + ADDR32 es lodsb + andb $0xfe, %al + cmpb $0xe8, %al + jne 1b + /* Get current jump target value in %eax */ + ADDR32 es lodsl + /* Convert absolute addresses in the range [0,limit) back to + * relative addresses in the range [-offset,limit-offset). + */ + cmpl %ecx, %eax + jae 2f + subl %ebx,%es:-4(%esi) +2: /* Convert negative numbers in the range [-offset,0) back to + * positive numbers in the range [limit-offset,limit). + */ + notl %eax /* Range is now [0,offset) */ + cmpl %ebx, %eax + jae 1b + addl %ecx,%es:-4(%esi) + jmp 1b +99: /* Return */ + ret + .size bcj_filter, . - bcj_filter + +/**************************************************************************** + * decompress (real-mode or 16/32-bit protected-mode near call) + * + * Decompress data + * + * Parameters (passed via registers): + * %ds:%esi : Start of compressed input data + * %es:%edi : Start of output buffer + * Returns: + * %ds:%esi - End of compressed input data + * %es:%edi - End of decompressed output data + * All other registers are preserved + * + * NOTE: It would be possible to build a smaller version of the + * decompression code for -DKEEP_IT_REAL by using 16-bit registers + * where possible. + **************************************************************************** + */ + .globl decompress +decompress: + /* Preserve registers */ + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + pushl %ebp + /* Allocate parameter block */ + subl $sizeof__lzma_dec, %esp + movl %esp, %ebp + /* Zero parameter block and set all probabilities to 0.5 */ + pushl %edi + pushw %es + pushw %ss + popw %es + movl %ebp, %edi + xorl %eax, %eax + movl $( sizeof__lzma_dec / 4 ), %ecx + ADDR32 rep stosl + leal probs(%ebp), %edi + movw $( ( 1 << 11 ) / 2 ), %ax + movl $( ( sizeof__lzma_dec - probs ) / 2 ), %ecx + ADDR32 rep stosw + popw %es + popl %edi + /* Initialise remaining parameters */ + movl %esi, in_start(%ebp) + movl %edi, out_start(%ebp) + print_character '\n' + ADDR32 lodsb /* discard initial byte */ + print_hex_byte %al + ADDR32 lodsl + bswapl %eax + print_hex_dword %eax + print_character '\n' + movl %eax, rc_code(%ebp) + decl rc_range(%ebp) + movl $STATE_LIT_LIT, %edx +1: /* Decompress until we reach end of buffer */ + call lzma_decode + jnc 1b + print_character '\n' + /* Undo BCJ filter */ + pushl %esi + movl out_start(%ebp), %esi + call bcj_filter + popl %esi + /* Restore registers and return */ + addl $sizeof__lzma_dec, %esp + popl %ebp + popl %edx + popl %ecx + popl %ebx + popl %eax + ret + + /* Specify minimum amount of stack space required */ + .globl _min_decompress_stack + .equ _min_decompress_stack, ( sizeof__lzma_dec + 512 /* margin */ ) diff --git a/src/arch/i386/prefix/unlzma16.S b/src/arch/i386/prefix/unlzma16.S new file mode 100644 index 00000000..f58b0f73 --- /dev/null +++ b/src/arch/i386/prefix/unlzma16.S @@ -0,0 +1,9 @@ +/* + * 16-bit version of the decompressor + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ) + +#define CODE16 +#include "unlzma.S" diff --git a/src/util/zbin.c b/src/util/zbin.c index 3b7cf95b..1862a382 100644 --- a/src/util/zbin.c +++ b/src/util/zbin.c @@ -1,13 +1,21 @@ +#include #include +#include +#include +#include #include - -#define ENCODE -#define VERBOSE -#include "nrv2b.c" -FILE *infile, *outfile; +#include #define DEBUG 0 +/* LZMA filter choices. Must match those used by unlzma.S */ +#define LZMA_LC 2 +#define LZMA_LP 0 +#define LZMA_PB 0 + +/* LZMA preset choice. This is a policy decision */ +#define LZMA_PRESET ( LZMA_PRESET_DEFAULT | LZMA_PRESET_EXTREME ) + struct input_file { void *buf; size_t len; @@ -177,13 +185,75 @@ static int process_zinfo_copy ( struct input_file *input, return 0; } +#define OPCODE_CALL 0xe8 +#define OPCODE_JMP 0xe9 + +static void bcj_filter ( void *data, size_t len ) { + struct { + uint8_t opcode; + int32_t target; + } __attribute__ (( packed )) *jump; + ssize_t limit = ( len - sizeof ( *jump ) ); + ssize_t offset; + + /* liblzma does include an x86 BCJ filter, but it's hideously + * convoluted and undocumented. This BCJ filter is + * substantially simpler and achieves the same compression (at + * the cost of requiring the decompressor to know the size of + * the decompressed data, which we already have in iPXE). + */ + for ( offset = 0 ; offset <= limit ; offset++ ) { + jump = ( data + offset ); + + /* Skip instructions that are not followed by a rel32 address */ + if ( ( jump->opcode != OPCODE_CALL ) && + ( jump->opcode != OPCODE_JMP ) ) + continue; + + /* Convert rel32 address to an absolute address. To + * avoid false positives (which damage the compression + * ratio), we should check that the jump target is + * within the range [0,limit). + * + * Some output values would then end up being mapped + * from two distinct input values, making the + * transformation irreversible. To solve this, we + * transform such values back into the part of the + * range which would otherwise correspond to no input + * values. + */ + if ( ( jump->target >= -offset ) && + ( jump->target < ( limit - offset ) ) ) { + /* Convert relative addresses in the range + * [-offset,limit-offset) to absolute + * addresses in the range [0,limit). + */ + jump->target += offset; + } else if ( ( jump->target >= ( limit - offset ) ) && + ( jump->target < limit ) ) { + /* Convert positive numbers in the range + * [limit-offset,limit) to negative numbers in + * the range [-offset,0). + */ + jump->target -= limit; + } + offset += sizeof ( jump->target ); + }; +} + static int process_zinfo_pack ( struct input_file *input, struct output_file *output, union zinfo_record *zinfo ) { struct zinfo_pack *pack = &zinfo->pack; size_t offset = pack->offset; size_t len = pack->len; - unsigned long packed_len; + size_t packed_len = 0; + size_t remaining = ( output->max_len - output->len ); + lzma_options_lzma options; + const lzma_filter filters[] = { + { .id = LZMA_FILTER_LZMA1, .options = &options }, + { .id = LZMA_VLI_UNKNOWN } + }; if ( ( offset + len ) > input->len ) { fprintf ( stderr, "Input buffer overrun on pack\n" ); @@ -196,9 +266,15 @@ static int process_zinfo_pack ( struct input_file *input, return -1; } - if ( ucl_nrv2b_99_compress ( ( input->buf + offset ), len, - ( output->buf + output->len ), - &packed_len, 0 ) != UCL_E_OK ) { + bcj_filter ( ( input->buf + offset ), len ); + + lzma_lzma_preset ( &options, LZMA_PRESET ); + options.lc = LZMA_LC; + options.lp = LZMA_LP; + options.pb = LZMA_PB; + if ( lzma_raw_buffer_encode ( filters, NULL, ( input->buf + offset ), + len, ( output->buf + output->len ), + &packed_len, remaining ) != LZMA_OK ) { fprintf ( stderr, "Compression failure\n" ); return -1; } @@ -206,7 +282,7 @@ static int process_zinfo_pack ( struct input_file *input, if ( DEBUG ) { fprintf ( stderr, "PACK [%#zx,%#zx) to [%#zx,%#zx)\n", offset, ( offset + len ), output->len, - ( size_t )( output->len + packed_len ) ); + ( output->len + packed_len ) ); } output->len += packed_len;