david/ipxe
david
/
ipxe
Archived
1
0
Fork 0

[zbin] Use LZMA compression

LZMA provides significantly better compression (by ~15%) than the
current NRV2B algorithm.

We use a raw LZMA stream (aka LZMA1) to avoid the need for code to
parse the LZMA2 block headers.  We use parameters {lc=2,lp=0,pb=0} to
reduce the stack space required by the decompressor to acceptable
levels (around 8kB).  Using lc=3 or pb=2 would give marginally better
compression, but at the cost of substantially increasing the required
stack space.

The build process now requires the liblzma headers to be present on
the build system, since we do not include a copy of an LZMA compressor
within the iPXE source tree.  The decompressor is written from scratch
(based on XZ Embedded) and is entirely self-contained within the
iPXE source.

The branch-call-jump (BCJ) filter used to improve the compressibility
is specific to iPXE.  We choose not to use liblzma's built-in BCJ
filter since the algorithm is complex and undocumented.  Our BCJ
filter achieves approximately the same results (on typical iPXE
binaries) with a substantially simpler algorithm.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
This commit is contained in:
Michael Brown 2015-02-21 14:29:53 +00:00
parent 3e04f0791e
commit 5350b65a3c
4 changed files with 1004 additions and 17 deletions

View File

@ -1220,15 +1220,12 @@ endif # defined(BIN)
#
# The compression utilities
#
$(NRV2B) : util/nrv2b.c $(MAKEDEPS)
$(QM)$(ECHO) " [HOSTCC] $@"
$(Q)$(HOST_CC) $(HOST_CFLAGS) -DENCODE -DDECODE -DMAIN -DVERBOSE \
-DNDEBUG -DBITSIZE=32 -DENDIAN=0 -o $@ $<
CLEANUP += $(NRV2B)
$(ZBIN) : util/zbin.c util/nrv2b.c $(MAKEDEPS)
ZBIN_LDFLAGS := -llzma
$(ZBIN) : util/zbin.c $(MAKEDEPS)
$(QM)$(ECHO) " [HOSTCC] $@"
$(Q)$(HOST_CC) $(HOST_CFLAGS) -o $@ $<
$(Q)$(HOST_CC) $(HOST_CFLAGS) $< $(ZBIN_LDFLAGS) -o $@
CLEANUP += $(ZBIN)
###############################################################################

View File

@ -0,0 +1,905 @@
/*
* Copyright (C) 2015 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
FILE_LICENCE ( GPL2_OR_LATER );
/****************************************************************************
*
* This file provides the decompress() and decompress16() functions
* which can be called in order to decompress an LZMA-compressed
* image. The code is modelled on the public-domain "XZ Embedded"
* implementation as used by the Linux kernel. Symbol names are
* chosen to match the XZ Embedded implementation where possible, for
* ease of reference.
*
* This code is optimised for size rather than speed, since the amount
* of data to be decompressed is trivially small by modern standards.
*
* The same basic assembly code is used to compile both decompress()
* and decompress16().
*
* Note that these functions require large amounts of stack space.
*
****************************************************************************
*/
.text
.arch i586
.section ".prefix.lib", "ax", @progbits
#ifdef CODE16
#define ADDR16
#define ADDR32 addr32
#define decompress decompress16
.code16
#else /* CODE16 */
#define ADDR16 addr16
#define ADDR32
.code32
#endif /* CODE16 */
/****************************************************************************
* Debugging (via 0xe9 debug port)
****************************************************************************
*/
#define DEBUG 0
#if DEBUG
.macro print_character, char
pushw %ax
movb $\char, %al
outb %al, $0xe9
popw %ax
.endm
.macro print_hex_nibble
cmpb $10, %al
sbb $0x69, %al
das
outb %al, $0xe9
.endm
.macro print_hex_byte, reg
pushfl
pushw %ax
movb \reg, %al
pushw %ax
shrb $4, %al
print_hex_nibble
popw %ax
andb $0x0f, %al
print_hex_nibble
popw %ax
popfl
.endm
.macro print_hex_word, reg
pushw %ax
movw \reg, %ax
print_hex_byte %ah
print_hex_byte %al
popw %ax
.endm
.macro print_hex_dword, reg
pushl %eax
movl \reg, %eax
rorl $16, %eax
print_hex_word %ax
rorl $16, %eax
print_hex_word %ax
popl %eax
.endm
#else
.macro print_character, char
.endm
.macro print_hex_byte, reg
.endm
.macro print_hex_word, reg
.endm
.macro print_hex_dword, reg
.endm
#endif
/****************************************************************************
* LZMA parameters and data structures
****************************************************************************
*/
/* LZMA decompressor states (as used in XZ Embedded) */
#define STATE_LIT_LIT 0x00
#define STATE_MATCH_LIT_LIT 0x01
#define STATE_REP_LIT_LIT 0x02
#define STATE_SHORTREP_LIT_LIT 0x03
#define STATE_MATCH_LIT 0x04
#define STATE_REP_LIT 0x05
#define STATE_SHORTREP_LIT 0x06
#define STATE_LIT_MATCH 0x07
#define STATE_LIT_LONGREP 0x08
#define STATE_LIT_SHORTREP 0x09
#define STATE_NONLIT_MATCH 0x0a
#define STATE_NONLIT_REP 0x0b
/* LZMA maximum decompressor state in which most recent symbol was a literal */
#define STATE_LIT_MAX 0x06
/* LZMA number of literal context bits ("lc=" parameter) */
#define LZMA_LC 2
.struct 0
lzma_len_dec:
choice: .word 0
choice2: .word 0
low: .rept ( 1 << 3 )
.word 0
.endr
mid: .rept ( 1 << 3 )
.word 0
.endr
high: .rept ( 1 << 8 )
.word 0
.endr
.equ sizeof__lzma_len_dec, . - lzma_len_dec
.previous
.struct 0
lzma_dec:
in_start: .long 0
out_start: .long 0
rc_code: .long 0
rc_range: .long 0
len: .word 0
reps:
rep0: .long 0
rep1: .long 0
rep2: .long 0
rep3: .long 0
probs:
is_match: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
is_rep: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
is_rep0: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
is_rep1: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
is_rep2: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
is_rep0_long: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
dist_slot: .rept ( 4 * ( 1 << 6 ) )
.word 0
.endr
dist_special: .rept ( ( 1 << ( 14 / 2 ) ) - 14 )
.word 0
.endr
dist_align: .rept ( 1 << 4 )
.word 0
.endr
match_len_dec: .space sizeof__lzma_len_dec
rep_len_dec: .space sizeof__lzma_len_dec
literal: .rept ( ( 1 << LZMA_LC ) * 0x300 )
.word 0
.endr
.align 4
.equ sizeof__lzma_dec, . - lzma_dec
.previous
/*****************************************************************************
* Normalise range encoder
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %eax : current range
* Corrupts:
* %eax
*****************************************************************************
*/
rc_normalise:
/* Check if rc_range is less than 1<<24 */
testb $0xff, (rc_range+3)(%ebp)
jnz 1f
/* If it is, shift in a new byte from the compressed input data */
shll $8, rc_range(%ebp)
shll $8, rc_code(%ebp)
ADDR32 lodsb
movb %al, (rc_code+0)(%ebp)
1: /* Return current range */
movl rc_range(%ebp), %eax
ret
.size rc_normalise, . - rc_normalise
/*****************************************************************************
* Decode single range-encoded bit using a probability estimate
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %ebx : probability estimate pointer (offset from %ebp)
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* CF : decoded bit
* ZF : inverse of decoded bit
* Corrupts:
* none
*****************************************************************************
*/
rc_bit:
/* Preserve registers */
pushl %eax
pushl %edx
/* Perform normalisation */
call rc_normalise
/* Calculate bound in %eax and probability estimate in %dx */
shrl $11, %eax
movzwl (%ebp,%ebx), %edx
mul %edx /* will zero %edx */
movw (%ebp,%ebx), %dx
/* Compare code against bound */
cmpl %eax, rc_code(%ebp)
jae 2f
1: /* Code is less than bound */
movl %eax, rc_range(%ebp)
negw %dx
addw $(1<<11), %dx
shrw $5, %dx
addw %dx, (%ebp,%ebx)
xorw %ax, %ax /* Clear CF, set ZF */
jmp 99f
2: /* Code is greater than or equal to bound */
subl %eax, rc_range(%ebp)
subl %eax, rc_code(%ebp)
shrw $5, %dx
subw %dx, (%ebp,%ebx)
incw %dx /* Clear ZF (%dx is 11-bit; can never wrap) */
stc /* Set CF */
99: /* Restore registers and return */
popl %edx
popl %eax
ret
.size rc_bit, . - rc_bit
/*****************************************************************************
* Decode MSB-first bittree
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %ebx : probability estimate set pointer (offset from %ebp)
* %cx : number of bits to decode
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %eax : decoded bittree
* Corrupts:
* none
*****************************************************************************
*/
rc_bittree:
/* Preserve registers */
pushl %edi
pushw %cx
movl %ebx, %edi
/* Initialise registers */
movl $1, %eax
1: /* Decode bit */
leaw (%edi,%eax,2), %bx /* high word always zero anyway */
call rc_bit
rclw %ax
ADDR16 loop 1b
/* Restore registers, clear unwanted high bit of result, and return */
movl %edi, %ebx
popw %cx
popl %edi
btrw %cx, %ax
ret
.size rc_bittree, . - rc_bittree
/*****************************************************************************
* Decode LSB-first bittree
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %ebx : probability estimate set pointer (offset from %ebp)
* %cx : number of bits to decode
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %eax : decoded bittree
* Corrupts:
* none
*****************************************************************************
*/
rc_bittree_reverse:
/* Preserve registers */
pushw %cx
/* Decode bittree */
call rc_bittree
1: /* Reverse result */
rcrb %al
rclb %ah
ADDR16 loop 1b
shrw $8, %ax
/* Restore registers and return */
popw %cx
ret
.size rc_bittree_reverse, . - rc_bittree_reverse
/*****************************************************************************
* Decode MSB-first bittree with optional match byte
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %ebx : probability estimate set pointer (offset from %ebp)
* %cl : match byte
* %ch : 1 to use match byte, 0 to ignore match byte
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %eax : decoded bittree
* Corrupts:
* none
*****************************************************************************
*/
rc_bittree_match:
/* Preserve registers */
pushl %edi
pushw %cx
pushw %dx
movl %ebx, %edi
/* Initialise registers */
movl $1, %eax
1: /* Decode bit */
rolb $1, %cl
movw %cx, %dx
andb %dh, %dl /* match_bit in %dl */
movw %dx, %bx
addb %bl, %bh
xorb %bl, %bl
addw %ax, %bx /* offset + match_bit + symbol */
leaw (%edi,%ebx,2), %bx /* high word always zero anyway */
call rc_bit
rclw %ax
movb %al, %dh
notb %dh
xorb %dh, %dl
andb %dl, %ch /* offset &= ( match_bit ^ bit ) */
testb %ah, %ah
jz 1b
/* Restore registers, clear unwanted high bit of result, and return */
movl %edi, %ebx
popw %dx
popw %cx
popl %edi
xorb %ah, %ah
ret
.size rc_bittree_match, . - rc_bittree_match
/*****************************************************************************
* Decode direct bits (no probability estimates)
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %cx : number of bits to decode
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %eax : decoded bits
* Corrupts:
* none
*****************************************************************************
*/
rc_direct:
/* Preserve registers */
pushl %ebx
pushw %cx
pushl %edx
/* Initialise registers */
xorl %edx, %edx
1: /* Perform normalisation */
call rc_normalise
/* Decode bit */
shrl $1, %eax
movl %eax, rc_range(%ebp)
movl rc_code(%ebp), %ebx
subl %eax, %ebx
js 2f
movl %ebx, rc_code(%ebp)
2: rcll %ebx
rcll %edx
xorb $1, %dl
ADDR16 loop 1b
/* Restore registers and return */
movl %edx, %eax
popl %edx
popw %cx
popl %ebx
ret
.size rc_direct, . - rc_direct
/*****************************************************************************
* Decode an LZMA literal
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %es:%edi : uncompressed output data pointer
* %edx : LZMA state
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %es:%edi : uncompressed output data pointer (updated)
* %edx : LZMA state
* CF : end of payload marker found (always zero)
* Corrupts:
* %eax
* %ebx
* %ecx
*****************************************************************************
*
* Literals are coded as an eight-bit tree, using a match byte if the
* previous symbol was not a literal.
*
*/
lzma_literal:
/* Get most recent output byte, if available */
xorl %ebx, %ebx
cmpl %esi, in_start(%ebp)
je 1f
movb %es:-1(%edi), %bh
1: /* Locate probability estimate set */
shrb $( 8 - LZMA_LC ), %bh
shlb $1, %bh
leaw literal(%ebx,%ebx,2), %bx
/* Get match byte, if applicable */
xorw %cx, %cx
cmpb $STATE_LIT_MAX, %dl
jbe 1f
movl rep0(%ebp), %eax
notl %eax
movb %es:(%edi,%eax), %cl
movb $1, %ch
1: /* Decode bittree */
call rc_bittree_match
/* Store output byte */
ADDR32 stosb
print_hex_byte %al
print_character ' '
/* Update LZMA state */
subb $3, %dl
jns 1f
xorb %dl, %dl
1: cmpb $7, %dl
jb 1f
subb $3, %dl
1: /* Clear CF and return */
clc
ret
.size lzma_literal, . - lzma_literal
/*****************************************************************************
* Decode an LZMA length
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %ebx : length parameter pointer (offset from %ebp)
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* Corrupts:
* %ebx
*****************************************************************************
*
* Lengths are encoded as:
*
* "0" + 3 bits : lengths 2-9 ("low")
* "10" + 3 bits : lengths 10-17 ("mid")
* "11" + 8 bits : lengths 18-273 ("high")
*/
lzma_len:
/* Preserve registers */
pushl %eax
pushl %ecx
pushl %edi
movl %ebx, %edi
/* Start by assuming three bits and a base length of 2 */
movw $3, %cx
movw $2, len(%ebp)
/* Check low-length choice bit */
leal choice(%edi), %ebx
call rc_bit
leal low(%edi), %ebx
jz 1f
/* Check high-length choice bit */
leal choice2(%edi), %ebx
call rc_bit
leal mid(%edi), %ebx
movb $10, len(%ebp)
jz 1f
leal high(%edi), %ebx
movb $8, %cl
movb $18, len(%ebp)
1: /* Get encoded length */
call rc_bittree
addw %ax, len(%ebp)
/* Restore registers and return */
movl %edi, %ebx
popl %edi
popl %ecx
popl %eax
ret
.size lzma_len, . - lzma_len
/*****************************************************************************
* Copy (possibly repeated) matched data
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %es:%edi : uncompressed output data pointer
* %cl : repeated match distance index (for repeated matches)
* %eax : match distance (for non-repeated matches)
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %es:%edi : uncompressed output data pointer
* CF : match distance is out of range
* Corrupts:
* %eax
* %ebx
* %ecx
*****************************************************************************
*/
match: /* Update repeated match list */
print_character '['
movl $3, %ecx
jmp 1f
match_rep:
print_character '['
print_character 'R'
print_hex_byte %cl
print_character '='
movzbl %cl, %ecx
movl reps(%ebp,%ecx,4), %eax
jcxz 2f
1: movl (reps-4)(%ebp,%ecx,4), %ebx
movl %ebx, reps(%ebp,%ecx,4)
loop 1b
movl %eax, rep0(%ebp)
2: /* Preserve registers */
pushl %esi
/* Get stored match length */
movzwl len(%ebp), %ecx
print_hex_dword %eax
print_character '+'
print_hex_word %cx
print_character ']'
print_character ' '
/* Abort with CF set if match distance is out of range */
movl out_start(%ebp), %esi
negl %esi
leal -1(%edi,%esi), %esi
cmpl %eax, %esi
jc 99f
/* Perform copy */
notl %eax
leal (%edi,%eax), %esi
ADDR32 es rep movsb
99: /* Restore registers and return */
popl %esi
ret
.size match, . - match
/*****************************************************************************
* Decode an LZMA match
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %es:%edi : uncompressed output data pointer
* %edx : LZMA state
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %es:%edi : uncompressed output data pointer
* %edx : LZMA state
* CF : end of payload marker found
* Corrupts:
* %eax
* %ebx
* %ecx
*****************************************************************************
*
* Matches are encoded as an LZMA length followed by a 6-bit "distance
* slot" code, 0-26 fixed-probability bits, and 0-5 context encoded
* bits.
*/
lzma_match:
/* Preserve registers */
pushl %edi
/* Update LZMA state */
cmpb $STATE_LIT_MAX, %dl
movb $STATE_LIT_MATCH, %dl
jbe 1f
movb $STATE_NONLIT_MATCH, %dl
1: /* Decode length */
movl $match_len_dec, %ebx
call lzma_len
/* Decode distance slot */
movw len(%ebp), %bx
subw $2, %bx
cmpw $4, %bx
jb 1f
movw $3, %bx
1: shlw $7, %bx
addw $dist_slot, %bx
movw $6, %cx
call rc_bittree
/* Distance slots 0-3 are literal distances */
cmpb $4, %al
jb 99f
/* Determine initial bits: 10/11 for even/odd distance codes */
movl %eax, %edi
andw $1, %di
orw $2, %di
/* Determine number of context-encoded bits */
movw %ax, %cx
shrb $1, %cl
decb %cl
/* Select context to be used in absence of fixed-probability bits */
movl %edi, %ebx
shlw %cl, %bx
subw %ax, %bx
leaw (dist_special-2)(%ebx,%ebx), %bx
/* Decode fixed-probability bits, if any */
cmpb $6, %cl
jb 1f
subb $4, %cl
shll %cl, %edi
call rc_direct
orl %eax, %edi
/* Select context to be used in presence of fixed-probability bits */
movb $4, %cl
movl $dist_align, %ebx
1: /* Decode context-encoded bits */
shll %cl, %edi
call rc_bittree_reverse
orl %edi, %eax
99: /* Restore registers and tail-call */
popl %edi
jmp match
.size lzma_match, . - lzma_match
/*****************************************************************************
* Decode an LZMA repeated match
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %es:%edi : uncompressed output data pointer
* %edx : LZMA state
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %es:%edi : uncompressed output data pointer
* %edx : LZMA state
* CF : end of payload marker found
* Corrupts:
* %eax
* %ebx
* %ecx
*****************************************************************************
*
* Repeated matches are encoded as:
*
* "00" : shortrep0 (implicit length 1)
* "01" + len : longrep0
* "10" + len : longrep1
* "110" + len : longrep2
* "111" + len : longrep3
*/
lzma_rep_match:
/* Initially assume longrep0 */
movw $(STATE_LIT_LONGREP << 8), %cx
/* Get is_rep0 bit */
leal is_rep0(,%edx,2), %ebx
call rc_bit
jnz 1f
/* Get is_rep0_long bit */
leal is_rep0_long(,%edx,2), %ebx
call rc_bit
jnz 98f
movw $1, len(%ebp)
movb $STATE_LIT_SHORTREP, %ch
jmp 99f
1: /* Get is_rep1 bit */
incb %cl
leal is_rep1(,%edx,2), %ebx
call rc_bit
jz 98f
/* Get is_rep2 bit */
incb %cl
leal is_rep2(,%edx,2), %ebx
call rc_bit
adcb $0, %cl
98: /* Decode length */
movl $rep_len_dec, %ebx
call lzma_len
99: /* Update LZMA state */
cmpb $STATE_LIT_MAX, %dl
movb %ch, %dl
jbe 1f
movb $STATE_NONLIT_REP, %dl
1: /* Tail call */
jmp match_rep
.size lzma_match, . - lzma_match
/*****************************************************************************
* Decode one LZMA symbol
*
* Parameters:
* %ss:%ebp : LZMA parameter block
* %ds:%esi : compressed input data pointer
* %es:%edi : uncompressed output data pointer
* %edx : LZMA state
* Returns:
* %ds:%esi : compressed input data pointer (possibly updated)
* %es:%edi : uncompressed output data pointer (updated)
* %edx : LZMA state
* CF : end of payload marker found
* Corrupts:
* %eax
* %ebx
* %ecx
*****************************************************************************
*/
lzma_decode:
/* Get is_match bit */
leal is_match(,%edx,2), %ebx
call rc_bit
jz lzma_literal
/* Get is_rep bit */
leal is_rep(,%edx,2), %ebx
call rc_bit
jz lzma_match
jmp lzma_rep_match
.size lzma_decode, . - lzma_decode
/****************************************************************************
* Undo effect of branch-call-jump (BCJ) filter
*
* Parameters:
* %es:%esi : start of uncompressed output data (note %es)
* %es:%edi : end of uncompressed output data
* Returns:
* Corrupts:
* %eax
* %ebx
* %ecx
* %edx
* %esi
*****************************************************************************
*/
bcj_filter:
/* Store (negative) start of data in %edx */
movl %esi, %edx
negl %edx
/* Calculate limit in %ecx */
leal -5(%edi,%edx), %ecx
1: /* Calculate offset in %ebx */
leal (%esi,%edx), %ebx
/* Check for end of data */
cmpl %ecx, %ebx
ja 99f
/* Check for an opcode which would be followed by a rel32 address */
ADDR32 es lodsb
andb $0xfe, %al
cmpb $0xe8, %al
jne 1b
/* Get current jump target value in %eax */
ADDR32 es lodsl
/* Convert absolute addresses in the range [0,limit) back to
* relative addresses in the range [-offset,limit-offset).
*/
cmpl %ecx, %eax
jae 2f
subl %ebx,%es:-4(%esi)
2: /* Convert negative numbers in the range [-offset,0) back to
* positive numbers in the range [limit-offset,limit).
*/
notl %eax /* Range is now [0,offset) */
cmpl %ebx, %eax
jae 1b
addl %ecx,%es:-4(%esi)
jmp 1b
99: /* Return */
ret
.size bcj_filter, . - bcj_filter
/****************************************************************************
* decompress (real-mode or 16/32-bit protected-mode near call)
*
* Decompress data
*
* Parameters (passed via registers):
* %ds:%esi : Start of compressed input data
* %es:%edi : Start of output buffer
* Returns:
* %ds:%esi - End of compressed input data
* %es:%edi - End of decompressed output data
* All other registers are preserved
*
* NOTE: It would be possible to build a smaller version of the
* decompression code for -DKEEP_IT_REAL by using 16-bit registers
* where possible.
****************************************************************************
*/
.globl decompress
decompress:
/* Preserve registers */
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
pushl %ebp
/* Allocate parameter block */
subl $sizeof__lzma_dec, %esp
movl %esp, %ebp
/* Zero parameter block and set all probabilities to 0.5 */
pushl %edi
pushw %es
pushw %ss
popw %es
movl %ebp, %edi
xorl %eax, %eax
movl $( sizeof__lzma_dec / 4 ), %ecx
ADDR32 rep stosl
leal probs(%ebp), %edi
movw $( ( 1 << 11 ) / 2 ), %ax
movl $( ( sizeof__lzma_dec - probs ) / 2 ), %ecx
ADDR32 rep stosw
popw %es
popl %edi
/* Initialise remaining parameters */
movl %esi, in_start(%ebp)
movl %edi, out_start(%ebp)
print_character '\n'
ADDR32 lodsb /* discard initial byte */
print_hex_byte %al
ADDR32 lodsl
bswapl %eax
print_hex_dword %eax
print_character '\n'
movl %eax, rc_code(%ebp)
decl rc_range(%ebp)
movl $STATE_LIT_LIT, %edx
1: /* Decompress until we reach end of buffer */
call lzma_decode
jnc 1b
print_character '\n'
/* Undo BCJ filter */
pushl %esi
movl out_start(%ebp), %esi
call bcj_filter
popl %esi
/* Restore registers and return */
addl $sizeof__lzma_dec, %esp
popl %ebp
popl %edx
popl %ecx
popl %ebx
popl %eax
ret
/* Specify minimum amount of stack space required */
.globl _min_decompress_stack
.equ _min_decompress_stack, ( sizeof__lzma_dec + 512 /* margin */ )

View File

@ -0,0 +1,9 @@
/*
* 16-bit version of the decompressor
*
*/
FILE_LICENCE ( GPL2_OR_LATER )
#define CODE16
#include "unlzma.S"

View File

@ -1,13 +1,21 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/stat.h>
#define ENCODE
#define VERBOSE
#include "nrv2b.c"
FILE *infile, *outfile;
#include <lzma.h>
#define DEBUG 0
/* LZMA filter choices. Must match those used by unlzma.S */
#define LZMA_LC 2
#define LZMA_LP 0
#define LZMA_PB 0
/* LZMA preset choice. This is a policy decision */
#define LZMA_PRESET ( LZMA_PRESET_DEFAULT | LZMA_PRESET_EXTREME )
struct input_file {
void *buf;
size_t len;
@ -177,13 +185,75 @@ static int process_zinfo_copy ( struct input_file *input,
return 0;
}
#define OPCODE_CALL 0xe8
#define OPCODE_JMP 0xe9
static void bcj_filter ( void *data, size_t len ) {
struct {
uint8_t opcode;
int32_t target;
} __attribute__ (( packed )) *jump;
ssize_t limit = ( len - sizeof ( *jump ) );
ssize_t offset;
/* liblzma does include an x86 BCJ filter, but it's hideously
* convoluted and undocumented. This BCJ filter is
* substantially simpler and achieves the same compression (at
* the cost of requiring the decompressor to know the size of
* the decompressed data, which we already have in iPXE).
*/
for ( offset = 0 ; offset <= limit ; offset++ ) {
jump = ( data + offset );
/* Skip instructions that are not followed by a rel32 address */
if ( ( jump->opcode != OPCODE_CALL ) &&
( jump->opcode != OPCODE_JMP ) )
continue;
/* Convert rel32 address to an absolute address. To
* avoid false positives (which damage the compression
* ratio), we should check that the jump target is
* within the range [0,limit).
*
* Some output values would then end up being mapped
* from two distinct input values, making the
* transformation irreversible. To solve this, we
* transform such values back into the part of the
* range which would otherwise correspond to no input
* values.
*/
if ( ( jump->target >= -offset ) &&
( jump->target < ( limit - offset ) ) ) {
/* Convert relative addresses in the range
* [-offset,limit-offset) to absolute
* addresses in the range [0,limit).
*/
jump->target += offset;
} else if ( ( jump->target >= ( limit - offset ) ) &&
( jump->target < limit ) ) {
/* Convert positive numbers in the range
* [limit-offset,limit) to negative numbers in
* the range [-offset,0).
*/
jump->target -= limit;
}
offset += sizeof ( jump->target );
};
}
static int process_zinfo_pack ( struct input_file *input,
struct output_file *output,
union zinfo_record *zinfo ) {
struct zinfo_pack *pack = &zinfo->pack;
size_t offset = pack->offset;
size_t len = pack->len;
unsigned long packed_len;
size_t packed_len = 0;
size_t remaining = ( output->max_len - output->len );
lzma_options_lzma options;
const lzma_filter filters[] = {
{ .id = LZMA_FILTER_LZMA1, .options = &options },
{ .id = LZMA_VLI_UNKNOWN }
};
if ( ( offset + len ) > input->len ) {
fprintf ( stderr, "Input buffer overrun on pack\n" );
@ -196,9 +266,15 @@ static int process_zinfo_pack ( struct input_file *input,
return -1;
}
if ( ucl_nrv2b_99_compress ( ( input->buf + offset ), len,
( output->buf + output->len ),
&packed_len, 0 ) != UCL_E_OK ) {
bcj_filter ( ( input->buf + offset ), len );
lzma_lzma_preset ( &options, LZMA_PRESET );
options.lc = LZMA_LC;
options.lp = LZMA_LP;
options.pb = LZMA_PB;
if ( lzma_raw_buffer_encode ( filters, NULL, ( input->buf + offset ),
len, ( output->buf + output->len ),
&packed_len, remaining ) != LZMA_OK ) {
fprintf ( stderr, "Compression failure\n" );
return -1;
}
@ -206,7 +282,7 @@ static int process_zinfo_pack ( struct input_file *input,
if ( DEBUG ) {
fprintf ( stderr, "PACK [%#zx,%#zx) to [%#zx,%#zx)\n",
offset, ( offset + len ), output->len,
( size_t )( output->len + packed_len ) );
( output->len + packed_len ) );
}
output->len += packed_len;