--- /dev/null
+++ b/arch/x86/boot/compressed/LzmaDecode.c
@@ -0,0 +1,586 @@
+/*
+  LzmaDecode.c
+  LZMA Decoder (optimized for Speed version)
+  
+  LZMA SDK 4.17 Copyright (c) 1999-2005 Igor Pavlov (2005-04-05)
+  http://www.7-zip.org/
+
+  LZMA SDK is licensed under two licenses:
+  1) GNU Lesser General Public License (GNU LGPL)
+  2) Common Public License (CPL)
+  It means that you can select one of these two licenses and 
+  follow rules of that license.
+
+  SPECIAL EXCEPTION:
+  Igor Pavlov, as the author of this Code, expressly permits you to 
+  statically or dynamically link your Code (or bind by name) to the 
+  interfaces of this file without subjecting your linked Code to the 
+  terms of the CPL or GNU LGPL. Any modifications or additions 
+  to this file, however, are subject to the LGPL or CPL terms.
+*/
+
+#include "LzmaDecode.h"
+
+#ifndef Byte
+#define Byte unsigned char
+#endif
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_READ_BYTE (*Buffer++)
+
+#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
+  { int i; for(i = 0; i < 5; i++) { RC_TEST; Code = (Code << 8) | RC_READ_BYTE; }}
+
+#ifdef _LZMA_IN_CB
+
+#define RC_TEST { if (Buffer == BufferLim) \
+  { UInt32 size; int result = InCallback->Read(InCallback, &Buffer, &size); if (result != LZMA_RESULT_OK) return result; \
+  BufferLim = Buffer + size; if (size == 0) return LZMA_RESULT_DATA_ERROR; }}
+
+#define RC_INIT Buffer = BufferLim = 0; RC_INIT2
+
+#else
+
+#define RC_TEST { if (Buffer == BufferLim) return LZMA_RESULT_DATA_ERROR; }
+
+#define RC_INIT(buffer, bufferSize) Buffer = buffer; BufferLim = buffer + bufferSize; RC_INIT2
+ 
+#endif
+
+#define RC_NORMALIZE if (Range < kTopValue) { RC_TEST; Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; }
+
+#define IfBit0(p) RC_NORMALIZE; bound = (Range >> kNumBitModelTotalBits) * *(p); if (Code < bound)
+#define UpdateBit0(p) Range = bound; *(p) += (kBitModelTotal - *(p)) >> kNumMoveBits;
+#define UpdateBit1(p) Range -= bound; Code -= bound; *(p) -= (*(p)) >> kNumMoveBits;
+
+#define RC_GET_BIT2(p, mi, A0, A1) IfBit0(p) \
+  { UpdateBit0(p); mi <<= 1; A0; } else \
+  { UpdateBit1(p); mi = (mi + mi) + 1; A1; } 
+  
+#define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ; , ;)               
+
+#define RangeDecoderBitTreeDecode(probs, numLevels, res) \
+  { int i = numLevels; res = 1; \
+  do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); \
+  res -= (1 << numLevels); }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenChoice 0
+#define LenChoice2 (LenChoice + 1)
+#define LenLow (LenChoice2 + 1)
+#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
+#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols) 
+
+
+#define kNumStates 12
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+
+#define IsMatch 0
+#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define IsRep0Long (IsRepG2 + kNumStates)
+#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
+#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
+#define LenCoder (Align + kAlignTableSize)
+#define RepLenCoder (LenCoder + kNumLenProbs)
+#define Literal (RepLenCoder + kNumLenProbs)
+
+#if Literal != LZMA_BASE_SIZE
+StopCompilingDueBUG
+#endif
+
+#ifdef _LZMA_OUT_READ
+
+typedef struct _LzmaVarState
+{
+  Byte *Buffer;
+  Byte *BufferLim;
+  UInt32 Range;
+  UInt32 Code;
+  #ifdef _LZMA_IN_CB
+  ILzmaInCallback *InCallback;
+  #endif
+  Byte *Dictionary;
+  UInt32 DictionarySize;
+  UInt32 DictionaryPos;
+  UInt32 GlobalPos;
+  UInt32 Reps[4];
+  int lc;
+  int lp;
+  int pb;
+  int State;
+  int RemainLen;
+  Byte TempDictionary[4];
+} LzmaVarState;
+
+int LzmaDecoderInit(
+    unsigned char *buffer, UInt32 bufferSize,
+    int lc, int lp, int pb,
+    unsigned char *dictionary, UInt32 dictionarySize,
+    #ifdef _LZMA_IN_CB
+    ILzmaInCallback *InCallback
+    #else
+    unsigned char *inStream, UInt32 inSize
+    #endif
+    )
+{
+  Byte *Buffer;
+  Byte *BufferLim;
+  UInt32 Range;
+  UInt32 Code;
+  LzmaVarState *vs = (LzmaVarState *)buffer;
+  CProb *p = (CProb *)(buffer + sizeof(LzmaVarState));
+  UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc + lp));
+  UInt32 i;
+  if (bufferSize < numProbs * sizeof(CProb) + sizeof(LzmaVarState))
+    return LZMA_RESULT_NOT_ENOUGH_MEM;
+  vs->Dictionary = dictionary;
+  vs->DictionarySize = dictionarySize;
+  vs->DictionaryPos = 0;
+  vs->GlobalPos = 0;
+  vs->Reps[0] = vs->Reps[1] = vs->Reps[2] = vs->Reps[3] = 1;
+  vs->lc = lc;
+  vs->lp = lp;
+  vs->pb = pb;
+  vs->State = 0;
+  vs->RemainLen = 0;
+  dictionary[dictionarySize - 1] = 0;
+  for (i = 0; i < numProbs; i++)
+    p[i] = kBitModelTotal >> 1; 
+
+  #ifdef _LZMA_IN_CB
+  RC_INIT;
+  #else
+  RC_INIT(inStream, inSize);
+  #endif
+  vs->Buffer = Buffer;
+  vs->BufferLim = BufferLim;
+  vs->Range = Range;
+  vs->Code = Code;
+  #ifdef _LZMA_IN_CB
+  vs->InCallback = InCallback;
+  #endif
+
+  return LZMA_RESULT_OK;
+}
+
+int LzmaDecode(unsigned char *buffer, 
+    unsigned char *outStream, UInt32 outSize,
+    UInt32 *outSizeProcessed)
+{
+  LzmaVarState *vs = (LzmaVarState *)buffer;
+  Byte *Buffer = vs->Buffer;
+  Byte *BufferLim = vs->BufferLim;
+  UInt32 Range = vs->Range;
+  UInt32 Code = vs->Code;
+  #ifdef _LZMA_IN_CB
+  ILzmaInCallback *InCallback = vs->InCallback;
+  #endif
+  CProb *p = (CProb *)(buffer + sizeof(LzmaVarState));
+  int state = vs->State;
+  Byte previousByte;
+  UInt32 rep0 = vs->Reps[0], rep1 = vs->Reps[1], rep2 = vs->Reps[2], rep3 = vs->Reps[3];
+  UInt32 nowPos = 0;
+  UInt32 posStateMask = (1 << (vs->pb)) - 1;
+  UInt32 literalPosMask = (1 << (vs->lp)) - 1;
+  int lc = vs->lc;
+  int len = vs->RemainLen;
+  UInt32 globalPos = vs->GlobalPos;
+
+  Byte *dictionary = vs->Dictionary;
+  UInt32 dictionarySize = vs->DictionarySize;
+  UInt32 dictionaryPos = vs->DictionaryPos;
+
+  Byte tempDictionary[4];
+  if (dictionarySize == 0)
+  {
+    dictionary = tempDictionary;
+    dictionarySize = 1;
+    tempDictionary[0] = vs->TempDictionary[0];
+  }
+
+  if (len == -1)
+  {
+    *outSizeProcessed = 0;
+    return LZMA_RESULT_OK;
+  }
+
+  while(len != 0 && nowPos < outSize)
+  {
+    UInt32 pos = dictionaryPos - rep0;
+    if (pos >= dictionarySize)
+      pos += dictionarySize;
+    outStream[nowPos++] = dictionary[dictionaryPos] = dictionary[pos];
+    if (++dictionaryPos == dictionarySize)
+      dictionaryPos = 0;
+    len--;
+  }
+  if (dictionaryPos == 0)
+    previousByte = dictionary[dictionarySize - 1];
+  else
+    previousByte = dictionary[dictionaryPos - 1];
+#else
+
+int LzmaDecode(
+    Byte *buffer, UInt32 bufferSize,
+    int lc, int lp, int pb,
+    #ifdef _LZMA_IN_CB
+    ILzmaInCallback *InCallback,
+    #else
+    unsigned char *inStream, UInt32 inSize,
+    #endif
+    unsigned char *outStream, UInt32 outSize,
+    UInt32 *outSizeProcessed)
+{
+  UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc + lp));
+  CProb *p = (CProb *)buffer;
+
+  UInt32 i;
+  int state = 0;
+  Byte previousByte = 0;
+  UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
+  UInt32 nowPos = 0;
+  UInt32 posStateMask = (1 << pb) - 1;
+  UInt32 literalPosMask = (1 << lp) - 1;
+  int len = 0;
+  
+  Byte *Buffer;
+  Byte *BufferLim;
+  UInt32 Range;
+  UInt32 Code;
+  
+  if (bufferSize < numProbs * sizeof(CProb))
+    return LZMA_RESULT_NOT_ENOUGH_MEM;
+  for (i = 0; i < numProbs; i++)
+    p[i] = kBitModelTotal >> 1;
+  
+
+  #ifdef _LZMA_IN_CB
+  RC_INIT;
+  #else
+  RC_INIT(inStream, inSize);
+  #endif
+#endif
+
+  *outSizeProcessed = 0;
+  while(nowPos < outSize)
+  {
+    CProb *prob;
+    UInt32 bound;
+    int posState = (int)(
+        (nowPos 
+        #ifdef _LZMA_OUT_READ
+        + globalPos
+        #endif
+        )
+        & posStateMask);
+
+    prob = p + IsMatch + (state << kNumPosBitsMax) + posState;
+    IfBit0(prob)
+    {
+      int symbol = 1;
+      UpdateBit0(prob)
+      prob = p + Literal + (LZMA_LIT_SIZE * 
+        (((
+        (nowPos 
+        #ifdef _LZMA_OUT_READ
+        + globalPos
+        #endif
+        )
+        & literalPosMask) << lc) + (previousByte >> (8 - lc))));
+
+      if (state >= kNumLitStates)
+      {
+        int matchByte;
+        #ifdef _LZMA_OUT_READ
+        UInt32 pos = dictionaryPos - rep0;
+        if (pos >= dictionarySize)
+          pos += dictionarySize;
+        matchByte = dictionary[pos];
+        #else
+        matchByte = outStream[nowPos - rep0];
+        #endif
+        do
+        {
+          int bit;
+          CProb *probLit;
+          matchByte <<= 1;
+          bit = (matchByte & 0x100);
+          probLit = prob + 0x100 + bit + symbol;
+          RC_GET_BIT2(probLit, symbol, if (bit != 0) break, if (bit == 0) break)
+        }
+        while (symbol < 0x100);
+      }
+      while (symbol < 0x100)
+      {
+        CProb *probLit = prob + symbol;
+        RC_GET_BIT(probLit, symbol)
+      }
+      previousByte = (Byte)symbol;
+
+      outStream[nowPos++] = previousByte;
+      #ifdef _LZMA_OUT_READ
+      dictionary[dictionaryPos] = previousByte;
+      if (++dictionaryPos == dictionarySize)
+        dictionaryPos = 0;
+      #endif
+      if (state < 4) state = 0;
+      else if (state < 10) state -= 3;
+      else state -= 6;
+    }
+    else             
+    {
+      UpdateBit1(prob);
+      prob = p + IsRep + state;
+      IfBit0(prob)
+      {
+        UpdateBit0(prob);
+        rep3 = rep2;
+        rep2 = rep1;
+        rep1 = rep0;
+        state = state < kNumLitStates ? 0 : 3;
+        prob = p + LenCoder;
+      }
+      else
+      {
+        UpdateBit1(prob);
+        prob = p + IsRepG0 + state;
+        IfBit0(prob)
+        {
+          UpdateBit0(prob);
+          prob = p + IsRep0Long + (state << kNumPosBitsMax) + posState;
+          IfBit0(prob)
+          {
+            #ifdef _LZMA_OUT_READ
+            UInt32 pos;
+            #endif
+            UpdateBit0(prob);
+            if (nowPos 
+                #ifdef _LZMA_OUT_READ
+                + globalPos
+                #endif
+                == 0)
+              return LZMA_RESULT_DATA_ERROR;
+            state = state < kNumLitStates ? 9 : 11;
+            #ifdef _LZMA_OUT_READ
+            pos = dictionaryPos - rep0;
+            if (pos >= dictionarySize)
+              pos += dictionarySize;
+            previousByte = dictionary[pos];
+            dictionary[dictionaryPos] = previousByte;
+            if (++dictionaryPos == dictionarySize)
+              dictionaryPos = 0;
+            #else
+            previousByte = outStream[nowPos - rep0];
+            #endif
+            outStream[nowPos++] = previousByte;
+            continue;
+          }
+          else
+          {
+            UpdateBit1(prob);
+          }
+        }
+        else
+        {
+          UInt32 distance;
+          UpdateBit1(prob);
+          prob = p + IsRepG1 + state;
+          IfBit0(prob)
+          {
+            UpdateBit0(prob);
+            distance = rep1;
+          }
+          else 
+          {
+            UpdateBit1(prob);
+            prob = p + IsRepG2 + state;
+            IfBit0(prob)
+            {
+              UpdateBit0(prob);
+              distance = rep2;
+            }
+            else
+            {
+              UpdateBit1(prob);
+              distance = rep3;
+              rep3 = rep2;
+            }
+            rep2 = rep1;
+          }
+          rep1 = rep0;
+          rep0 = distance;
+        }
+        state = state < kNumLitStates ? 8 : 11;
+        prob = p + RepLenCoder;
+      }
+      {
+        int numBits, offset;
+        CProb *probLen = prob + LenChoice;
+        IfBit0(probLen)
+        {
+          UpdateBit0(probLen);
+          probLen = prob + LenLow + (posState << kLenNumLowBits);
+          offset = 0;
+          numBits = kLenNumLowBits;
+        }
+        else
+        {
+          UpdateBit1(probLen);
+          probLen = prob + LenChoice2;
+          IfBit0(probLen)
+          {
+            UpdateBit0(probLen);
+            probLen = prob + LenMid + (posState << kLenNumMidBits);
+            offset = kLenNumLowSymbols;
+            numBits = kLenNumMidBits;
+          }
+          else
+          {
+            UpdateBit1(probLen);
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols + kLenNumMidSymbols;
+            numBits = kLenNumHighBits;
+          }
+        }
+        RangeDecoderBitTreeDecode(probLen, numBits, len);
+        len += offset;
+      }
+
+      if (state < 4)
+      {
+        int posSlot;
+        state += kNumLitStates;
+        prob = p + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << 
+            kNumPosSlotBits);
+        RangeDecoderBitTreeDecode(prob, kNumPosSlotBits, posSlot);
+        if (posSlot >= kStartPosModelIndex)
+        {
+          int numDirectBits = ((posSlot >> 1) - 1);
+          rep0 = (2 | ((UInt32)posSlot & 1));
+          if (posSlot < kEndPosModelIndex)
+          {
+            rep0 <<= numDirectBits;
+            prob = p + SpecPos + rep0 - posSlot - 1;
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              RC_NORMALIZE
+              Range >>= 1;
+              rep0 <<= 1;
+              if (Code >= Range)
+              {
+                Code -= Range;
+                rep0 |= 1;
+              }
+            }
+            while (--numDirectBits != 0);
+            prob = p + Align;
+            rep0 <<= kNumAlignBits;
+            numDirectBits = kNumAlignBits;
+          }
+          {
+            int i = 1;
+            int mi = 1;
+            do
+            {
+              CProb *prob3 = prob + mi;
+              RC_GET_BIT2(prob3, mi, ; , rep0 |= i);
+              i <<= 1;
+            }
+            while(--numDirectBits != 0);
+          }
+        }
+        else
+          rep0 = posSlot;
+        if (++rep0 == (UInt32)(0))
+        {
+          /* it's for stream version */
+          len = -1;
+          break;
+        }
+      }
+
+      len += kMatchMinLen;
+      if (rep0 > nowPos 
+        #ifdef _LZMA_OUT_READ
+        + globalPos || rep0 > dictionarySize
+        #endif
+        ) 
+        return LZMA_RESULT_DATA_ERROR;
+      do
+      {
+        #ifdef _LZMA_OUT_READ
+        UInt32 pos = dictionaryPos - rep0;
+        if (pos >= dictionarySize)
+          pos += dictionarySize;
+        previousByte = dictionary[pos];
+        dictionary[dictionaryPos] = previousByte;
+        if (++dictionaryPos == dictionarySize)
+          dictionaryPos = 0;
+        #else
+        previousByte = outStream[nowPos - rep0];
+        #endif
+        len--;
+        outStream[nowPos++] = previousByte;
+      }
+      while(len != 0 && nowPos < outSize);
+    }
+  }
+  RC_NORMALIZE;
+
+  #ifdef _LZMA_OUT_READ
+  vs->Buffer = Buffer;
+  vs->BufferLim = BufferLim;
+  vs->Range = Range;
+  vs->Code = Code;
+  vs->DictionaryPos = dictionaryPos;
+  vs->GlobalPos = globalPos + nowPos;
+  vs->Reps[0] = rep0;
+  vs->Reps[1] = rep1;
+  vs->Reps[2] = rep2;
+  vs->Reps[3] = rep3;
+  vs->State = state;
+  vs->RemainLen = len;
+  vs->TempDictionary[0] = tempDictionary[0];
+  #endif
+
+  *outSizeProcessed = nowPos;
+  return LZMA_RESULT_OK;
+}
--- /dev/null
+++ b/arch/x86/boot/compressed/LzmaDecode.h
@@ -0,0 +1,100 @@
+/* 
+  LzmaDecode.h
+  LZMA Decoder interface
+
+  LZMA SDK 4.16 Copyright (c) 1999-2005 Igor Pavlov (2005-03-18)
+  http://www.7-zip.org/
+
+  LZMA SDK is licensed under two licenses:
+  1) GNU Lesser General Public License (GNU LGPL)
+  2) Common Public License (CPL)
+  It means that you can select one of these two licenses and 
+  follow rules of that license.
+
+  SPECIAL EXCEPTION:
+  Igor Pavlov, as the author of this code, expressly permits you to 
+  statically or dynamically link your code (or bind by name) to the 
+  interfaces of this file without subjecting your linked code to the 
+  terms of the CPL or GNU LGPL. Any modifications or additions 
+  to this file, however, are subject to the LGPL or CPL terms.
+*/
+
+#ifndef __LZMADECODE_H
+#define __LZMADECODE_H
+
+/* #define _LZMA_IN_CB */
+/* Use callback for input data */
+
+/* #define _LZMA_OUT_READ */
+/* Use read function for output data */
+
+/* #define _LZMA_PROB32 */
+/* It can increase speed on some 32-bit CPUs, 
+   but memory usage will be doubled in that case */
+
+/* #define _LZMA_LOC_OPT */
+/* Enable local speed optimizations inside code */
+
+#ifndef UInt32
+#ifdef _LZMA_UINT32_IS_ULONG
+#define UInt32 unsigned long
+#else
+#define UInt32 unsigned int
+#endif
+#endif
+
+#ifdef _LZMA_PROB32
+#define CProb UInt32
+#else
+#define CProb unsigned short
+#endif
+
+#define LZMA_RESULT_OK 0
+#define LZMA_RESULT_DATA_ERROR 1
+#define LZMA_RESULT_NOT_ENOUGH_MEM 2
+
+#ifdef _LZMA_IN_CB
+typedef struct _ILzmaInCallback
+{
+  int (*Read)(void *object, unsigned char **buffer, UInt32 *bufferSize);
+} ILzmaInCallback;
+#endif
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 768
+
+/* 
+bufferSize = (LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)))* sizeof(CProb)
+bufferSize += 100 in case of _LZMA_OUT_READ
+by default CProb is unsigned short, 
+but if specify _LZMA_PROB_32, CProb will be UInt32(unsigned int)
+*/
+
+#ifdef _LZMA_OUT_READ
+int LzmaDecoderInit(
+    unsigned char *buffer, UInt32 bufferSize,
+    int lc, int lp, int pb,
+    unsigned char *dictionary, UInt32 dictionarySize,
+  #ifdef _LZMA_IN_CB
+    ILzmaInCallback *inCallback
+  #else
+    unsigned char *inStream, UInt32 inSize
+  #endif
+);
+#endif
+
+int LzmaDecode(
+    unsigned char *buffer, 
+  #ifndef _LZMA_OUT_READ
+    UInt32 bufferSize,
+    int lc, int lp, int pb,
+  #ifdef _LZMA_IN_CB
+    ILzmaInCallback *inCallback,
+  #else
+    unsigned char *inStream, UInt32 inSize,
+  #endif
+  #endif
+    unsigned char *outStream, UInt32 outSize,
+    UInt32 *outSizeProcessed);
+
+#endif
--- /dev/null
+++ b/arch/x86/boot/compressed/lzma_misc.c
@@ -0,0 +1,281 @@
+/*
+ * lzma_misc.c
+ * 
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ * puts by Nick Holloway 1993, better puts by Martin Mares 1995
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ * 
+ * Decompress LZMA compressed vmlinuz 
+ * Version 0.9 Copyright (c) Ming-Ching Tiew mctiew@yahoo.com
+ * Program adapted from misc.c for 2.6 kernel
+ * Forward ported to latest 2.6 version of misc.c by
+ * Felix Fietkau <nbd@openwrt.org>
+ */
+
+#undef CONFIG_PARAVIRT
+#include <linux/linkage.h>
+#include <linux/vmalloc.h>
+#include <linux/screen_info.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/boot.h>
+
+/* WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically
+ * at run time, but no relocation processing is performed.
+ * This means that it is not safe to place pointers in static structures.
+ */
+
+/*
+ * Getting to provable safe in place decompression is hard.
+ * Worst case behaviours need to be analized.
+ * Background information:
+ *
+ * The file layout is:
+ *    magic[2]
+ *    method[1]
+ *    flags[1]
+ *    timestamp[4]
+ *    extraflags[1]
+ *    os[1]
+ *    compressed data blocks[N]
+ *    crc[4] orig_len[4]
+ *
+ * resulting in 18 bytes of non compressed data overhead.
+ *
+ * Files divided into blocks
+ * 1 bit (last block flag)
+ * 2 bits (block type)
+ *
+ * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
+ * The smallest block type encoding is always used.
+ *
+ * stored:
+ *    32 bits length in bytes.
+ *
+ * fixed:
+ *    magic fixed tree.
+ *    symbols.
+ *
+ * dynamic:
+ *    dynamic tree encoding.
+ *    symbols.
+ *
+ *
+ * The buffer for decompression in place is the length of the
+ * uncompressed data, plus a small amount extra to keep the algorithm safe.
+ * The compressed data is placed at the end of the buffer.  The output
+ * pointer is placed at the start of the buffer and the input pointer
+ * is placed where the compressed data starts.  Problems will occur
+ * when the output pointer overruns the input pointer.
+ *
+ * The output pointer can only overrun the input pointer if the input
+ * pointer is moving faster than the output pointer.  A condition only
+ * triggered by data whose compressed form is larger than the uncompressed
+ * form.
+ *
+ * The worst case at the block level is a growth of the compressed data
+ * of 5 bytes per 32767 bytes.
+ *
+ * The worst case internal to a compressed block is very hard to figure.
+ * The worst case can at least be boundined by having one bit that represents
+ * 32764 bytes and then all of the rest of the bytes representing the very
+ * very last byte.
+ *
+ * All of which is enough to compute an amount of extra data that is required
+ * to be safe.  To avoid problems at the block level allocating 5 extra bytes
+ * per 32767 bytes of data is sufficient.  To avoind problems internal to a block
+ * adding an extra 32767 bytes (the worst case uncompressed block size) is
+ * sufficient, to ensure that in the worst case the decompressed data for
+ * block will stop the byte before the compressed data for a block begins.
+ * To avoid problems with the compressed data's meta information an extra 18
+ * bytes are needed.  Leading to the formula:
+ *
+ * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
+ *
+ * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+ * Adding 32768 instead of 32767 just makes for round numbers.
+ * Adding the decompressor_size is necessary as it musht live after all
+ * of the data as well.  Last I measured the decompressor is about 14K.
+ * 10K of actuall data and 4K of bss.
+ *
+ */
+
+/*
+ * gzip declarations
+ */
+
+#define OF(args)  args
+#define STATIC static
+
+#undef memcpy
+
+typedef unsigned char  uch;
+typedef unsigned short ush;
+typedef unsigned long  ulg;
+
+#define WSIZE 0x80000000	/* Window size must be at least 32k,
+				 * and a power of two
+				 * We don't actually have a window just
+				 * a huge output buffer so I report
+				 * a 2G windows size, as that should
+				 * always be larger than our output buffer.
+				 */
+
+static uch *inbuf;	/* input buffer */
+static uch *window;	/* Sliding window buffer, (and final output buffer) */
+
+static unsigned insize;  /* valid bytes in inbuf */
+static unsigned inptr;   /* index of next byte to be processed in inbuf */
+static unsigned long workspace;
+
+#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+		
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond,msg) {if(!(cond)) error(msg);}
+#  define Trace(x) fprintf x
+#  define Tracev(x) {if (verbose) fprintf x ;}
+#  define Tracevv(x) {if (verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+static int  fill_inbuf(void);
+  
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+static unsigned char *real_mode; /* Pointer to real-mode data */
+extern unsigned char input_data[];
+extern int input_len;
+
+static void error(char *x);
+static void *memcpy(void *dest, const void *src, unsigned n);
+
+#ifdef CONFIG_X86_NUMAQ
+void *xquad_portio;
+#endif
+
+static void* memcpy(void* dest, const void* src, unsigned n)
+{
+	int i;
+	char *d = (char *)dest, *s = (char *)src;
+
+	for (i=0;i<n;i++) d[i] = s[i];
+	return dest;
+}
+
+/* ===========================================================================
+ * Fill the input buffer. This is called only when the buffer is empty
+ * and at least one byte is really needed.
+ */
+static int fill_inbuf(void)
+{
+	error("ran out of input data");
+	return 0;
+}
+
+
+// When using LZMA in callback, the compressed length is not needed.
+// Otherwise you need a special version of lzma compression program
+// which will pad the compressed length in the header.
+#define _LZMA_IN_CB
+#include "LzmaDecode.h"
+#include "LzmaDecode.c"
+
+static int read_byte(void *object, unsigned char **buffer, UInt32 *bufferSize);
+
+
+/*
+ * Do the lzma decompression
+ * When using LZMA in callback, the end of input stream is automatically determined
+ */
+static int lzma_unzip(void)
+{
+
+	unsigned int i;  /* temp value */
+	unsigned int lc; /* literal context bits */
+	unsigned int lp; /* literal pos state bits */
+	unsigned int pb; /* pos state bits */
+	unsigned int uncompressedSize = 0;
+	unsigned char* p;
+	
+	ILzmaInCallback callback;
+	callback.Read = read_byte;
+
+	/* lzma args */
+	i = get_byte();
+	lc = i % 9, i = i / 9;
+	lp = i % 5, pb = i / 5;
+	
+	/* skip dictionary size */
+	for (i = 0; i < 4; i++) 
+		get_byte();
+	// get uncompressedSize 	
+	p= (char*)&uncompressedSize;	
+	for (i = 0; i < 4; i++) 
+	    *p++ = get_byte();
+	    
+	//get compressedSize 
+	for (i = 0; i < 4; i++) 
+		get_byte();
+	
+	// point it beyond uncompresedSize
+	//workspace = window + uncompressedSize;
+	
+	/* decompress kernel */
+	if (LzmaDecode((unsigned char*)workspace, ~0, lc, lp, pb, &callback,
+		(unsigned char*)window, uncompressedSize, &i) == LZMA_RESULT_OK)
+		return 0;
+	else
+		return 1;
+}
+
+
+#ifdef  _LZMA_IN_CB
+static int read_byte(void *object, unsigned char **buffer, UInt32 *bufferSize)
+{
+	static unsigned int i = 0;
+	static unsigned char val;
+	*bufferSize = 1;
+	val = get_byte();
+	*buffer = &val;
+	return LZMA_RESULT_OK;
+}	
+#endif
+
+static void error(char *x)
+{
+	while(1);	/* Halt */
+}
+
+asmlinkage void decompress_kernel(void *rmode, unsigned long end,
+			uch *input_data, unsigned long input_len, uch *output)
+{
+	real_mode = rmode;
+
+	window = output;
+	inbuf  = input_data;	/* Input buffer */
+	insize = input_len;
+	inptr  = 0;
+
+	if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1))
+		error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
+	if ((workspace = end) > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff))
+		error("Destination address too large");
+#ifndef CONFIG_RELOCATABLE
+	if ((u32)output != LOAD_PHYSICAL_ADDR)
+		error("Wrong destination address");
+#endif
+
+	lzma_unzip();
+	return;
+}
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -172,4 +172,9 @@
 quiet_cmd_gzip = GZIP    $@
 cmd_gzip = gzip -f -9 < $< > $@
 
-
+# LZMA
+#
+quiet_cmd_lzma = LZMA $@
+cmd_lzma = bash -e scripts/lzma_kern $< $@ -lc7 -lp0 -pb0
+# to use lzmacomp,
+# cmd_lzma = lzmacomp $< 700 > $@
--- /dev/null
+++ b/scripts/lzma_kern
@@ -0,0 +1,4 @@
+get-size() { echo "$5" ;}
+printf -v len '%.8x' "$(get-size $(ls -l "$1"))"
+lzma e "$@"
+echo -ne "\x$(echo $len | cut -c 7,8)\x$(echo $len | cut -c 5,6)\x$(echo $len | cut -c 3,4)\x$(echo $len | cut -c 1,2)" >> "$2"
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,7 @@
 # create a compressed vmlinux image from the original vmlinux
 #
 
-targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc.o piggy.o
+targets := vmlinux vmlinux.bin vmlinux.bin.lzma head_$(BITS).o lzma_misc.o piggy.o
 
 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
@@ -18,7 +18,7 @@
 LDFLAGS := -m elf_$(UTS_MACHINE)
 LDFLAGS_vmlinux := -T
 
-$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
+$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/lzma_misc.o $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
 
@@ -44,11 +44,11 @@
 	$(call if_changed,relocbin)
 
 ifdef CONFIG_RELOCATABLE
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
-	$(call if_changed,gzip)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE
+	$(call if_changed,lzma)
 else
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lzma)
 endif
 LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
 
@@ -60,5 +60,5 @@
 endif
 
 
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.lzma FORCE
 	$(call if_changed,ld)