梆梆加固分析 - 学习 | Y&Yのblog = 保持好奇心

# 梆梆加固分析

解压打开 lib 文件，发现这四个 so 文件较新

而且 dexHelp 听起来和壳相关，从这里入手

打开文件后从 JNI_OnLoad 开始看起

上面是一些 x86 检测，壳配置读取，native 函数注册，直到函数末尾会调用脱壳函数

此函数混杂了检测，脱壳等操作，前半部分检测较多，尤其是这里

创建新线程进行检测，常见的模拟器检测，hook 检测，root 检测等，里面一些字符串被加密

字符串解密

示例数据

[+] Dump 0xDA613 - 0xDA623 (16 bytes) :
unsigned char xmmword_DA613[16] = {
    0x62, 0x00, 0x75, 0x00, 0x76, 0x00, 0x3B, 0x00, 0x3D, 0x00, 0x45, 0x00, 0x6D, 0x00, 0x64, 0x00
};

[+] Dump 0xDA623 - 0xDA633 (16 bytes) :
unsigned char xmmword_DA623[16] = {
    0x75, 0x00, 0x74, 0x00, 0x4F, 0x00, 0x6B, 0x00, 0x6F, 0x00, 0x6E, 0x00, 0x67, 0x00, 0x73, 0x00
};

[+] Dump 0xD5F70 - 0xD5F80 (16 bytes) :
unsigned char xmmword_D5F70[16] = {
    0xFD, 0xFE, 0xFF, 0xFD, 0xFE, 0xFF, 0xFD, 0xFE, 0xFF, 0xFD, 0xFE, 0xFF, 0xFD, 0xFE, 0xFF, 0xFD
};

最终解密脚本为

#include <iostream>
#include <stdint.h>
int main()
{
    uint8_t data[] = {0x75, 0x76, 0x3B, 0x3D, 0x45, 0x6D, 0x64, 0x75, 0x74, 0x4F, 0x6B, 0x6F, 0x6E, 0x67, 0x73};
    uint8_t key[] = {0xFD, 0xFE, 0xFF, 0xFD, 0xFE, 0xFF, 0xFD, 0xFE, 0xFF, 0xFD, 0xFE, 0xFF, 0xFD, 0xFE, 0xFF, 0xFD};
    for(int i = 0; i < sizeof(data); i++)
    {
        printf("%c",data[i] + key[i]); // rt::ClassLinker
    }
}

这样解密后就可正常分析检测了

继续往下读还能看到防止程序自己 ptrace 自己，防止调试器附加

不过通过观察可以发现，所有的检测函数失败后都会走同一个分支

此函数会根据传入的参数 a1 判断是什么类型的检测并上报，a2 a3 造 crash 退出

往下程序拿到 dex 文件后找 deadata0 字符串，走到解密 key 的地方

将原始字节与包名异或后存到 dec_key 里

随后开始 dex 解密

可以在这里下断获得原始 dex

此时 x22 为 dex meta，x22 + 8 为 dexPtr，firda hook 脚本如下

'use strict';

const TARGET = 'libDexHelper.so';
const NOT_PASS_PATH_OFF = 0x31c64;
const DEX_READY_OFF = 0x3d914;
const DEX_LOOP_DONE_OFF = 0x3db64;
const OUT_DIR = '/data/data/com.chinamworld.main/files/dexdump';
const MAX_DEX_SIZE = 0x8000000;

// Set to 0 for a clean full dump. Increase it only when resuming a partial dump.
const MIN_DUMP_INDEX = 0;
const EXPECTED_DEX_COUNT = 22;

let installed = false;
const dumped = {};
const keepAlive = [];

function log(msg) {
  console.log('[dexhelper-min] ' + msg);
}

function readPath(arg) {
  try {
    if (arg && !arg.isNull()) {
      return arg.readCString();
    }
  } catch (_) {
  }
  return '';
}

function ensureDir() {
  try {
    const mkdirPtr = Module.findExportByName(null, 'mkdir');
    if (!mkdirPtr) {
      return;
    }
    const mkdirFn = new NativeFunction(mkdirPtr, 'int', ['pointer', 'int']);
    mkdirFn(Memory.allocUtf8String(OUT_DIR), 0x1ff);
  } catch (e) {
    log('ensureDir failed: ' + e);
  }
}

function hexPreview(ptr, size) {
  try {
    const n = Math.min(size, 16);
    const bytes = new Uint8Array(ptr.readByteArray(n));
    const out = [];
    for (let i = 0; i < bytes.length; i++) {
      out.push(('0' + bytes[i].toString(16)).slice(-2));
    }
    return out.join(' ');
  } catch (e) {
    return 'read-failed:' + e;
  }
}

function hasDexMagic(ptr) {
  try {
    const b = new Uint8Array(ptr.readByteArray(8));
    return b[0] === 0x64 && b[1] === 0x65 && b[2] === 0x78 && b[3] === 0x0a;
  } catch (_) {
    return false;
  }
}

function writeFile(path, data) {
  const f = new File(path, 'wb');
  f.write(data);
  f.flush();
  f.close();
}

function replaceNotPass(base) {
  const cb = new NativeCallback(function (reason, seed, mask) {
    log(
      'skip not_pass_path reason=0x' + reason.toString(16) +
      ' seed=0x' + (seed >>> 0).toString(16) +
      ' mask=0x' + (mask >>> 0).toString(16)
    );
  }, 'void', ['int64', 'int', 'int']);
  keepAlive.push(cb);
  Interceptor.replace(base.add(NOT_PASS_PATH_OFF), cb);
  log('replace not_pass_path @ ' + base.add(NOT_PASS_PATH_OFF));
}

function hookDexReady(base) {
  Interceptor.attach(base.add(DEX_READY_OFF), {
    onEnter() {
      try {
        const idx = this.context.x25.toUInt32();
        const size = this.context.x8.toUInt32();
        const meta = this.context.x22;
        const dexPtr = meta.add(8).readPointer();

        if (idx < MIN_DUMP_INDEX) {
          return;
        }

        log(
          'dex ready idx=' + idx +
          ' ptr=' + dexPtr +
          ' size=0x' + size.toString(16) +
          ' head=' + hexPreview(dexPtr, Math.min(size, 16))
        );

        if (size <= 0x70 || size > MAX_DEX_SIZE) {
          log('skip invalid size idx=' + idx + ' size=0x' + size.toString(16));
          return;
        }
        if (!hasDexMagic(dexPtr)) {
          log('skip non-dex idx=' + idx);
          return;
        }

        const key = idx + '_' + size.toString(16);
        if (dumped[key]) {
          return;
        }
        dumped[key] = true;

        ensureDir();
        const outPath = OUT_DIR + '/dex_' + idx + '_' + size.toString(16) + '.dex';
        writeFile(outPath, dexPtr.readByteArray(size));
        log('dex dump ok path=' + outPath);
      } catch (e) {
        log('dex ready dump failed: ' + e);
      }
    }
  });
  log('attach dex ready @ ' + base.add(DEX_READY_OFF));
}

function hookDone(base) {
  Interceptor.attach(base.add(DEX_LOOP_DONE_OFF), {
    onEnter() {
      log('dex loop done idx=' + this.context.x25 + ' count=' + this.context.x20);
    }
  });
  log('attach dex loop done @ ' + base.add(DEX_LOOP_DONE_OFF));
}

function install() {
  if (installed) {
    return;
  }
  const m = Process.findModuleByName(TARGET);
  if (!m) {
    return;
  }

  installed = true;
  ensureDir();
  log('install hooks at base ' + m.base);
  replaceNotPass(m.base);
  hookDexReady(m.base);
  hookDone(m.base);
}

function installFromModule(module) {
  if (module.name === TARGET) {
    install();
  }
}

function hookLoaderExport(name) {
  const addr = Module.findExportByName(null, name);
  if (!addr) {
    return;
  }
  Interceptor.attach(addr, {
    onEnter(args) {
      this.path = readPath(args[0]);
      if (this.path.indexOf(TARGET) !== -1) {
        log(name + ' -> ' + this.path);
      }
    },
    onLeave() {
      install();
    }
  });
}

if (Process.attachModuleObserver) {
  Process.attachModuleObserver({ onAdded: installFromModule });
}
hookLoaderExport('android_dlopen_ext');
hookLoaderExport('dlopen');
install();
setInterval(install, 500);
log('script ready, min_idx=' + MIN_DUMP_INDEX + ' expected=' + EXPECTED_DEX_COUNT);

但是现在解密出来的是抽空的 dex，接下来还需要还原方法

想还原就要知道原始 dex 的指令是怎么保存的

重点在接下来的这段代码

while ( *v896 != 'cgd.bbBB' )
{
  LODWORD(v894) = v894 - 1;
  v895 -= 0x100000000LL;
  v896 = (_QWORD *)((char *)v896 - 1);
  if ( v893 >= (unsigned int)v894 )
    goto LABEL_923;
}
v897 = v892 + (v895 >> 32);
v898 = *(unsigned __int8 *)(v897 - 1);
v899 = (*(unsigned __int8 *)(v897 - 4) << 24) & 0xFFFFFFFFFF0000FFLL
     | ((unsigned __int64)*(unsigned __int8 *)(v897 - 3) << 16)
     | ((unsigned __int64)*(unsigned __int8 *)(v897 - 2) << 8);
off = (__int128 *)(v897 - 0x20);
v901 = v897 - 0x20 - *(_QWORD *)(v883 + 8);
off1 = v899 | v898;
code_map_start = (header *)((char *)off - (v899 | v898));
v1272 = off;
v1278 = *(_QWORD *)(v822 + 8 * v877);
*(_DWORD *)(v883 + 28) = v899 | v898;
*(_QWORD *)(v883 + 16) = code_map_start;
*(_QWORD *)(v883 + 32) = v901;
v904 = (_QWORD *)operator new(0x18u);
v904[1] = 0;
v904[2] = 0;
*v904 = 0;
*(_QWORD *)v883 = v904;
if ( off1 )
{
  itemdata_len = bswap32(code_map_start->item_data_size);
  if ( itemdata_len >= 0x18 )
  {
    v906 = v904;
    item_idx = 0;
    item_count = itemdata_len / 0x18;
    v909 = (char *)code_map_start
         + (((unsigned __int64)LOBYTE(code_map_start->data_offset) << 24)
          | ((unsigned __int64)BYTE1(code_map_start->data_offset) << 16)
          | ((unsigned __int64)BYTE2(code_map_start->data_offset) << 8)
          | HIBYTE(code_map_start->data_offset));
    BBitem = (BBbbItem *)((char *)code_map_start
                        + (((unsigned __int64)BYTE1(code_map_start->item_size) << 16)
                         & 0xFFFFFFFF00FF00FFLL
                         | ((unsigned __int64)LOBYTE(code_map_start->item_size) << 24)
                         | ((unsigned __int64)BYTE2(code_map_start->item_size) << 8)
                         | HIBYTE(code_map_start->item_size)));
    do
    {
      v911 = (LOBYTE(BBitem->rel_code_off) << 24) & 0xFFFFFFFFFF0000FFLL
           | ((unsigned __int64)BYTE1(BBitem->rel_code_off) << 16)
           | ((unsigned __int64)BYTE2(BBitem->rel_code_off) << 8)
           | HIBYTE(BBitem->rel_code_off);
      v912 = bswap32(BBitem->code_size_raw);
      v913 = bswap32(BBitem->flags_or_zero);
      v914 = bswap32(BBitem->code_off_field_pos);
      v915 = malloc(0x60u);
      v916 = (_QWORD *)v906[1];
      v917 = (_QWORD *)v906[2];
      s[0].n128_u64[0] = (unsigned __int64)v915;
      *(_DWORD *)v915 = v913;
      v915[1] = &v909[v911];
      v915[2] = 0;
      *((_DWORD *)v915 + 6) = v912;
      *((_DWORD *)v915 + 7) = 0;
      *((_DWORD *)v915 + 8) = v914;
      if ( v916 == v917 )
      {
        std::vector<CommonMethod *>::__push_back_slow_path<CommonMethod * const&>(v906, s);
      }
      else
      {
        *v916 = v915;
        v906[1] = v916 + 1;
      }
      ++item_idx;
      ++BBitem;
    }
    while ( item_idx < item_count );
  }

这个结合实际 dex 文件观察一下

根据代码计算起始地址：code_map_start = 0x150330 - 0xBCE4 - 0x20 = 0x14462C

这里的 0x20 是尾部还有 0x20 的无关数据

跳到 0x14462C 进行观察

code 数据头部结构为

[code_size + const_num + item_len + item_data_len + data_offset + data_len]

后面跟着的就是每个 item 结构

这里只看三个地方，第一个是离 data 区域的偏移，data 区域是存放真正 insn 指令的地方，从头部结构可以看到，data 偏移为 0x1860，那么实际地址为 0x14462C + 0x0x1860 = 0x145E8C。当前 item 在 data 区域的 Insn 指令地址还要再加上离 data 起始处的偏移，图中的例子就是再加 0x0

下一个是实际 insn 指令，第三个是 dex 中被抽取的 insn 地址，也就是需要回填的地址，图中例子是 0x4BC5A，所以应该把刚刚算出来偏移的指令复制到这个地址中，这样就完成回填了。

写一个脚本将指令回填即可

#!/usr/bin/env python3
from typing import List
import argparse
import pathlib
import hashlib
import struct
import sys
import zipfile
import zlib

try:
    import zstandard
except ImportError as exc:
    raise SystemExit("missing dependency: zstandard") from exc

try:
    from gmssl.sm4 import CryptSM4, SM4_DECRYPT
except ImportError as exc:
    raise SystemExit("missing dependency: gmssl") from exc


DEXDATA_MAGIC = b"dexdata0"
FDEX_MAGIC = b"fdex"
CODEMAP_MAGIC = b"BBbb.dgc"
DEFAULT_KEY = bytes([
    0x66, 0x97, 0x6C, 0xE8, 0x6D, 0x46, 0x38, 0xB0,
    0x09, 0x5A, 0xA5, 0xD7, 0x0F, 0xCB, 0x9A, 0xA0,
])


def sm4_decrypt_ecb_nopad(key: bytes, ciphertext: bytes) -> bytes:
    sm4 = CryptSM4()
    sm4.set_key(key, SM4_DECRYPT)
    out = []
    for i in range(0, len(ciphertext), 16):
        block = list(ciphertext[i:i + 16])
        out += sm4.one_round(sm4.sk, block)
    return bytes(out)


def read_be32(buf: bytes, off: int) -> int:
    return struct.unpack_from(">I", buf, off)[0]


def write_uleb128(buf: bytearray, off: int, value: int) -> int:
    count = 0
    while value >> 7:
        buf[off + count] = (value & 0x7F) | 0x80
        value >>= 7
        count += 1
        if count == 4:
            buf[off + count] = value & 0x7F
            return count + 1
    buf[off + count] = value & 0x7F
    return count + 1


def derive_key(package_name: str) -> bytes:
    key = bytearray(DEFAULT_KEY)
    salt = package_name.encode("utf-8")
    for i in range(min(0x10, len(salt))):
        key[i] ^= salt[i]
    return bytes(key)


def load_input_blob(path: pathlib.Path) -> bytes:
    data = path.read_bytes()
    if zipfile.is_zipfile(path):
        with zipfile.ZipFile(path, "r") as zf:
            if "classes.dex" not in zf.namelist():
                raise SystemExit(f"{path} is zip/jar but has no classes.dex")
            data = zf.read("classes.dex")
    return data


def extract_dexdata_container(raw: bytes) -> bytes:
    start = raw.find(DEXDATA_MAGIC)
    if start < 0:
        raise SystemExit("dexdata0 not found")

    return expand_stage1_payload(raw[start + 0x0C:])


def expand_stage1_payload(payload: bytes) -> bytes:
    copy_len = read_be32(payload, 0x00)
    compress_len = read_be32(payload, 0x04)
    compress_out_len = read_be32(payload, 0x08)
    comp_off = 0x0C + copy_len
    comp_end = comp_off + compress_len
    if comp_end > len(payload):
        raise SystemExit(
            f"invalid stage1 sizes: copy={copy_len:#x} comp={compress_len:#x} "
            f"out={compress_out_len:#x} payload={len(payload):#x}"
        )
    comp = payload[comp_off:comp_end]
    out = zstandard.ZstdDecompressor().decompress(comp, max_output_size=compress_out_len)
    return payload[0x0C:0x0C + copy_len] + out


def extract_dexdata_container_from_fdex(raw: bytes) -> bytes:
    if not raw.startswith(b"dex\n") or len(raw) < 0x28:
        raise ValueError("not a dex file")

    file_size = struct.unpack_from("<I", raw, 0x20)[0]
    if file_size < 0x28 or file_size > len(raw):
        raise ValueError("invalid dex file_size")
    if raw[file_size - 4:file_size] != FDEX_MAGIC:
        raise ValueError("fdex trailer not found")

    table_off = struct.unpack_from("<I", raw, file_size - 8)[0]
    if table_off >= file_size - 8:
        raise ValueError("invalid fdex table offset")

    count = struct.unpack_from("<I", raw, table_off)[0]
    entry = table_off + 4
    for _ in range(count):
        if entry + 8 > file_size:
            break
        entry_size = struct.unpack_from("<I", raw, entry)[0]
        name_len = struct.unpack_from("<I", raw, entry + 4)[0]
        name_off = entry + 8
        if entry_size < 8 + name_len + 4 or entry + entry_size > file_size:
            break

        name = raw[name_off:name_off + name_len]
        if name == DEXDATA_MAGIC:
            payload_size = struct.unpack_from(">I", raw, name_off + name_len)[0]
            payload_off = name_off + name_len + 4
            payload = raw[payload_off:payload_off + payload_size]
            if len(payload) != payload_size:
                raise SystemExit("dexdata0 payload truncated")
            return expand_stage1_payload(payload)

        entry += entry_size

    raise ValueError("dexdata0 fdex entry not found")


def load_stage1_container(raw: bytes) -> bytes:
    try:
        return extract_dexdata_container_from_fdex(raw)
    except ValueError:
        pass
    if DEXDATA_MAGIC in raw:
        return extract_dexdata_container(raw)
    return raw


def relocate_codeitems(dex: bytes) -> bytes:
    off = dex.rfind(CODEMAP_MAGIC)
    if off < 4:
        raise SystemExit("BBbb.dgc not found")

    off1 = read_be32(dex, off - 4)
    code_map_start = off - 0x20 - off1
    item_size = read_be32(dex, code_map_start + 0x08)
    itemdata_len = read_be32(dex, code_map_start + 0x0C)
    data_off = read_be32(dex, code_map_start + 0x10)
    item_count = itemdata_len // item_size

    out = bytearray(dex)
    for i in range(item_count):
        item_off = code_map_start + 0x18 + item_size * i
        code_off = code_map_start + data_off + read_be32(out, item_off)
        base = read_be32(out, item_off + 0x10)
        write_uleb128(out, base, code_off)

    out[0x0C:0x20] = hashlib.sha1(out[0x20:]).digest()
    checksum = zlib.adler32(out[0x0C:]) & 0xFFFFFFFF
    out[0x08:0x0C] = struct.pack("<I", checksum)
    return bytes(out)


def repair_dumped_dex_dir(in_dir: pathlib.Path, out_dir: pathlib.Path) -> None:
    files = sorted(
        in_dir.glob("dex_*.dex"),
        key=lambda p: int(p.name.split("_")[1]) if len(p.name.split("_")) > 2 else p.name,
    )
    if not files:
        raise SystemExit(f"no dex_*.dex found in {in_dir}")

    repaired = 0
    copied = 0
    for path in files:
        raw = path.read_bytes()
        out_path = out_dir / path.name
        if CODEMAP_MAGIC in raw:
            fixed = relocate_codeitems(raw)
            out_path.write_bytes(fixed)
            repaired += 1
            print(f"[+] repaired {path.name} -> {out_path}")
        else:
            out_path.write_bytes(raw)
            copied += 1
            print(f"[=] copied   {path.name} -> {out_path}")

    print(f"[+] total={len(files)} repaired={repaired} copied={copied}")


def split_and_decrypt_dexes(deccom: bytes, dex_count: int, key: bytes, decrypt_size: int) -> List[bytes]:
    table_off = 0x1000 - dex_count * 0x10
    out = []
    for i in range(dex_count):
        off = read_be32(deccom, table_off + 0x10 * i)
        size = read_be32(deccom, table_off + 0x10 * i + 4)
        if size == 0:
            continue
        start = 0x1000 + off
        end = start + size
        dex = deccom[start:end]
        if len(dex) != size:
            raise SystemExit(f"dex[{i}] truncated: expect {size:#x}, got {len(dex):#x}")

        head_size = min(decrypt_size, len(dex))
        if head_size % 16 != 0:
            head_size -= head_size % 16
        if head_size <= 0:
            raise SystemExit(f"dex[{i}] too small to decrypt")

        plain = sm4_decrypt_ecb_nopad(key, dex[:head_size]) + dex[head_size:]
        out.append(plain)
    return out


def main() -> int:
    parser = argparse.ArgumentParser(description="Recover dexes from Bangbang DexHelper container")
    parser.add_argument("input", nargs="?", help="shell dex or dumped .cache/classes.jar")
    parser.add_argument("--dumped-dir", help="directory containing already dumped dex_*.dex files")
    parser.add_argument("-p", "--package", default="com.chinamworld.main", help="package name used to derive SM4 key")
    parser.add_argument("-n", "--dex-count", type=int, default=10, help="number of embedded dex files")
    parser.add_argument("-o", "--out-dir", default="dexhelper_out", help="output directory")
    parser.add_argument("--decrypt-size", type=lambda x: int(x, 0), default=0x20000, help="SM4 decrypt size per dex")
    args = parser.parse_args()

    out_dir = pathlib.Path(args.out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    if args.dumped_dir:
        repair_dumped_dex_dir(pathlib.Path(args.dumped_dir), out_dir)
        return 0

    if not args.input:
        parser.error("input is required unless --dumped-dir is used")

    in_path = pathlib.Path(args.input)
    raw = load_input_blob(in_path)
    deccom = load_stage1_container(raw)
    (out_dir / "deccom.bin").write_bytes(deccom)

    key = derive_key(args.package)
    dexes = split_and_decrypt_dexes(deccom, args.dex_count, key, args.decrypt_size)
    if not dexes:
        raise SystemExit("no dex extracted")

    for i, dex in enumerate(dexes):
        raw_path = out_dir / f"dex_{i}.dex"
        raw_path.write_bytes(dex)
        fixed = relocate_codeitems(dex)
        (out_dir / f"dex_out_{i}.dex").write_bytes(fixed)
        print(f"[+] wrote {raw_path}")
        print(f"[+] wrote {out_dir / f'dex_out_{i}.dex'}")

    return 0


if __name__ == "__main__":
    sys.exit(main())

最终 dex 还原成功，能够正常看到 java 层逻辑

参考：[原创] 某企业壳逆向分析 —— 从过检测到 dex 代码抽取还原 - Android 安全 - 看雪安全社区｜专业技术交流与安全研究论坛

Reverse

# 梆梆加固分析

分析开源项目DPT抽取壳源码实现