# 分析开源项目 DPT 抽取壳源码实现
为了自实现抽取壳,对开源项目 dpt 进行分析
开始先创建一个代理类
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 package com.luoyesiqiu.shell;import android.app.Application;import android.content.Context;import android.content.pm.ApplicationInfo;import android.content.pm.PackageManager;import android.text.TextUtils;import android.util.Log;import com.luoyesiqiu.shell.util.FileUtils;public class ProxyApplication extends Application { private static final String TAG = ProxyApplication.class.getSimpleName(); private String realApplicationName = "" ; private Application realApplication = null ; private void replaceApplication () { if (Global.sNeedCalledApplication && !TextUtils.isEmpty(realApplicationName)) { realApplication = (Application) JniBridge.ra(realApplicationName); Log.d(TAG, "applicationExchange: " + realApplicationName + ", realApplication: " + realApplication.getClass().getName()); JniBridge.craa(getApplicationContext(), realApplicationName); JniBridge.craoc(realApplicationName); Global.sNeedCalledApplication = false ; } } @Override public void onCreate () { super .onCreate(); Log.d(TAG, "dpt onCreate" ); replaceApplication(); } @Override public Context createPackageContext (String packageName, int flags) throws PackageManager.NameNotFoundException { Log.d(TAG, "createPackageContext: " + realApplicationName); if (!TextUtils.isEmpty(realApplicationName)){ replaceApplication(); return realApplication; } return super .createPackageContext(packageName, flags); } @Override public String getPackageName () { if (!TextUtils.isEmpty(realApplicationName)){ return "" ; } return super .getPackageName(); } @Override protected void attachBaseContext (Context base) { super .attachBaseContext(base); Log.d(TAG,"dpt attachBaseContext classloader = " + base.getClassLoader()); if (!Global.sIsReplacedClassLoader) { ApplicationInfo applicationInfo = base.getApplicationInfo(); if (applicationInfo == null ) { throw new NullPointerException ("application info is null" ); } FileUtils.unzipLibs(applicationInfo.sourceDir, applicationInfo.dataDir); JniBridge.loadShellLibs(applicationInfo.dataDir); Log.d(TAG,"ProxyApplication init" ); JniBridge.ia(); ClassLoader targetClassLoader = base.getClassLoader(); JniBridge.cbde(targetClassLoader); Global.sIsReplacedClassLoader = true ; } realApplicationName = JniBridge.rapn(); } }
先从 attachBaseContext 方法看起
在 Global.java 中定义了全局变量来控制事件,这个方法做了一下几件事:
1. 解压并加载程序 so 文件
1 2 FileUtils.unzipLibs(applicationInfo.sourceDir, applicationInfo.dataDir); JniBridge.loadShellLibs(applicationInfo.dataDir);
2. 将原始的 code 指令保存在自定义 Map 中,这里在 native 方法中实现
3. 替换 pathList 的 Elements,在 native 方法中实现
1 JniBridge.cbde(targetClassLoader);
4. 获取真实的 application 类名,准备在 onCreate 方法中替换,同样是在 naitve 方法中实现
1 realApplicationName = JniBridge.rapn();
接下来是 onCreate 方法,对 application 做了替换,并调用真实 application 的 attachBaseContext 方法和 onCreate 方法,具体实现在 native 方法中
接下来看具体的 native 实现
# init_app
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 std::optional<std::tuple<uint8_t *,size_t >> g_codeItemFileData; DPT_ENCRYPT void init_app (JNIEnv *env, jclass __unused) { DLOGD ("called!" ); clock_t start = clock (); void *package_addr = nullptr ; size_t package_size = 0 ; load_package (env, &package_addr, &package_size); if (!g_codeItemFileData.has_value ()) { auto entry_data = read_zip_file_entry (package_addr, package_size, AY_OBFUSCATE (CODE_ITEM_NAME_IN_ZIP)); if (entry_data.has_value ()) { g_codeItemFileData = std::move (entry_data); } printTime ("read codeitem data took =" , start); } else { DLOGD ("no need read codeitem from zip" ); } auto [entry_data, entry_size] = g_codeItemFileData.value (); readCodeItem ((uint8_t *)entry_data, entry_size); pthread_mutex_lock (&g_write_dexes_mutex); extractDexesInNeeded (env, package_addr, package_size); pthread_mutex_unlock (&g_write_dexes_mutex); unload_package (package_addr, package_size); printTime ("read package data took =" , start); }
在 apk 中拿到自己的 code 指令后,将其解析并保存,主要在 readCodeItem 方法中实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 std::unordered_map<int ,std::vector<data::CodeItem *> *> dexMap; DPT_ENCRYPT void readCodeItem (uint8_t *data,size_t data_len) { if (data != nullptr && data_len >= 0 ) { data::MultiDexCode *dexCode = data::MultiDexCode::getInst (); dexCode->init (data, data_len); DLOGI ("version = %d, dexCount = %d" , dexCode->readVersion (), dexCode->readDexCount ()); int indexCount = 0 ; uint32_t *dexCodeIndex = dexCode->readDexCodeIndex (&indexCount); dexMap.reserve (indexCount); for (int i = 0 ; i < indexCount; i++) { DLOGI ("dexCodeIndex[%d] = %d" , i, *(dexCodeIndex + i)); uint32_t dexCodeOffset = *(dexCodeIndex + i); uint16_t methodCount = dexCode->readUInt16 (dexCodeOffset); DLOGD ("dexCodeOffset[%d] = %d, methodCount[%d] = %d" , i, dexCodeOffset, i, methodCount); auto codeItemVec = new std::vector <data::CodeItem *>(65536 ); uint32_t codeItemIndex = dexCodeOffset + 2 ; for (int k = 0 ; k < methodCount; k++) { data::CodeItem *codeItem = dexCode->nextCodeItem (&codeItemIndex); uint32_t methodIdx = codeItem->getMethodIdx (); codeItemVec->at (methodIdx) = codeItem; } dexMap.emplace (i, codeItemVec); } DLOGD ("map size = %lu" , (unsigned long )dexMap.size ()); } }
单看这个方法可能有点不明白,实际应该结合作者自定义的一个结构来分析
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 #include "MultiDexCode.h" dpt::data::MultiDexCode* dpt::data::MultiDexCode::getInst (){ static auto *m_inst = new MultiDexCode (); return m_inst; } void dpt::data::MultiDexCode::init (uint8_t * buffer, size_t size){ this ->m_buffer = buffer; this ->m_size = size; } uint16_t dpt::data::MultiDexCode::readVersion (){ return readUInt16 (0 ); } uint16_t dpt::data::MultiDexCode::readDexCount (){ return readUInt16 (2 ); } uint32_t * dpt::data::MultiDexCode::readDexCodeIndex (int * count){ uint16_t dexCount = readDexCount (); *count = dexCount; return (uint32_t *)(m_buffer + 4 ); } dpt::data::CodeItem* dpt::data::MultiDexCode::nextCodeItem (uint32_t * offset) { uint32_t methodIdx = readUInt32 (*offset); uint32_t insnsSize = readUInt32 (*offset + 4 ); auto * insns = (uint8_t *)(m_buffer + *offset + 8 ); *offset = (*offset + 8 + insnsSize); auto * codeItem = new CodeItem (methodIdx, insnsSize, insns); return codeItem; } uint8_t dpt::data::MultiDexCode::readUInt8 (uint32_t offset){ uint8_t t = 0 ; memcpy (&t, m_buffer + offset, sizeof (uint8_t )); return t; } uint16_t dpt::data::MultiDexCode::readUInt16 (uint32_t offset){ uint16_t t = 0 ; memcpy (&t, m_buffer + offset, sizeof (uint16_t )); return t; } uint32_t dpt::data::MultiDexCode::readUInt32 (uint32_t offset){ uint32_t t = 0 ; memcpy (&t, m_buffer + offset, sizeof (uint32_t )); return t; }
根据 readCodeItem 方法中的 log
1 2 DLOGI ("version = %d, dexCount = %d" , dexCode->readVersion (), dexCode->readDexCount ());
得知原始文件头为版本号 (2 字节) + dex 方法数 (2 字节)
接下来是一个 dex 方法数循环
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 for (int i = 0 ; i < indexCount; i++) { DLOGI ("dexCodeIndex[%d] = %d" , i, *(dexCodeIndex + i)); uint32_t dexCodeOffset = *(dexCodeIndex + i); uint16_t methodCount = dexCode->readUInt16 (dexCodeOffset); DLOGD ("dexCodeOffset[%d] = %d, methodCount[%d] = %d" , i, dexCodeOffset, i, methodCount); auto codeItemVec = new std::vector <data::CodeItem *>(65536 ); uint32_t codeItemIndex = dexCodeOffset + 2 ; for (int k = 0 ; k < methodCount; k++) { data::CodeItem *codeItem = dexCode->nextCodeItem (&codeItemIndex); uint32_t methodIdx = codeItem->getMethodIdx (); codeItemVec->at (methodIdx) = codeItem; } dexMap.emplace (i, codeItemVec);
文件头之后是每个 dex 在文件中的偏移,偏移之后是 dexcode
根据代码可以得到每个 dexCode 的结构:
抽空方法数(2 字节)+ codeItem
现在 init_app 函数功能就分析完了:读取 apk 中抽出的指令,并将其保存到 dexMap 中
# combineDexElements
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 DPT_ENCRYPT void combineDexElement (JNIEnv* env, jclass __unused, jobject targetClassLoader, const char * pathChs) { jobjectArray extraDexElements = makePathElements (env,pathChs); dalvik_system_BaseDexClassLoader targetBaseDexClassLoader (env,targetClassLoader) ; jobject originDexPathListObj = targetBaseDexClassLoader.getPathList (); dalvik_system_DexPathList targetDexPathList (env,originDexPathListObj) ; jobjectArray originDexElements = targetDexPathList.getDexElements (); jsize extraSize = env->GetArrayLength (extraDexElements); jsize originSize = env->GetArrayLength (originDexElements); dalvik_system_DexPathList::Element element (env, nullptr ) ; jclass ElementClass = element.getClass (); jobjectArray newDexElements = env->NewObjectArray (originSize + extraSize,ElementClass, nullptr ); for (int i = 0 ;i < originSize;i++) { jobject elementObj = env->GetObjectArrayElement (originDexElements, i); env->SetObjectArrayElement (newDexElements,i,elementObj); } for (int i = originSize;i < originSize + extraSize;i++) { jobject elementObj = env->GetObjectArrayElement (extraDexElements, i - originSize); env->SetObjectArrayElement (newDexElements,i,elementObj); } targetDexPathList.setDexElements (newDexElements); DLOGD ("success" ); } DPT_ENCRYPT void combineDexElements (JNIEnv* env, jclass klass, jobject targetClassLoader) { char compressedDexesPathChs[256 ] = {0 }; getCompressedDexesPath (env,compressedDexesPathChs, ARRAY_LENGTH (compressedDexesPathChs)); combineDexElement (env, klass, targetClassLoader, compressedDexesPathChs); #ifndef DEBUG junkCodeDexProtect (env); #endif DLOGD ("success" ); } void getCompressedDexesPath (JNIEnv *env, char *outDexZipPath, size_t max_len) { std::string dataDir = getDataDir (env); snprintf (outDexZipPath,max_len, "%s/%s/%s" , dataDir.c_str (), CACHE_DIR, DEXES_ZIP_NAME); }
这个是壳里非常经典的操作了,PathList 字段里存在两个成员变量,其中 dexElemenst 用来存放 dex 和资源列表,所以要把原始 dex 放到当前 dexElements 中,不然 classLoader 找不到这个 dex
被抽空的指令已经保存至 map 中,那么什么时候还原呢
1 2 3 4 #define INIT_ARRAY_SECTION __attribute__ ((constructor)) INIT_ARRAY_SECTION void init_dpt () ;
在 so 文件被加载时,就会调用 init_dpt 这个函数
init_dpt
1 2 3 4 5 6 7 8 9 10 11 void init_dpt () {#ifdef DECRYPT_BITCODE decrypt_bitcode (); #endif DLOGI ("call!" ); dpt_hook (); createAntiRiskProcess (); }
其中 dpt_hook 为还原指令过程
# dpt_hook
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 void dpt_hook () { bytehook_init (BYTEHOOK_MODE_AUTOMATIC,false ); g_sdkLevel = android_get_device_api_level (); hook_execve (); hook_mmap (); hook_write (); bool hookSuccess = hook_DefineClass (); if (!hookSuccess) { hook_LoadClass (); } }
先看是如何 hook defineClass 的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 DPT_ENCRYPT bool hook_DefineClass () { char sym[256 ] = {0 }; find_symbol_in_elf_file (GetClassLinkerDefineClassLibPath (), sym, ARRAY_LENGTH (sym), 2 , "ClassLinker" , "DefineClass" ); if (strlen (sym) == 0 ) { DLOGW ("cannot find symbol: DefineClass" ); return false ; } void * defineClassAddress = DobbySymbolResolver (GetClassLinkerDefineClassLibPath (), sym); if (defineClassAddress == nullptr ) { DLOGE ("defineClass address is null, sym: %s" , sym); return false ; } int hookResult; if (g_sdkLevel >= __ANDROID_API_L_MR1__) { hookResult = DobbyHook (defineClassAddress, (dobby_dummy_func_t ) DefineClassV22, (dobby_dummy_func_t *) &g_originDefineClassV22); } else { hookResult = DobbyHook (defineClassAddress, (dobby_dummy_func_t ) DefineClassV21, (dobby_dummy_func_t *) &g_originDefineClassV21); } if (hookResult == 0 ) { DLOGD ("hook success." ); return true ; } else { DLOGE ("hook fail!" ); return false ; } } DPT_ENCRYPT void *DefineClassV22 (void * thiz,void * self, const char * descriptor, size_t hash, void * class_loader, const void * dex_file, const void * dex_class_def) { if (LIKELY (g_originDefineClassV22 != nullptr )) { patchClass (descriptor,dex_file,dex_class_def); return g_originDefineClassV22 ( thiz,self,descriptor,hash,class_loader, dex_file, dex_class_def); } return nullptr ; }
可以看到,在调用 defineClass 之前先将方法回填,
为什么是选择 defineClass 来 hook 呢,作者是这样说的:
在 Hook DefineClass 函数之前,我们需要了解 DefineClass 函数流程。为什么是 DefineClass 函数,其他函数是否可行?
当一个类被加载的时候,它的调用顺序是这样的 (部分流程已省略):
ClassLoader.java::loadClass
DexFile.java::defineClass
class_linker.cc::DefineClass
class_linker.cc::LoadClass
class_linker.cc::LoadClassMembers
class_linker.cc::LoadMethod
也就是说,当一个类被加载,它是会去调用 DefineClass 函数的,我们看一下它的函数原型:
1 2 3 4 5 6 mirror::Class* ClassLinker::DefineClass (Thread* self, const char * descriptor, size_t hash, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, const DexFile::ClassDef& dex_class_def) ;
DefineClass 函数的参数很巧,有 DexFile 结构,还有 ClassDef 结构,我们通过 Hook 这个函数就知道以下信息:
加载的类来自哪个 dex 文件
加载类的数据的偏移
第一条可以帮助我们大致定位到存储的 CodeItem 的位置;第二条可以帮助我们找到 CodeItem 具体存储的位置以及填充到的位置。
来看一下 ClassDef 的定义:
1 2 3 4 5 6 7 8 9 10 struct ClassDef { uint32_t class_idx_; uint32_t access_flags_; uint32_t superclass_idx_; uint32_t interfaces_off_; uint32_t source_file_idx_; uint32_t annotations_off_; uint32_t class_data_off_; uint32_t static_values_off_; };
其中最重要的字段就是 class_data_off_ 它的值是当前加载的类的具体数据在 dex 文件中的偏移,通过这个字段就可以顺藤摸瓜定位到当前加载类的所有函数的在内存中 CodeItem 的具体位置。
ClassDef 这个结构还有一个特点,它是 dex 文件的结构,也就是说 dex 文件格式不变,它一般就不会变。
还有,DefineClass 函数的参数会改变吗?目前来看从 Android M 到现在没有变过。
所以使用它不用太担心随着 Android 版本的升级而导致字段偏移的变化,也就是兼容性较强。
这就是为什么用 DefineClass 作为 Hook 点。
总结一下就是 DefineClass 函数稳定且方便,在调用时还原也可以起到动态还原的效果
接下来具体分析是如何将指令回填的
# patchClass
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 DPT_ENCRYPT void patchClass (__unused const char * descriptor, const void * dex_file, const void * dex_class_def) { const char *junkClassName = AY_OBFUSCATE (JUNK_CLASS_FULL_NAME); if (descriptor != nullptr && UNLIKELY (dpt_strstr (descriptor, junkClassName) != nullptr )) { size_t descriptorLength = dpt_strlen (descriptor); char ch = descriptor[descriptorLength - 2 ]; DLOGD ("Attempt patch junk class %s ,char is '%c'" ,descriptor,ch); if (isdigit (ch)) { DLOGE ("Find illegal call, desc: %s!" , descriptor); dpt_crash (); return ; } } if (LIKELY (dex_file != nullptr )){ std::string location; uint8_t *begin = nullptr ; uint64_t dexSize = 0 ; if (g_sdkLevel >= 35 ) { auto * dexFileV35 = (V35::DexFile *)dex_file; location = dexFileV35->location_; begin = (uint8_t *)dexFileV35->begin_; dexSize = dexFileV35->header_->file_size_; } else if (g_sdkLevel >= __ANDROID_API_P__){ auto * dexFileV28 = (V28::DexFile *)dex_file; location = dexFileV28->location_; begin = (uint8_t *)dexFileV28->begin_; dexSize = dexFileV28->size_ == 0 ? dexFileV28->header_->file_size_ : dexFileV28->size_; } else { auto * dexFileV21 = (V21::DexFile *)dex_file; location = dexFileV21->location_; begin = (uint8_t *)dexFileV21->begin_; dexSize = dexFileV21->size_ == 0 ? dexFileV21->header_->file_size_ : dexFileV21->size_; } if (location.rfind (DEXES_ZIP_NAME) != std::string::npos && dex_class_def){ int dexIndex = parse_dex_number (location); auto * class_def = (dex::ClassDef *)dex_class_def; NLOG ("class_desc = '%s', class_idx_ = 0x%x, class data off = 0x%x" ,descriptor,class_def->class_idx_,class_def->class_data_off_); if (LIKELY (class_def->class_data_off_ != 0 )) { size_t read = 0 ; auto *class_data = (uint8_t *) ((uint8_t *) begin + class_def->class_data_off_); uint64_t static_fields_size = 0 ; read += DexFileUtils::readUleb128 (class_data, &static_fields_size); uint64_t instance_fields_size = 0 ; read += DexFileUtils::readUleb128 (class_data + read, &instance_fields_size); uint64_t direct_methods_size = 0 ; read += DexFileUtils::readUleb128 (class_data + read, &direct_methods_size); uint64_t virtual_methods_size = 0 ; read += DexFileUtils::readUleb128 (class_data + read, &virtual_methods_size); read += DexFileUtils::getFieldsSize (class_data + read, static_fields_size); read += DexFileUtils::getFieldsSize (class_data + read, instance_fields_size); auto *directMethods = new dex::ClassDataMethod[direct_methods_size]; read += DexFileUtils::readMethods (class_data + read, directMethods, direct_methods_size); auto *virtualMethods = new dex::ClassDataMethod[virtual_methods_size]; read += DexFileUtils::readMethods (class_data + read, virtualMethods, virtual_methods_size); for (uint64_t i = 0 ; i < direct_methods_size; i++) { auto method = directMethods[i]; patchMethod (begin, location.c_str (), dexSize, dexIndex, method.method_idx_delta_, method.code_off_); } for (uint64_t i = 0 ; i < virtual_methods_size; i++) { auto method = virtualMethods[i]; patchMethod (begin, location.c_str (), dexSize, dexIndex, method.method_idx_delta_, method.code_off_); } delete [] directMethods; delete [] virtualMethods; } else { NLOG ("class_def->class_data_off_ is zero" ); } } } }
在 Dex 格式中,为了极致压缩体积,Google 使用了 ULEB128 (Unsigned Little Endian Base 128) 变长编码来存储数字,一个数字可能占 1 个字节,也可能占 5 个字节。所以在上面代码中,添加了 read 这个游标来确定占用字节。
dex class_data 格式如下,上面代码解析了 dex 格式,并找到对应方法的地址,遍历方法将指令回填
这段代码作者使用了很多自定义函数或结构,大多在 dex_file.h/dex_file_cpp 里。为了方便理解,对它们也进行分析
首先是这段对于 dexFile 的解析,因为我的真机是 Android13,所以用这个版本的代码做示例
1 2 3 4 5 6 else if (g_sdkLevel >= __ANDROID_API_P__){ auto * dexFileV28 = (V28::DexFile *)dex_file; location = dexFileV28->location_; begin = (uint8_t *)dexFileV28->begin_; dexSize = dexFileV28->size_ == 0 ? dexFileV28->header_->file_size_ : dexFileV28->size_; }
当在 defineClass 拦截到这个操作时,会传过来一个 dexFile 的指针,因为没有直接解析它的函数,所以作者自定义了一个
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 namespace V28 { class DexFile { public : void *_; const uint8_t *const begin_; const size_t size_; const uint8_t *const data_begin_; const size_t data_size_; const std::string location_; const uint32_t location_checksum_; const dex::Header *const header_; const dex::StringId *const string_ids_; const dex::TypeId *const type_ids_; const dex::FieldId *const field_ids_; const dex::MethodId *const method_ids_; const dex::ProtoId *const proto_ids_; const dex::ClassDef *const class_defs_; }; }
这就是 dexFile 在内存中的管理对象,当然还有在物理内存中的 dexFile 对象
下面是一些工具类函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 size_t dpt::DexFileUtils::readUleb128 (uint8_t const * const data, uint64_t * const val) { uint64_t result = 0 ; size_t read = 0 ; for (int i = 0 ;i < 5 ;i++){ uint8_t b = *(data + i); uint8_t value = b & 0x7f ; result |= (value << (i * 7 )); read++; if ((b & 0x80 ) != 0x80 ){ break ; } } *val = result; return read; } size_t dpt::DexFileUtils::readFields (uint8_t *data, dpt::dex::ClassDataField *fields, uint64_t count) { size_t read = 0 ; uint32_t fieldIndexDelta = 0 ; for (uint64_t i = 0 ; i < count; ++i) { uint64_t fieldIndex = 0 ; read += readUleb128 (data + read,&fieldIndex); fieldIndexDelta += fieldIndex; uint64_t accessFlags = 0 ; read += readUleb128 (data + read,&accessFlags); fields[i].field_idx_delta_ = fieldIndexDelta; fields[i].access_flags_ = accessFlags; } return read; } size_t dpt::DexFileUtils::getFieldsSize (uint8_t *data, uint64_t count) { size_t read = 0 ; for (uint64_t i = 0 ; i < count; ++i) { uint64_t fieldIndex = 0 ; read += readUleb128 (data + read, &fieldIndex); uint64_t accessFlags = 0 ; read += readUleb128 (data + read, &accessFlags); } return read; } size_t dpt::DexFileUtils::readMethods (uint8_t *data, dpt::dex::ClassDataMethod *method, uint64_t count){ size_t read = 0 ; uint32_t methodIndexDelta = 0 ; for (uint64_t i = 0 ; i < count; ++i) { uint64_t methodIndex = 0 ; read += readUleb128 (data + read,&methodIndex); methodIndexDelta += methodIndex; uint64_t accessFlags = 0 ; read += readUleb128 (data + read,&accessFlags); uint64_t codeOff = 0 ; read += readUleb128 (data + read,&codeOff); method[i].method_idx_delta_ = methodIndexDelta; method[i].access_flags_ = accessFlags; method[i].code_off_ = codeOff; } return read; }
最后看一下 patchMethod 方法具体是如何回填指令的
# patchMethod
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 void patchMethod (uint8_t *begin, __unused const char *location, uint32_t dexSize, int dexIndex, uint32_t methodIdx, uint32_t codeOff) { auto dexIt = dexMap.find (dexIndex); if (LIKELY (dexIt != dexMap.end ())) { auto dexMemIt = dexMemMap.find (dexIndex); if (UNLIKELY (dexMemIt == dexMemMap.end ())){ change_dex_protective (begin, dexSize, dexIndex); } auto codeItemVec = dexIt->second; auto codeItem = codeItemVec->at (methodIdx); if (LIKELY (codeItem != nullptr )) { if (codeOff == 0 ) { NLOG ("dex: %d methodIndex: %d no need patch!" ,dexIndex,methodIdx); return ; } auto *dexCodeItem = (dex::CodeItem *)(begin + codeOff); auto *realInsnsPtr = (uint8_t *)(dexCodeItem->insns_); NLOG ("codeItem patch, methodIndex = %d, insnsSize = %d >>> %p(0x%x)" , codeItem->getMethodIdx (), codeItem->getInsnsSize (), realInsnsPtr, (unsigned int )(realInsnsPtr - begin)); memcpy (realInsnsPtr,codeItem->getInsns (),codeItem->getInsnsSize ()); } else { NLOG ("cannot find methodId: %d in codeitem map, dex index: %d(%s)" , methodIdx, dexIndex, location); } } else { DLOGW ("cannot find dex: '%s' in dex map" , location); } }
将之前存在 dexMap 中的指令取出,用 memcpy 拷贝回去,这样就完成最后方法指令回填了
在 dpt_hook 中,除了 hook DefineClass 实现指令回填,注意到还 hook 了几个函数
# hook_execve
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 DPT_ENCRYPT void hook_execve () { bytehook_stub_t stub = bytehook_hook_single ( getArtLibName (), "libc.so" , "execve" , (void *) fake_execve, nullptr , nullptr ); if (stub != nullptr ) { DLOGD ("execve hook success!" ); } else { DLOGE ("execve hook fail!" ); } } DPT_ENCRYPT int fake_execve (const char *pathname, char *const argv[], char *const envp[]) { BYTEHOOK_STACK_SCOPE (); DLOGD ("execve hooked: %s" , pathname); if (strstr (pathname, "dex2oat" ) != nullptr ) { DLOGD ("execve blocked: %s" , pathname); errno = EACCES; return -1 ; } return BYTEHOOK_CALL_PREV (fake_execve, pathname, argv, envp); }
hook execve 是为了防止系统将 dex 预编译
Dalvik 虚拟机主要依赖解释器,当 app 运行到一个方法,会实时去内存中读取 Dalvik 字节码,然后解释执行。
但是 ART 时代下,引入了 AOT 预编译机制,系统会通过 dex2oat 这个进程,将 dex 编译为本地机器码格式,虽然这样提升了效率,但是在抽取壳中,dex 是被我们抽空的,回填指令操作和 oat 文件没什么关系,预编译之后也是空方法。所以要禁用 dex2oat 预编译,让系统强制走解释执行
# hook_mmap
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 DPT_ENCRYPT void hook_mmap () { bytehook_stub_t stub = bytehook_hook_single ( getArtLibName (), "libc.so" , "mmap" , (void *)fake_mmap, nullptr , nullptr ); if (stub != nullptr ){ DLOGD ("mmap hook success!" ); } else { DLOGE ("mmap hook fail!" ); } } DPT_ENCRYPT void * fake_mmap (void * __addr, size_t __size, int __prot, int __flags, int __fd, off_t __offset) { BYTEHOOK_STACK_SCOPE (); int prot = __prot; int hasRead = (__prot & PROT_READ) == PROT_READ; int hasWrite = (__prot & PROT_WRITE) == PROT_WRITE; char fd_path[256 ] = {0 }; dpt_readlink (__fd,fd_path, ARRAY_LENGTH (fd_path)); std::string fd_path_str = fd_path; if (checkWebViewInFilename (fd_path_str)) { DLOGW ("link path: %s, no need to change prot" ,fd_path); goto tail; } if (hasRead && !hasWrite) { prot = prot | PROT_WRITE; DLOGD ("append write flag fd = %d, size = %zu, prot = %d, flag = %d" ,__fd,__size, prot,__flags); } if (g_sdkLevel == 30 ){ if (strstr (fd_path,"base.vdex" ) != nullptr ){ DLOGE ("want to mmap base.vdex" ); __flags = 0 ; } } tail: void *addr = BYTEHOOK_CALL_PREV (fake_mmap,__addr, __size, prot, __flags, __fd, __offset); return addr; }
将物理文件映射至内存空间时都需要使用 mmap 函数,所以可以 hook mmap 函数更改 dex 权限
# 总结
简单来说,抽取壳其实就是抽取指令 -> 还原的过程。为了实现调用方法时才动态还原,需要 Hook DefineClass 等函数实现