# 编写

虚假控制流使用大量不透明谓词组成恒真或恒假的算式,每个基本块之间用条件跳转连接,用不可达基本块和条件跳转达成混淆的目的

与控制流平坦化相比,它的 cfg 更加细长

其中会有多个 if 语句,也可能参杂一些函数加大混淆难度,这些函数往往返回定值

具体代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Instructions.h"
#include "SplitBasicBlock.h"
#include "Utils.h"
#include <vector>
#include <cstdlib>
#include <ctime>
using std::vector;
using namespace llvm;

static cl::opt<int> obfuTimes("bcf_loop",cl::init(1),cl::desc("obf function bcf_loop times"));

namespace{
class BogusControlFlow : public FunctionPass{
public:
static char ID;
BogusControlFlow() : FunctionPass(ID){
srand(time(0));
}

bool runOnFunction(Function &F);
void bogus(BasicBlock *BB);

Value* createBogusCmp(BasicBlock *insertAfter);
Value* createBogusCmp2(BasicBlock *insertAfter);

Function* createBogusCall(Module *M,int constVal,StringRef funcName);

};
}

bool BogusControlFlow::runOnFunction(Function &F)
{
INIT_CONTEXT(F);
FunctionPass *pass = createSplitBasicBlockPass();
pass->runOnFunction(F);
for(int i = 0; i < obfuTimes; i++)
{
vector<BasicBlock *> origBB;
for(BasicBlock &BB : F)
{
origBB.push_back(&BB);
}
for(BasicBlock *BB : origBB)
{
bogus(BB);
}
}
return true;
}

// 返回定值的函数
Function* BogusControlFlow :: createBogusCall(Module *M,int constVal,StringRef funcName)
{
Function *F = M->getFunction(funcName);
if(!F)
{
FunctionType *Ft = FunctionType::get(Type::getInt32Ty(*CONTEXT),false);
// 创建函数
F = Function::Create(Ft,GlobalValue::PrivateLinkage,funcName,M);
// 创建基本块
BasicBlock *BB = BasicBlock::Create(*CONTEXT,"",F);
// 返回指令 返回定值
ReturnInst::Create(*CONTEXT,ConstantInt::get(Type::getInt32Ty(*CONTEXT),constVal),BB);
}
return F;
}

// 第一个恒真恒假条件
Value* BogusControlFlow :: createBogusCmp(BasicBlock *insertAfter)
{
// y < 10 || x * (x + 1) % 2 == 0
Module *M = insertAfter->getModule();
// 创建全局变量指针 初始值为0
GlobalVariable *xptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"x");
GlobalVariable *yptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"y");
// 加载x y
LoadInst *x = new LoadInst(TYPE_I32,xptr,"",insertAfter);
LoadInst *y = new LoadInst(TYPE_I32,yptr,"",insertAfter);
// 比较指令 将y与10进行比较
ICmpInst *cmp1 = new ICmpInst(*insertAfter,CmpInst::ICMP_SLT,y,CONST_I32(10));
// 创建一个函数 返回值为1 名称为"return_1"
Function *func1 = createBogusCall(M,1,"return_1");
// 创建call指令
CallInst *callFunc1 = CallInst::Create(func1,"",insertAfter);
// 加法指令 函数返回结果与x相加
BinaryOperator *op1 = BinaryOperator::CreateAdd(x,callFunc1,"",insertAfter);
// 乘法指令 加法结果指令与x相乘
BinaryOperator *op2 = BinaryOperator::CreateMul(x,op1,"",insertAfter);
// 除余指令
BinaryOperator *op3 = BinaryOperator::CreateSRem(op2,CONST_I32(2),"",insertAfter);
// 比较指令
ICmpInst *cmp2 = new ICmpInst(*insertAfter,CmpInst::ICMP_EQ,op3,CONST_I32(0));
// 返回位或指令的值
return BinaryOperator::CreateOr(cmp1,cmp2,"",insertAfter);
}

// 第二个恒真恒假指令
Value* BogusControlFlow :: createBogusCmp2(BasicBlock *insertAfter)
{
// x * 2 < 9 && (y + 1) * 3 < 10
Module *M = insertAfter->getModule();
GlobalVariable *xptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"x");
GlobalVariable *yptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"y");
LoadInst *x = new LoadInst(TYPE_I32,xptr,"",insertAfter);
LoadInst *y = new LoadInst(TYPE_I32,yptr,"",insertAfter);
BinaryOperator *op1 = BinaryOperator::CreateMul(x,CONST_I32(2),"",insertAfter);
ICmpInst *cmp1 = new ICmpInst(*insertAfter,CmpInst::ICMP_SLT,op1,CONST_I32(9));
BinaryOperator *op2 = BinaryOperator::CreateAdd(y,CONST_I32(1),"",insertAfter);
BinaryOperator *op3 = BinaryOperator::CreateMul(op2,CONST_I32(3),"",insertAfter);
ICmpInst *cmp2 = new ICmpInst(*insertAfter,CmpInst::ICMP_SLT,op3,CONST_I32(10));
return BinaryOperator::CreateAnd(cmp1,cmp2,"",insertAfter);
}

void BogusControlFlow :: bogus(BasicBlock *entryBB)
{
// 以第一个phi指令为界限 分割头基本块与中基本块
BasicBlock *bodyBB = entryBB->splitBasicBlock(entryBB->getFirstNonPHI(),"bodyBB");
// 以终结指令为界限 分割中基本块与尾基本块
BasicBlock *endBB = bodyBB->splitBasicBlock(bodyBB->getTerminator(),"endBB");
// 不可达克隆块
BasicBlock *cloneBB = createCloneBasicBlock(bodyBB);

// 去除跳转指令
bodyBB->getTerminator()->eraseFromParent();
cloneBB->getTerminator()->eraseFromParent();
entryBB->getTerminator()->eraseFromParent();

// 创建恒真恒假条件
Value *cond1 = createBogusCmp(entryBB);
Value *cond2 = createBogusCmp2(bodyBB);

// 创建条件跳转分支
BranchInst::Create(bodyBB,cloneBB,cond1,entryBB);
BranchInst::Create(endBB,cloneBB,cond2,bodyBB);
BranchInst::Create(bodyBB,cloneBB);
}

char BogusControlFlow::ID = 0;
static RegisterPass<BogusControlFlow> X("bcf","BogusControlFlow a function");

# 去除

# 例:re

题目来自 ciscn2025 初赛

典型的虚假控制流特征,里面出现了大量不透明谓词,先进行重命名方便分析

可以发现,除了不透明谓词之外夹杂着一些函数增大混淆难度,有些函数返回定值,有些返回参数。

接下来讲解去除的两种方式

# 1. 常量替换

根据 x86 函数调用约定,a1 参数为 rcx,a2 参数为 rdx,而函数的返回值为 rax。所以对于所有函数替换,只需要将原来的 call 指令替换成对 eax 的赋值即可。

而对于不透明谓词,则是替换相应赋值的寄存器,直接改为常量。

需要注意的是指令长度,图中所做的改变均比原始指令长度短,假如指令长度超过原始字节,则需要考虑 nop 掉无用字节进行修改。

写 idc 脚本批量替换

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#include <idc.idc>

static NopCode(Addr, Length)
{
auto i;
for (i = 0; i < Length; i++)
{
PatchByte(Addr + i, 0x90);
}
}

// 检查是否为需要的汇编指令
static checkAsm(current_addr,my_insn_name,my_op1,my_op2)
{
auto insn_name = print_insn_mnem(current_addr);
auto op1 = print_operand(current_addr,0);
auto op2 = print_operand(current_addr,1);
if(my_insn_name == 0)
{
my_insn_name = insn_name;
}
if(my_op1 == 0)
{
my_op1 = op1;
}
if(my_op2 == 0)
{
my_op2 = op2;
}
if(insn_name == my_insn_name && op1 == my_op1 && op2 == my_op2)
{
return 1;
}
return 0;
}

// 返回对应的值
static getValue(data)
{
if(data == "cs:_0")
{
return 0;
}
if(data == "cs:_1")
{
return 1;
}
if(data == "cs:_2")
{
return 2;
}
if(data == "cs:_3")
{
return 3;
}
if(data == "cs:_4")
{
return 4;
}
if(data == "cs:_5")
{
return 5;
}
if(data == "cs:_6")
{
return 6;
}
if(data == "cs:_7")
{
return 7;
}
if(data == "cs:_8")
{
return 8;
}
if(data == "cs:_9")
{
return 9;
}
}

static patchMovData(asm_addr,reg,data)
{
auto value = getValue(data);
auto reg_data;
if(reg == "eax")
{
reg_data = 0xB8;
}
if(reg == "ecx")
{
reg_data = 0xB9;
}
if(reg == "edx")
{
reg_data = 0xBA;
}
PatchByte(asm_addr,reg_data);
PatchDword(asm_addr + 1,value);
NopCode(asm_addr + 5,2);
}

static main()
{
auto current_addr = 0x407E53;
auto end_addr = 0x408859;
while (current_addr != BADADDR && current_addr < end_addr)
{
// mov reg imm
auto data = print_operand(current_addr,1);
auto is_data = 1;
if(strstr(data,"cs:_") != -1)
{
is_data = 0;
}
auto mov_data = checkAsm(current_addr,"movsx",0,is_data);
if(mov_data)
{
msg("mov data asm addr : %X\n",current_addr);
auto reg = print_operand(current_addr,0);
patchMovData(current_addr,reg,data);
}
// call return_a2
auto return_a2_func = print_operand(current_addr,0);
if(return_a2_func == "return_a2")
{
msg("return a2 func : %X\n",current_addr);
PatchWord(current_addr,0xD089);
NopCode(current_addr + 2,3);
}
// call return_a1
auto return_a1_func = print_operand(current_addr,0);
if(return_a1_func == "return_a1")
{
msg("return a1 func : %X\n",current_addr);
PatchWord(current_addr,0xC889);
NopCode(current_addr + 2,3);
}
// return 1
auto return_1_func = print_operand(current_addr,0);
if(return_1_func == "return_1")
{
msg("return 1 func : %X\n",current_addr);
PatchByte(current_addr,0xB8);
PatchDword(current_addr + 1,1);
}

current_addr = next_head(current_addr, end_addr);
}
}

去混淆效果如下

去除非常完美

# 2. 条件解析

一般来说,编写恒真恒假条件时不会编写太多完全不同的条件,而是对一个条件进行变化衍生出其他条件

图中这四个算式虽然不同,但它们本质上都是 随机数1 * (随机数2 * 0) 这样的式子,结果恒为 0,只需要匹配它们汇编代码的相同部分,再 patch 条件跳转即可

示例 idc 脚本如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <idc.idc>

static NopCode(Addr, Length)
{
auto i;
for (i = 0; i < Length; i++)
{
PatchByte(Addr + i, 0x90);
}
}

static findJmpAddr(current_addr,end_addr,jmp_type)
{
while(current_addr != BADADDR && current_addr < end_addr)
{
auto jz_name = print_insn_mnem(current_addr);
if(jz_name == jmp_type)
{
break;
}
current_addr = next_head(current_addr,end_addr);
}
return current_addr;
}

static checkAsm(current_addr,my_insn_name,my_op1,my_op2)
{
auto insn_name = print_insn_mnem(current_addr);
auto op1 = print_operand(current_addr,0);
auto op2 = print_operand(current_addr,1);
if(my_insn_name == 0)
{
my_insn_name = insn_name;
}
if(my_op1 == 0)
{
my_op1 = op1;
}
if(my_op2 == 0)
{
my_op2 = op2;
}
if(insn_name == my_insn_name && op1 == my_op1 && op2 == my_op2)
{
return 1;
}
return 0;
}

static main()
{
auto current_addr = 0x407E53;
auto end_addr = 0x408859;
while (current_addr != BADADDR && current_addr < end_addr)
{
auto current_res;
auto next_addr;
auto next_res;
auto next_next_addr;
auto next_next_res;
// data * (data * 0 + 6 + 3 - 9) = 0
current_res = checkAsm(current_addr,"movsx",0,"cs:_3");
next_addr = next_head(current_addr,end_addr);
next_res = checkAsm(next_addr,"movsx",0,"cs:_6");
if(current_res && next_res)
{
auto jz_addr = findJmpAddr(current_addr,end_addr,"jz");
NopCode(current_addr,jz_addr - current_addr);
NopCode(jz_addr,1);
PatchByte(jz_addr + 1,0xE9);
msg("jz addr : %X\n",jz_addr);
}
// cmp 0,data
current_res = checkAsm(current_addr,"movsx",0,"cs:_0");
next_addr = next_head(current_addr,end_addr);
next_res = checkAsm(next_addr,"cmp",0,0);
next_next_addr = next_head(next_addr,end_addr);
next_next_res = checkAsm(next_next_addr,"jbe",0,0);
if(current_res && next_res && next_next_res)
{
auto nop_addr = next_head(next_next_addr,end_addr);
NopCode(current_addr,nop_addr - current_addr);
msg("cmp 0 addr : %X\n",next_next_addr);
}
next_next_res = checkAsm(next_next_addr,"jge",0,0);
if(current_res && next_res && next_next_res)
{
NopCode(next_next_addr,1);
PatchByte(next_next_addr + 1,0xE9);
NopCode(current_addr - 5,5);
msg("cmp 0 addr : %X\n",next_next_addr);
}

current_addr = next_head(current_addr,end_addr);
}
}

这个 idc 脚本解析了上述 随机数1 * (随机数2 * 0) 的算式和随机数与 0 比较的算式,在替换方法效果很好的情况下,为什么还要使用这种方法呢

# 例:hello-obf

题目来自 lilctf2025

这个题同样是不透明谓词 + 返回定值函数的虚假控制流,先使用常量替换的方式去除

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <idc.idc>

static NopCode(Addr, Length)
{
auto i;
for (i = 0; i < Length; i++)
{
PatchByte(Addr + i, 0x90);
}
}

static PatchNumFunc(num,current_addr)
{
msg("call return num addr : %X\n",current_addr);
auto patch_num;
PatchByte(current_addr,0xB8);
if(num == "return_7")
{
patch_num = 7;
}
if(num == "return_2")
{
patch_num = 2;
}
if(num == "return_6")
{
patch_num = 6;
}
if(num == "return_3")
{
patch_num = 3;
}
if(num == "return_9")
{
patch_num = 9;
}
PatchDword(current_addr + 1,patch_num);
}

static getValue(data)
{
if(data == "cs:_0")
{
return 0;
}
if(data == "cs:_1")
{
return 1;
}
if(data == "cs:_2")
{
return 2;
}
if(data == "cs:_3")
{
return 3;
}
if(data == "cs:_4")
{
return 4;
}
if(data == "cs:_5")
{
return 5;
}
if(data == "cs:_6")
{
return 6;
}
if(data == "cs:_7")
{
return 7;
}
if(data == "cs:_8")
{
return 8;
}
if(data == "cs:_9")
{
return 9;
}
}

static PatchNum(num,current_addr)
{
msg("mov reg num addr : %X\n",current_addr);
auto value = getValue(num);
auto reg = print_operand(current_addr,0);
auto reg_byte;
if(reg == "eax")
{
reg_byte = 0xB8;
}
if(reg == "ecx")
{
reg_byte = 0xB9;
}
if(reg == "edx")
{
reg_byte = 0xBA;
}
PatchByte(current_addr,reg_byte);
PatchDword(current_addr + 1,value);
NopCode(current_addr + 5,2);
}

static main()
{
auto current_addr = 0x1400217E8;
auto end_addr = 0x1400260C2;
while (current_addr != BADADDR && current_addr < end_addr)
{
// call return_num
auto call_num = print_operand(current_addr,0);
if(strstr(call_num,"return_") != -1)
{
PatchNumFunc(call_num,current_addr);
}

// mov reg num
auto num = print_operand(current_addr,1);
if(strstr(num,"cs:_") != -1)
{
PatchNum(num,current_addr);
}
current_addr = next_head(current_addr, end_addr);
}
}

效果如下

其中这行算式并没有被 ida 优化掉,因为使用了浮点数运算。发现每行算式都有固定的 1 * (7 - (3 + 3)) 式子,这个时候用条件解析会去除更加干净

观察发现是将 eax 放入栈中,匹配混淆段特征并写脚本去除

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <idc.idc>

static NopCode(Addr, Length)
{
auto i;
for (i = 0; i < Length; i++)
{
PatchByte(Addr + i, 0x90);
}
}

static checkAsm(current_addr,my_insn_name,my_op1,my_op2)
{
auto insn_name = print_insn_mnem(current_addr);
auto op1 = print_operand(current_addr,0);
auto op2 = print_operand(current_addr,1);
if(my_insn_name == 0)
{
my_insn_name = insn_name;
}
if(my_op1 == 0)
{
my_op1 = op1;
}
if(my_op2 == 0)
{
my_op2 = op2;
}
if(insn_name == my_insn_name && op1 == my_op1 && op2 == my_op2)
{
return 1;
}
return 0;
}

static findMovStack(current_addr,end_addr)
{
while(current_addr != BADADDR && current_addr < end_addr)
{
auto mov_stack = checkAsm(current_addr,"movsd",0,"xmm0");
if(mov_stack)
{
return current_addr;
}
current_addr = next_head(current_addr,end_addr);
}
}

static findEnd(current_addr,end_addr)
{
while(current_addr != BADADDR && current_addr < end_addr)
{
auto res = checkAsm(current_addr,"cvttsd2si","eax","xmm0");
if(res)
{
return current_addr;
}
current_addr = next_head(current_addr,end_addr);
}
}

static main()
{
auto current_addr = 0x1400217E8;
auto end_addr = 0x1400260C2;
while (current_addr != BADADDR && current_addr < end_addr)
{
auto sub_eax = checkAsm(current_addr,"sub","eax",0);
auto pxor_addr = next_head(current_addr,end_addr);
auto pxor_xmm0 = checkAsm(pxor_addr,"pxor","xmm0","xmm0");
auto cvtsi2sd_addr = next_head(pxor_addr,end_addr);
auto cvtsi2sd_xmm0 = checkAsm(cvtsi2sd_addr,"cvtsi2sd","xmm0","eax");
if(sub_eax && pxor_xmm0 && cvtsi2sd_xmm0)
{
msg("addr : %X\n",current_addr);
auto mov_stack_addr = findMovStack(cvtsi2sd_addr + 12,end_addr);
NopCode(mov_stack_addr,1);
PatchWord(mov_stack_addr + 1,0x8948);
msg("patch addr : %X\n",mov_stack_addr);
auto next_addr = next_head(mov_stack_addr,end_addr);
auto cvttsd2si_addr = findEnd(mov_stack_addr,end_addr);
auto obf_end_addr = next_head(cvttsd2si_addr,end_addr);
msg("end addr : %X\n",obf_end_addr);
NopCode(pxor_addr,mov_stack_addr - pxor_addr);
NopCode(next_addr,obf_end_addr - next_addr);
}
current_addr = next_head(current_addr, end_addr);
}
}

效果如下

这个例子其实不是举得特别好,但是我没找到其他去不掉的文件,条件解析比常量替换要麻烦很多,所以这里只是示例,提供另一种去除思路

# 总结

虚假控制流主要由恒真恒假跳转控制程序走向,其中夹杂不可达块与无逻辑函数以达成混淆目的。最简单去除方式就是替换不透明谓词为对寄存器赋值,在特定情况下,由于混淆部分代码大部分是相同的,也可以考虑解析特征整块去除。

如果大家有其他思路或者有趣的实例欢迎向我分享!