dustland

dustball in dustland

glibc2.23 Ptmalloc2 源码分析

Dive Into Ptmalloc2

基于glibc2.23的ptmalloc2源码分析

datastructure

malloc_chunk

堆空间管理的最小单元

每个堆块由元数据和数据两部分组成

元数据记录了该堆块的物理前块大小,本块大小,分配区,前块使用,是否mmap块状态,以及空闲状态下的前驱后继指针

数据就是返回给用户的可用空间

1
2
3
4
5
6
7
8
9
10
11
12
struct malloc_chunk {

INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */
INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */

struct malloc_chunk* fd; /* double links -- used only if free. */
struct malloc_chunk* bk;

/* Only used for large blocks: pointer to next larger size. */
struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
struct malloc_chunk* bk_nextsize;
};

字段意义

prev_size

如果物理上紧挨着的一个chunk空闲的话,则该值为物理上前面紧挨着的那个chunk的大小.

如果物理上紧挨着的一个chunk占用的话,则该值可以被物理上紧挨着的那个chunk使用(空间复用)

size

本chunk的大小,包括chunk头和chunk数据

其中chunk头就是malloc_chunk结构体,chunk数据就是返回给用户使用的内存空间

每个chunk的大小都必须是2*SIZE_SZ整数倍

32位系统中size_sz=4,64位系统中size_sz=8

因此32位系统上chunk大小是8的倍数,64位上chunk是16的倍数

诚如是,则size的低3位永远用不到,为了节省空间,ptmalloc的实现中,这三个低位表示三个符号A,M,P

fd,bk

当本chunk空闲并且挂在bin上,此时fd,bk分别是前向和后向chunk的指针,相当于双向链表.

注意是逻辑上相邻,也就是链表相连,不是物理上相邻

fd_nextsize,bk_nextsize

当chunk空闲并且挂在large bin中时,用于查找最近匹配的空闲chunk

怎么个用法呢?

large bin中挂着的chunk是按照大小排序的,一个chunk逻辑上相连的chunk可能大小相同,也可能不同,fd_nextsize,bk_nextsize就指向第一个大小不同的chunk

这样说比较抽象,具体见后面的largebin结构

空间复用

分配时状态
1
2
3
4
5
6
7
8
9
10
11
12
13
14
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk, if unallocated (P clear) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of chunk, in bytes |A|M|P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| User data starts here... .
. .
. (malloc_usable_size() bytes) .
next . |
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| (size of chunk, but used for application data) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of next chunk, in bytes |A|0|1|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
空闲时状态
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`head:' | Size of chunk, in bytes |P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Forward pointer to next chunk in list |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Back pointer to previous chunk in list |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Unused space (may be 0 bytes long) .
. .
. |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`foot:' | Size of chunk, in bytes |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

宏定义

指针转换
1
2
3
/* conversion from malloc headers to user pointers, and back */
#define chunk2mem(p) ((void *) ((char *) (p) + 2 * SIZE_SZ))
#define mem2chunk(mem) ((mchunkptr)((char *) (mem) -2 * SIZE_SZ))

mem就是数据区,chunk就是malloc_chunk的基地址,两者的关系在图上表示

1
2
3
4
5
6
7
8
9
10
11
12
13
14
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk, if unallocated (P clear) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of chunk, in bytes |A|M|P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| User data starts here... .
. .
. (malloc_usable_size() bytes) .
next . |
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| (size of chunk, but used for application data) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of next chunk, in bytes |A|0|1|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

显然mem网上数两个成员就是chunk,这两个成员都是INTERNAL_SIZE_T类型的,在32位平台上分别长4字节,在64位平台上分别长8字节

最小chunk大小
1
2
/* The smallest possible chunk */
#define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize))

offsetof(struct,struct.member);作用是计算member成员在其所在的结构体struct中的偏移量

这表明最小的chunk至少要包含前四个成员,prev_size,size,fd,bk,后面两个可以没有

最小申请的堆内存大小
1
2
3
4
5
/* The smallest size we can malloc is an aligned minimal chunk */
//MALLOC_ALIGN_MASK = 2 * SIZE_SZ -1
#define MINSIZE \
(unsigned long) (((MIN_CHUNK_SIZE + MALLOC_ALIGN_MASK) & \
~MALLOC_ALIGN_MASK))
检查对齐
1
2
3
4
5
6
7
/* Check if m has acceptable alignment */
// MALLOC_ALIGN_MASK = 2 * SIZE_SZ -1
#define aligned_OK(m) (((unsigned long) (m) & MALLOC_ALIGN_MASK) == 0)

#define misaligned_chunk(p) \
((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem(p)) & \
MALLOC_ALIGN_MASK)
判断用户请求是否离谱
1
2
3
4
5
6
7
8
/*
Check if a request is so large that it would wrap around zero when
padded and aligned. To simplify some other code, the bound is made
low enough so that adding MINSIZE will also not wrap around zero.
*/

#define REQUEST_OUT_OF_RANGE(req) \
((unsigned long) (req) >= (unsigned long) (INTERNAL_SIZE_T)(-2 * MINSIZE))
规范化请求大小
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/* pad request bytes into a usable size -- internal version */
//MALLOC_ALIGN_MASK = 2 * SIZE_SZ -1
#define request2size(req) \
(((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) \
? MINSIZE \//如果用户请求的太小则直接用MINSIZE
: ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)//否则向上取整到满足对齐要求

/* Same, except also perform argument check */

#define checked_request2size(req, sz) \
if (REQUEST_OUT_OF_RANGE(req)) { \
__set_errno(ENOMEM); \
return 0; \
} \
(sz) = request2size(req);
设置size最低三位标志位
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
#define PREV_INUSE 0x1

/* extract inuse bit of previous chunk */
#define prev_inuse(p) ((p)->mchunk_size & PREV_INUSE)

/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
#define IS_MMAPPED 0x2

/* check for mmap()'ed chunk */
#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)

/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
from a non-main arena. This is only set immediately before handing
the chunk to the user, if necessary. */
#define NON_MAIN_ARENA 0x4

/* Check for chunk from main arena. */
#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)

/* Mark a chunk as not being on the main arena. */
#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)

/*
Bits to mask off when extracting size
Note: IS_MMAPPED is intentionally not masked off from size field in
macros for which mmapped chunks should never be seen. This should
cause helpful core dumps to occur if it is tried by accident by
people extending or adapting this malloc.
*/
#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
获取本chunk size
1
2
3
4
5
/* Get size, ignoring use bits */
#define chunksize(p) (chunksize_nomask(p) & ~(SIZE_BITS))

/* Like chunksize, but do not mask SIZE_BITS. */
#define chunksize_nomask(p) ((p)->mchunk_size)

如果想要获得纯真的size,最低三位应该忽略标志位的影响,因此chunksize中用SIZE_BITS取反得到第三位全是0然后按位与,确保获得的size低三位必为0

而chunksize_nomask就没有忽略,相当于直接区的malloc_struct的第二个成员

使用状态
1
2
3
4
5
6
7
8
9
10
/* extract p's inuse bit */
#define inuse(p) \
((((mchunkptr)(((char *) (p)) + chunksize(p)))->mchunk_size) & PREV_INUSE)

/* set/clear chunk as being inuse without otherwise disturbing */
#define set_inuse(p) \
((mchunkptr)(((char *) (p)) + chunksize(p)))->mchunk_size |= PREV_INUSE

#define clear_inuse(p) \
((mchunkptr)(((char *) (p)) + chunksize(p)))->mchunk_size &= ~(PREV_INUSE)
size大小
1
2
3
4
5
6
7
8
9
10
11
/* Set size at head, without disturbing its use bit */
// SIZE_BITS = 7
#define set_head_size(p, s) \
((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))

/* Set size/use field */
#define set_head(p, s) ((p)->mchunk_size = (s))

/* Set size at footer (only when chunk is not in use) */
#define set_foot(p, s) \
(((mchunkptr)((char *) (p) + (s)))->mchunk_prev_size = (s))

这里set_foot干了啥?

p是chunk指针,s是该chunk的大小,p+s就指向了本chunk的结尾,

也就是下一个chunk的基地址,也就是下一个chunk的prev_size成员,

于是p+s强转为一个malloc_chunk类型指针,

然后取其第一个成员也就是prev_size,写上本chunk的大小

指定偏移处认为是一个chunk
1
2
/* Treat space at ptr + offset as a chunk */
#define chunk_at_offset(p, s) ((mchunkptr)(((char *) (p)) + (s)))

p指针加上s偏移量的地址视为一个chunk的基地址,返回一个malloc_chunk*指针

malloc_state

分配区结构,一个进程只能有一个主分配区,可以可以有多个非主分配区

当某个线程试图用malloc动态申请内存时,会首先对一个分配区上锁,如果主分配区忙则沿着malloc_state->next寻找下一个分配区,直到找到一个闲的分配区上锁使用.如果转一圈没发现闲的分配区则创建新的非主分配区,然后将其加入到这个分配区环状链表中上锁使用.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
struct malloc_state {
/* Serialize access. */
mutex_t mutex;//互斥锁,保证临界区只有一个线程访问

/* Flags (formerly in max_fast). */
int flags;

/* Fastbins */
mfastbinptr fastbins[NFASTBINS];

/* Base of the topmost chunk -- not otherwise kept in a bin */
mchunkptr top;

/* The remainder from the most recent split of a small request */
mchunkptr last_remainder;

/* Normal bins packed as described above */
mchunkptr bins[NBINS * 2 - 2];

/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];

/* Linked list */
struct malloc_state *next;
...
};

其中管理堆块的手段有fastbin,topchunk,unsortedbin,smallbins,largebins这么几种

只考虑单线程的情况,也就是说不会产生非主分配区,只使用主分配区

最初只有很大一块topchunk,刚开始的malloc申请都是直接在malloc上切割使用

free释放时,如果对应堆块落在fastbin范围内则放到fastbin对应的链表中

否则一律放到unsortedbin中,等后面再次malloc时切割或者合并或者分拣

fastbins

只会使用fd指针的单向链表

1
2
3
flowchart LR
A["fastbin[x]"]
A--fd-->a--fd-->b--fd-->c
max_fast
1
2
3
4
5
6
7
8
9
10
#define set_max_fast(s) \
global_max_fast = (((s) == 0) \
? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
#define get_max_fast() global_max_fast

static INTERNAL_SIZE_T global_max_fast;

#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1)
#define MALLOC_ALIGNMENT (2 *SIZE_SZ < __alignof__ (long double) \
? __alignof__ (long double) : 2 *SIZE_SZ)

对于x64平台,SIZE_SZ=8,

1
2
3
MALLOC_ALIGNMENT=2*SIZE_SZ=16=0b 10000
MALLOC_ALIGN_MASK=15=0b 1111
~MALLOC_ALIGN_MASK=111...111 0000

这个global_max_fastmalloc_init_state时期被初始化

1
2
3
4
if (av == &main_arena)
set_max_fast (DEFAULT_MXFAST);

#define DEFAULT_MXFAST (64 * SIZE_SZ / 4)

对于x64平台,SIZE_SZ=8,那么DEFAULT_MXFAST=128

1
2
3
4
5
set_max_fast(128):
global_max_fast = ((128 + 8) & 111...111 0000))
=0b10001000&0b111...111 0000
=0b10000000
=128

也就是说,nb<=128才可能在fastbin中取堆块

fastbin_index
1
#define fastbin_index(sz)  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)

在x64平台上,SIZE_SZ=8,而在x86平台上SIZE_SZ=4

如果在x64平台上,则将sz右移4位,相当于除以16,然后-2,

1
2
fastbin_index(sz)
=sz >> 4 - 2
sz fastbin_index(sz) on x64
[0b100000,0b110000)=[32,48) 0
[0b110000,0b1000000)=[48,64) 1
[0b1000000,0b1010000)=[64,80) 2

比如用户期望分配0x10大小的空间,那么实际上的堆块大小是32字节

1
2
3
4
fastbin_index(0b100000)
=0b100000>>4 -2
=0b10-2
=0
fastbin[idx]
1
#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])

fastbins结构

1
2
3
4
5
6
7
typedef struct malloc_chunk *mfastbinptr;
...
struct malloc_state{
...
mfastbinptr fastbinsY[NFASTBINS];
...
}

fastbins是一个链栈,先释放的堆块也会先被再次分配

也就是说mfastbinptr *fb = &fastbin (av, idx);

这栈中的指针变量fb指向桶子头的地址,桶子头指向该桶子中的第一个堆块

1
2
3
4
5
6
flowchart LR
A["fastbin[x] @malloc_state"]
a2["1st chunk @heap"]
a1["2nd chunk @heap"]
a0["3rd chunk @heap"]
fb["fb句柄 @stack"]---->A--fd-->a2--fd-->a1--fd-->a0
catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)
1
2
3
4
5
6
7
8
9
10
11
#  define catomic_compare_and_exchange_val_acq(mem, newval, oldval) \
atomic_compare_and_exchange_val_acq (mem, newval, oldval)

#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
({ __typeof (mem) __gmemp = (mem); \
__typeof (*mem) __gret = *__gmemp; \
__typeof (*mem) __gnewval = (newval); \
\
if (__gret == (oldval)) \
*__gmemp = __gnewval; \
__gret; })

这个宏的作用是,

原本mem指向的是oldval,现在将oldval作为返回值,然后将men指向newval

放在原文中

1
2
3
4
5
6
7
8
9
10
11
do
{
victim = pp;//首先执行一次,如果第一次victim为空,说明这个桶子就是空的,也就不能用fastbin进行分配
if (victim == NULL)
break;
}
while (
(pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)
)!= victim
//victim指向链栈顶堆块,把他取下来,把原来的次顶堆块,也就是victim的后继堆块,挂到fb指针上,返回值pp是victim
);

fb这个桶子头原本是指向victim这个堆块的,

现在要让fb指向victim的后继堆块,然后返回victim给pp

显然pp必然等于victim,也就是顶多拿出堆顶来,while就结束了,while只会执行一次

至于为啥要这样写呢?压行

check_remalloced_chunk(A,P,N)

对本应该属于A分配区的大小位S的堆块P进行检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)


static void
do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
{
INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
//提取p堆块结构体中存放的size,由于低三位是标志复用,现在需要将其盖住
if (!chunk_is_mmapped (p))//如果是mmap分配的堆块
//如果是mmap分配的堆块,则
{
assert (av == arena_for_chunk (p));//首先检查给定的av是否是预期的p的所属分配区
if (chunk_non_main_arena (p))//如果p不是主分配区的
assert (av != &main_arena);//检查av是不是主分配区
else
assert (av == &main_arena);
}

do_check_inuse_chunk (av, p);//检查本堆块是否正在使用

/* Legal size ... */
assert ((sz & MALLOC_ALIGN_MASK) == 0);//检查sz大小是否对齐
assert ((unsigned long) (sz) >= MINSIZE);//检查sz大小是否大于最小分配大小
/* ... and alignment */
assert (aligned_OK (chunk2mem (p)));//检查p指向的地址是否对齐
/* chunk is less than MINSIZE more than request */
assert ((long) (sz) - (long) (s) >= 0);
assert ((long) (sz) - (long) (s + MINSIZE) < 0);
}

unsortedbins

smallbinsunsortedbins中堆块的连接方式相同,都是双向链表

两者不同的是,unsortedbin中堆块可以大小各异,但是smallbin中一个桶子里的堆块必须相同

smallbin2

unsortedbin的双向链表没有长短限制,采用头插法

unsorted_chunks(M) (bin_at(M, 1))
1
#define unsorted_chunks(M) (bin_at(M, 1))

取unsortedbin桶子头

smallbins

1
#define NSMALLBINS 64

bins的下标是从0到253,其中每个桶子占用两个bins,分别作为fd和bk指针

smallbins占用64个桶子,

其中第1个桶子是unsortedbin,第2个到第63个桶子是smallbins

从第64个及以后的桶子就是largebins

next_bin
1
2
/* analog of ++bin */
#define next_bin(b) ((mbinptr)((char *)(b) + (sizeof(mchunkptr) << 1)))

下一个bin就是mchunkptr指针的大小,也就是8个字节(在x64上)

左移一位也就是乘以2,因为每个Bin占用两个bin,分别作为fd和bk指针

in_smallbin_range
1
2
3
4
5
6
7
8
9
10
11
12
13
#define in_smallbin_range(sz)  \
((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)

#define MIN_LARGE_SIZE ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)

#define NBINS 128
#define NSMALLBINS 64
#define SMALLBIN_WIDTH MALLOC_ALIGNMENT
#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)

///MALLOC_ALIGNMENT=16
SMALLBIN_CORRECTION=FALSE=0
MIN_LARGE_SIZE=(64-0)*16=1024

smallbins有(64-2=62)个桶子,最大管理的堆块为1023Bytes

再大一个字节都得放到largebin中

也就是说fastbins管理的堆块大小也在smallbin范围内,也就是说,fastbin相当于前部分比较小的smallbins的缓存

smallbin_index
1
2
3
4
5
#define smallbin_index(sz) \
((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))+ SMALLBIN_CORRECTION)
SMALLBIN_WIDTH=MALLOC_ALIGNMENT=16字节
SMALLBIN_CORRECTION=0
smallbin_index(sz)=(sz>>4)+0=sz/16

这里参数sz是将请求大小换算成对应堆块整体大小之后的值,也就是包括了元数据

最小是0x20(元数据prev_size和size占用0x10,剩下的0x10是最小分配要求)

堆块大小(sz) index
unsortedbin 1
smallbins [1,63]
[0x20,0x30) 2
[0x30,0x40) 3
....
[0x3f0,0x400) 63
>0x400 largebins
bin_at
1
2
3
4
#define bin_at(m, i) \
(mbinptr)(((char *)&((m)->bins[((i)-1) * 2])) - offsetof(struct malloc_chunk, fd))
//(m)->bins[((i)-1) * 2]-16
#define offsetof(s,m) ((size_t)&(((s*)0)->m))

这里m是malloc_state结构,i是使用smallbin_index宏计算出的堆块在smallbin中的下标,i从2开始,因为bins[0]和bins[1]是unsortedbin的地盘

m->bins[2*(i-1)]指向的是下标为(2*(i-1))的桶子的桶子头,减去fd成员在一个堆块中的偏移量,得到的是该桶子头基址往前16字节的内存地址

显然这个地方是未知的,这是为啥呢?

最后将该地址又交给一个mbinptr也就是malloc_chunk*指针保管

那么此时,新指针+16的位置刚好是修正后的fd

9c28ec87e40a4ea615599a26bafa58c

而每个桶子头节点虽然也是malloc_chunk类型,但是只需要fd和bk两个指针,其他成员不需要

smallbinhead
set_inuse_bit_at_offset
1
2
#define set_inuse_bit_at_offset(p, s) \
(((mchunkptr)(((char *)(p)) + (s)))->size |= PREV_INUSE)

将size字段的flag位设置上PREV_INUSE=1,表示前一个物理相邻块正在被占用

do_check_malloced_chunk
1
2
3
4
5
6
7
#define check_malloced_chunk(A, P, N) do_check_malloced_chunk(A, P, N)
static void
do_check_malloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s)
{
do_check_remalloced_chunk(av, p, s);
assert(prev_inuse(p));
}

largebins

smallbins中的每两个相邻的桶子,其中堆块的大小相差0x16字节(在x64上)

Bin Index就是bin_at的计算结果

1
2
3
4
5
6
7
8
9
10
11
12
实际上largebins和smallbins可以看成一个整体,前64个桶子是smallbins
64个桶子相邻两个桶子之间大小差8字节
然后32个桶子相邻两个桶子之间大小差64字节
然后16个桶子相邻两个桶子之间大小差512字节
...
64 bins of size 8
32 bins of size 64
16 bins of size 512
8 bins of size 4096
4 bins of size 32768
2 bins of size 262144
1 bin of size what's left
largebin_range

malloc函数在分配时,超过smallbin_range大小的堆块才可能被放到largebin

1
2
#define in_smallbin_range(sz) \
((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE)

在x64上,MIN_LARGE_SIZE=1024

也就是说,大于等于1024的堆块才可能进入largebin

largebin_index
1
2
3
4
5
6
7
8
9
10
11
#define largebin_index(sz)                              \
(SIZE_SZ == 8 ? largebin_index_64(sz) \
: MALLOC_ALIGNMENT == 16 ? largebin_index_32_big(sz) \ //size_sz!=8并且对齐是16位,调用largebin_index_32_big
: largebin_index_32(sz)) //size_sz!=8并且对齐是8位,调用largebin_index_32
x64上SIZE_SZ=8(一个指针的大小),因此调用largebin_index_64(sz) 这个宏
#define largebin_index_64(sz) \
(((((unsigned long)(sz)) >> 6) <= 48) ? 48 + (((unsigned long)(sz)) >> 6) : ((((unsigned long)(sz)) >> 9) <= 20) ? 91 + (((unsigned long)(sz)) >> 9) \
: ((((unsigned long)(sz)) >> 12) <= 10) ? 110 + (((unsigned long)(sz)) >> 12) \
: ((((unsigned long)(sz)) >> 15) <= 4) ? 119 + (((unsigned long)(sz)) >> 15) \
: ((((unsigned long)(sz)) >> 18) <= 2) ? 124 + (((unsigned long)(sz)) >> 18) \
: 126)

这里的参数sz是包括元数据的整个堆块大小

又落在largebin范围内的堆块,最小是1024字节,因此sz右移6位后,最小是16,那么第一组从16到48,堆块的大小也就是从1024到3072

这些堆块对应的桶下标计算方式为,将其大小右移6位然后加上48,

也就是说,一个桶子中的堆块一样大,同一组内相邻两个桶子中堆块相差64B

largebins堆块大小 下标
[1024,1087) 64
[1088,1151) 65
...
[3072,3135) 96 这块儿到底塞到哪里我也不知道
1
2
3
4
5
6
7
64 bins of size       8
32 bins of size 64
16 bins of size 512
8 bins of size 4096
4 bins of size 32768
2 bins of size 262144
1 bin of size what's left

整个largebin中有6组桶子,第一组占用32个Bins,相邻两个桶子之间的堆块相差64B

第二组占用16个Bins,相邻两个桶子之间的堆块相差16B

...

1
2
3
4
5
6
7
if(sz/64<=48){
return 48+sz/64
}else if(sz/512<=20){
return 91+sz/512
}else if(sz/4096<=10){
return 110+sz/4096
}else if(sz/)

binmap

1
2
3
4
5
6
7
8
9
10
#define NBINS 128

#define BINMAPSHIFT 5
#define BITSPERMAP (1U << BINMAPSHIFT)
BITSPERMAP=32
#define BINMAPSIZE (NBINS / BITSPERMAP)
BINMAPSIZE=128/32=4

unsigned int binmap[BINMAPSIZE];
unsigned int binmap[4];

binmap是一个4个int的数组,共32位,不管是x64还是x86都是32位,用于标记32个largebin中是否有空闲的堆块

用于加快largebin中分配堆块时的最适寻找工作

1
#define idx2block(i) ((i) >> BINMAPSHIFT)

i是largebins下标,右移5位也就是除以32计算得到属于i下标的桶子属于map[0]还是map[1],map[2],map[3]哪一个管理

一个block也就是8个桶子归一个map管

1
#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))

计算i下标的largebins桶子属于其对应block的哪一位管

1
2
3
#define mark_bin(m, i) ((m)->binmap[idx2block(i)] |= idx2bit(i))			//改,标记i下标的largebins有空闲堆块
#define unmark_bin(m, i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) //删
#define get_binmap(m, i) ((m)->binmap[idx2block(i)] & idx2bit(i)) //查

algorithm

malloc

用户空间的malloc函数,实际上调用的是__libc_malloc@glibc,别名罢了

__libc_malloc

用户程序调用的malloc函数,实际上调用的是__libc_malloc

glibc/malloc/malloc.c中有这么一个alias声明

1
strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)

__libc_malloc实际上做的事情就两句话

1
2
victim = _int_malloc (ar_ptr, bytes);
return victim;

其他内容都是多线程上下锁,各种检查,编译优化了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;//堆块指针

void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));//在实际调用int_malloc函数之前,首先调用钩子函数hook,hook指向__malloc_hook

arena_get (ar_ptr, bytes);//获取分配区指针,返回值交给ar_ptr,传递参数bytes的作用是判断分配区空间是否足够

victim = _int_malloc (ar_ptr, bytes);//int_malloc函数是实际进行内存分配的函数
/* Retry with another arena only if we were able to find a usable arena
before. */
if (!victim && ar_ptr != NULL)//分配失败并且没有获取到分配区
{
LIBC_PROBE (memory_malloc_retry, 1, bytes);
ar_ptr = arena_get_retry (ar_ptr, bytes);//分配区获取失败,重试一次
victim = _int_malloc (ar_ptr, bytes);//重新获取分配区之后再次尝试切割堆块给victim
}

if (ar_ptr != NULL)//解锁,因为int_malloc中会对分配区上锁,解锁后方便其他线程分配内存
(void) mutex_unlock (&ar_ptr->mutex);

assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
ar_ptr == arena_for_chunk (mem2chunk (victim)));//最后一次检查
//检查内容包括:
//victim指针是否真的指向一个堆块
//victim对应的堆块是否已经在bitmap中被标记
//ar_ptr指向的分配区,是否是victim堆块所在的分配区
return victim;
}
atomic_forced_read
1
2
# define atomic_forced_read(x) \
({ __typeof (x) __x; __asm ("" : "=r" (__x) : "0" (x)); __x; })

原子读,这段内联汇编应该这样断句:

1
2
3
4
5
__asm (
""
: "=r" (__x)
: "0" (x)
);

首先""意思是没有一条指令,本内联代码块只需要使用输入输出约束

"=r" (__x)输出操作数约束,意思是将__x视为输出变量,放到通用寄存器里

: "0" (x)输入操作数约束,意思是x使用和第一个输出操作数(也就是__x)相同的约束

整个内联汇编的作用是将变量x拷贝到__x

看完了也不知道"原子"如何保证的

1
__builtin_expect (hook != NULL, 0)

编译器分支预测优化

long __builtin_expect(long exp, long c);期望exp表达式的值等于c

__malloc_hook
1
2
static void *malloc_hook_ini(size_t sz,const void *caller) __THROW;
void *weak_variable (*__malloc_hook)(size_t __size, const void *) = malloc_hook_ini;

分配前钩子,如果有注册钩子函数,则调用该钩子函数进行分配,直接返回钩子函数的返回值给句柄,不会再调用glibc自己实现的int_malloc

可以考虑篡改malloc_hook钩子劫持控制流

malloc_hook以及free_hook劫持原理 | S3cana's Blog (seanachao.github.io)

_int_malloc

这个函数很长,因为GNU向来要求函数嵌套不能太深,因此这个一个函数综合了从fastbin,smallbin,bin,unsortedbin等各种地方申请堆块的操作

glibc2.23/malloc/malloc.c 第3318行开始

函数签名

1
static void *_int_malloc (mstate av, size_t bytes);

static决定本函数只能在malloc模块中可见,用户程序无法越级调用

void*返回值类型

两个参数,mstate av是分配区指针

size_t bytes是企图分配的内存大小

算法流程

malloc

局部变量

首先定义了一众局部变量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
INTERNAL_SIZE_T nb;               /* normalized request size *///本变量是用户希望大小size的计算值,也就是实际的堆块大小
unsigned int idx; /* associated bin index *///本变量用于记录nb大小的堆块属于的桶子下标
mbinptr bin; /* associated bin */;//桶子头指针

mchunkptr victim; /* inspected/selected chunk *///命中堆块
INTERNAL_SIZE_T size; /* its size */ //victim命中堆块本来的大小
int victim_index; /* its bin index */ //victim_index命中堆块属于的桶子下标

mchunkptr remainder; /* remainder from a split */ //切割一个大块,剩下的部分被称为remainder
unsigned long remainder_size; /* its size */ //剩余部分的大小

unsigned int block; /* bit map traverser */ //binmap下标,用于记录一个桶子属于四个block之一的哪一个
unsigned int bit; /* bit map traverser */ //用于记录一共桶子属于其block中的哪一位
unsigned int map; /* current word of binmap */ //binmap[map],作为binmap的下标,有0,1,2,3四个取值

mchunkptr fwd; /* misc temp for linking */ //取桶子头之后一般会让bck指向之前的第一个堆块,fwd指向桶子头,然后头插
mchunkptr bck; /* misc temp for linking */

const char *errstr = NULL;

计算实际大小

1
checked_request2size (bytes, nb);

这个宏的作用是将请求的bytes,按照对齐等规则,转化为实际上要申请的大小nb

经过此宏之后,int_malloc中使用的都是nb,不再使用bytes作为分配大小

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#define checked_request2size(req, sz)                             \
if (REQUEST_OUT_OF_RANGE (req)) { \
__set_errno (ENOMEM); \
return 0; \
} \
(sz) = request2size (req);


#define request2size(req) \
(((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \
MINSIZE : \
((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)


#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1)
# define MALLOC_ALIGNMENT (2 *SIZE_SZ < __alignof__ (long double) \
? __alignof__ (long double) : 2 *SIZE_SZ)

如果请求大小req+SIZE_SZ+对齐掩码小于最小分配大小,则按照最小分配大小来

否则将上述值和对齐掩码的补码按位与

在x64上

MALLOC_ALIGNMENT=2*SIZE_SZ=16

MALLOC_ALIGN_MASK=15

request2size(req) =(req+8+15 )&11111110000

假设req=0x10,即用户希望得到一块至少有0x10个字节的堆块则

1
2
3
4
5
6
request2size(req) 
=(16+8+15 )&11111110000
=(0b10000+0b1000+0b1111)&111111110000
=0b100111&0b110000
=0b100000
=32

检查当前是否有可用分配区

然后检查av分配区指针是否为空,显然这里的编译器优化是期望其不空的

但是如果真的av为空,没有可用分配区的画,则调用sysmalloc直接解决分配问题

1
2
3
4
5
6
7
8
9
/* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
mmap. */
if (__glibc_unlikely (av == NULL))
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}

如果果真为空则调用sysmalloc函数,

sysmalloc被调用的情况是这样的:

当av分配区的topchunk大小不足以满足用户需求,调用sysmalloc扩大topchunk大小或者更换topchunk

比如调用sbrk系统调用扩大topchunk的大小

sysmalloc如果能成功分配堆块,则p指向该堆块,然后alloc_perturb将p指向堆块的用户空间的前bytes个字节,初始化为perturb_byte^0xff

1
2
3
4
5
6
7
8
static int perturb_byte;

static void
alloc_perturb (char *p, size_t n)
{
if (__glibc_unlikely (perturb_byte))
memset (p, perturb_byte ^ 0xff, n);
}

fastbins区分配

经过两个检查之后,如果控制流执行至此,说明需要分配的堆块不是很离谱,起码不用麻烦sbrk额外分配大块内存

那么首先尝试使用fastbins进行分配

在该区分配的主要流程:

1.根据实际堆块大小nb计算应该落在哪个桶子里

2.从该桶子顶取出一个堆块交给用户

3.将该桶子中剩余的部分重新挂到桶子头上

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{//首先判断,nb这个大小,是否落在fastbins管理的堆块大小范围内

//控制流至此说明nb大小适合fastbins分配,下面需要判断fastbins里面有没有空闲堆块

idx = fastbin_index (nb);//根据nb大小计算落在fastbin的哪个桶里面,返回值是数组下标
mfastbinptr *fb = &fastbin (av, idx);//&fastbins[idx]就是对应桶的桶子头
mchunkptr pp = *fb;//*解引用,也就是拿出fastbins[idx]指向的第一个堆块,pp拷贝堆块的指针
do
{
victim = pp;//如果上来victim就为空,说明桶子头fastbins[idx]指向NULL,也就是这个桶是空的
if (victim == NULL)
break;
}
while (
(pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)
)!= victim
//victim指向链栈顶,然后把他取下来,把原来的次顶堆块挂到fb指针上,返回值pp是victim
);
if (victim != 0)//如果victim不为0说明对应桶中确实有堆块,并且已经交给victim保管
{
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
{//victim获取到的fastbin堆块,再检查一下发现不应该属于其原本的桶中,说明有鬼
errstr = "malloc(): memory corruption (fast)";
errout:
malloc_printerr (check_action, errstr, chunk2mem (victim), av);
return NULL;
}
check_remalloced_chunk (av, victim, nb);//重新分配的堆块检查,这里指的是从topchunk割下来然后free进入各种bins然后又被重新利用的堆块
void *p = chunk2mem (victim);//
alloc_perturb (p, bytes);
return p;
}
}

smallbins区分配

bins数组中维护的是桶子头的fd,bk指针,一个smallbin头需要两个bins数组元素存放,一个记录fd,一个记录bk,

看图一眼顶针

smallbin1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
if (in_smallbin_range (nb))
{
idx = smallbin_index (nb);//计算nb所在的smallbins下标
bin = bin_at (av, idx);//取smallbin[idx]桶子头节点

if ((victim = last (bin)) != bin)//last(bin)=bin->fd,如果bin的指针还是指向bin说明这个桶子是空的
{
if (victim == 0) /* initialization check */
malloc_consolidate (av);//堆块合并
else
{//下面要将victim从双向链表上摘下来
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))//检查victim->bk指向的堆块,其fd指针是否是victim
{
errstr = "malloc(): smallbin double linked list corrupted";
goto errout;
}
set_inuse_bit_at_offset (victim, nb);//经过malloc_consolidate后,如果本块和物理相邻的前块都没使用,则会合并起来
//把victim抠下来,然后把桶子头和victim->bk连起来
bin->bk = bck;
bck->fd = bin;

if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;//标记非主分配区
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);//获取data基地址指针
alloc_perturb (p, bytes);//填充
return p;
}
}
}

fastbin合并

注意有两种到达此处的可能,要么是一个smallbin的申请,但是没在smallbin中找到对应堆块,要么是一个largebin的申请

前者不会引起fastbin的合并,后者会首先合并fastbin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
 if (in_smallbin_range(nb))
{
...
if ((victim = last(bin)) != bin) // bin桶子中的最后一个,如果不是bin这个头节点自己,那么说明这个桶子里至少有一个空闲堆块
{
...
return p;
}
}
}


else
{
idx = largebin_index(nb);
if (have_fastchunks(av))
malloc_consolidate(av);
}

fastbin合并之后的堆块,都会被放到unsortedbin中,其目的是给unsortedbin区的尝试分配增大可能性

看上去此时将fastbin进行合并,有损效率,但这是为了防止fastbin截留堆块导致堆空间碎片化(fastbin中的堆块依然保持使用状态,不会被其他临近堆块向前或者向后合并.因此需要对其进行主动合并释放)

并且经验表明,一个程序要么主要使用smallbin大小的堆块,要么主要使用largebin大小的堆块

因此对fastbin的合并操作不会被经常调用

具体的fastbin合并过程,在malloc_consolidate

malloc_consolidate

用于fastbin区的合并

两层循环,外层循环遍历fastbin桶子头

内层循环遍历挂载一个桶子头上的堆块链表

对每个堆块,尝试进行向前合并和向后合并,注意只会分别执行一次

如果尝试向后合并时发现和topchunk相邻则并入topchunk

如果尝试向前合并和向后合并之后,没有并入topchunk会被头插法链接到unsortedbin的双向链表上

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
static void malloc_consolidate(mstate av)
{
mfastbinptr *fb; /* current fastbin being consolidated */
mfastbinptr *maxfb; /* last fastbin (for loop control) */
mchunkptr p; /* current chunk being consolidated */
mchunkptr nextp; /* next chunk to consolidate */
mchunkptr unsorted_bin; /* bin header */
mchunkptr first_unsorted; /* chunk to link to */

/* These have same use as in free() */
mchunkptr nextchunk;
INTERNAL_SIZE_T size;
INTERNAL_SIZE_T nextsize;
INTERNAL_SIZE_T prevsize;
int nextinuse;
mchunkptr bck;
mchunkptr fwd;

/*
If max_fast is 0, we know that av hasn't
yet been initialized, in which case do so below
*/

if (get_max_fast() != 0)//如果max_faxt值为空,则说明堆还没有初始化
{
clear_fastchunks(av);

unsorted_bin = unsorted_chunks(av);

/*
Remove each chunk from fast bin and consolidate it, placing it
then in unsorted bin. Among other reasons for doing this,
placing in unsorted bin avoids needing to calculate actual bins
until malloc is sure that chunks aren't immediately going to be
reused anyway.
*/

maxfb = &fastbin(av, NFASTBINS - 1);
fb = &fastbin(av, 0);
do
{
p = atomic_exchange_acq(fb, 0); // p=fb,fb++
if (p != 0)
{
do
{ // 释放快桶子p上挂着的所有堆块
check_inuse_chunk(av, p);
nextp = p->fd; // 先取后继

/* Slightly streamlined version of consolidation code in free() */
size = p->size & ~(PREV_INUSE | NON_MAIN_ARENA); // 撤销flag
nextchunk = chunk_at_offset(p, size); // 物理上相邻的下一个堆块
nextsize = chunksize(nextchunk);

if (!prev_inuse(p))//向前合并
{
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long)prevsize));//取物理上前一个相邻的堆块基址,作为合并堆块的基址
unlink(av, p, bck, fwd);
}

if (nextchunk != av->top)//如果后面和topchunk相邻则和topchunk合并,否则尝试向后合并
{
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

if (!nextinuse)
{
size += nextsize;
unlink(av, nextchunk, bck, fwd);//如果物理上后面相邻的堆块没在使用则向后合并
}
else
clear_inuse_bit_at_offset(nextchunk, 0);

first_unsorted = unsorted_bin->fd;//取第一个unsorted_bin上悬挂的堆块
unsorted_bin->fd = p;//头插法
first_unsorted->bk = p;//将p链接到unsorted_bin和p之间

if (!in_smallbin_range(size))//如果这个合并堆块在largebin范围内则初始化其nextsize指针
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}

set_head(p, size | PREV_INUSE);
p->bk = unsorted_bin;
p->fd = first_unsorted;
set_foot(p, size);
}

else
{
size += nextsize;
set_head(p, size | PREV_INUSE);//p后面就是topchunk,p合并到topchunk
av->top = p;
}

} while ((p = nextp) != 0);
}
} while (fb++ != maxfb);//遍历整个fastbin,直到fastbin桶子头哨兵maxfb
}
else
{
malloc_init_state(av);//初始化堆
check_malloc_state(av);
}
}

unsortedbin区分配

unsortedbin尝试分配 与 归类
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
    while ((victim = unsorted_chunks(av)->bk) != unsorted_chunks(av))//检查unsortedbin中是否确实有堆块,有则从unsortedbin中拿下第一个堆块
{
bck = victim->bk;//后继
if (__builtin_expect(victim->size <= 2 * SIZE_SZ, 0) || __builtin_expect(victim->size > av->system_mem, 0))
malloc_printerr(check_action, "malloc(): memory corruption",
chunk2mem(victim), av);
size = chunksize(victim);//根据size字段获取victim的大小

/*
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.
*/

if (in_smallbin_range(nb) &&//如果是一个smallbin的分配申请
bck == unsorted_chunks(av) &&//bck=victim->bk如果这个判断通过,说明刚从unsortedbin中拆下的堆块victim是unsoreted中唯一的堆块
victim == av->last_remainder &&//如果victim是最近一次分配过的堆块,最近使用的堆块页面可能还在内存中,因此有这种优化
(unsigned long)(size) > (unsigned long)(nb + MINSIZE))//如果这个victim堆块满足大小要求
{//这个victim通过了考察,下面将其分割,将满足大小要求的部分给用户,剩下的部分再放回unsortedbin
/* split and reattach remainder */
remainder_size = size - nb;//剩余大小
remainder = chunk_at_offset(victim, nb);//victim的前半部分将要分出去给用户,后面的剩下,remainder是剩下部分的基地址
unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;//更新unsortedbin中这个唯一堆块的剩余状态
av->last_remainder = remainder;//剩余堆块记为最近使用
remainder->bk = remainder->fd = unsorted_chunks(av);//设置前后指针都为unsortedbin桶子
if (!in_smallbin_range(remainder_size))//如果剩下的部分属于largebin范围,则初始化两个指针
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}

set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);//因为前块被分配,因此remainder的prev_inuse置1
set_foot(remainder, remainder_size);

check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);//p=victim+0x10指向data区域
alloc_perturb(p, bytes);
return p;
}

/* remove from unsorted list */
unsorted_chunks(av)->bk = bck;
bck->fd = unsorted_chunks(av);//将bin从unsortedbin中拿出来,然后将其前后驱连接

/* Take now instead of binning if exact fit */

if (size == nb)//如果尝试分配的大小,恰好和这个unsortedbin堆块一样大则分配之
{
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}

/* place chunk in bin */

if (in_smallbin_range(size))//如果这个刚摘下来的unsortedbin堆块属于smallbin范围,计算好新的前后邻居
{
victim_index = smallbin_index(size);
bck = bin_at(av, victim_index);
fwd = bck->fd;
}
else//否则说明这unsortedbin堆块属于largebin范围,计算好新的前后邻居
{
victim_index = largebin_index(size);
bck = bin_at(av, victim_index);
fwd = bck->fd;

/* maintain large bins in sorted order */
if (fwd != bck)
{
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert((bck->bk->size & NON_MAIN_ARENA) == 0);
if ((unsigned long)(size) < (unsigned long)(bck->bk->size))
{
fwd = bck;
bck = bck->bk;

victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
}
else
{
assert((fwd->size & NON_MAIN_ARENA) == 0);
while ((unsigned long)size < fwd->size)
{
fwd = fwd->fd_nextsize;
assert((fwd->size & NON_MAIN_ARENA) == 0);
}

if ((unsigned long)size == (unsigned long)fwd->size)
/* Always insert in the second position. */
fwd = fwd->fd;
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
}
}
else
victim->fd_nextsize = victim->bk_nextsize = victim;
}
//结算,前面不管是largebin还是smallbin,都已经计算好了前后邻居bck,fwd,在此将诸位连接
mark_bin(av, victim_index);//标记binmap
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;

#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)//顶多合并10000次,太多次合并会导致本次请求响应太慢
break;
}
largebin申请

如果到此还没有返回,也就是还没有申请到堆块下面再尝试使用largebin申请

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
 if (!in_smallbin_range(nb))//如果是一个largebin的请求
{
bin = bin_at(av, idx);//取桶子头

/* skip scan if empty or largest chunk is too small */
if ((victim = first(bin)) != bin &&
(unsigned long)(victim->size) >= (unsigned long)(nb))
{//first(bin)=bin->fd,可以看出bin->fd应该是该桶子中最大的一个堆块,然后顺着fd指针越来越小
//如果最大的堆块都不满足nb的需求,显然再往后找更小的无意义,因此首先需要判断最大的堆块是否能满足要求,
//当这个前提条件满足时,再向后找最佳适配的堆块
victim = victim->bk_nextsize;//bk_nextsize是下一个比当前victim小的堆块,victim->bk可能和victim一样大,但是victim->bk_nextsize要么是桶子头,要么一定比当前堆块小
while (((unsigned long)(size = chunksize(victim)) <
(unsigned long)(nb)))
victim = victim->bk_nextsize;//从小开始遍历直到第一个大于等于nb大小的堆块

/* Avoid removing the first entry for a size so that the skip
list does not have to be rerouted. */
if (victim != last(bin) && victim->size == victim->fd->size)
victim = victim->fd;//避免移除跳表的最开始一个导致变更指针,首先尝试寻找该大小的堆块是否有第二块,如果有则放过跳表头

remainder_size = size - nb;//victim块比较抠,只分配nb大小左右,多余的不给
unlink(av, victim, bck, fwd);

/* Exhaust */
if (remainder_size < MINSIZE)//如果发现victim分割后的剩余部分都是下脚料就不抠了
{
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
/* Split */
else//否则victim剩余部分放到unsortedbin
{
remainder = chunk_at_offset(victim, nb);//取victim切割nb字节之后的剩余部分
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely(fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks";
goto errout;
}
remainder->bk = bck;//头插法
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range(remainder_size))
{
remainder->fd_nextsize = NULL;//如果剩余大小还是largebin大小,则此时预先将指针清零
remainder->bk_nextsize = NULL;
}
set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);
}
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
}
后续largebin申请

如果到此还没有分配,说明当前largebin里面没有找到何时的,那么向后面的largebin桶子中找

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/*
Search for a chunk by scanning bins, starting with next largest
bin. This search is strictly by best-fit; i.e., the smallest
(with ties going to approximately the least recently used) chunk
that fits is selected.

The bitmap avoids needing to check that most blocks are nonempty.
The particular case of skipping all bins during warm-up phases
when no chunks have been returned yet is faster than it might look.
*/

++idx; //取下一个桶子的下标
bin = bin_at(av, idx); //首先查binmap,看看下一个桶子是否确实有空闲堆块
block = idx2block(idx);
map = av->binmap[block];
bit = idx2bit(idx);

for (;;)
{
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0)//如果bit>map只可能是map=0,也就是当前block是空的
{
do
{
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;//如果发现block遍历了4个block,全是空的,也就是largebin空了,直接使用top_chunk分配
} while ((map = av->binmap[block]) == 0);//跳过所有空的largebin

bin = bin_at(av, (block << BINMAPSHIFT));
bit = 1;
}

/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0)//尝试找一个map对应block中有堆块的桶子
{
bin = next_bin(bin);
bit <<= 1;//左移也就是往largebin更大的方向找
assert(bit != 0);
}//退出循环时,bin对应的桶子中一定有堆块

/* Inspect the bin. It is likely to be non-empty */
victim = last(bin);

/* If a false alarm (empty bin), clear the bit. */
if (victim == bin)//检查是否该bin中至少有一个堆块,这是因为map是懒修改的
{//也就是说,map中标记有的不一定有,但是map中标记没有的一定没有
av->binmap[block] = map &= ~bit; /* Write through */
//本桶子中确实没有,但也不是没有功劳,起码可以修改map,下一次查找一定不会查本桶子
bin = next_bin(bin);
bit <<= 1;//继续向更大的largebin桶子寻找
}

else//本桶子中确实有至少一个堆块
{
size = chunksize(victim);

/* We know the first chunk in this bin is big enough to use. */
assert((unsigned long)(size) >= (unsigned long)(nb));

remainder_size = size - nb;

/* unlink */
unlink(av, victim, bck, fwd);

/* Exhaust */
if (remainder_size < MINSIZE)//下脚料一起送人
{
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}

/* Split */
else //切割指定大小的堆块,剩下的送给unsortedbin
{
remainder = chunk_at_offset(victim, nb);

/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely(fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks 2";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;

/* advertise as last remainder */
if (in_smallbin_range(nb))
av->last_remainder = remainder;
if (!in_smallbin_range(remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);
}
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
}
unlink

从双向链表上摘下一个堆块P,把它的前后驱重新链接起来

针对smallbinunsortedbin,有如下检查

1
2
P->bk->fd==P
P->fd->bk==P

如果是一个largebin的堆块,还会有

1
2
P->fd_nextsize->bk_nextsize==P
P->bk_nextsize->fd_nextsize==P

对于smallbinunsortedbin,如果检查通过,则执行

1
2
FD->bk = BK;                                                                                                          \
BK->fd = FD;

将P的前后驱连接起来

对于largebin的堆块,还会执行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
if (FD->fd_nextsize == NULL)                                                                                        \
{ \
if (P->fd_nextsize == P) \
FD->fd_nextsize = FD->bk_nextsize = FD; \
else \
{ \
FD->fd_nextsize = P->fd_nextsize; \
FD->bk_nextsize
= P->bk_nextsize; \
P->fd_nextsize->bk_nextsize = FD; \
P->bk_nextsize->fd_nextsize = FD; \
} \
} \
else \
{ \
P->fd_nextsize->bk_nextsize = P->bk_nextsize; \
P->bk_nextsize->fd_nextsize = P->fd_nextsize; \
}

完整代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/* Take a chunk off a bin list */
#define unlink(AV, P, BK, FD) \
{ \
FD = P->fd; \
BK = P->bk; \
if (__builtin_expect(FD->bk != P || BK->fd != P, 0)) \
//检查后继的前驱指针以及前驱的后继指针
malloc_printerr(check_action, "corrupted double-linked list", P, AV); \
else \
{ \
FD->bk = BK; \
//将前后驱堆块连接,解放P
BK->fd = FD; \
if (!in_smallbin_range(P->size) && __builtin_expect(P->fd_nextsize != NULL, 0)) \
{ \
if (__builtin_expect(P->fd_nextsize->bk_nextsize != P, 0) || __builtin_expect(P->bk_nextsize->fd_nextsize != P, 0)) \
malloc_printerr(check_action, \
"corrupted double-linked list (not small)", \
P, AV); \
if (FD->fd_nextsize == NULL) \
{ \
if (P->fd_nextsize == P) \
FD->fd_nextsize = FD->bk_nextsize = FD; \
else \
{ \
FD->fd_nextsize = P->fd_nextsize; \
FD->bk_nextsize
= P->bk_nextsize; \
P->fd_nextsize->bk_nextsize = FD; \
P->bk_nextsize->fd_nextsize = FD; \
} \
} \
else \
{ \
P->fd_nextsize->bk_nextsize = P->bk_nextsize; \
P->bk_nextsize->fd_nextsize = P->fd_nextsize; \
} \
} \
} \
}

topchunk申请

如果还不行,尝试使用topchunk分配

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).

We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
*/

victim = av->top;
size = chunksize(victim);

if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE))
{
remainder_size = size - nb;
remainder = chunk_at_offset(victim, nb);
av->top = remainder;
set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);

check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}

/* When we are using atomic ops to free fast chunks we can get
here for all block sizes. */
else if (have_fastchunks(av))
{
malloc_consolidate(av);
/* restore original bin index */
if (in_smallbin_range(nb))
idx = smallbin_index(nb);
else
idx = largebin_index(nb);
}
sysmalloc申请

如果还不行,尝试sysmalloc

1
2
3
4
5
6
7
8
9
10
/*
Otherwise, relay to handle system-dependent cases
*/
else
{
void *p = sysmalloc(nb, av);
if (p != NULL)
alloc_perturb(p, bytes);
return p;
}

free

1
strong_alias(__libc_free, __free) strong_alias(__libc_free, free)

__libc_free

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
void __libc_free(void *mem)
{
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */

void (*hook)(void *, const void *) = atomic_forced_read(__free_hook);//首先尝试调用hook函数(如果有注册的话)
if (__builtin_expect(hook != NULL, 0))
{
(*hook)(mem, RETURN_ADDRESS(0));
return;
}

if (mem == 0) /* free(0) has no effect */
return;

p = mem2chunk(mem);//p指向堆块基址,mem指向数据区,也就是p+0x10

if (chunk_is_mmapped(p)) /* release mmapped memory. */
{
/* see if the dynamic brk/mmap threshold needs adjusting */
if (!mp_.no_dyn_threshold && p->size > mp_.mmap_threshold && p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
{
mp_.mmap_threshold = chunksize(p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE(memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk(p);//如果p堆块是mmap分配的则调用munmap释放
return;
}

ar_ptr = arena_for_chunk(p);
_int_free(ar_ptr, p, 0);//调用glibc实现的_int_free,这也是默认释放过程
}
__free_hook
1
void weak_variable (*__free_hook)(void *__ptr,const void *) = NULL;

__malloc_hook同理,如果本钩子函数有注册过则调用之进行释放,不会再调用glibc自己实现的_int_free

_int_free

实际上调用的释放函数

算法流程

free

整个流程要比分配_int_malloc简单点

局部变量

1
2
3
4
5
6
7
8
9
10
11
12
13
INTERNAL_SIZE_T size;     /* its size */		//用于保存请求堆块的整体大小(包括元数据)
mfastbinptr *fb; /* associated fastbin */ //fastbin桶子
mchunkptr nextchunk; /* next contiguous chunk */ //下一个堆块
INTERNAL_SIZE_T nextsize; /* its size */ //下一个堆块的大小
int nextinuse; /* true if nextchunk is used */ //下一个堆块是否在使用,合并堆块时用
INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */ //前块大小
mchunkptr bck; /* misc temp for linking */ //头插法前后邻居
mchunkptr fwd; /* misc temp for linking */

const char *errstr = NULL;
int locked = 0;

size = chunksize(p); //size当前要申请的堆块的大小(包括元数据)

释放前检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/* Little security check which won't hurt performance: the
allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear
here by accident or by "design" from some intruder. */
if (__builtin_expect((uintptr_t)p > (uintptr_t)-size, 0) || __builtin_expect(misaligned_chunk(p), 0))
{
errstr = "free(): invalid pointer";
errout:
if (!have_lock && locked)
(void)mutex_unlock(&av->mutex);
malloc_printerr(check_action, errstr, chunk2mem(p), av);
return;
}
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
if (__glibc_unlikely(size < MINSIZE || !aligned_OK(size)))
{
errstr = "free(): invalid size";
goto errout;
}

check_inuse_chunk(av, p)

检查锁和对齐,整个释放过程可以看成一个事务,由锁保证一致性

fastbin区释放

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
  if ((unsigned long)(size) <= (unsigned long)(get_max_fast())//如果释放堆块大小落在fastbin范围内

#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks
bordering top into fastbins
*/
&& (chunk_at_offset(p, size) != av->top)//检查后面是否和topchunk相邻,(如果相邻需要合并,不会进入fastbin)
#endif
)
{

if (__builtin_expect(chunk_at_offset(p, size)->size <= 2 * SIZE_SZ, 0) || __builtin_expect(chunksize(chunk_at_offset(p, size)) >= av->system_mem, 0))
{//首先检查堆块大小是否比最小大小要大,并且是不是可以分配的范围内
/* We might not have a lock at this point and concurrent modifications
of system_mem might have let to a false positive. Redo the test
after getting the lock. */
if (have_lock || ({c
assert(locked == 0);
mutex_lock(&av->mutex);
locked = 1;
chunk_at_offset(p, size)->size <= 2 * SIZE_SZ || chunksize(chunk_at_offset(p, size)) >= av->system_mem;
}))
{
errstr = "free(): invalid next size (fast)";
goto errout;
}
if (!have_lock)
{
(void)mutex_unlock(&av->mutex);
locked = 0;
}
}
//已获得锁
free_perturb(chunk2mem(p), size - 2 * SIZE_SZ);//堆块的mem数据区清零

set_fastchunks(av);
unsigned int idx = fastbin_index(size);//计算fastbin桶子下标
fb = &fastbin(av, idx);//获取fastbin桶子头

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;//old指向fastbin对应桶子的第一个堆块
unsigned int old_idx = ~0u;
do
{
/* Check that the top of the bin is not the record we are going to add
(i.e., double free). */
if (__builtin_expect(old == p, 0))//检查p是否已经被刚刚释放过一次
{
errstr = "double free or corruption (fasttop)";
goto errout;
}
/* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD
only if we have the lock, otherwise it might have already been
deallocated. See use of OLD_IDX below for the actual check. */
if (have_lock && old != NULL)
old_idx = fastbin_index(chunksize(old));
p->fd = old2 = old;//把p挂到fastbin上(头插法),fastbin[idx]->p->old->...
} while ((old = catomic_compare_and_exchange_val_rel(fb, p, old2)) != old2);

if (have_lock && old != NULL && __builtin_expect(old_idx != idx, 0))
{
errstr = "invalid fastbin entry (free)";
goto errout;
}
}

堆块合并

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  else if (!chunk_is_mmapped(p))//p不能是mmap映射的
{
if (!have_lock)
{
(void)mutex_lock(&av->mutex);
locked = 1;
}

nextchunk = chunk_at_offset(p, size);//取物理上下一个相邻的堆块基地址

/* Lightweight tests: check whether the block is already the
top block. */
if (__glibc_unlikely(p == av->top))//p不能是topchunk
{
errstr = "double free or corruption (top)";
goto errout;
}
/* Or whether the next chunk is beyond the boundaries of the arena. */
if (__builtin_expect(contiguous(av) && (char *)nextchunk >= ((char *)av->top + chunksize(av->top)), 0))
{//如果下一个堆块溢出到topchunk内部了
errstr = "double free or corruption (out)";
goto errout;
}
/* Or whether the block is actually not marked used. */
if (__glibc_unlikely(!prev_inuse(nextchunk)))
{//如果物理上的后块没有记录本块的释放状态
errstr = "double free or corruption (!prev)";
goto errout;
}

nextsize = chunksize(nextchunk);//下一个堆块的大小
if (__builtin_expect(nextchunk->size <= 2 * SIZE_SZ, 0) || __builtin_expect(nextsize >= av->system_mem, 0))
{//下一堆块的大小必须在合法范围(2*SIZE_SZ,av->system_mem)之内
errstr = "free(): invalid next size (normal)";
goto errout;
}

free_perturb(chunk2mem(p), size - 2 * SIZE_SZ);//p数据区清零

/* consolidate backward */
if (!prev_inuse(p))//向前合并
{
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long)prevsize));
unlink(av, p, bck, fwd);
}

if (nextchunk != av->top)//如果后块时topchunk则合并到topchunk,否则尝试向后合并
{
/* get and clear inuse bit */
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

/* consolidate forward */
if (!nextinuse)//如果后一堆块空闲则向后合并
{
unlink(av, nextchunk, bck, fwd);
size += nextsize;
}
else
clear_inuse_bit_at_offset(nextchunk, 0);

/*
Place the chunk in unsorted chunk list. Chunks are
not placed into regular bins until after they have
been given one chance to be used in malloc.
*/

bck = unsorted_chunks(av);//释放的堆块放到unsortedbin中,下一次malloc才可能重新安排新去处
fwd = bck->fd;
if (__glibc_unlikely(fwd->bk != bck))
{
errstr = "free(): corrupted unsorted chunks";
goto errout;
}
p->fd = fwd;
p->bk = bck;
if (!in_smallbin_range(size))//如果是largebin的堆块则现在就把fd_nextsize和bk_nextsize清零
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
bck->fd = p;
fwd->bk = p;

set_head(p, size | PREV_INUSE);
set_foot(p, size);

check_free_chunk(av, p);
}

/*
If the chunk borders the current high end of memory,
consolidate into top
*/

else//此else意味着向后与topchunk相邻,则合并到topchunk
{
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
check_chunk(av, p);
}

/*
If freeing a large space, consolidate possibly-surrounding
chunks. Then, if the total unused topmost memory exceeds trim
threshold, ask malloc_trim to reduce top.

Unless max_fast is 0, we don't know if there are fastbins
bordering top, so we cannot tell for sure whether threshold
has been reached unless fastbins are consolidated. But we
don't want to consolidate on each free. As a compromise,
consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
is reached.
*/

if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD)
{//如果size大于fastbin合并阈值65536
if (have_fastchunks(av))
malloc_consolidate(av);//清空fastbin,该合并合并,放到unsortedbin或者topchunk

if (av == &main_arena)//如果是主分配区
{
#ifndef MORECORE_CANNOT_TRIM
if ((unsigned long)(chunksize(av->top)) >=
(unsigned long)(mp_.trim_threshold))//如果topchunk太大了就得修剪一下
systrim(mp_.top_pad, av);
#endif
}
else//否则就是非主分配区的辅助堆
{
/* Always try heap_trim(), even if the top chunk is not
large, because the corresponding heap might go away. */
heap_info *heap = heap_for_ptr(top(av));

assert(heap->ar_ptr == av);
heap_trim(heap, mp_.top_pad);
}
}

if (!have_lock)
{
assert(locked);//保证事务完整性
(void)mutex_unlock(&av->mutex);
}
}

mmap映射区释放

1
2
3
4
else
{
munmap_chunk(p);
}