Tcache机制

September 10, 2019 PWN 访问: 43 次

在CTF里面,设计到tcachepwn题一般都是在ubuntu18.04搭建的,所以我先准备了一个ubuntu18.04的虚拟机,ldd的版本是2.27,所以我也准备了一份glibc-2.27的源码(网上有下载地址)
tcache全程是:thread local caching,它为每个线程创建一个缓存(cache),从而实现无锁分配算法。

tcache数据结构

通过查看glibc源码我们可以发现一些tcache的数据结构
glibc在编译时使用USE_TCACHE条件来开启tcache机制,如下所示:
/glibc-2.27/malloc/malloc.c:302

#if USE_TCACHE
/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
# define TCACHE_MAX_BINS        64
# define MAX_TCACHE_SIZE    tidx2usize (TCACHE_MAX_BINS-1)
/* Only used to pre-fill the tunables.  */
# define tidx2usize(idx)    (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
/* When "x" is from chunksize().  */
# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
/* When "x" is a user-provided size.  */
# define usize2tidx(x) csize2tidx (request2size (x))
/* With rounding and alignment, the bins are...
   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
   idx 1   bytes 25..40 or 13..20
   idx 2   bytes 41..56 or 21..28
   etc.  */
/* This is another arbitrary limit, which tunables can change.  Each
   tcache bin will hold at most this number of chunks.  */
# define TCACHE_FILL_COUNT 7
#endif

从上面的源码上我们可以得到一些关于tcache的信息
- 有64个tcache bin
- 每个bin中最多有7个chunk
- 在64位机器上,chunk的大小以16字节递增(24~1024),在32位上,chunk的大小以8字节递增(12~512)
综上所诉,tcache bin 中只存放不是largebin中的chunk
demo测试一下:

#include<stdio.h>
#include<stdlib.h>
int main()
{
    char *heap[10];
    int i;
    for(i=0;i<10;i++)
    {
        heap[i]=malloc(0x20);
    }
    free(heap[0]);
    free(heap[1]);
    free(heap[2]);
    free(heap[3]);
    free(heap[4]);
    free(heap[5]);
    free(heap[6]);
    free(heap[7]);
    free(heap[8]);
    free(heap[9]);
    return 0;
}

free第7个之后,再freechunk,就会进入到fast bin

pwndbg> bins
tcachebins
0x30 [  7]: 0x602380 —▸ 0x602350 —▸ 0x602320 —▸ 0x6022f0 —▸ 0x6022c0 —▸ 0x602290 —▸ 0x602260 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x602400 —▸ 0x6023d0 —▸ 0x6023a0 ◂— 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
empty
largebins
empty
pwndbg>

然后是引入了两个新的结构体:

/* We overlay this structure on the user-data portion of a chunk when
   the chunk is stored in the per-thread cache.  */
typedef struct tcache_entry
{
  struct tcache_entry *next;
} tcache_entry;
/* There is one of these for each thread, which contains the
   per-thread cache (hence "tcache_perthread_struct").  Keeping
   overall size low is mildly important.  Note that COUNTS and ENTRIES
   are redundant (we could have just counted the linked list each
   time), this is for performance reasons.  */
typedef struct tcache_perthread_struct
{
  char counts[TCACHE_MAX_BINS];
  tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

首先第一个是tcache_entry,用于连接空闲的chunk结构体,tcache_entry结构体中的next指针指向下一个大小相同的chunk

这里需要注意的是tcache_entry结构体中的next指针虽然指向的是下一个大小的chunk,但是指向的是user data的的地址

第二个结构体:tcache_perthread_struct,每个线程(Thread)都会维护一个tcache_perthread_struct结构体,它是整个tcache的管理结构,前面有定义到一共有64个Tcached bin,所以在tcache_perthread_struct结构体中,counts数组的个数就是64个,该数组元素代表着相对于Tcache binchunk的个数,entries指针是tcache_entry 类型的,也就是说有64个tcache_entry类型的指针,指向对应的单链表。

源码分析

/root/glibc-2.27/malloc/malloc.c:3026

void *
__libc_malloc (size_t bytes)
{
  mstate ar_ptr;
  void *victim;
  void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
  if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));
#if USE_TCACHE
  /* int_free also calls request2size, be careful to not pad twice.  */
  size_t tbytes;
  checked_request2size (bytes, tbytes);
  size_t tc_idx = csize2tidx (tbytes);
  MAYBE_INIT_TCACHE ();
  DIAG_PUSH_NEEDS_COMMENT;
  if (tc_idx < mp_.tcache_bins
      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
      && tcache
      && tcache->entries[tc_idx] != NULL)
    {
      return tcache_get (tc_idx);
    }
  DIAG_POP_NEEDS_COMMENT;
#endif
  if (SINGLE_THREAD_P)
    {
      victim = _int_malloc (&main_arena, bytes);
      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
          &main_arena == arena_for_chunk (mem2chunk (victim)));
      return victim;
    }
  arena_get (ar_ptr, bytes);
  victim = _int_malloc (ar_ptr, bytes);
  /* Retry with another arena only if we were able to find a usable arena
     before.  */
  if (!victim && ar_ptr != NULL)
    {
      LIBC_PROBE (memory_malloc_retry, 1, bytes);
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }
  if (ar_ptr != NULL)
    __libc_lock_unlock (ar_ptr->mutex);
  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
          ar_ptr == arena_for_chunk (mem2chunk (victim)));
  return victim;
}
libc_hidden_def (__libc_malloc)

其中#if USE_TCACHE来判断是否开启tcache机制,如果开启的话就会进入到if语句中,在这里面调用了一个宏定义MAYBE_INIT_TCACHE
/root/glibc-2.27/malloc/malloc.c:3011

# define MAYBE_INIT_TCACHE() \
  if (__glibc_unlikely (tcache == NULL)) \
    tcache_init();
#else  /* !USE_TCACHE */
# define MAYBE_INIT_TCACHE()

然后在这个宏中先判断tcache是不是为Null(也就是判断是不是第一次malloc),如果是的话就会调用tcache_init函数
/root/glibc-2.27/malloc/malloc.c:2977

static void
tcache_init(void)
{
  mstate ar_ptr;
  void *victim = 0;
  const size_t bytes = sizeof (tcache_perthread_struct);
  if (tcache_shutting_down)
    return;
  arena_get (ar_ptr, bytes);
  victim = _int_malloc (ar_ptr, bytes);
  if (!victim && ar_ptr != NULL)
    {
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }
  if (ar_ptr != NULL)
    __libc_lock_unlock (ar_ptr->mutex);
  /* In a low memory situation, we may not be able to allocate memory
     - in which case, we just keep trying later.  However, we
     typically do this very early, so either there is sufficient
     memory, or there isn't enough memory to do non-trivial
     allocations anyway.  */
  if (victim)
    {
      tcache = (tcache_perthread_struct *) victim;
      memset (tcache, 0, sizeof (tcache_perthread_struct));
    }
}

本来想着要在gdb中进去这个函数去调试,但是程序到停3042行下了,回头一下,tcache_init是在MAYBE_INIT_TCACHE里面调用的,宏定义的编译时预处理的,在gdb中进不去,很难受

In file: /root/glibc-2.27/malloc/malloc.c
   3037   /* int_free also calls request2size, be careful to not pad twice.  */
   3038   size_t tbytes;
   3039   checked_request2size (bytes, tbytes);
   3040   size_t tc_idx = csize2tidx (tbytes);
   3041
 ► 3042   MAYBE_INIT_TCACHE ();
   3043
   3044   DIAG_PUSH_NEEDS_COMMENT;
   3045   if (tc_idx < mp_.tcache_bins
   3046       /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
   3047       && tcache

tcache_init函数中,调用_int_malloc函数分配heap的大小是sizeof (tcache_perthread_struct)==64+8*64==0x240,所以chunk的大小就是0x250,如图所示,这是第一个chunk申请之后的heap分布

pwndbg> heap
0x602000 PREV_INUSE {
  mchunk_prev_size = 0,
  mchunk_size = 593, //0x251
  fd = 0x0,
  bk = 0x0,
  fd_nextsize = 0x0,
  bk_nextsize = 0x0
}
0x602250 FASTBIN {
  mchunk_prev_size = 0,
  mchunk_size = 33, //0x20
  fd = 0x0,
  bk = 0x0,
  fd_nextsize = 0x0,
  bk_nextsize = 0x20d91
}
0x602270 PREV_INUSE {
  mchunk_prev_size = 0,
  mchunk_size = 134545,
  fd = 0x0,
  bk = 0x0,
  fd_nextsize = 0x0,
  bk_nextsize = 0x0
}
pwndbg>

tcache机制已经init之后再次malloc时,就会操作tcache
/root/glibc-2.27/malloc/malloc.c:3045

if (tc_idx < mp_.tcache_bins
      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
      && tcache
      && tcache->entries[tc_idx] != NULL)
    {
      return tcache_get (tc_idx);
    }

tc_idx是根据size计算出来的tcache binindex,判断是不是小于最大的index,也就是64,再判断tcache是否初始化,再判断对应indexbin中是不是Null(空),如果都符合,那么就去这个bin中取出合适的chunk;然后再看tcache_get函数的源码:
/root/glibc-2.27/malloc/malloc.c:2935

/* Caller must ensure that we know tc_idx is valid and there's
   available chunks to remove.  */
static __always_inline void *
tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  return (void *) e;
}

首先将该tcache bin中第一个chunk赋值到针指e,然后将e的下一个赋值到该tcache bin中第一个chunk上,对应的counts值减1,最后将e返回给用户

tcache bin 中也是遵循先进后出原则

然后再来看一下chunk是怎么进入到tcache bins中的
__libc_free:/root/glibc-2.27/malloc/malloc.c:3084

void
__libc_free (void *mem)
{
  mstate ar_ptr;
  mchunkptr p;                          /* chunk corresponding to mem */
  void (*hook) (void *, const void *)
    = atomic_forced_read (__free_hook);
  if (__builtin_expect (hook != NULL, 0))
    {
      (*hook)(mem, RETURN_ADDRESS (0));
      return;
    }
  if (mem == 0)                              /* free(0) has no effect */
    return;
  p = mem2chunk (mem);
  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
    {
      /* See if the dynamic brk/mmap threshold needs adjusting.
     Dumped fake mmapped chunks do not affect the threshold.  */
      if (!mp_.no_dyn_threshold
          && chunksize_nomask (p) > mp_.mmap_threshold
          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
      && !DUMPED_MAIN_ARENA_CHUNK (p))
        {
          mp_.mmap_threshold = chunksize (p);
          mp_.trim_threshold = 2 * mp_.mmap_threshold;
          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
                      mp_.mmap_threshold, mp_.trim_threshold);
        }
      munmap_chunk (p);
      return;
    }
  MAYBE_INIT_TCACHE ();
  ar_ptr = arena_for_chunk (p);
  _int_free (ar_ptr, p, 0);
}
libc_hidden_def (__libc_free)

经过和glibc-2.23的源码相比,就多了一行MAYBE_INIT_TCACHE ();,而这个宏定义是用来初始化tcachebin的,所以关键代码应该在_int_free,跟进:
/root/glibc-2.27/malloc/malloc.c:4165

#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);
    if (tcache
    && tc_idx < mp_.tcache_bins
    && tcache->counts[tc_idx] < mp_.tcache_count)
      {
    tcache_put (p, tc_idx);
    return;
      }
  }
#endif

首先通过size找到tcachebinindex,然后判断tcache是否存在,再判断算出来的index是否小于最大的index(64),最后再判断index对应的tcache binchunk的数量是不是大于最大容量(7),如果都符合条件的话,则调用tcache_put函数,跟进来看一下:
/root/glibc-2.27/malloc/malloc.c:2925

static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);
  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

首先通过chunk2mem来获取到要freechunkuser date的首地址,然后再判断算出来的index是否小于最大的index(64),然后是将该chunknext指针指向当前tcache bin中的第一个 chunk,再然后就是把该tcache bin的第一个chunk的地址设置成该chunk,相对应的counts值加一
chunk进入到tcache中的几种类型:
- 如果申请的chunk在fastbin中取出来的,而且该fastbin中还有其他chunk,程序会把该bin中其他的chunk全部转移到相应index的tcachebin中
- smallbin的chunk进入tcache中的原理和上述一样
- 在unsortedbin上循环操作时,,当找到合适的chunk,并不直接返回,而是先放到tcache中,然后在进行处理

添加新评论