slabs.c

/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */

/*

 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size

 * and are divided into chunks. The chunk sizes start off at the size of the

 * "item" structure plus space for a small key and value. They increase by

 * a multiplier factor from there, up to half the maximum slab size. The last

 * slab size is always 1MB, since that's the maximum item size allowed by the

 * memcached protocol.

 */

#include "memcached.h"

#include <sys/stat.h>

#include <sys/socket.h>

#include <sys/resource.h>

#include <fcntl.h>

#include <netinet/in.h>

#include <errno.h>

#include <stdlib.h>

#include <stdio.h>

#include <string.h>

#include <signal.h>

#include <assert.h>

#include <pthread.h>

//#define DEBUG_SLAB_MOVER

/* powers-of-N allocation structures */

typedef struct {

    unsigned int size;      /* sizes of items */

    unsigned int perslab;   /* how many items per slab */

    void *slots;           /* list of item ptrs */

    unsigned int sl_curr;   /* total free items in list */

    unsigned int slabs;     /* how many slabs were allocated for this class */

    void **slab_list;       /* array of slab pointers */

    unsigned int list_size; /* size of prev array */

    size_t requested; /* The number of requested bytes */

} slabclass_t;

static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];

static size_t mem_limit = ;

static size_t mem_malloced = ;

/* If the memory limit has been hit once. Used as a hint to decide when to

 * early-wake the LRU maintenance thread */

static bool mem_limit_reached = false;

static int power_largest;

static void *mem_base = NULL;

static void *mem_current = NULL;

static size_t mem_avail = ;

/**

 * Access to the slab allocator is protected by this lock

 */

static pthread_mutex_t slabs_lock = PTHREAD_MUTEX_INITIALIZER;

static pthread_mutex_t slabs_rebalance_lock = PTHREAD_MUTEX_INITIALIZER;

/*

 * Forward Declarations

 */

static int do_slabs_newslab(const unsigned int id);

static void *memory_allocate(size_t size);

static void do_slabs_free(void *ptr, const size_t size, unsigned int id);

/* Preallocate as many slab pages as possible (called from slabs_init)

   on start-up, so users don't get confused out-of-memory errors when

   they do have free (in-slab) space, but no space to make new slabs.

   if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all

   slab types can be made.  if max memory is less than 18 MB, only the

   smaller ones will be made.  */

static void slabs_preallocate (const unsigned int maxslabs);

/*

 * Figures out which slab class (chunk size) is required to store an item of

 * a given size.

 *

 * Given object size, return id to use when allocating/freeing memory for object

 * 0 means error: can't store such a large object

 */

unsigned int slabs_clsid(const size_t size) {

    int res = POWER_SMALLEST;

    if (size ==  || size > settings.item_size_max)

        return ;

    // commit: haizhu.shao 2016-12-03 19:17

    // 习惯写成for循环

    /**

     for (; res != power_largest; ++res)

     {

        if (size <= slabclass[res].size)

            break;

     }

     return res;

     *

     */

    while (size > slabclass[res].size)

        if (res++ == power_largest)     /* won't fit in the biggest slab */

            return power_largest;

    return res;

}

/**

 * Determines the chunk sizes and initializes the slab class descriptors

 * accordingly.

 */

void slabs_init(const size_t limit, const double factor, const bool prealloc, const uint32_t *slab_sizes) {

    int i = POWER_SMALLEST - ;

    unsigned int size = sizeof(item) + settings.chunk_size;

    mem_limit = limit;

    if (prealloc) {

        /* Allocate everything in a big chunk with malloc */

        mem_base = malloc(mem_limit);

        if (mem_base != NULL) {

            mem_current = mem_base;

            mem_avail = mem_limit;

        } else {

            fprintf(stderr, "Warning: Failed to allocate requested memory in"

                    " one large chunk.\nWill allocate in smaller chunks\n");

        }

    }

    memset(slabclass, , sizeof(slabclass));

    while (++i < MAX_NUMBER_OF_SLAB_CLASSES-) {

        if (slab_sizes != NULL) {

            if (slab_sizes[i-] == )

                break;

            size = slab_sizes[i-];

        } else if (size >= settings.slab_chunk_size_max / factor) {

            break;

        }

        /* Make sure items are always n-byte aligned */

        if (size % CHUNK_ALIGN_BYTES)

            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);

        slabclass[i].size = size;

        slabclass[i].perslab = settings.slab_page_size / slabclass[i].size;

        if (slab_sizes == NULL)

            size *= factor;

        if (settings.verbose > ) {

            fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",

                    i, slabclass[i].size, slabclass[i].perslab);

        }

    }

    power_largest = i;

    slabclass[power_largest].size = settings.slab_chunk_size_max;

    slabclass[power_largest].perslab = settings.slab_page_size / settings.slab_chunk_size_max;

    if (settings.verbose > ) {

        fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",

                i, slabclass[i].size, slabclass[i].perslab);

    }

    /* for the test suite:  faking of how much we've already malloc'd */

    {

        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");

        if (t_initial_malloc) {

            mem_malloced = (size_t)atol(t_initial_malloc);

        }

    }

    if (prealloc) {

        slabs_preallocate(power_largest);

    }

}

static void slabs_preallocate (const unsigned int maxslabs) {

    int i;

    unsigned int prealloc = ;

    /* pre-allocate a 1MB slab in every size class so people don't get

       confused by non-intuitive "SERVER_ERROR out of memory"

       messages.  this is the most common question on the mailing

       list.  if you really don't want this, you can rebuild without

       these three lines.  */

    for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {

        if (++prealloc > maxslabs)

            return;

        if (do_slabs_newslab(i) == ) {

            fprintf(stderr, "Error while preallocating slab memory!\n"

                "If using -L or other prealloc options, max memory must be "

                "at least %d megabytes.\n", power_largest);

            exit();

        }

    }

}

static int grow_slab_list (const unsigned int id) {

    slabclass_t *p = &slabclass[id];

    if (p->slabs == p->list_size) {

        size_t new_size =  (p->list_size != ) ? p->list_size *  : ;

        void *new_list = realloc(p->slab_list, new_size * sizeof(void *));

        if (new_list == ) return ;

        p->list_size = new_size;

        p->slab_list = new_list;

    }

    return ;

}

static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {

    slabclass_t *p = &slabclass[id];

    int x;

    for (x = ; x < p->perslab; x++) {

        do_slabs_free(ptr, , id);

        ptr += p->size;

    }

}

/* Fast FIFO queue */

static void *get_page_from_global_pool(void) {

    slabclass_t *p = &slabclass[SLAB_GLOBAL_PAGE_POOL];

    if (p->slabs < ) {

        return NULL;

    }

    char *ret = p->slab_list[p->slabs - ];

    p->slabs--;

    return ret;

}

static int do_slabs_newslab(const unsigned int id) {

    slabclass_t *p = &slabclass[id];

    slabclass_t *g = &slabclass[SLAB_GLOBAL_PAGE_POOL];

    int len = (settings.slab_reassign || settings.slab_chunk_size_max != settings.slab_page_size)

        ? settings.slab_page_size

        : p->size * p->perslab;

    char *ptr;

    if ((mem_limit && mem_malloced + len > mem_limit && p->slabs >

         && g->slabs == )) {

        mem_limit_reached = true;

        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);

        return ;

    }

    if ((grow_slab_list(id) == ) ||

        (((ptr = get_page_from_global_pool()) == NULL) &&

        ((ptr = memory_allocate((size_t)len)) == ))) {

        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);

        return ;

    }

    memset(ptr, , (size_t)len);

    split_slab_page_into_freelist(ptr, id);

    // commit: haizhu.shao 2016-12-03 20:50

    // 這裡ptr是保存在slots的，沒有保存在slab_list裡面。

    // 不懂這塊

    p->slab_list[p->slabs++] = ptr;

    MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);

    return ;

}

/* This calculation ends up adding sizeof(void *) to the item size. */

static void *do_slabs_alloc_chunked(const size_t size, slabclass_t *p, unsigned int id) {

    void *ret = NULL;

    item *it = NULL;

    int x;

    int csize = p->size - sizeof(item_chunk);

    unsigned int chunks_req = size / csize;

    if (size % csize != )

        chunks_req++;

    while (p->sl_curr < chunks_req) {

        if (do_slabs_newslab(id) == )

            break;

    }

    if (p->sl_curr >= chunks_req) {

        item_chunk *chunk = NULL;

        /* Configure the head item in the chain. */

        it = (item *)p->slots;

        p->slots = it->next;

        if (it->next) it->next->prev = ;

        /* Squirrel away the "top chunk" into h_next for now */

        it->h_next = (item *)p->slots;

        assert(it->h_next != );

        chunk = (item_chunk *) it->h_next;

        /* roll down the chunks, marking them as such. */

        for (x = ; x < chunks_req-; x++) {

            chunk->it_flags &= ~ITEM_SLABBED;

            chunk->it_flags |= ITEM_CHUNK;

            /* Chunks always have a direct reference to the head item */

            chunk->head = it;

            chunk->size = p->size - sizeof(item_chunk);

            chunk->used = ;

            chunk = chunk->next;

        }

        /* The final "next" is now the top of the slab freelist */

        p->slots = chunk;

        if (chunk && chunk->prev) {

            /* Disconnect the final chunk from the chain */

            chunk->prev->next = ;

            chunk->prev = ;

        }

        it->it_flags &= ~ITEM_SLABBED;

        it->it_flags |= ITEM_CHUNKED;

        it->refcount = ;

        p->sl_curr -= chunks_req;

        ret = (void *)it;

    } else {

        ret = NULL;

    }

    return ret;

}

/*@null@*/

static void *do_slabs_alloc(const size_t size, unsigned int id, uint64_t *total_bytes,

        unsigned int flags) {

    slabclass_t *p;

    void *ret = NULL;

    item *it = NULL;

    if (id < POWER_SMALLEST || id > power_largest) {

        MEMCACHED_SLABS_ALLOCATE_FAILED(size, );

        return NULL;

    }

    p = &slabclass[id];

    assert(p->sl_curr ==  || ((item *)p->slots)->slabs_clsid == );

    if (total_bytes != NULL) {

        *total_bytes = p->requested;

    }

    if (size <= p->size) {

        /* fail unless we have space at the end of a recently allocated page,

           we have something on our freelist, or we could allocate a new page */

        if (p->sl_curr ==  && flags != SLABS_ALLOC_NO_NEWPAGE) {

            do_slabs_newslab(id);

        }

        if (p->sl_curr != ) {

            /* return off our freelist */

            it = (item *)p->slots;

            p->slots = it->next;

            if (it->next) it->next->prev = ;

            /* Kill flag and initialize refcount here for lock safety in slab

             * mover's freeness detection. */

            it->it_flags &= ~ITEM_SLABBED;

            it->refcount = ;

            p->sl_curr--;

            ret = (void *)it;

        } else {

            ret = NULL;

        }

    } else {

        /* Dealing with a chunked item. */

        ret = do_slabs_alloc_chunked(size, p, id);

    }

    if (ret) {

        p->requested += size;

        MEMCACHED_SLABS_ALLOCATE(size, id, p->size, ret);

    } else {

        MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);

    }

    return ret;

}

// commit: haizhu.shao 2016-12-03 21:40

// TODO: alloc_chunk和free_chunk第一个chunk的分配不一样啊

// alloc_chunk时候并没有ITEM_data

static void do_slabs_free_chunked(item *it, const size_t size, unsigned int id,

                                  slabclass_t *p) {

    item_chunk *chunk = (item_chunk *) ITEM_data(it);

    size_t realsize = size;

    while (chunk) {

        realsize += sizeof(item_chunk);

        chunk = chunk->next;

    }

    chunk = (item_chunk *) ITEM_data(it);

    unsigned int chunks_found = ;

    it->it_flags = ITEM_SLABBED;

    it->slabs_clsid = ;

    it->prev = ;

    it->next = (item *) chunk->next;

    assert(it->next);

    /* top chunk should already point back to head */

    assert(it->next && (void*)it->next->prev == (void*)chunk);

    chunk = chunk->next;

    chunk->prev = (item_chunk *)it;

    while (chunk) {

        assert(chunk->it_flags == ITEM_CHUNK);

        chunk->it_flags = ITEM_SLABBED;

        chunk->slabs_clsid = ;

        chunks_found++;

        if (chunk->next) {

            chunk = chunk->next;

        } else {

            break;

        }

    }

    /* must have had nothing hanging off of the final chunk */

    assert(chunk && chunk->next == );

    /* Tail chunk, link the freelist here. */

    chunk->next = p->slots;

    if (chunk->next) chunk->next->prev = chunk;

    p->slots = it;

    p->sl_curr += chunks_found;

    p->requested -= size;

    return;

}

static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {

    slabclass_t *p;

    item *it;

    assert(id >= POWER_SMALLEST && id <= power_largest);

    if (id < POWER_SMALLEST || id > power_largest)

        return;

    MEMCACHED_SLABS_FREE(size, id, ptr);

    p = &slabclass[id];

    it = (item *)ptr;

    if ((it->it_flags & ITEM_CHUNKED) == ) {

        it->it_flags = ITEM_SLABBED;

        it->slabs_clsid = ;

        it->prev = ;

        it->next = p->slots;

        if (it->next) it->next->prev = it;

        p->slots = it;

        p->sl_curr++;

        p->requested -= size;

    } else {

        do_slabs_free_chunked(it, size, id, p);

    }

    return;

}

static int nz_strcmp(int nzlength, const char *nz, const char *z) {

    int zlength=strlen(z);

    return (zlength == nzlength) && (strncmp(nz, z, zlength) == ) ?  : -;

}

bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) {

    bool ret = true;

    if (add_stats != NULL) {

        if (!stat_type) {

            /* prepare general statistics for the engine */

            STATS_LOCK();

            APPEND_STAT("bytes", "%llu", (unsigned long long)stats_state.curr_bytes);

            APPEND_STAT("curr_items", "%llu", (unsigned long long)stats_state.curr_items);

            APPEND_STAT("total_items", "%llu", (unsigned long long)stats.total_items);

            STATS_UNLOCK();

            if (settings.slab_automove > ) {

                pthread_mutex_lock(&slabs_lock);

                APPEND_STAT("slab_global_page_pool", "%u", slabclass[SLAB_GLOBAL_PAGE_POOL].slabs);

                pthread_mutex_unlock(&slabs_lock);

            }

            item_stats_totals(add_stats, c);

        } else if (nz_strcmp(nkey, stat_type, "items") == ) {

            item_stats(add_stats, c);

        } else if (nz_strcmp(nkey, stat_type, "slabs") == ) {

            slabs_stats(add_stats, c);

        } else if (nz_strcmp(nkey, stat_type, "sizes") == ) {

            item_stats_sizes(add_stats, c);

        } else if (nz_strcmp(nkey, stat_type, "sizes_enable") == ) {

            item_stats_sizes_enable(add_stats, c);

        } else if (nz_strcmp(nkey, stat_type, "sizes_disable") == ) {

            item_stats_sizes_disable(add_stats, c);

        } else {

            ret = false;

        }

    } else {

        ret = false;

    }

    return ret;

}

/*@null@*/

static void do_slabs_stats(ADD_STAT add_stats, void *c) {

    int i, total;

    /* Get the per-thread stats which contain some interesting aggregates */

    struct thread_stats thread_stats;

    threadlocal_stats_aggregate(&thread_stats);

    total = ;

    for(i = POWER_SMALLEST; i <= power_largest; i++) {

        slabclass_t *p = &slabclass[i];

        if (p->slabs != ) {

            uint32_t perslab, slabs;

            slabs = p->slabs;

            perslab = p->perslab;

            char key_str[STAT_KEY_LEN];

            char val_str[STAT_VAL_LEN];

            int klen = , vlen = ;

            APPEND_NUM_STAT(i, "chunk_size", "%u", p->size);

            APPEND_NUM_STAT(i, "chunks_per_page", "%u", perslab);

            APPEND_NUM_STAT(i, "total_pages", "%u", slabs);

            APPEND_NUM_STAT(i, "total_chunks", "%u", slabs * perslab);

            APPEND_NUM_STAT(i, "used_chunks", "%u",

                            slabs*perslab - p->sl_curr);

            APPEND_NUM_STAT(i, "free_chunks", "%u", p->sl_curr);

            /* Stat is dead, but displaying zero instead of removing it. */

            APPEND_NUM_STAT(i, "free_chunks_end", "%u", );

            APPEND_NUM_STAT(i, "mem_requested", "%llu",

                            (unsigned long long)p->requested);

            APPEND_NUM_STAT(i, "get_hits", "%llu",

                    (unsigned long long)thread_stats.slab_stats[i].get_hits);

            APPEND_NUM_STAT(i, "cmd_set", "%llu",

                    (unsigned long long)thread_stats.slab_stats[i].set_cmds);

            APPEND_NUM_STAT(i, "delete_hits", "%llu",

                    (unsigned long long)thread_stats.slab_stats[i].delete_hits);

            APPEND_NUM_STAT(i, "incr_hits", "%llu",

                    (unsigned long long)thread_stats.slab_stats[i].incr_hits);

            APPEND_NUM_STAT(i, "decr_hits", "%llu",

                    (unsigned long long)thread_stats.slab_stats[i].decr_hits);

            APPEND_NUM_STAT(i, "cas_hits", "%llu",

                    (unsigned long long)thread_stats.slab_stats[i].cas_hits);

            APPEND_NUM_STAT(i, "cas_badval", "%llu",

                    (unsigned long long)thread_stats.slab_stats[i].cas_badval);

            APPEND_NUM_STAT(i, "touch_hits", "%llu",

                    (unsigned long long)thread_stats.slab_stats[i].touch_hits);

            total++;

        }

    }

    /* add overall slab stats and append terminator */

    APPEND_STAT("active_slabs", "%d", total);

    APPEND_STAT("total_malloced", "%llu", (unsigned long long)mem_malloced);

    add_stats(NULL, , NULL, , c);

}

static void *memory_allocate(size_t size) {

    void *ret;

    if (mem_base == NULL) {

        /* We are not using a preallocated large memory chunk */

        ret = malloc(size);

    } else {

        ret = mem_current;

        if (size > mem_avail) {

            return NULL;

        }

        /* mem_current pointer _must_ be aligned!!! */

        if (size % CHUNK_ALIGN_BYTES) {

            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);

        }

        mem_current = ((char*)mem_current) + size;

        if (size < mem_avail) {

            mem_avail -= size;

        } else {

            mem_avail = ;

        }

    }

    mem_malloced += size;

    return ret;

}

/* Must only be used if all pages are item_size_max */

static void memory_release() {

    void *p = NULL;

    // commit: haizhu.shao 2016-12-03 21:45

    // TODO: why?

    if (mem_base != NULL)

        return;

    // commit: haizhu.shao 2016-12-03 21:45

    // TODO: why? slab_reassign是啥东西

    if (!settings.slab_reassign)

        return;

    while (mem_malloced > mem_limit &&

            (p = get_page_from_global_pool()) != NULL) {

        free(p);

        mem_malloced -= settings.item_size_max;

    }

}

void *slabs_alloc(size_t size, unsigned int id, uint64_t *total_bytes,

        unsigned int flags) {

    void *ret;

    pthread_mutex_lock(&slabs_lock);

    ret = do_slabs_alloc(size, id, total_bytes, flags);

    pthread_mutex_unlock(&slabs_lock);

    return ret;

}

void slabs_free(void *ptr, size_t size, unsigned int id) {

    pthread_mutex_lock(&slabs_lock);

    do_slabs_free(ptr, size, id);

    pthread_mutex_unlock(&slabs_lock);

}

void slabs_stats(ADD_STAT add_stats, void *c) {

    pthread_mutex_lock(&slabs_lock);

    do_slabs_stats(add_stats, c);

    pthread_mutex_unlock(&slabs_lock);

}

static bool do_slabs_adjust_mem_limit(size_t new_mem_limit) {

    /* Cannot adjust memory limit at runtime if prealloc'ed */

    if (mem_base != NULL)

        return false;

    settings.maxbytes = new_mem_limit;

    mem_limit = new_mem_limit;

    mem_limit_reached = false; /* Will reset on next alloc */

    memory_release(); /* free what might already be in the global pool */

    return true;

}

bool slabs_adjust_mem_limit(size_t new_mem_limit) {

    bool ret;

    pthread_mutex_lock(&slabs_lock);

    ret = do_slabs_adjust_mem_limit(new_mem_limit);

    pthread_mutex_unlock(&slabs_lock);

    return ret;

}

// commit: haizhu.shao 2016-12-03 21:49

// TODO: 不懂这个函数的目的

void slabs_adjust_mem_requested(unsigned int id, size_t old, size_t ntotal)

{

    pthread_mutex_lock(&slabs_lock);

    slabclass_t *p;

    if (id < POWER_SMALLEST || id > power_largest) {

        fprintf(stderr, "Internal error! Invalid slab class\n");

        abort();

    }

    p = &slabclass[id];

    p->requested = p->requested - old + ntotal;

    pthread_mutex_unlock(&slabs_lock);

}

unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag,

        uint64_t *total_bytes, unsigned int *chunks_perslab) {

    unsigned int ret;

    slabclass_t *p;

    pthread_mutex_lock(&slabs_lock);

    p = &slabclass[id];

    ret = p->sl_curr;

    if (mem_flag != NULL)

        *mem_flag = mem_limit_reached;

    if (total_bytes != NULL)

        *total_bytes = p->requested;

    if (chunks_perslab != NULL)

        *chunks_perslab = p->perslab;

    pthread_mutex_unlock(&slabs_lock);

    return ret;

}

static pthread_cond_t slab_rebalance_cond = PTHREAD_COND_INITIALIZER;

static volatile int do_run_slab_thread = ;

static volatile int do_run_slab_rebalance_thread = ;

#define DEFAULT_SLAB_BULK_CHECK 1

int slab_bulk_check = DEFAULT_SLAB_BULK_CHECK;

static int slab_rebalance_start(void) {

    slabclass_t *s_cls;

    int no_go = ;

    pthread_mutex_lock(&slabs_lock);

    if (slab_rebal.s_clsid < POWER_SMALLEST ||

        slab_rebal.s_clsid > power_largest  ||

        slab_rebal.d_clsid < SLAB_GLOBAL_PAGE_POOL ||

        slab_rebal.d_clsid > power_largest  ||

        slab_rebal.s_clsid == slab_rebal.d_clsid)

        no_go = -;

    s_cls = &slabclass[slab_rebal.s_clsid];

    if (!grow_slab_list(slab_rebal.d_clsid)) {

        no_go = -;

    }

    if (s_cls->slabs < )

        no_go = -;

    if (no_go != ) {

        pthread_mutex_unlock(&slabs_lock);

        return no_go; /* Should use a wrapper function... */

    }

    /* Always kill the first available slab page as it is most likely to

     * contain the oldest items

     */

    slab_rebal.slab_start = s_cls->slab_list[];

    slab_rebal.slab_end   = (char *)slab_rebal.slab_start +

        (s_cls->size * s_cls->perslab);

    slab_rebal.slab_pos   = slab_rebal.slab_start;

    slab_rebal.done       = ;

    /* Also tells do_item_get to search for items in this slab */

    slab_rebalance_signal = ;

    if (settings.verbose > ) {

        fprintf(stderr, "Started a slab rebalance\n");

    }

    pthread_mutex_unlock(&slabs_lock);

    STATS_LOCK();

    stats_state.slab_reassign_running = true;

    STATS_UNLOCK();

    return ;

}

/* CALLED WITH slabs_lock HELD */

static void *slab_rebalance_alloc(const size_t size, unsigned int id) {

    slabclass_t *s_cls;

    s_cls = &slabclass[slab_rebal.s_clsid];

    int x;

    item *new_it = NULL;

    for (x = ; x < s_cls->perslab; x++) {

        new_it = do_slabs_alloc(size, id, NULL, SLABS_ALLOC_NO_NEWPAGE);

        /* check that memory isn't within the range to clear */

        if (new_it == NULL) {

            break;

        }

        if ((void *)new_it >= slab_rebal.slab_start

            && (void *)new_it < slab_rebal.slab_end) {

            /* Pulled something we intend to free. Mark it as freed since

             * we've already done the work of unlinking it from the freelist.

             */

            s_cls->requested -= size;

            new_it->refcount = ;

            new_it->it_flags = ITEM_SLABBED|ITEM_FETCHED;

#ifdef DEBUG_SLAB_MOVER

            memcpy(ITEM_key(new_it), "deadbeef", );

#endif

            new_it = NULL;

            slab_rebal.inline_reclaim++;

        } else {

            break;

        }

    }

    return new_it;

}

/* CALLED WITH slabs_lock HELD */

/* detatches item/chunk from freelist. */

static void slab_rebalance_cut_free(slabclass_t *s_cls, item *it) {

    /* Ensure this was on the freelist and nothing else. */

    assert(it->it_flags == ITEM_SLABBED);

    if (s_cls->slots == it) {

        s_cls->slots = it->next;

    }

    if (it->next) it->next->prev = it->prev;

    if (it->prev) it->prev->next = it->next;

    s_cls->sl_curr--;

}

enum move_status {

    MOVE_PASS=, MOVE_FROM_SLAB, MOVE_FROM_LRU, MOVE_BUSY, MOVE_LOCKED

};

/* refcount == 0 is safe since nobody can incr while item_lock is held.

 * refcount != 0 is impossible since flags/etc can be modified in other

 * threads. instead, note we found a busy one and bail. logic in do_item_get

 * will prevent busy items from continuing to be busy

 * NOTE: This is checking it_flags outside of an item lock. I believe this

 * works since it_flags is 8 bits, and we're only ever comparing a single bit

 * regardless. ITEM_SLABBED bit will always be correct since we're holding the

 * lock which modifies that bit. ITEM_LINKED won't exist if we're between an

 * item having ITEM_SLABBED removed, and the key hasn't been added to the item

 * yet. The memory barrier from the slabs lock should order the key write and the

 * flags to the item?

 * If ITEM_LINKED did exist and was just removed, but we still see it, that's

 * still safe since it will have a valid key, which we then lock, and then

 * recheck everything.

 * This may not be safe on all platforms; If not, slabs_alloc() will need to

 * seed the item key while holding slabs_lock.

 */

static int slab_rebalance_move(void) {

    slabclass_t *s_cls;

    int x;

    int was_busy = ;

    int refcount = ;

    uint32_t hv;

    void *hold_lock;

    enum move_status status = MOVE_PASS;

    pthread_mutex_lock(&slabs_lock);

    s_cls = &slabclass[slab_rebal.s_clsid];

    for (x = ; x < slab_bulk_check; x++) {

        hv = ;

        hold_lock = NULL;

        item *it = slab_rebal.slab_pos;

        item_chunk *ch = NULL;

        status = MOVE_PASS;

        if (it->it_flags & ITEM_CHUNK) {

            /* This chunk is a chained part of a larger item. */

            ch = (item_chunk *) it;

            /* Instead, we use the head chunk to find the item and effectively

             * lock the entire structure. If a chunk has ITEM_CHUNK flag, its

             * head cannot be slabbed, so the normal routine is safe. */

            it = ch->head;

            assert(it->it_flags & ITEM_CHUNKED);

        }

        /* ITEM_FETCHED when ITEM_SLABBED is overloaded to mean we've cleared

         * the chunk for move. Only these two flags should exist.

         */

        if (it->it_flags != (ITEM_SLABBED|ITEM_FETCHED)) {

            /* ITEM_SLABBED can only be added/removed under the slabs_lock */

            if (it->it_flags & ITEM_SLABBED) {

                assert(ch == NULL);

                slab_rebalance_cut_free(s_cls, it);

                status = MOVE_FROM_SLAB;

            } else if ((it->it_flags & ITEM_LINKED) != ) {

                /* If it doesn't have ITEM_SLABBED, the item could be in any

                 * state on its way to being freed or written to. If no

                 * ITEM_SLABBED, but it's had ITEM_LINKED, it must be active

                 * and have the key written to it already.

                 */

                hv = hash(ITEM_key(it), it->nkey);

                if ((hold_lock = item_trylock(hv)) == NULL) {

                    status = MOVE_LOCKED;

                } else {

                    refcount = refcount_incr(&it->refcount);

                    if (refcount == ) { /* item is linked but not busy */

                        /* Double check ITEM_LINKED flag here, since we're

                         * past a memory barrier from the mutex. */

                        if ((it->it_flags & ITEM_LINKED) != ) {

                            status = MOVE_FROM_LRU;

                        } else {

                            /* refcount == 1 + !ITEM_LINKED means the item is being

                             * uploaded to, or was just unlinked but hasn't been freed

                             * yet. Let it bleed off on its own and try again later */

                            status = MOVE_BUSY;

                        }

                    } else {

                        if (settings.verbose > ) {

                            fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",

                                it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);

                        }

                        status = MOVE_BUSY;

                    }

                    /* Item lock must be held while modifying refcount */

                    if (status == MOVE_BUSY) {

                        refcount_decr(&it->refcount);

                        item_trylock_unlock(hold_lock);

                    }

                }

            } else {

                /* See above comment. No ITEM_SLABBED or ITEM_LINKED. Mark

                 * busy and wait for item to complete its upload. */

                status = MOVE_BUSY;

            }

        }

        int save_item = ;

        item *new_it = NULL;

        size_t ntotal = ;

        switch (status) {

            case MOVE_FROM_LRU:

                /* Lock order is LRU locks -> slabs_lock. unlink uses LRU lock.

                 * We only need to hold the slabs_lock while initially looking

                 * at an item, and at this point we have an exclusive refcount

                 * (2) + the item is locked. Drop slabs lock, drop item to

                 * refcount 1 (just our own, then fall through and wipe it

                 */

                /* Check if expired or flushed */

                ntotal = ITEM_ntotal(it);

                /* REQUIRES slabs_lock: CHECK FOR cls->sl_curr > 0 */

                if (ch == NULL && (it->it_flags & ITEM_CHUNKED)) {

                    /* Chunked should be identical to non-chunked, except we need

                     * to swap out ntotal for the head-chunk-total. */

                    ntotal = s_cls->size;

                }

                if ((it->exptime !=  && it->exptime < current_time)

                    || item_is_flushed(it)) {

                    /* Expired, don't save. */

                    save_item = ;

                } else if (ch == NULL &&

                        (new_it = slab_rebalance_alloc(ntotal, slab_rebal.s_clsid)) == NULL) {

                    /* Not a chunk of an item, and nomem. */

                    save_item = ;

                    slab_rebal.evictions_nomem++;

                } else if (ch != NULL &&

                        (new_it = slab_rebalance_alloc(s_cls->size, slab_rebal.s_clsid)) == NULL) {

                    /* Is a chunk of an item, and nomem. */

                    save_item = ;

                    slab_rebal.evictions_nomem++;

                } else {

                    /* Was whatever it was, and we have memory for it. */

                    save_item = ;

                }

                pthread_mutex_unlock(&slabs_lock);

                unsigned int requested_adjust = ;

                if (save_item) {

                    if (ch == NULL) {

                        assert((new_it->it_flags & ITEM_CHUNKED) == );

                        /* if free memory, memcpy. clear prev/next/h_bucket */

                        memcpy(new_it, it, ntotal);

                        new_it->prev = ;

                        new_it->next = ;

                        new_it->h_next = ;

                        /* These are definitely required. else fails assert */

                        new_it->it_flags &= ~ITEM_LINKED;

                        new_it->refcount = ;

                        do_item_replace(it, new_it, hv);

                        /* Need to walk the chunks and repoint head  */

                        if (new_it->it_flags & ITEM_CHUNKED) {

                            item_chunk *fch = (item_chunk *) ITEM_data(new_it);

                            fch->next->prev = fch;

                            while (fch) {

                                fch->head = new_it;

                                fch = fch->next;

                            }

                        }

                        it->refcount = ;

                        it->it_flags = ITEM_SLABBED|ITEM_FETCHED;

#ifdef DEBUG_SLAB_MOVER

                        memcpy(ITEM_key(it), "deadbeef", );

#endif

                        slab_rebal.rescues++;

                        requested_adjust = ntotal;

                    } else {

                        item_chunk *nch = (item_chunk *) new_it;

                        /* Chunks always have head chunk (the main it) */

                        ch->prev->next = nch;

                        if (ch->next)

                            ch->next->prev = nch;

                        memcpy(nch, ch, ch->used + sizeof(item_chunk));

                        ch->refcount = ;

                        ch->it_flags = ITEM_SLABBED|ITEM_FETCHED;

                        slab_rebal.chunk_rescues++;

#ifdef DEBUG_SLAB_MOVER

                        memcpy(ITEM_key((item *)ch), "deadbeef", );

#endif

                        refcount_decr(&it->refcount);

                        requested_adjust = s_cls->size;

                    }

                } else {

                    /* restore ntotal in case we tried saving a head chunk. */

                    ntotal = ITEM_ntotal(it);

                    do_item_unlink(it, hv);

                    slabs_free(it, ntotal, slab_rebal.s_clsid);

                    /* Swing around again later to remove it from the freelist. */

                    slab_rebal.busy_items++;

                    was_busy++;

                }

                item_trylock_unlock(hold_lock);

                pthread_mutex_lock(&slabs_lock);

                /* Always remove the ntotal, as we added it in during

                 * do_slabs_alloc() when copying the item.

                 */

                s_cls->requested -= requested_adjust;

                break;

            case MOVE_FROM_SLAB:

                it->refcount = ;

                it->it_flags = ITEM_SLABBED|ITEM_FETCHED;

#ifdef DEBUG_SLAB_MOVER

                memcpy(ITEM_key(it), "deadbeef", );

#endif

                break;

            case MOVE_BUSY:

            case MOVE_LOCKED:

                slab_rebal.busy_items++;

                was_busy++;

                break;

            case MOVE_PASS:

                break;

        }

        slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;

        if (slab_rebal.slab_pos >= slab_rebal.slab_end)

            break;

    }

    if (slab_rebal.slab_pos >= slab_rebal.slab_end) {

        /* Some items were busy, start again from the top */

        if (slab_rebal.busy_items) {

            slab_rebal.slab_pos = slab_rebal.slab_start;

            STATS_LOCK();

            stats.slab_reassign_busy_items += slab_rebal.busy_items;

            STATS_UNLOCK();

            slab_rebal.busy_items = ;

        } else {

            slab_rebal.done++;

        }

    }

    pthread_mutex_unlock(&slabs_lock);

    return was_busy;

}

static void slab_rebalance_finish(void) {

    slabclass_t *s_cls;

    slabclass_t *d_cls;

    int x;

    uint32_t rescues;

    uint32_t evictions_nomem;

    uint32_t inline_reclaim;

    uint32_t chunk_rescues;

    pthread_mutex_lock(&slabs_lock);

    s_cls = &slabclass[slab_rebal.s_clsid];

    d_cls = &slabclass[slab_rebal.d_clsid];

#ifdef DEBUG_SLAB_MOVER

    /* If the algorithm is broken, live items can sneak in. */

    slab_rebal.slab_pos = slab_rebal.slab_start;

    while () {

        item *it = slab_rebal.slab_pos;

        assert(it->it_flags == (ITEM_SLABBED|ITEM_FETCHED));

        assert(memcmp(ITEM_key(it), "deadbeef", ) == );

        it->it_flags = ITEM_SLABBED|ITEM_FETCHED;

        slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;

        if (slab_rebal.slab_pos >= slab_rebal.slab_end)

            break;

    }

#endif

    /* At this point the stolen slab is completely clear.

     * We always kill the "first"/"oldest" slab page in the slab_list, so

     * shuffle the page list backwards and decrement.

     */

    s_cls->slabs--;

    for (x = ; x < s_cls->slabs; x++) {

        s_cls->slab_list[x] = s_cls->slab_list[x+];

    }

    d_cls->slab_list[d_cls->slabs++] = slab_rebal.slab_start;

    /* Don't need to split the page into chunks if we're just storing it */

    if (slab_rebal.d_clsid > SLAB_GLOBAL_PAGE_POOL) {

        memset(slab_rebal.slab_start, , (size_t)settings.item_size_max);

        split_slab_page_into_freelist(slab_rebal.slab_start,

            slab_rebal.d_clsid);

    } else if (slab_rebal.d_clsid == SLAB_GLOBAL_PAGE_POOL) {

        /* mem_malloc'ed might be higher than mem_limit. */

        memory_release();

    }

    slab_rebal.done       = ;

    slab_rebal.s_clsid    = ;

    slab_rebal.d_clsid    = ;

    slab_rebal.slab_start = NULL;

    slab_rebal.slab_end   = NULL;

    slab_rebal.slab_pos   = NULL;

    evictions_nomem    = slab_rebal.evictions_nomem;

    inline_reclaim = slab_rebal.inline_reclaim;

    rescues   = slab_rebal.rescues;

    chunk_rescues = slab_rebal.chunk_rescues;

    slab_rebal.evictions_nomem    = ;

    slab_rebal.inline_reclaim = ;

    slab_rebal.rescues  = ;

    slab_rebalance_signal = ;

    pthread_mutex_unlock(&slabs_lock);

    STATS_LOCK();

    stats.slabs_moved++;

    stats.slab_reassign_rescues += rescues;

    stats.slab_reassign_evictions_nomem += evictions_nomem;

    stats.slab_reassign_inline_reclaim += inline_reclaim;

    stats.slab_reassign_chunk_rescues += chunk_rescues;

    stats_state.slab_reassign_running = false;

    STATS_UNLOCK();

    if (settings.verbose > ) {

        fprintf(stderr, "finished a slab move\n");

    }

}

/* Slab mover thread.

 * Sits waiting for a condition to jump off and shovel some memory about

 */

static void *slab_rebalance_thread(void *arg) {

    int was_busy = ;

    /* So we first pass into cond_wait with the mutex held */

    mutex_lock(&slabs_rebalance_lock);

    while (do_run_slab_rebalance_thread) {

        if (slab_rebalance_signal == ) {

            if (slab_rebalance_start() < ) {

                /* Handle errors with more specifity as required. */

                slab_rebalance_signal = ;

            }

            was_busy = ;

        } else if (slab_rebalance_signal && slab_rebal.slab_start != NULL) {

            was_busy = slab_rebalance_move();

        }

        if (slab_rebal.done) {

            slab_rebalance_finish();

        } else if (was_busy) {

            /* Stuck waiting for some items to unlock, so slow down a bit

             * to give them a chance to free up */

            usleep();

        }

        if (slab_rebalance_signal == ) {

            /* always hold this lock while we're running */

            pthread_cond_wait(&slab_rebalance_cond, &slabs_rebalance_lock);

        }

    }

    return NULL;

}

/* Iterate at most once through the slab classes and pick a "random" source.

 * I like this better than calling rand() since rand() is slow enough that we

 * can just check all of the classes once instead.

 */

static int slabs_reassign_pick_any(int dst) {

    static int cur = POWER_SMALLEST - ;

    int tries = power_largest - POWER_SMALLEST + ;

    for (; tries > ; tries--) {

        cur++;

        if (cur > power_largest)

            cur = POWER_SMALLEST;

        if (cur == dst)

            continue;

        if (slabclass[cur].slabs > ) {

            return cur;

        }

    }

    return -;

}

static enum reassign_result_type do_slabs_reassign(int src, int dst) {

    if (slab_rebalance_signal != )

        return REASSIGN_RUNNING;

    if (src == dst)

        return REASSIGN_SRC_DST_SAME;

    /* Special indicator to choose ourselves. */

    if (src == -) {

        src = slabs_reassign_pick_any(dst);

        /* TODO: If we end up back at -1, return a new error type */

    }

    if (src < POWER_SMALLEST        || src > power_largest ||

        dst < SLAB_GLOBAL_PAGE_POOL || dst > power_largest)

        return REASSIGN_BADCLASS;

    if (slabclass[src].slabs < )

        return REASSIGN_NOSPARE;

    slab_rebal.s_clsid = src;

    slab_rebal.d_clsid = dst;

    slab_rebalance_signal = ;

    pthread_cond_signal(&slab_rebalance_cond);

    return REASSIGN_OK;

}

enum reassign_result_type slabs_reassign(int src, int dst) {

    enum reassign_result_type ret;

    if (pthread_mutex_trylock(&slabs_rebalance_lock) != ) {

        return REASSIGN_RUNNING;

    }

    ret = do_slabs_reassign(src, dst);

    pthread_mutex_unlock(&slabs_rebalance_lock);

    return ret;

}

/* If we hold this lock, rebalancer can't wake up or move */

void slabs_rebalancer_pause(void) {

    pthread_mutex_lock(&slabs_rebalance_lock);

}

void slabs_rebalancer_resume(void) {

    pthread_mutex_unlock(&slabs_rebalance_lock);

}

static pthread_t rebalance_tid;

int start_slab_maintenance_thread(void) {

    int ret;

    slab_rebalance_signal = ;

    slab_rebal.slab_start = NULL;

    char *env = getenv("MEMCACHED_SLAB_BULK_CHECK");

    if (env != NULL) {

        slab_bulk_check = atoi(env);

        if (slab_bulk_check == ) {

            slab_bulk_check = DEFAULT_SLAB_BULK_CHECK;

        }

    }

    if (pthread_cond_init(&slab_rebalance_cond, NULL) != ) {

        fprintf(stderr, "Can't intiialize rebalance condition\n");

        return -;

    }

    pthread_mutex_init(&slabs_rebalance_lock, NULL);

    if ((ret = pthread_create(&rebalance_tid, NULL,

                              slab_rebalance_thread, NULL)) != ) {

        fprintf(stderr, "Can't create rebal thread: %s\n", strerror(ret));

        return -;

    }

    return ;

}

/* The maintenance thread is on a sleep/loop cycle, so it should join after a

 * short wait */

void stop_slab_maintenance_thread(void) {

    mutex_lock(&slabs_rebalance_lock);

    do_run_slab_thread = ;

    do_run_slab_rebalance_thread = ;

    pthread_cond_signal(&slab_rebalance_cond);

    pthread_mutex_unlock(&slabs_rebalance_lock);

    /* Wait for the maintenance thread to stop */

    pthread_join(rebalance_tid, NULL);

}
slabs.c的更多相关文章

Memcached源码分析之slabs.c
#include "memcached.h" #include <sys/stat.h> #include <sys/socket.h> #include ...
Memcached stats slabs 命令
Memcached stats slabs 命令用于显示各个slab的信息,包括chunk的大小.数目.使用情况等. 语法: stats slabs 命令的基本语法格式如下: stats slabs ...
memcached 的内存分配器是如何工作的？为什么不适用 malloc/free！？为何要使用 slabs？
实际上,这是一个编译时选项.默认会使用内部的 slab 分配器.您确实确实应该使用内建的 slab 分配器.最早的时候,memcached 只使用 malloc/free 来管理内存.然而,这种方 ...
缓存、队列（Memcached、redis、RabbitMQ）
本章内容: Memcached 简介.安装.使用 Python 操作 Memcached 天生支持集群 redis 简介.安装.使用.实例 Python 操作 Redis String.Hash.Li ...
MemCache超详细解读
MemCache是什么 MemCache是一个自由.源码开放.高性能.分布式的分布式内存对象缓存系统,用于动态Web应用以减轻数据库的负载.它通过在内存中缓存数据和对象来减少读取数据库的次数,从而提高 ...
前端学PHP之PHP操作memcache
× 目录 [1]安装 [2]连接 [3]增删改查[4]分布式[5]状态[6]安全[7]应用前面的话和访问mysql服务器类似,PHP也是作为客户端API访问memcached服务器的,所以同样需要 ...
Erlang C1500K长连接推送服务-内存
上篇 Erlang C1500K长连接推送服务-性能提到:150w连接,使用了23GB内存,每个连接占用15KB,约一半是内核使用. 大概分析一下: 1. Erlang 节点 12GB,内部因为有内 ...
memcache的内存管理探微
slab分配器:http://blog.csdn.net/luotuo44/article/details/42737181 hash操作 :http://blog.csdn.net/luotuo4 ...
Coping with the TCP TIME-WAIT state on busy Linux servers
Coping with the TCP TIME-WAIT state on busy Linux servers 文章源自于:https://vincent.bernat.im/en/blog/20 ...
随机推荐

什么是IDOC，以及IDOC的步骤
创建IDOC: 第一步:WE31 创建IDOC所包含的字段. 第二步:WE30 创建IDOC 把Segment分配给IDOC 第三步:WE81 创建信息类型第四步:WE82 把 ...
[Node.js] Creating Demo APIs with json-server
json-server makes it extremely easy to setup robust JSON apis to use for demos and proof of concepts ...
Android ListView快速定位（一）
方法一: SectionIndexer接口 + 索引列表参考:http://www.apkbus.com/android-69999-1-1.html 所谓section 就是一组有共性的item, ...
Clover
为您的 Windows Explorer 插上翅膀! Clover 是 Windows Explorer 资源管理器的一个扩展,为其增加类似谷歌 Chrome 浏览器的多标签页功能. 方便的 Tab ...
HttpWebRequest 模拟登录响应点击事件（分享自己用的HttpHelper类）
平时也经常采集网站数据,也做模拟登录,但一般都是html控件POST到页面登录:还没有遇到用户服务器控件button按钮点击事件登录的,今天像往常一样POST传递参数,但怎么都能登录不了:最后发现还有 ...
EasyMock 使用方法与原理剖析--转载
原文地址:http://www.ibm.com/developerworks/cn/opensource/os-cn-easymock/ Mock 方法是单元测试中常见的一种技术,它的主要作用是模拟一 ...
显示/去掉CONSOLE窗口
众所周知,控制台应用程序一般都会显示一个控制台窗口(虚拟DOS窗口),但很多时候控制台程序的执行逻辑根本不需要与用户进行交互,所以显示这个难看的窗口纯属多余,那么如何将它屏蔽掉呢?下面我向大家介绍一种 ...
C# 重新改变数组的长度
重新改变数组的长度 System.Array.Resize(ref FieldName, j);
Java设计模式之策略模式（一）
今年寒假没有回家,打算利用这个假期的时间进行学习设计模式,这一个人感觉比较牛的知识,前一段时间一直在忙着搞自己的专业课,还有就是捣鼓了一下Linux系统,没有好好的学习关于Java还有Android方 ...
修改整个app的字体
在项目开发中有时候为了一些好的UI效果可能需要自定义字体 app导入字体库的教程网上有很多导进去修改plist文件然后如何将整个app的字体都换成自定义的字体呢一个个去写太麻烦了 ...
slabs.c

slabs.c的更多相关文章

随机推荐

热门专题