【JAVA】HashMap源码阅读

1、关键的几个static参数
2、内部类定义Node节点
3、成员变量
4、静态方法
5、HashMap的四个构造方法
6、put方法
7、扩容resize方法
8、get方法
9、remove操作
10、参考链接

HashMap在JDK 1.7的时候，底层的实现机制是数组+链表，利用链表来解决哈希冲突。链表的查找复杂度是O(n)，如果链表很长的话，查找的时间是比较大的，所以在JDK 1.8对HashMap做了优化，其底层的实现机制变成了数组+链表+红黑树。当链表的长度超过某个阈值，就会把链表变形成红黑树，红黑树的查找复杂度是O(log n)，这样在元素很多的情况下可以保证查找性能。

另外，HashMap是线程不安全的，主要体现在（1）JDK 1.7的实现版本，多个线程同时触发扩容机制的时候，可能会导致出现链表节点循环引用的现象，从而导致查找的时候出现死循环。（2）JDK 1.8中put放置元素的时候可能会出现数据丢失的情况。想要解决HashMap线程不安全，可以使用JUC包下的concurrentHashMap，或者使用Collections.synchronizedMap()返回一个线程安全的map。

1、关键的几个static参数

    //初始容量：2的4次方

    static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16

    //最大容量：2的30次方

    static final int MAXIMUM_CAPACITY = 1 << 30;

    //装载因子0.75，设置为0.75是在查找时间和空间利用上做的平衡

    static final float DEFAULT_LOAD_FACTOR = 0.75f;

    //树化阈值

    static final int TREEIFY_THRESHOLD = 8;

    //非树化阈值

    static final int UNTREEIFY_THRESHOLD = 6;

    //最小的树化容量

    static final int MIN_TREEIFY_CAPACITY = 64;

2、内部类定义Node节点

    //内部类，定义Node节点,key-value

    static class Node<K,V> implements Map.Entry<K,V> {

        final int hash;//final

        final K key;   //final

        V value;

        Node<K,V> next; //指向下一个节点

        Node(int hash, K key, V value, Node<K,V> next) {

            this.hash = hash;

            this.key = key;

            this.value = value;

            this.next = next;

        }

        public final K getKey()        { return key; }

        public final V getValue()      { return value; }

        public final String toString() { return key + "=" + value; }

        //计算当前节点的hashcode，key的hashcode和value的hashcode做异或操作

        public final int hashCode() {

            return Objects.hashCode(key) ^ Objects.hashCode(value);

        }

        public final V setValue(V newValue) {

            V oldValue = value;

            value = newValue;

            return oldValue;

        }

        //equals方法,指针相等 || key和value都相等

        public final boolean equals(Object o) {

            if (o == this)

                return true;

            if (o instanceof Map.Entry) {

                Map.Entry<?,?> e = (Map.Entry<?,?>)o;

                if (Objects.equals(key, e.getKey()) &&

                    Objects.equals(value, e.getValue()))

                    return true;

            }

            return false;

        }

    }

3、成员变量

    //transient 不能序列化 ，数组存放Node节点，这个就是hashmap底层的数组

    transient Node<K,V>[] table;

    //entrySet集合

    transient Set<Map.Entry<K,V>> entrySet;

    //hashmap中元素的个数

    transient int size;

    //修改次数

    transient int modCount;

    int threshold; //扩容阈值

    //装载因子

    final float loadFactor;

4、静态方法

    //计算hash值，>>>无符号右移运算，用key的hashcode值和右移十六位的hashcode值做运算

    static final int hash(Object key) {

        int h;

        return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);

    }

    //对于给定的目标容量，返回两倍大小的幂。

    static final int tableSizeFor(int cap) {

        int n = cap - 1;

        n |= n >>> 1;

        n |= n >>> 2;

        n |= n >>> 4;

        n |= n >>> 8;

        n |= n >>> 16;

        return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;

    }

5、HashMap的四个构造方法

    //指定初始容量和装填因子

    public HashMap(int initialCapacity, float loadFactor) {

        if (initialCapacity < 0)

            throw new IllegalArgumentException("Illegal initial capacity: " +

                                               initialCapacity);

        if (initialCapacity > MAXIMUM_CAPACITY)

            initialCapacity = MAXIMUM_CAPACITY;

        if (loadFactor <= 0 || Float.isNaN(loadFactor))

            throw new IllegalArgumentException("Illegal load factor: " +

                                               loadFactor);

        this.loadFactor = loadFactor;

        //扩容阈值用静态方法tableSizeFor()计算得到

        this.threshold = tableSizeFor(initialCapacity);

    }

    //指定初始容量，装填因子采用默认值0.75

    public HashMap(int initialCapacity) {

        this(initialCapacity, DEFAULT_LOAD_FACTOR);

    }

    //无参构造方法，初始容量和装填因子均采用默认值，16和0.75

    public HashMap() {

        this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted

    }

    //利用一个Map构造HashMap

    public HashMap(Map<? extends K, ? extends V> m) {

        this.loadFactor = DEFAULT_LOAD_FACTOR;

        putMapEntries(m, false);

    }

注意到HashMap的最后一个构造方法，利用了一个Map来构造HashMap，其中调用了putMapEntries()方法。注意到putMapEntries()最后调用了putVal()方法，putVal()方法在put操作部分进行介绍。

    //把一个Map中元素批量添加到当前的HashMap中

    final void putMapEntries(Map<? extends K, ? extends V> m, boolean evict) {

        //s是另一个Map大小

        int s = m.size();

        //如果另一个map不为空

        if (s > 0) {

            //如果当前的hashmap为空,就就根据map m的参数计算当前hashmap的扩容阈值

            if (table == null) { // pre-size

                float ft = ((float)s / loadFactor) + 1.0F;

                int t = ((ft < (float)MAXIMUM_CAPACITY) ?

                         (int)ft : MAXIMUM_CAPACITY);

                if (t > threshold)

                    threshold = tableSizeFor(t);

            }

            //map m的大小超过了当前hashmap的扩容阈值，则进行扩容

            else if (s > threshold)

                resize();

            //对map m中的每一对key-value，都调用putVal函数将其放入到当前的hashMap中

            for (Map.Entry<? extends K, ? extends V> e : m.entrySet()) {

                K key = e.getKey();

                V value = e.getValue();

                //放置元素到当前的HashMap中

                putVal(hash(key), key, value, false, evict);

            }

        }

    }

6、put方法

    public V put(K key, V value) {

        return putVal(hash(key), key, value, false, true);

    }

    //第四个参数是，只有当key对应的位置为空的时候，才进行替换，一般设置为false

    //第五个参数如果是false，表示是在第一次放置+初始化数组容量的时候调用。

    final V putVal(int hash, K key, V value, boolean onlyIfAbsent,

                   boolean evict) {

        Node<K,V>[] tab; Node<K,V> p; int n, i;

        //如果table数组为空，则进行第一次resize,扩容到初始容量

        if ((tab = table) == null || (n = tab.length) == 0)

            n = (tab = resize()).length;

        //如果key在数组中映射的位置上的元素为空，没有产生哈希冲突，则直接放置

        if ((p = tab[i = (n - 1) & hash]) == null)

            tab[i] = newNode(hash, key, value, null);

        else {

            Node<K,V> e; K k;

            //如果key值相同，则直接覆盖

            if (p.hash == hash &&

                ((k = p.key) == key || (key != null && key.equals(k))))

                e = p;

            //如果key值不同，则产生了哈希冲突，需要解决冲突

            else if (p instanceof TreeNode)//如果当前是个树节点，则需要往树上放置元素

                e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);

            else {

                //是个链表

                for (int binCount = 0; ; ++binCount) {

                    //如果走到链表的末尾，则直接新建一个节点，插入到链表末尾

                    if ((e = p.next) == null) {

                        p.next = newNode(hash, key, value, null);

                        //判断需不需要进行变形，把链表变成红黑树，提高查找效率

                        if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st

                            treeifyBin(tab, hash);

                        break;

                    }

                    //如果当前的key值和链表上的某个key值相同

                    if (e.hash == hash &&

                        ((k = e.key) == key || (key != null && key.equals(k))))

                        break;

                    //指针移动

                    p = e;

                }

            }

            //如果循环结束后，e不等于null，则e的value值需要被替换成新的value值

            if (e != null) { // existing mapping for key

                V oldValue = e.value;

                if (!onlyIfAbsent || oldValue == null)

                    e.value = value;

                //HashMap的这个函数是空的，LinkedHashMap继承HashMap重写了这个方法，用来实现插入有序，或者LRU访问

                afterNodeAccess(e);

                return oldValue;

            }

        }

        //修改数++

        ++modCount;

        //如果当前数组的容量超过了扩容的阈值，则进行扩容

        if (++size > threshold)

            resize();

        afterNodeInsertion(evict);

        return null;

    }

下面的图总结了put操作的逻辑，一图胜千言（图片来源：美团技术团队）

7、扩容resize方法

    //扩容函数

    final Node<K,V>[] resize() {

        //旧表

        Node<K,V>[] oldTab = table;

        //判断是否是第一次扩容

        int oldCap = (oldTab == null) ? 0 : oldTab.length;

        //旧表的扩容阈值

        int oldThr = threshold;

        //初始化新表的容量和新表的扩容阈值

        int newCap, newThr = 0;

        //如果旧表的容量大于0，则不是第一次扩容，那么新表的容量就设置为旧表的2倍。新表的扩容阈值也是旧表的扩容阈值的2倍。

        if (oldCap > 0) {

            //如果旧表的容量已经大于等于最大容量，则不再进行扩容，直接返回旧表

            //此时依然可以装新的元素，只不过map数组的容量不再发生变化

            if (oldCap >= MAXIMUM_CAPACITY) {

                threshold = Integer.MAX_VALUE;

                return oldTab;

            }

            //新容量扩大为原来的2倍，新的扩容阈值也扩大为旧的扩容阈值的2倍。

            else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&

                     oldCap >= DEFAULT_INITIAL_CAPACITY)

                newThr = oldThr << 1; // double threshold

        }

        //如果旧表的容量等于0，oldThr>0意味着指定了构造因子，并初始了扩容阈值

        //此时将新表容量直接赋值为旧表的扩容阈值

        else if (oldThr > 0) // initial capacity was placed in threshold

            newCap = oldThr;

        //如果旧表容量=0，旧表扩容阈值=0

        else {               // zero initial threshold signifies using defaults

            newCap = DEFAULT_INITIAL_CAPACITY;

            newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);

        }

        //如果新的库容阈值为0，对应的是旧表为0的情况

        if (newThr == 0) {

            //计算新的扩容阈值

            float ft = (float)newCap * loadFactor;

            newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?

                      (int)ft : Integer.MAX_VALUE);

        }

        //更新全局的扩容阈值

        threshold = newThr;

        //创建一个新的数组

        @SuppressWarnings({"rawtypes","unchecked"})

            Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];

        table = newTab;

        //旧表不为空的情况下，需要把旧表中的每个元素都重新插入到新表中

        if (oldTab != null) {

            //遍历旧表数组的每个元素

            for (int j = 0; j < oldCap; ++j) {

                Node<K,V> e;

                //如果数组不为空

                if ((e = oldTab[j]) != null) {

                    //首先释放旧表的空间

                    oldTab[j] = null;

                    //如果是单个元素，则直接放入新表，注意位置的计算是hash和新表长度的&运算

                    if (e.next == null)

                        newTab[e.hash & (newCap - 1)] = e;

                    //如果当前元素是个树节点,，则需要确定树节点在新表中的位置

                    else if (e instanceof TreeNode)

                        ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);

                    //如果当前元素是链表节点，

                    else { // preserve order

                        //因为新表容量是翻倍，则原链表上的节点分成两部分

                        //要么放在原来的下标的位置，要么在新表的下标位置

                        //低位链表（原来的下标）的头结点和尾节点

                        Node<K,V> loHead = null, loTail = null;

                        //高位链表（新计算出来的下标）的头结点和尾节点

                        Node<K,V> hiHead = null, hiTail = null;

                        Node<K,V> next;

                        do {

                            next = e.next;

                            //元素放在低位链表上

                            if ((e.hash & oldCap) == 0) {

                                if (loTail == null)

                                    loHead = e;

                                else

                                    loTail.next = e;

                                loTail = e;

                            }

                            //元素放在高位链表上

                            else {

                                if (hiTail == null)

                                    hiHead = e;

                                else

                                    hiTail.next = e;

                                hiTail = e;

                            }

                        } while ((e = next) != null);

                        //放置旧表

                        if (loTail != null) {

                            loTail.next = null;

                            newTab[j] = loHead;

                        }

                        //放置新表

                        if (hiTail != null) {

                            hiTail.next = null;

                            //注意，高位链表的位置，是j+oldCap，也就是原来的下标+一个旧表大小的偏移量

                            newTab[j + oldCap] = hiHead;

                        }

                    }

                }

            }

        }

        return newTab;

    }

8、get方法

    //根据key值获取

    public V get(Object key) {

        Node<K,V> e;

        return (e = getNode(hash(key), key)) == null ? null : e.value;

    }

    final Node<K,V> getNode(int hash, Object key) {

        Node<K,V>[] tab; Node<K,V> first, e; int n; K k;

        //table数组不为空,且length>0，且hash值和数组长度做&运算得到的那个bucket不为空

        if ((tab = table) != null && (n = tab.length) > 0 &&

            (first = tab[(n - 1) & hash]) != null) {

            //如果是第一个节点，则直接返回第一个节点

            if (first.hash == hash && // always check first node

                ((k = first.key) == key || (key != null && key.equals(k))))

                return first;

            //开始找下一个节点

            if ((e = first.next) != null) {

                //如果下一个节点是红黑树节点

                if (first instanceof TreeNode)

                    return ((TreeNode<K,V>)first).getTreeNode(hash, key); //则开始在树上找节点

                do {

                    //如果是链表节点，一直遍历链表，知道找到。

                    if (e.hash == hash &&

                        ((k = e.key) == key || (key != null && key.equals(k))))

                        return e;

                } while ((e = e.next) != null);

            }

        }

        //否则直接返回空

        return null;

    }

9、remove操作

    public V remove(Object key) {

        Node<K,V> e;

        return (e = removeNode(hash(key), key, null, false, true)) == null ?

            null : e.value;

    }

    //看过put方法，removeNode方法的逻辑就非常简单

    final Node<K,V> removeNode(int hash, Object key, Object value,

                               boolean matchValue, boolean movable) {

        Node<K,V>[] tab; Node<K,V> p; int n, index;

        if ((tab = table) != null && (n = tab.length) > 0 &&

            (p = tab[index = (n - 1) & hash]) != null) {

            Node<K,V> node = null, e; K k; V v;

            if (p.hash == hash &&

                ((k = p.key) == key || (key != null && key.equals(k))))

                node = p;

            else if ((e = p.next) != null) {

                if (p instanceof TreeNode)

                    node = ((TreeNode<K,V>)p).getTreeNode(hash, key);

                else {

                    do {

                        if (e.hash == hash &&

                            ((k = e.key) == key ||

                             (key != null && key.equals(k)))) {

                            node = e;

                            break;

                        }

                        p = e;

                    } while ((e = e.next) != null);

                }

            }

            if (node != null && (!matchValue || (v = node.value) == value ||

                                 (value != null && value.equals(v)))) {

                if (node instanceof TreeNode)

                    ((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);

                else if (node == p)

                    tab[index] = node.next;

                else

                    p.next = node.next;

                ++modCount;

                --size;

                afterNodeRemoval(node);

                return node;

            }

        }

        return null;

    }

10、参考链接

搞懂java HashMap源码

 HashMap的负载因子初始值为什么是0.75?

JDK1.7和JDK1.8中HashMap为什么是线程不安全的?