先贴源码地址

https://github.com/dotnet/corefx/tree/master/src/System.Linq/src

.NET CORE很大一个好处就是代码的开源,你可以详细的查看你使用类的源代码,并学习微软的写法和实现思路。我们这个系列熟悉基本类库是一个目的,另一个目的就是学习微软的实现思路和编程方法。

今天我们就单独讨论的问题是linq中的distinct方法是如何实现。最后还会有我们实际编程时候对distinct方法的扩展。

System.Linq

linq中Distinct方法在Enumerable类中

Enumerable

public static partial class Enumerable

内部去重方法实现有2个重载

1

public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source) => Distinct(source, null);

2

public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
{
if (source == null)
{
throw Error.ArgumentNull(nameof(source));
}
return new DistinctIterator<TSource>(source, comparer);
}

去重迭代器DistinctIterator

private sealed class DistinctIterator

去重迭代器,先把元素都加到Set<TSource> _set;中,然后用set的UnionWith去重

这里的set是内部实现的一个轻量级的hash set 具体代码下一部分介绍

/// <summary>
/// An iterator that yields the distinct values in an <see cref="IEnumerable{TSource}"/>.
/// </summary>
/// <typeparam name="TSource">The type of the source enumerable.</typeparam>
private sealed class DistinctIterator<TSource> : Iterator<TSource>, IIListProvider<TSource>
{
private readonly IEnumerable<TSource> _source;
private readonly IEqualityComparer<TSource> _comparer;
private Set<TSource> _set;
private IEnumerator<TSource> _enumerator; public DistinctIterator(IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
{
Debug.Assert(source != null);
_source = source;
_comparer = comparer;
}
public override Iterator<TSource> Clone() => new DistinctIterator<TSource>(_source, _comparer); public override bool MoveNext()
{
switch (_state)
{
case :
_enumerator = _source.GetEnumerator();
if (!_enumerator.MoveNext())
{
Dispose();
return false;
}
TSource element = _enumerator.Current;
_set = new Set<TSource>(_comparer);
_set.Add(element);
_current = element;
_state = ;
return true;
case :
while (_enumerator.MoveNext())
{
element = _enumerator.Current;
if (_set.Add(element))
{
_current = element;
return true;
}
}
break;
}
Dispose();
return false;
}
public override void Dispose()
{
if (_enumerator != null)
{
_enumerator.Dispose();
_enumerator = null;
_set = null;
}
base.Dispose();
}
private Set<TSource> FillSet()
{
Set<TSource> set = new Set<TSource>(_comparer);
set.UnionWith(_source);
return set;
} public TSource[] ToArray() => FillSet().ToArray();
public List<TSource> ToList() => FillSet().ToList();
public int GetCount(bool onlyIfCheap) => onlyIfCheap ? - : FillSet().Count;
}

内部密封的哈希Set

这部分其实是distinct实现的重点,所以内容较多。

在添加元素的时候会对数据进行过滤,如果相同返回false,不同的才会被加入哈希链表中。

/// <summary>
/// A lightweight hash set.
/// </summary>
/// <typeparam name="TElement">The type of the set's items.</typeparam>
internal sealed class Set<TElement>
{
变量
/// <summary>
/// The comparer used to hash and compare items in the set.
/// </summary>
private readonly IEqualityComparer<TElement> _comparer; /// <summary>
/// The hash buckets, which are used to index into the slots.
/// </summary>
private int[] _buckets; /// <summary>
/// The slots, each of which store an item and its hash code.
/// </summary>
private Slot[] _slots;
/// <summary>
/// An entry in the hash set.
/// </summary> private struct Slot
{
/// <summary>
/// The hash code of the item.
/// </summary>
internal int _hashCode; /// <summary>
/// In the case of a hash collision(碰撞), the index of the next slot to probe(查看).
/// </summary>
internal int _next; /// <summary>
/// The item held by this slot.
/// </summary>
internal TElement _value;
} /// <summary>
/// The number of items in this set.
/// </summary>
private int _count;
构造函数
/// <summary>
/// Constructs a set that compares items with the specified comparer.
/// </summary>
/// <param name="comparer">
/// The comparer. If this is <c>null</c>, it defaults to <see cref="EqualityComparer{TElement}.Default"/>.
/// </param>
public Set(IEqualityComparer<TElement> comparer)
{
_comparer = comparer ?? EqualityComparer<TElement>.Default;
_buckets = new int[];
_slots = new Slot[];
}
新增方法
/// <summary>
/// Attempts to add an item to this set.
/// </summary>
/// <param name="value">The item to add.</param>
/// <returns>
/// <c>true</c> if the item was not in the set; otherwise, <c>false</c>.
/// </returns>
public bool Add(TElement value)
{
//根据值获取哈希值 最终调用的是_comparer.GetHashCode(value)
int hashCode = InternalGetHashCode(value);
//遍历对比值 直接找到对应的桶,遍历桶中的元素 _slots[i]._next最后一个值会是-1,所以会跳出循环
for (int i = _buckets[hashCode % _buckets.Length] - ; i >= ; i = _slots[i]._next)
{
if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value))
{
return false;
}
}
//如果超出长度,就扩展 乘以2加1
if (_count == _slots.Length)
{
Resize();
} int index = _count;
_count++;
int bucket = hashCode % _buckets.Length;//这里具体桶的位置需要除以总体长度,这样空间利用率更好
_slots[index]._hashCode = hashCode;
_slots[index]._value = value;
_slots[index]._next = _buckets[bucket] - ;//桶中前一个元素的位置索引
_buckets[bucket] = index + ;
return true;
}
去除方法
/// <summary>
/// Attempts to remove an item from this set.
/// </summary>
/// <param name="value">The item to remove.</param>
/// <returns>
/// <c>true</c> if the item was in the set; otherwise, <c>false</c>.
/// </returns>
public bool Remove(TElement value)
{
int hashCode = InternalGetHashCode(value);
int bucket = hashCode % _buckets.Length;
int last = -;
for (int i = _buckets[bucket] - ; i >= ; last = i, i = _slots[i]._next)
{
if (_slots[i]._hashCode == hashCode && _comparer.Equals(_slots[i]._value, value))
{
if (last < )
{
_buckets[bucket] = _slots[i]._next + ;
}
else
{
_slots[last]._next = _slots[i]._next;
} _slots[i]._hashCode = -;
_slots[i]._value = default(TElement);
_slots[i]._next = -;
return true;
}
} return false;
}
扩展set
/// <summary>
/// Expands the capacity of this set to double the current capacity, plus one.
/// </summary>
private void Resize()
{
int newSize = checked((_count * ) + );//这个要检测是否超出int长度限制
int[] newBuckets = new int[newSize];
Slot[] newSlots = new Slot[newSize];
Array.Copy(_slots, , newSlots, , _count);//赋值newSlots数组
for (int i = ; i < _count; i++)
{
int bucket = newSlots[i]._hashCode % newSize;
newSlots[i]._next = newBuckets[bucket] - ;//重新记录桶位置
newBuckets[bucket] = i + ;
} _buckets = newBuckets;
_slots = newSlots;
} /// <summary>
/// Creates an array from the items in this set.
/// </summary>
/// <returns>An array of the items in this set.</returns>
public TElement[] ToArray()
{
TElement[] array = new TElement[_count];
for (int i = ; i != array.Length; ++i)
{
array[i] = _slots[i]._value;
} return array;
} /// <summary>
/// Creates a list from the items in this set.
/// </summary>
/// <returns>A list of the items in this set.</returns>
public List<TElement> ToList()
{
int count = _count;
List<TElement> list = new List<TElement>(count);
for (int i = ; i != count; ++i)
{
list.Add(_slots[i]._value);
} return list;
}
UnionWith方法,实际是执行add
/// <summary>
/// The number of items in this set.
/// </summary>
public int Count => _count; /// <summary>
/// Unions this set with an enumerable.
/// </summary>
/// <param name="other">The enumerable.</param>
public void UnionWith(IEnumerable<TElement> other)
{
Debug.Assert(other != null); foreach (TElement item in other)
{
Add(item);
}
}
内部哈希方法
/// <summary>
/// Gets the hash code of the provided value with its sign bit zeroed out, so that modulo has a positive result.
/// </summary>
/// <param name="value">The value to hash.</param>
/// <returns>The lower 31 bits of the value's hash code.</returns>
private int InternalGetHashCode(TElement value) => value == null ? : _comparer.GetHashCode(value) & 0x7FFFFFFF;
}

扩展distinct的关键

实现IEqualityComparer接口

public interface IEqualityComparer<in T>
{
// true if the specified objects are equal; otherwise, false.
bool Equals(T x, T y);
// Returns a hash code for the specified object.
// 异常:
// T:System.ArgumentNullException:
// The type of obj is a reference type and obj is null.
int GetHashCode(T obj);
}

distinct扩展方法

使用params,支持多字段。

public static class ComparerHelper
{
/// <summary>
/// 自定义Distinct扩展方法
/// </summary>
/// <typeparam name="T">要去重的对象类</typeparam>
/// <param name="source">要去重的对象</param>
/// <param name="getfield">获取自定义去重字段的委托</param>
/// <returns></returns>
public static IEnumerable<T> DistinctEx<T>(this IEnumerable<T> source, params Func<T, object>[] getfield)
{
return source.Distinct(new CompareEntityFields<T>(getfield));
}
}
public class CompareEntityFields<T> : IEqualityComparer<T>
{
private readonly Func<T, object>[] _compareFields; /// <summary>
/// 可以根据字段比对数据
/// </summary>
/// <param name="compareFields">比对字段引用</param>
public CompareEntityFields(params Func<T, object>[] compareFields)
{
_compareFields = compareFields;
} /// <summary>Determines whether the specified objects are equal.</summary>
/// <param name="x">The first object of type T to compare.</param>
/// <param name="y">The second object of type T to compare.</param>
/// <returns>true if the specified objects are equal; otherwise, false.</returns>
bool IEqualityComparer<T>.Equals(T x, T y)
{
if (_compareFields == null || _compareFields.Length <= )
{
return EqualityComparer<T>.Default.Equals(x, y);
} bool result = true;
foreach (var func in _compareFields)
{
var xv = func(x);
var yv = func(y);
result = xv == null && yv == null || Equals(xv, yv);
if (!result) break;
} return result;
} /// <summary>Returns a hash code for the specified object.</summary>
/// <param name="obj">The <see cref="T:System.Object"></see> for which a hash code is to be returned.</param>
/// <returns>A hash code for the specified object.</returns>
/// <exception cref="T:System.ArgumentNullException">
/// The type of <paramref name="obj">obj</paramref> is a reference type
/// and <paramref name="obj">obj</paramref> is null.
/// </exception>
int IEqualityComparer<T>.GetHashCode(T obj)
{
return obj.ToString().GetHashCode();
}
}

Core源码(二) Linq的Distinct扩展的更多相关文章

  1. 一个由正则表达式引发的血案 vs2017使用rdlc实现批量打印 vs2017使用rdlc [asp.net core 源码分析] 01 - Session SignalR sql for xml path用法 MemCahe C# 操作Excel图形——绘制、读取、隐藏、删除图形 IOC,DIP,DI,IoC容器

    1. 血案由来 近期我在为Lazada卖家中心做一个自助注册的项目,其中的shop name校验规则较为复杂,要求:1. 英文字母大小写2. 数字3. 越南文4. 一些特殊字符,如“&”,“- ...

  2. ASP.NET Core[源码分析篇] - WebHost

    _configureServicesDelegates的承接 在[ASP.NET Core[源码分析篇] - Startup]这篇文章中,我们得知了目前为止(UseStartup),所有的动作都是在_ ...

  3. ASP.NET Core[源码分析篇] - Authentication认证

    原文:ASP.NET Core[源码分析篇] - Authentication认证 追本溯源,从使用开始 首先看一下我们通常是如何使用微软自带的认证,一般在Startup里面配置我们所需的依赖认证服务 ...

  4. AQS源码二探-JUC系列

    本文已在公众号上发布,感谢关注,期待和你交流. AQS源码二探-JUC系列 共享模式 doAcquireShared 这个方法是共享模式下获取资源失败,执行入队和等待操作,等待的线程在被唤醒后也在这个 ...

  5. ASP.NET Core源码学习(一)Hosting

    ASP.NET Core源码的学习,我们从Hosting开始, Hosting的GitHub地址为:https://github.com/aspnet/Hosting.git 朋友们可以从以上链接克隆 ...

  6. Unity UGUI图文混排源码(二)

    Unity UGUI图文混排源码(一):http://blog.csdn.net/qq992817263/article/details/51112304 Unity UGUI图文混排源码(二):ht ...

  7. asp.net core源码地址

    https://github.com/dotnet/corefx 这个是.net core的 开源项目地址 https://github.com/aspnet 这个下面是asp.net core 框架 ...

  8. JMeter 源码二次开发函数示例

    JMeter 源码二次开发函数示例 一.JMeter 5.0 版本 实际测试中,依靠jmeter自带的函数已经无法满足我们需求,这个时候就需要二次开发.本次导入的是jmeter 5.0的源码进行实际的 ...

  9. 一起来看CORE源码(一) ConcurrentDictionary

    先贴源码地址 https://github.com/dotnet/corefx/blob/master/src/System.Collections.Concurrent/src/System/Col ...

  10. ASP .NET CORE 源码地址

    ASP .NET CORE 源码地址:https://github.com/dotnet/ 下拉可以查找相应的源码信息, 例如:查找 ASP .NET CORE Microsoft.Extension ...

随机推荐

  1. 将vue项目部署在Linux的Nginx下,并设置为局域网内访问

    1. 下载 wget http://nginx.org/download/nginx-1.12.2.tar.gz 2. 解压缩 tar -zxvf linux-nginx-1.12.2.tar.gz ...

  2. 源生JS实现点击复制功能

    之前在工作中,有位同事问过我一个问题,JS如何实现点击复制功能.给他解决后现在来总结归纳一下,顺便做个笔记. PS:此乃本人第一篇博客(跟着同事大佬学习),涉及知识尚浅,如有任何意见和建议请告知于我. ...

  3. JavaScript定时器方法

    一.setTimeout() 延迟性操作 window.setTimeout(function(){ console.log('派大星');//延迟了4秒 },4000); console.log(' ...

  4. IOS系统input输入框为readonly时, 隐藏键盘上的上下箭头

    业务中在一定场景中会将input 设置为只读状态,在IOS safari上当input 输入框focus 时,仍会出现键盘上的上下箭头,这种用户体验非常不好,如何干掉呢? <input read ...

  5. PAT 1010 Radix 进制转换+二分法

    Given a pair of positive integers, for example, 6 and 110, can this equation 6 = 110 be true? The an ...

  6. ASP.NET Core 3.0 gRPC 配置使用HTTP

    前言 gRPC是基于http/2,是同时支持https和http协议的,我们在gRPC实际使用中,在内网通讯场景下,更多的是走http协议,达到更高的效率,下面介绍如何在 .NET Core 3.0 ...

  7. 计算机网络知识(TCP连接,TCP/UDP区别,HTTP与HTTPS,Socket原理等等)

    1.网络七层协议包含,物理层.数据链路层.网络层(ip协议).传输层(TCP传输控制协议.UDP用户数据报协议).会话层.表示层.应用层(http协议).是一个提供的概念架构协议. 2.TCP/IP协 ...

  8. SSH框架之Hibernate第二篇

    1.1 持久化类的编写规则 1.1.1 什么是持久化类? 持久化类 : 与表建立了映射关系的实体类,就可以称之为持久化类. 持久化类 = Java类 + 映射文件. 1.1.2 持久化类的编写规则 ( ...

  9. Add an Item to the Navigation Control 将项目添加到导航控件

    In this lesson, you will learn how to add an item to the navigation control. For this purpose, the N ...

  10. jQuery学习笔记Fisrt Day

    跳过JS直接JQUERY,“不愧是你”. 对就是我. 今天开始jQuery学习第一天. click事件方法: 鼠标点击 dbl事件方法: 双击鼠标 mouseenter事件方法: 鼠标进入 mouse ...