C# compare different Encoding pattern between UTF8 and UTF32 based on Md5
- using System;
- using System.Text;
- using System.IO;
- using System.Security.Cryptography;
- static void Main(string[] args)
- {
- CompareFileGetBytes("lyf.txt");
- Console.ReadLine();
- }
- static void CompareFileGetBytes(string fileFullName)
- {
- byte[] fileReadAllBytes = File.ReadAllBytes(fileFullName);
- string fileReadAllBytesMd5 = GetBytesMd5(fileReadAllBytes);
- string utf8Md5 = string.Empty;
- using (StreamReader reader = new StreamReader(fileFullName))
- {
- string textResult = reader.ReadToEnd();
- byte[] utf8Bytes = Encoding.UTF8.GetBytes(textResult);
- utf8Md5 = GetBytesMd5(utf8Bytes);
- }
- string utf32Md5 = string.Empty;
- using (StreamReader utf32Reader = new StreamReader(fileFullName))
- {
- string textResult = utf32Reader.ReadToEnd();
- byte[] utf32Bytes = Encoding.UTF32.GetBytes(textResult);
- utf32Md5 = GetBytesMd5(utf32Bytes);
- }
- Console.WriteLine($"fileReadAllBytesMd5:{fileReadAllBytesMd5},utf8Md5:{utf8Md5}");
- if (string.Equals(fileReadAllBytesMd5, utf8Md5))
- {
- Console.WriteLine($"{nameof(fileReadAllBytesMd5)} is equal with {nameof(utf8Md5)}!");
- }
- else
- {
- Console.WriteLine($"{nameof(fileReadAllBytesMd5)} is not equal with {nameof(utf8Md5)}!");
- }
- Console.WriteLine($"utf8Md5:{utf8Md5},utf32Md5:{utf32Md5}");
- if (string.Equals(utf8Md5, utf32Md5))
- {
- Console.WriteLine($"{nameof(utf8Md5)} is equals with {nameof(utf32Md5)}");
- }
- else
- {
- Console.WriteLine($"{nameof(utf8Md5)} is not equals with {nameof(utf32Md5)}");
- }
- }
- static string GetBytesMd5(byte[] bytesData)
- {
- StringBuilder md5Builder = new StringBuilder();
- using(MD5CryptoServiceProvider md5=new MD5CryptoServiceProvider())
- {
- byte[] md5Bytes = md5.ComputeHash(bytesData);
- for(int i=;i<md5Bytes.Length;i++)
- {
- md5Builder.Append(md5Bytes[i].ToString("x2"));
- }
- }
- return md5Builder.ToString();
- }
I had validated that different encoding mode can generate different result,they are not identical.
Besides,the File.ReadAllBytes may based on UTF8 because they render the identical result!
