有了上一篇《.NET Core玩转机器学习》打基础,这一次我们以纽约出租车费的预测做为新的场景案例,来体验一下回归模型。






  • 创建项目

    看过上一篇文章的读者,就比较轻车熟路了,推荐使用Visual Studio 2017创建一个.NET Core的控制台应用程序项目,命名为TaxiFarePrediction。使用NuGet包管理工具添加对Microsoft.ML的引用。

  • 准备数据集




    字段名 含义 说明
    vendor_id 供应商编号 特征值
    rate_code 比率码 特征值
    passenger_count 乘客人数 特征值
    trip_time_in_secs 行程时长 特征值
    trip_distance 行程距离 特征值
    payment_type 支付类型 特征值
    fare_amount 费用 目标值


  • 定义数据类型和路径


    using System;
    using Microsoft.ML.Models;
    using Microsoft.ML.Runtime;
    using Microsoft.ML.Runtime.Api;
    using Microsoft.ML.Trainers;
    using Microsoft.ML.Transforms;
    using System.Collections.Generic;
    using System.Linq;
    using Microsoft.ML;


    const string DataPath = @".\Data\taxi-fare-train.csv";
    const string TestDataPath = @".\Data\taxi-fare-test.csv";
    const string ModelPath = @".\Models\Model.zip";
    const string ModelDirectory = @".\Models";


    public class TaxiTrip
    [Column(ordinal: "")]
    public string vendor_id;
    [Column(ordinal: "")]
    public string rate_code;
    [Column(ordinal: "")]
    public float passenger_count;
    [Column(ordinal: "")]
    public float trip_time_in_secs;
    [Column(ordinal: "")]
    public float trip_distance;
    [Column(ordinal: "")]
    public string payment_type;
    [Column(ordinal: "")]
    public float fare_amount;
    } public class TaxiTripFarePrediction
    public float fare_amount;
    } static class TestTrips
    internal static readonly TaxiTrip Trip1 = new TaxiTrip
    vendor_id = "VTS",
    rate_code = "",
    passenger_count = ,
    trip_distance = 10.33f,
    payment_type = "CSH",
    fare_amount = // predict it. actual = 29.5
  • 创建处理过程


    public static async Task<PredictionModel<TaxiTrip, TaxiTripFarePrediction>> Train()
    var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader<TaxiTrip>(DataPath, useHeader: true, separator: ","));
    pipeline.Add(new ColumnCopier(("fare_amount", "Label")));
    pipeline.Add(new CategoricalOneHotVectorizer("vendor_id",
    pipeline.Add(new ColumnConcatenator("Features",
    pipeline.Add(new FastTreeRegressor());
    PredictionModel<TaxiTrip, TaxiTripFarePrediction> model = pipeline.Train<TaxiTrip, TaxiTripFarePrediction>();
    if (!Directory.Exists(ModelDirectory))
    await model.WriteAsync(ModelPath);
    return model;
  • 评估验证模型


    public static void Evaluate(PredictionModel<TaxiTrip, TaxiTripFarePrediction> model)
    var testData = new TextLoader<TaxiTrip>(TestDataPath, useHeader: true, separator: ",");
    var evaluator = new RegressionEvaluator();
    RegressionMetrics metrics = evaluator.Evaluate(model, testData);
    // Rms should be around 2.795276
    Console.WriteLine("Rms=" + metrics.Rms);
    Console.WriteLine("RSquared = " + metrics.RSquared);
  • 预测新数据


    static class TestTrips
    internal static readonly TaxiTrip Trip1 = new TaxiTrip
    vendor_id = "VTS",
    rate_code = "",
    passenger_count = ,
    trip_distance = 10.33f,
    payment_type = "CSH",
    fare_amount = // predict it. actual = 29.5


    var prediction = model.Predict(TestTrips.Trip1);
    Console.WriteLine("Predicted fare: {0}, actual fare: 29.5", prediction.fare_amount);
  • 运行结果

到此我们完成了所有的步骤,关于这些代码的详细说明,可以参看《Tutorial: Use ML.NET to Predict New York Taxi Fares (Regression)》,只是要注意该文中的部分代码有误,由于使用到了C# 7.1的语法特性,本文的代码是经过了修正的。完整的代码如下:

using System;
using Microsoft.ML.Models;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using System.Threading.Tasks;
using System.IO; namespace TaxiFarePrediction
class Program
const string DataPath = @".\Data\taxi-fare-train.csv";
const string TestDataPath = @".\Data\taxi-fare-test.csv";
const string ModelPath = @".\Models\Model.zip";
const string ModelDirectory = @".\Models"; public class TaxiTrip
[Column(ordinal: "")]
public string vendor_id;
[Column(ordinal: "")]
public string rate_code;
[Column(ordinal: "")]
public float passenger_count;
[Column(ordinal: "")]
public float trip_time_in_secs;
[Column(ordinal: "")]
public float trip_distance;
[Column(ordinal: "")]
public string payment_type;
[Column(ordinal: "")]
public float fare_amount;
} public class TaxiTripFarePrediction
public float fare_amount;
} static class TestTrips
internal static readonly TaxiTrip Trip1 = new TaxiTrip
vendor_id = "VTS",
rate_code = "",
passenger_count = ,
trip_distance = 10.33f,
payment_type = "CSH",
fare_amount = // predict it. actual = 29.5
} public static async Task<PredictionModel<TaxiTrip, TaxiTripFarePrediction>> Train()
var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader<TaxiTrip>(DataPath, useHeader: true, separator: ","));
pipeline.Add(new ColumnCopier(("fare_amount", "Label")));
pipeline.Add(new CategoricalOneHotVectorizer("vendor_id",
pipeline.Add(new ColumnConcatenator("Features",
pipeline.Add(new FastTreeRegressor());
PredictionModel<TaxiTrip, TaxiTripFarePrediction> model = pipeline.Train<TaxiTrip, TaxiTripFarePrediction>();
if (!Directory.Exists(ModelDirectory))
await model.WriteAsync(ModelPath);
return model;
} public static void Evaluate(PredictionModel<TaxiTrip, TaxiTripFarePrediction> model)
var testData = new TextLoader<TaxiTrip>(TestDataPath, useHeader: true, separator: ",");
var evaluator = new RegressionEvaluator();
RegressionMetrics metrics = evaluator.Evaluate(model, testData);
// Rms should be around 2.795276
Console.WriteLine("Rms=" + metrics.Rms);
Console.WriteLine("RSquared = " + metrics.RSquared);
} static async Task Main(string[] args)
PredictionModel<TaxiTrip, TaxiTripFarePrediction> model = await Train();
Evaluate(model); var prediction = model.Predict(TestTrips.Trip1); Console.WriteLine("Predicted fare: {0}, actual fare: 29.5", prediction.fare_amount);

不知不觉我们的ML.NET之旅又向前进了一步,是不是对于使用.NET Core进行机器学习解决现实生活中的问题更有兴趣了?请保持关注吧。


