微众银行FATE联邦学习框架
参考:https://github.com/webankfintech/fate
https://www.fedai.org/#/
FATE $ sh build_standalone_docker.sh
FATE $ CONTAINER_ID=`docker run -t -d fate/standalone`
FATE $ docker exec -t -i ${CONTAINER_ID} bash
### 逐层遍历所有文件夹,运行其中test文件夹下以_test.py为结尾的python测试文件 cd $(dirname "$0")
cur_dir=$(pwd) # Print Working Directory 查看当前目录的绝对路径 run_test() {
file=$1
echo "start to run test "$file
python $file
} traverse_folder() {
for file in $(ls ${1});
do
file_fullname=$1/$file
# -d filename 如果 filename为目录,则为真
if [ -d $file_fullname ]; then
traverse_folder $file_fullname
# =~ 表示正则匹配 $ 行尾定位符
elif [[ $file =~ _test.py$ ]] && [[ $1 =~ /test$ ]]; then
run_test $file_fullname
fi
done
}
sh ./federatedml/test/run_test.sh > test_log.txt:
start to run test /fate/federatedml/test/../evaluation/test/evaluation_test.py
{'auc': 0.0, 'ks': 0.0, 'lift': [(0.5, 0.0)], 'precision': [(0.5, 0.0)], 'recall': [(0.5, 0.0)], 'accuracy': [(0.5, 0.0)], 'explained_variance': -0.6539, 'mean_absolute_error': 0.638, 'mean_squared_error': 0.4135, 'mean_squared_log_error': 0.1988, 'median_absolute_error': 0.635, 'r2_score': -0.6539, 'root_mean_squared_error': 0.643}
start to run test /fate/federatedml/test/../feature/test/feature_select_test.py
param_set {
key: "eps"
value: 9.999999747378752e-06
}
original_cols: 0
original_cols: 1
left_cols: 0
left_cols: 1
filter_name: "unique_value" start to run test /fate/federatedml/test/../feature/test/imputer_test.py
start to run test /fate/federatedml/test/../feature/test/instance_test.py
start to run test /fate/federatedml/test/../feature/test/min_max_scaler_test.py
start to run test /fate/federatedml/test/../feature/test/quantile_binning_test.py
-0.928683554705954 -0.8283685741417925 -0.8111241409211205
min_rank: 190, found_rank: 214, max_rank: 210
Spend time: 2.453406572341919
collect and use numpy time: 4.527343034744263
start to run test /fate/federatedml/test/../feature/test/quantile_summaries_test.py
min_rank: 89800, found_rank: 89995, max_rank: 90200
min_rank: 89800, found_rank: 89948, max_rank: 90200
min_rank: 89800, found_rank: 89995, max_rank: 90200
min_rank: 89800, found_rank: 89979, max_rank: 90200
min_rank: 89800, found_rank: 89977, max_rank: 90200
min_rank: 89800, found_rank: 90041, max_rank: 90200
start to run test /fate/federatedml/test/../feature/test/quantile_test.py
start to run test /fate/federatedml/test/../feature/test/sampler_test.py
start to run test /fate/federatedml/test/../feature/test/sparse_vector_test.py
start to run test /fate/federatedml/test/../feature/test/standard_scaler_test.py
start to run test /fate/federatedml/test/../ftl/test/common_data_util_test.py
start to run test /fate/federatedml/test/../ftl/test/eggroll_XY_test.py
--- test_distributed_calculate_XY_1 ---
[[ 2. 4. 6.]
[ 4. 5. 6.]
[-7. -8. -9.]
[10. 11. 12.]] (4, 3)
--- test_distributed_calculate_XY_2 ---
[[[0.43551517 0.70466051 0.30466157]
[0.26808046 0.76825121 0.06513553]
[0.51540669 0.2421427 0.48011399]] [[0.42700336 0.33585337 0.49496926]
[0.25135199 0.38382964 0.12959278]
[0.35088255 0.45791475 0.25325645]] [[0.13418796 0.20833246 0.31760359]
[0.2749853 0.21743268 0.48146084]
[0.20890022 0.14694317 0.49790149]] [[0.00388095 0.00665658 0.00157656]
[0.00567649 0.00432882 0.00119392]
[0.0014735 0.00646106 0.00375111]]] (4, 3, 3)
--- test_distributed_calculate_avg_XY_1 ---
--- test_distributed_calculate_avg_XY_2 ---
--- test_distributed_calculate_sum_XY ---
--- test_distributed_compute_XY_plus_Z ---
start to run test /fate/federatedml/test/../ftl/test/eggroll_X_plus_Y_test.py
start to run test /fate/federatedml/test/../ftl/test/eggroll_encryption_matmul_test.py
start to run test /fate/federatedml/test/../ftl/test/eggroll_encryption_runtime_test.py
start to run test /fate/federatedml/test/../ftl/test/eggroll_encryption_test.py
start to run test /fate/federatedml/test/../ftl/test/eggroll_storage_test.py
{'learning_rate': 0.01, 'input_dim': 100, 'hidden_dim': 64}
start to run test /fate/federatedml/test/../ftl/test/host_guest_factory_test.py
start to run test /fate/federatedml/test/../ftl/test/params_validation_test.py
start to run test /fate/federatedml/test/../ftl/test/random_mask_test.py
----test_mask_2_dim----
----test_mask_3_dim_1----
----test_encrypt_3_dim_2----
----test_mask_float----
original matrix 31.444
cleared_matrix 31.444
----test_mask_integer----
original matrix 31
cleared_matrix 31.0
----test_mask_scalar----
start to run test /fate/federatedml/test/../ftl/test/whitebox_autoencoder_test.py
0 / 0 cost: 1.0359334789708443
0 / 0 cost: 0.9028475258265454
0 / 0 cost: 0.7708735124447182
0 / 0 cost: 0.6399453667039348
0 / 0 cost: 0.5099926274994172
0 / 0 cost: 0.38094404886348604
0 / 0 cost: 0.25272588523352807
0 / 0 cost: 0.1252602715537511
0 / 0 cost: -0.0015352838442681354
0 / 0 cost: -0.1277506050008835
start to run test /fate/federatedml/test/../ftl/test/whitebox_enc_gradients_test.py
encrypt_grads_ex shape (4, 1, 5)
encrypt_grads_W shape (4, 5)
encrypt_grads_b shape (4,)
encrypt_grads_ex shape (2, 1, 5)
encrypt_grads_W shape (2, 5)
encrypt_grads_b shape (2,)
encrypt_grads_ex shape (4, 1, 5)
encrypt_grads_W shape (4, 5)
encrypt_grads_b shape (4,)
encrypt_grads_ex shape (2, 1, 5)
encrypt_grads_W shape (2, 5)
encrypt_grads_b shape (2,)
encrypt_grads_ex shape (4, 1, 5)
encrypt_grads_W shape (4, 5)
encrypt_grads_b shape (4,)
encrypt_grads_ex shape (2, 1, 5)
encrypt_grads_W shape (2, 5)
encrypt_grads_b shape (2,)
encrypt_grads_ex shape (4, 1, 5)
encrypt_grads_W shape (4, 5)
encrypt_grads_b shape (4,)
encrypt_grads_ex shape (2, 1, 5)
encrypt_grads_W shape (2, 5)
encrypt_grads_b shape (2,)
start to run test /fate/federatedml/test/../ftl/test/whitebox_faster_enc_gradients_test.py
encrypt_grads_ex shape (4, 1, 5)
encrypt_grads_W shape (4, 5)
encrypt_grads_b shape (4,)
encrypt_grads_ex shape (2, 1, 5)
encrypt_grads_W shape (2, 5)
encrypt_grads_b shape (2,)
encrypt_grads_ex shape (4, 1, 5)
encrypt_grads_W shape (4, 5)
encrypt_grads_b shape (4,)
encrypt_grads_ex shape (2, 1, 5)
encrypt_grads_W shape (2, 5)
encrypt_grads_b shape (2,)
encrypt_grads_ex shape (4, 1, 5)
encrypt_grads_W shape (4, 5)
encrypt_grads_b shape (4,)
encrypt_grads_ex shape (2, 1, 5)
encrypt_grads_W shape (2, 5)
encrypt_grads_b shape (2,)
encrypt_grads_ex shape (4, 1, 5)
encrypt_grads_W shape (4, 5)
encrypt_grads_b shape (4,)
encrypt_grads_ex shape (2, 1, 5)
encrypt_grads_W shape (2, 5)
encrypt_grads_b shape (2,)
start to run test /fate/federatedml/test/../ftl/test/whitebox_plain_gradients_test.py
start to run test /fate/federatedml/test/../logistic_regression/hetero_dnn_logistic_regression/test/local_model_proxy_test.py
----test_DNNLR_transform----
0 [4 5 6 7 8]
1 [9 3 6 7 8]
2 [ 7 8 9 10 11]
3 [1 2 3 4 5]
index_tracking_list [0, 1, 2, 3]
actual_features [[114 169 196 225]
[118 179 212 247]
[171 253 298 345]
[ 57 85 94 105]]
expected_trans_features [[114 169 196 225]
[118 179 212 247]
[171 253 298 345]
[ 57 85 94 105]]
----test_DNNLR_update_local_model----
0 [4 5 6 7 8]
1 [9 3 6 7 8]
2 [ 7 8 9 10 11]
3 [1 2 3 4 5]
X:
[[ 4 5 6 7 8]
[ 9 3 6 7 8]
[ 7 8 9 10 11]
[ 1 2 3 4 5]] (4, 5)
in_grad:
[[ 24. 32. 40.]
[ 72. 96. 120.]
[ 36. 48. 60.]
[ 12. 16. 20.]] (4, 3)
expected_instances: [[ 4 5 6 7 8]
[ 9 3 6 7 8]
[ 7 8 9 10 11]
[ 1 2 3 4 5]]
actual_instances: [[ 4 5 6 7 8]
[ 9 3 6 7 8]
[ 7 8 9 10 11]
[ 1 2 3 4 5]]
expected_back_grad [[ 24 32 40]
[ 72 96 120]
[ 36 48 60]
[ 12 16 20]]
actual_back_grad [[ 24. 32. 40.]
[ 72. 96. 120.]
[ 36. 48. 60.]
[ 12. 16. 20.]]
start to run test /fate/federatedml/test/../logistic_regression/test/homo_lr_test.py
before training, coef: [0.13968188 0.88744885 0.80187938 0.43285573 0.80779013], intercept: 0.831288889651533
[100, 5.723491806287644, 5.671945837532906, 5.621491950842434, 5.572123354216551, 5.523832548805044, 5.476611347744709, 5.43045089676398, 5.385341696437183, 5.341273625963129, 5.2982359683360585]
before training, coef: [0.86678672 0.84918847 0.52887216 0.43555516 0.91879362], intercept: 0.8588231086310137
After training, coef: [0.86346528 0.84547825 0.52417372 0.43242595 0.91320305], intercept: 0.8380594240649859, loss: 5.835974210822435
start to run test /fate/federatedml/test/../logistic_regression/test/logistic_regression_test.py
start to run test /fate/federatedml/test/../loss/test/cross_entropy_test.py
start to run test /fate/federatedml/test/../loss/test/regression_loss_test.py
start to run test /fate/federatedml/test/../model_selection/test/KFold_test.py
type: IN_MEMORY, namespace: 123, name: 3f80a06c-8b85-11e9-8fee-0242ac110002, partitions: 3 1000
expect_train_data_num: 900.0, expect_test_data_num: 100.0
train_num: 900, test_num: 100
train_num: 900, test_num: 100
train_num: 900, test_num: 100
train_num: 900, test_num: 100
train_num: 900, test_num: 100
train_num: 900, test_num: 100
train_num: 900, test_num: 100
train_num: 900, test_num: 100
train_num: 900, test_num: 100
train_num: 900, test_num: 100
start to run test /fate/federatedml/test/../model_selection/test/mini_batch_test.py
start to run test /fate/federatedml/test/../optim/federated_aggregator/test/hetero_federated_aggregator_test.py
start to run test /fate/federatedml/test/../optim/gradient/test/gradient_method_test.py
compute time: 13.412506341934204
start to run test /fate/federatedml/test/../optim/gradient/test/hetero_lr_gradient_test.py
start to run test /fate/federatedml/test/../optim/gradient/test/homo_lr_gradient_test.py
start to run test /fate/federatedml/test/../optim/test/convergence_test.py
start to run test /fate/federatedml/test/../optim/test/initialize_test.py
start to run test /fate/federatedml/test/../optim/test/updater_test.py
start to run test /fate/federatedml/test/../secureprotol/test/encode_test.py
start to run test /fate/federatedml/test/../secureprotol/test/fate_paillier_test.py
start to run test /fate/federatedml/test/../statistic/test/statics_test.py
start to run test /fate/federatedml/test/../tree/test/criterion_test.py
start to run test /fate/federatedml/test/../tree/test/feature_histogram_test.py
start to run test /fate/federatedml/test/../tree/test/node_test.py
start to run test /fate/federatedml/test/../util/test/classify_label_checker_test.py
start to run test /fate/federatedml/test/../util/test/data_io_test.py
start to run test /fate/federatedml/test/../util/test/param_extract_test.py
start to run test /fate/federatedml/test/../util/test/parameter_checker_test.py
./federatedml/evaluation/test/evaluation.py:
from federatedml.evaluation import Evaluation
import numpy as np
import unittest class TestClassificationEvaluaction(unittest.TestCase):
def assertFloatEqual(self, op1, op2):
diff = np.abs(op1 - op2)
self.assertLess(diff, 1e-6) def test_auc(self):
y_true = np.array([0, 0, 1, 1])
y_predict = np.array([0.1, 0.4, 0.35, 0.8])
ground_true_auc = 0.75 eva = Evaluation("binary")
auc = eva.auc(y_true, y_predict)
auc = round(auc, 2) self.assertFloatEqual(auc, ground_true_auc) def test_ks(self):
y_true = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0])
y_predict = np.array(
[0.42, 0.73, 0.55, 0.37, 0.57, 0.70, 0.25, 0.23, 0.46, 0.62, 0.76, 0.46, 0.55, 0.56, 0.56, 0.38, 0.37, 0.73,
0.77, 0.21, 0.39])
ground_true_ks = 0.75 eva = Evaluation("binary")
ks = eva.ks(y_true, y_predict)
ks = round(ks, 2) self.assertFloatEqual(ks, ground_true_ks) def test_lift(self):
y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1])
y_predict = np.array([0.57, 0.70, 0.25, 0.30, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56])
dict_score = {"": {0: 0, 1: 1}, "0.4": {0: 2, 1: 1.43}, "0.6": {0: 1.43, 1: 2}} eva = Evaluation("binary")
split_thresholds = [0, 0.4, 0.6] lifts = eva.lift(y_true, y_predict, thresholds=split_thresholds)
fix_lifts = []
for lift in lifts:
fix_lift = [round(pos, 2) for pos in lift]
fix_lifts.append(fix_lift) for i in range(len(split_thresholds)):
score_0 = dict_score[str(split_thresholds[i])][0]
score_1 = dict_score[str(split_thresholds[i])][1] pos_lift = fix_lifts[i]
self.assertEqual(len(pos_lift), 2)
self.assertFloatEqual(score_0, pos_lift[0])
self.assertFloatEqual(score_1, pos_lift[1]) def test_precision(self):
y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1])
y_predict = np.array([0.57, 0.70, 0.25, 0.30, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56])
dict_score = {"0.4": {0: 1, 1: 0.71}, "0.6": {0: 0.71, 1: 1}} eva = Evaluation("binary")
split_thresholds = [0.4, 0.6] prec_values = eva.precision(y_true, y_predict, thresholds=split_thresholds)
fix_prec_values = []
for prec_value in prec_values:
fix_prec_value = [round(pos, 2) for pos in prec_value]
fix_prec_values.append(fix_prec_value) for i in range(len(split_thresholds)):
score_0 = dict_score[str(split_thresholds[i])][0]
score_1 = dict_score[str(split_thresholds[i])][1] pos_prec_value = fix_prec_values[i]
self.assertEqual(len(pos_prec_value), 2)
self.assertFloatEqual(score_0, pos_prec_value[0])
self.assertFloatEqual(score_1, pos_prec_value[1]) def test_recall(self):
y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1])
y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56])
dict_score = {"0.3": {0: 0.2, 1: 1}, "0.4": {0: 0.6, 1: 1}} eva = Evaluation("binary")
split_thresholds = [0.3, 0.4] recalls = eva.recall(y_true, y_predict, thresholds=split_thresholds)
round_recalls = []
for recall in recalls:
round_recall = [round(pos, 2) for pos in recall]
round_recalls.append(round_recall) for i in range(len(split_thresholds)):
score_0 = dict_score[str(split_thresholds[i])][0]
score_1 = dict_score[str(split_thresholds[i])][1] pos_recall = round_recalls[i]
self.assertEqual(len(pos_recall), 2)
self.assertFloatEqual(score_0, pos_recall[0])
self.assertFloatEqual(score_1, pos_recall[1]) def test_bin_accuracy(self):
y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1])
y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56])
gt_score = {"0.3": 0.6, "0.5": 1.0, "0.7": 0.7} split_thresholds = [0.3, 0.5, 0.7]
eva = Evaluation("binary") acc = eva.accuracy(y_true, y_predict, thresholds=split_thresholds)
for i in range(len(split_thresholds)):
score = gt_score[str(split_thresholds[i])]
self.assertFloatEqual(score, acc[i]) def test_multi_accuracy(self):
y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4])
y_predict = [1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4]
gt_score = 0.6
gt_number = 12
eva = Evaluation("multi") acc = eva.accuracy(y_true, y_predict)
self.assertFloatEqual(gt_score, acc)
acc_number = eva.accuracy(y_true, y_predict, normalize=False)
self.assertEqual(acc_number, gt_number) def test_multi_recall(self):
y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5])
y_predict = np.array([1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6])
gt_score = {1: 0.4, 3: 0.8, 4: 1.0, 6: 0, 7: -1} eva = Evaluation("multi")
result_filter = [1, 3, 4, 6, 7]
recall_scores = eva.recall(y_true, y_predict, result_filter=result_filter) for i in range(len(result_filter)):
score = gt_score[result_filter[i]]
self.assertFloatEqual(score, recall_scores[result_filter[i]]) def test_multi_precision(self):
y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5])
y_predict = np.array([1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6])
gt_score = {2: 0.25, 3: 0.8, 5: 0, 6: 0, 7: -1} eva = Evaluation("multi")
result_filter = [2, 3, 5, 6, 7]
precision_scores = eva.precision(y_true, y_predict, result_filter=result_filter)
for i in range(len(result_filter)):
score = gt_score[result_filter[i]]
self.assertFloatEqual(score, precision_scores[result_filter[i]]) def test_explained_variance(self):
eva = Evaluation() y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
self.assertFloatEqual(np.around(eva.explain_variance(y_true, y_pred), 4), 0.9572) y_true = [[0.5, 1], [-1, 1], [7, -6]]
y_pred = [[0, 2], [-1, 2], [8, -5]]
self.assertFloatEqual(np.around(eva.explain_variance(y_true, y_pred), 4), 0.9839) def test_mean_absolute_error(self):
eva = Evaluation()
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
self.assertFloatEqual(eva.mean_absolute_error(y_true, y_pred), 0.5) y_true = [[0.5, 1], [-1, 1], [7, -6]]
y_pred = [[0, 2], [-1, 2], [8, -5]]
self.assertFloatEqual(eva.mean_absolute_error(y_true, y_pred), 0.75) def test_mean_squared_error(self):
eva = Evaluation()
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
self.assertFloatEqual(eva.mean_squared_error(y_true, y_pred), 0.375) y_true = [[0.5, 1], [-1, 1], [7, -6]]
y_pred = [[0, 2], [-1, 2], [8, -5]]
self.assertFloatEqual(np.around(eva.mean_squared_error(y_true, y_pred), 4), 0.7083) def test_mean_squared_log_error(self):
eva = Evaluation()
y_true = [3, 5, 2.5, 7]
y_pred = [2.5, 5, 4, 8]
self.assertFloatEqual(np.around(eva.mean_squared_log_error(y_true, y_pred), 4), 0.0397) y_true = [[0.5, 1], [1, 2], [7, 6]]
y_pred = [[0.5, 2], [1, 2.5], [8, 8]]
self.assertFloatEqual(np.around(eva.mean_squared_log_error(y_true, y_pred), 4), 0.0442) def test_median_absolute_error(self):
eva = Evaluation()
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
self.assertFloatEqual(eva.median_absolute_error(y_true, y_pred), 0.5) y_true = [3, -0.6, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
self.assertFloatEqual(eva.median_absolute_error(y_true, y_pred), 0.55) def test_root_mean_squared_error(self):
eva = Evaluation()
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
self.assertFloatEqual(np.around(eva.root_mean_squared_error(y_true, y_pred), 4), 0.6124) y_true = [[0.5, 1], [-1, 1], [7, -6]]
y_pred = [[0, 2], [-1, 2], [8, -5]]
self.assertFloatEqual(np.around(eva.root_mean_squared_error(y_true, y_pred), 4), 0.8416) def test_binary_report(self):
eva = Evaluation("binary")
y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1])
y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy",
"explained_variance", "mean_absolute_error", "mean_squared_error",
"mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics)
self.assertFloatEqual(eval_results['auc'], 1.0)
self.assertFloatEqual(eval_results['ks'], 1.0)
self.assertListEqual(eval_results['lift'], [(0.5, 2.0)])
self.assertListEqual(eval_results['precision'], [(0.5, 1.0)])
self.assertListEqual(eval_results['recall'], [(0.5, 1.0)])
self.assertListEqual(eval_results['accuracy'], [(0.5, 1.0)])
self.assertFloatEqual(eval_results['explained_variance'], 0.4501)
self.assertFloatEqual(eval_results['mean_absolute_error'], 0.3620)
self.assertFloatEqual(eval_results['mean_squared_error'], 0.1375)
self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.0707)
self.assertFloatEqual(eval_results['median_absolute_error'], 0.3650)
self.assertFloatEqual(eval_results['r2_score'], 0.4501)
self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.3708) def test_binary_report_with_pos_label(self):
eva = Evaluation("binary")
y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1])
y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy",
"explained_variance", "mean_absolute_error", "mean_squared_error",
"mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"]
eval_results = eva.report(y_true, y_predict, metrics, pos_label=0)
print(eval_results)
self.assertFloatEqual(eval_results['auc'], 0.0)
self.assertFloatEqual(eval_results['ks'], 0.0)
self.assertListEqual(eval_results['lift'], [(0.5, 0.0)])
self.assertListEqual(eval_results['precision'], [(0.5, 0.0)])
self.assertListEqual(eval_results['recall'], [(0.5, 0.0)])
self.assertListEqual(eval_results['accuracy'], [(0.5, 0.0)])
self.assertFloatEqual(eval_results['explained_variance'], -0.6539)
self.assertFloatEqual(eval_results['mean_absolute_error'], 0.6380)
self.assertFloatEqual(eval_results['mean_squared_error'], 0.4135)
self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.1988)
self.assertFloatEqual(eval_results['median_absolute_error'], 0.6350)
self.assertFloatEqual(eval_results['r2_score'], -0.6539)
self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.643) def test_multi_report(self):
eva = Evaluation("multi")
y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5])
y_predict = np.array([1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy",
"explained_variance", "mean_absolute_error", "mean_squared_error",
"mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics)
self.assertIsNone(eval_results['auc'])
self.assertIsNone(eval_results['ks'])
self.assertIsNone(eval_results['lift'])
self.assertDictEqual(eval_results['precision'], {1: 0.3333, 2: 0.25, 3: 0.8, 4: 1.0, 5: 0.0, 6: 0.0})
self.assertDictEqual(eval_results['recall'], {1: 0.4, 2: 0.2, 3: 0.8, 4: 1.0, 5: 0.0, 6: 0.0})
self.assertFloatEqual(eval_results['accuracy'], 0.48)
self.assertFloatEqual(eval_results['explained_variance'], 0.6928)
self.assertFloatEqual(eval_results['mean_absolute_error'], 0.5600)
self.assertFloatEqual(eval_results['mean_squared_error'], 0.6400)
self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.0667)
self.assertFloatEqual(eval_results['median_absolute_error'], 1.000)
self.assertFloatEqual(eval_results['r2_score'], 0.6800) def test_multi_report_with_absent_value(self):
eva = Evaluation("multi")
y_true = np.array( [1, 1, 1, 1, 1, None, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, None])
y_predict = np.array([1, 1, 2, 2, 3, 3,2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy",
"explained_variance", "mean_absolute_error", "mean_squared_error",
"mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics)
self.assertIsNone(eval_results['auc'])
self.assertIsNone(eval_results['ks'])
self.assertIsNone(eval_results['lift'])
self.assertDictEqual(eval_results['precision'], {1: 0.3333, 2: 0.25, 3: 0.8, 4: 1.0, 5: 0.0, 6: 0.0})
self.assertDictEqual(eval_results['recall'], {1: 0.4, 2: 0.2, 3: 0.8, 4: 1.0, 5: 0.0, 6: 0.0})
self.assertFloatEqual(eval_results['accuracy'], 0.48)
self.assertFloatEqual(eval_results['explained_variance'], 0.6928)
self.assertFloatEqual(eval_results['mean_absolute_error'], 0.5600)
self.assertFloatEqual(eval_results['mean_squared_error'], 0.6400)
self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.0667)
self.assertFloatEqual(eval_results['median_absolute_error'], 1.000)
self.assertFloatEqual(eval_results['r2_score'], 0.6800)
self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.800)
self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.800) def test_regression_report(self):
eva = Evaluation("regression")
y_true = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5])
y_predict = np.array([1, 1, 2, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy",
"explained_variance", "mean_absolute_error", "mean_squared_error",
"mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"] eval_results = eva.report(y_true, y_predict, metrics)
self.assertFloatEqual(eval_results['explained_variance'], 0.6928)
self.assertFloatEqual(eval_results['mean_absolute_error'], 0.5600)
self.assertFloatEqual(eval_results['mean_squared_error'], 0.6400)
self.assertFloatEqual(eval_results['mean_squared_log_error'], 0.0667)
self.assertFloatEqual(eval_results['median_absolute_error'], 1.000)
self.assertFloatEqual(eval_results['r2_score'], 0.6800)
self.assertFloatEqual(eval_results['root_mean_squared_error'], 0.800) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy"]
eval_results = eva.report(y_true, y_predict, metrics)
self.assertIsNone(eval_results) if __name__ == '__main__':
unittest.main()
// 唯一带有print输出的函数,计算各个预测评估指标
def test_binary_report_with_pos_label(self):
eva = Evaluation("binary")
y_true = np.array([1, 1, 0, 0, 0, 1, 1, 0, 0, 1])
y_predict = np.array([0.57, 0.70, 0.25, 0.31, 0.46, 0.62, 0.76, 0.46, 0.35, 0.56]) metrics = ["auc", "ks", "lift", "precision", "recall", "accuracy",
"explained_variance", "mean_absolute_error", "mean_squared_error",
"mean_squared_log_error", "median_absolute_error", "r2_score", "root_mean_squared_error"]
eval_results = eva.report(y_true, y_predict, metrics, pos_label=0)
print(eval_results)
./federatedml/feature/test/feature_select_test.py:
import unittest
from federatedml.feature.feature_selection import UniqueValueFilter
from federatedml.param.param import UniqueValueParam class TestFeatureSelect(unittest.TestCase):
def setUp(self):
param = UniqueValueParam()
self.filter_obj = UniqueValueFilter(param, select_cols=[0, 1])
self.filter_obj.left_cols = [0, 1] def test_protobuf(self):
result = self.filter_obj.to_result()
// 唯一的print输出
print(result) if __name__ == '__main__':
unittest.main()
三、Example Programs:
examples文件夹下,有12个子文件夹
1、data:测试数据集.
2、hetero_dnn_logistic_regression(多相深度神经网络逻辑回归):
run_logistic_regression_standalone.sh:
cd $(dirname "$0")
curtime=$(date +%Y%m%d%H%M%S)
work_mode=0
jobid="hetero_logistic_regression_example_standalone_"$curtime
guest_partyid=10000
host_partyid=9999
arbiter_partyid=10001 bash run_logistic_regression.sh $work_mode $jobid $guest_partyid $host_partyid $arbiter_partyid
run_logistic_regression.sh:
// Vertical Federated Learning
work_mode=$1
jobid=$2
guest_partyid=$3
host_partyid=$4
arbiter_partyid=$5
if [[ $work_mode -eq 1 ]]; then
role=$6
fi cur_dir=$(pwd)
# 数据集文件夹
data_dir=$cur_dir/../data
# 文件数据加载程序
load_file_program=$cur_dir/../load_file/load_file.py
# 配置文件夹
conf_dir=$cur_dir/conf
# 日志记录文件夹
log_dir=$cur_dir/../../logs
# 文件数据加载配置
load_data_conf=$conf_dir/load_file.json
# 客户运行时间配置
guest_runtime_conf=$conf_dir/guest_runtime_conf.json
# 宿主运行时间配置
host_runtime_conf=$conf_dir/host_runtime_conf.json
# 仲裁者运行时间配置
arbiter_runtime_conf=$conf_dir/arbiter_runtime_conf.json
# 数据集类型
data_set=breast
#data_set=default_credit
#data_set=give_credit
# 训练数据集
train_data_host=$data_dir/${data_set}_a.csv
train_data_guest=$data_dir/${data_set}_b.csv
# 测试数据集
predict_data_host=$data_dir/${data_set}_a.csv
predict_data_guest=$data_dir/${data_set}_b.csv
# 验证数据集
cv_data_host=$data_dir/${data_set}_a.csv
cv_data_guest=$data_dir/${data_set}_b.csv echo "data dir is : "$data_dir
mode='train'
#mode='predict'
#mode='cross_validation'
data_table=''
log_file='' mkdir -p $log_dir load_file() {
input_path=$1
role=$2
load_mode=$3
conf_path=$conf_dir/load_file.json_${role}_${load_mode}_$jobid
cp $load_data_conf $conf_path
data_table=${data_set}_${role}_${load_mode}_$jobid
// sed -i 直接修改读取的文件内容,而不是输出到终端
sed -i "s|_input_path|${input_path}|g" ${conf_path}
sed -i "s/_table_name/${data_table}/g" ${conf_path}
sed -i "s/_work_mode/${work_mode}/g" ${conf_path} python $load_file_program -c ${conf_path}
} train() {
role=$1
train_table=$2
predict_table=$3
runtime_conf=''
if [ $role = 'guest' ]; then
runtime_conf=$guest_runtime_conf
elif [ $role = 'arbiter' ]; then
runtime_conf=$arbiter_runtime_conf
else
runtime_conf=$host_runtime_conf
fi cur_runtime_conf=${runtime_conf}_$jobid
cp $runtime_conf $cur_runtime_conf echo "current runtime conf is "$cur_runtime_conf
echo "training table is "$train_table
echo $predict_table
sed -i "s/_workflow_method/train/g" $cur_runtime_conf
sed -i "s/_train_table_name/$train_table/g" $cur_runtime_conf
sed -i "s/_predict_table_name/$predict_table/g" $cur_runtime_conf
sed -i "s/_work_mode/$work_mode/g" $cur_runtime_conf
sed -i "s/_guest_party_id/$guest_partyid/g" $cur_runtime_conf
sed -i "s/_host_party_id/$host_partyid/g" $cur_runtime_conf
sed -i "s/_arbiter_party_id/$arbiter_partyid/g" $cur_runtime_conf log_file=${log_dir}/${jobid}
echo "Please check log file in "${log_file}
if [[ $role == 'guest' ]]; then
echo "enter guest"
nohup bash run_guest.sh $cur_runtime_conf $jobid &
elif [ $role == 'arbiter' ]; then
echo "enter arbiter"
nohup bash run_arbiter.sh $cur_runtime_conf $jobid &
else
echo "enter host"
nohup bash run_host.sh $cur_runtime_conf $jobid &
fi } cross_validation() {
role=$1
cv_table=$2
runtime_conf=''
if [ $role = 'guest' ]; then
runtime_conf=$guest_runtime_conf
elif [ $role = 'arbiter' ]; then
runtime_conf=$arbiter_runtime_conf
else
runtime_conf=$host_runtime_conf
fi cur_runtime_conf=${runtime_conf}_$jobid
cp $runtime_conf $cur_runtime_conf echo "current runtime conf is "$cur_runtime_conf
echo "cv talbe is"$cv_table
sed -i "s/_workflow_method/cross_validation/g" $cur_runtime_conf
sed -i "s/_cross_validation_table_name/$cv_table/g" $cur_runtime_conf
sed -i "s/_work_mode/$work_mode/g" $cur_runtime_conf
sed -i "s/_guest_party_id/$guest_partyid/g" $cur_runtime_conf
sed -i "s/_host_party_id/$host_partyid/g" $cur_runtime_conf
sed -i "s/_arbiter_party_id/$arbiter_partyid/g" $cur_runtime_conf log_file=${log_dir}/${jobid}
echo "Please check log file in "${log_file}
if [ $role == 'guest' ]; then
echo "enter guest"
nohup bash run_guest.sh $cur_runtime_conf $jobid &
elif [ $role == 'arbiter' ]; then
echo "enter arbiter"
nohup bash run_arbiter.sh $cur_runtime_conf $jobid &
else
echo "enter host"
nohup bash run_host.sh $cur_runtime_conf $jobid &
fi
} get_log_result() {
log_path=$1
keyword=$2
sleep 5s
while true
do
num=$(cat $log_path | grep $keyword | wc -l)
if [ $num -ge 1 ]; then
cat $log_path | grep $keyword
break
else
echo "please wait or check more info in "$log_path
sleep 10s
fi
done
} if [ $mode = 'train' ]; then
if [ $work_mode -eq 0 ]; then
load_file $train_data_guest guest train
train_table_guest=${data_table}
echo "train_table guest is:"$train_table_guest load_file $train_data_host host train
train_table_host=$data_table
echo "train_table host is:"$train_table_host load_file $predict_data_guest guest predict
predict_table_guest=${data_table}
echo "predict_table guest is:"$predict_table_guest load_file $predict_data_host host predict
predict_table_host=$data_table
echo "predict_table host is:"$predict_table_host train guest $train_table_guest $predict_table_guest
train host $train_table_host $predict_table_host
train arbiter "" ""
workflow_log=${log_file}/workflow.log
get_log_result ${workflow_log} eval_result
elif [[ $role == 'guest' ]]; then
load_file $train_data_guest guest train
train_table_guest=$data_table load_file $predict_data_guest guest predict
predict_table_guest=$data_table train guest $train_table_guest $predict_table_guest workflow_log=${log_file}/workflow.log
get_log_result ${workflow_log} eval_result
elif [[ $role == 'host' ]]; then
load_file $train_data_host host train
train_table_host=$data_table load_file $predict_data_host host predict
predict_table_host=$data_table
echo "Predict_table host is:"${predict_table_host} train host $train_table_host $predict_table_host
elif [[ $role == 'arbiter' ]]; then
train arbiter '' ''
fi
elif [ $mode = 'cross_validation' ]; then
if [[ $work_mode -eq 0 ]]; then
load_file $cv_data_guest guest cross_validation
cv_table_guest=$data_table
load_file $cv_data_host host cross_validation
cv_table_host=$data_table echo "cv table guest is:"$cv_table_guest
echo "cv table host is:"$cv_table_host cross_validation guest $cv_table_guest
cross_validation host $cv_table_host
cross_validation arbiter "" workflow_log=${log_file}/workflow.log
get_log_result ${workflow_log} mean elif [[ $role == 'guest' ]]; then
load_file $cv_data_guest guest cross_validation
cv_table_guest=$data_table
echo "cv table guest is:"$cv_table_guest
cross_validation guest $cv_table_guest
workflow_log=${log_file}/workflow.log
get_log_result ${workflow_log} mean elif [[ $role == 'host' ]]; then
load_file $cv_data_host host cross_validation
cv_table_host=$data_table
echo "cv table host is:"$cv_table_host
cross_validation host $cv_table_host
elif [[ $role == 'arbiter' ]]; then
echo "arbiter do not need data"
cross_validation arbiter ""
else
echo $role" not support"
fi
fi
3、。。。(后续再作补充)
微众银行FATE联邦学习框架的更多相关文章
- “联邦对抗技术大赛”9月开战 微众银行呼唤开发者共同“AI创新”
“联邦对抗技术大赛”9月开战 微众银行呼唤开发者共同“AI创新” 从<第五元素>中的智能系统到<超体>中的信息操控,在科幻电影中人工智能已经发展到了极致.而在现实中,目前 ...
- 微众银行Java面试-社招-一面(2019/07)
个人情况 2017年毕业,普通本科,计算机科学与技术专业,毕业后在一个二三线小城市从事Java开发,2年Java开发经验.做过分布式开发,没有高并发的处理经验,平时做To G的项目居多.写下面经是希望 ...
- Nebula Graph 在微众银行数据治理业务的实践
本文为微众银行大数据平台:周可在 nMeetup 深圳场的演讲这里文字稿,演讲视频参见:B站 自我介绍下,我是微众银行大数据平台的工程师:周可,今天给大家分享一下 Nebula Graph 在微众银行 ...
- MindSpore联邦学习框架解决行业级难题
内容来源:华为开发者大会2021 HMS Core 6 AI技术论坛,主题演讲<MindSpore联邦学习框架解决隐私合规下的数据孤岛问题>. 演讲嘉宾:华为MindSpore联邦学习工程 ...
- 微众银行c++选择题后记
一个类的成员可以有:另一个类的对象,类的自身指针,自身类对象的引用(私有的如何初始化呢,所以不行,换成静态的可以),自身类对象(构造时如何初始化呢?) class A{ public: A(){} A ...
- 联邦学习开源框架FATE助力腾讯神盾沙箱,携手打造数据安全合作生态
近日,微众银行联邦学习FATE开源社区迎来了两位新贡献者——来自腾讯的刘洋及秦姝琦,作为云计算安全领域的专家,两位为FATE构造了新的功能点,并在Github上提交修复了相关漏洞.(Github项目地 ...
- 联邦学习 Federated Learning 相关资料整理
本文链接:https://blog.csdn.net/Sinsa110/article/details/90697728代码微众银行+杨强教授团队的联邦学习FATE框架代码:https://githu ...
- 腾讯数据安全专家谈联邦学习开源项目FATE:通往隐私保护理想未来的桥梁
数据孤岛.数据隐私以及数据安全,是目前人工智能和云计算在大规模产业化应用过程中绕不开的“三座大山”. “联邦学习”作为新一代的人工智能算法,能在数据不出本地的情况下,实现共同建模,提升AI模型的效果, ...
- 联邦学习(Federated Learning)
联邦学习简介 联邦学习(Federated Learning)是一种新兴的人工智能基础技术,在 2016 年由谷歌最先提出,原本用于解决安卓手机终端用户在本地更新模型的问题,其设计目标是 ...
随机推荐
- CORS跨域操作cookie
CORS 跨域 在服务端设置响应头 ACAO( Access-Control-Allow-Origin )即可 前端代码,运行在 8080 端口上 $.ajax({ url:'http://local ...
- [C#.NET 拾遗补漏]06:单例模式实佳实践
大家好,这是[C#.NET 拾遗补漏]专辑的第 06 篇文章.今天讲讲大家熟悉的单例模式. 单例模式大概是所有设计模式中最简单的一种,如果在面试时被问及熟悉哪些设计模式,你可能第一个答的就是单例模式. ...
- MapReduce之Combiner合并
Combiner是MR程序中Mapper和Reducer之外的一种组件(本质是一个Reducer类) Combinr组件的父类就是Reducer Conbimer只有在驱动类里设置了之后,才会运行 C ...
- Python os.tmpnam() 方法
概述 os.tmpnam() 方法用于为创建一个临时文件返回一个唯一的路径.高佣联盟 www.cgewang.com 语法 tmpnam()方法语法格式如下: os.tmpnam 参数 无 返回值 返 ...
- Python time strftime()方法
描述 Python time strftime() 函数接收以时间元组,并返回以可读字符串表示的当地时间,格式由参数format决定.高佣联盟 www.cgewang.com 语法 strftime( ...
- Python List max()方法
描述 max() 方法返回列表元素中的最大值.高佣联盟 www.cgewang.com 语法 max()方法语法: max(list) 参数 list -- 要返回最大值的列表. 返回值 返回列表元素 ...
- PHP sscanf() 函数
实例 Parse a string: <?php高佣联盟 www.cgewang.com$str = "age:30 weight:60kg";sscanf($str,&qu ...
- ABC 162 F Select Half dp 贪心
LINK:Select Half 考试的时候调了一个小时给调自闭了 原来是dp的姿势不太对. 首先 容易发现 奇数最多空2个位置 偶数最多空1一个位置 然后 设f[i][j][k]表示第i个数选了没有 ...
- 【小白学AI】线性回归与逻辑回归(似然参数估计)
文章转自[机器学习炼丹术] 线性回归解决的是回归问题,逻辑回归相当于是线性回归的基础上,来解决分类问题. 1 公式 线性回归(Linear Regression)是什么相比不用多说了.格式是这个样子的 ...
- 《JavaScript语言入门教程》记录整理:运算符、语法和标准库
目录 运算符 算数运算符 比较运算符 布尔运算符 二进制位运算符 void和逗号运算符 运算顺序 语法 数据类型的转换 错误处理机制 编程风格 console对象和控制台 标准库 Object对象 属 ...