2019-06-13 18:31:42 +08:00
|
|
|
'''
|
|
|
|
Construct model pool according to initialization dataset, e.g., apps developed in 2011
|
|
|
|
|
|
|
|
'''
|
2020-04-17 23:20:30 +08:00
|
|
|
import pylibol
|
2019-06-13 18:31:42 +08:00
|
|
|
import numpy as np
|
|
|
|
import scipy
|
|
|
|
from scipy.stats import logistic
|
|
|
|
from scipy.special import expit
|
|
|
|
from numpy import dot
|
|
|
|
import sklearn
|
|
|
|
from sklearn.datasets import load_svmlight_file
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import string
|
|
|
|
from decimal import *
|
|
|
|
import collections
|
2020-04-17 23:20:30 +08:00
|
|
|
from pylibol import classifiers
|
2019-06-13 18:31:42 +08:00
|
|
|
from classifiers import *
|
|
|
|
import time
|
|
|
|
import random
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument('--starting', type=int, help='initialization dataset') # to use = args.initialization
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
starting_year = args.starting
|
|
|
|
|
|
|
|
X_train,Y_train=load_svmlight_file(str(starting_year) + '.libsvm')
|
|
|
|
print 'X_train data shape' , type(X_train), X_train.shape
|
|
|
|
|
|
|
|
global clfs
|
|
|
|
|
|
|
|
clfs = [PA1(), OGD(), AROW(), RDA(), ADA_FOBOS()]
|
|
|
|
|
|
|
|
print 'model pool size: ', len(clfs) # number of models in the model pool
|
|
|
|
|
|
|
|
ori_train_acc = []
|
|
|
|
|
|
|
|
directory = './' + str(starting_year) + 'train/'
|
|
|
|
if not os.path.exists(directory):
|
|
|
|
os.makedirs(directory)
|
|
|
|
|
|
|
|
# initialization process of all models
|
|
|
|
print 'All model initialization'
|
|
|
|
for i in xrange(len(clfs)): # i = every model in model pool
|
|
|
|
print clfs[i]
|
|
|
|
print 'training'
|
|
|
|
train_accuracy,data,err,fit_time=clfs[i].fit(X_train,Y_train, False)
|
|
|
|
ori_train_acc.append(train_accuracy)
|
|
|
|
clfs[i].save('./' + str(starting_year) + 'train/' + str(starting_year) + '_' + str(i) + '.model')
|
|
|
|
|
|
|
|
print 'original model accuracy', ori_train_acc
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|