/******************************************************************
 * Copyright (c) 2021
 * All rights reserved.
 
 * Filename: procedure.cpp
 
 * Version: 1.0
 * Author: Yang Zhang  (zhangyang@big.ac.cn)
 * Date: December, 2021
 
 ******************************************************************/

#include "procedure.h"

void NoLaber(WholeData wholeData, int typenum,int tensorD){
    int ReductionNum = wholeData.geneNullNum;
    int * ReductionIndex = new int[wholeData.geneNum];

    Standardize(wholeData.Data, wholeData.sampleNum, wholeData.geneNum, wholeData.typeNum);//归一化
        if (wholeData.typeNum>1) {
            for (int i=0; i<wholeData.geneNum; ++i)
                ReductionIndex[i]= wholeData.geneNullsign[i];

        }
        else{
            /*************************Reduction***************************************/

            Reduction* reduction=new Reduction();
            reduction->Run_Variance(wholeData,1);
            for (int i=0; i<wholeData.geneNum; ++i) {
                if(reduction->indexReduction_var[i]==1){
                    ReductionIndex[i]=1;
                }
                else
                    ReductionIndex[i]=0;
            }
            ReductionNum = reduction->Reduction_varNum;
            delete reduction;
        }


/*************************cluster***************************************/
    
    cout<<endl<<endl<<"fuzzy c_means cluster(FCM) algorithm........"<<endl<<endl;
    cout<<"parameters: "<<endl<<endl;

    FCM* fcm=new FCM();
    TensorDecomposition* tensorDecomposition=new TensorDecomposition();
    tensorDecomposition->Tensor_fN(wholeData, ReductionNum);
    
    float **tensor_sample = CreateGrid2(wholeData.sampleNum, tensorDecomposition->get_tensor_sample_fN());
    float **tensor_gene = CreateGrid2(wholeData.geneNum, tensorDecomposition->get_tensor_gene_fN());
    float **tensor_type = CreateGrid2(wholeData.typeNum, tensorDecomposition->get_tensor_type_fN());

    if(tensorD==1) {
        tensorDecomposition->tensor_de(tensor_sample, tensor_gene, tensor_type, wholeData, ReductionNum,
                                       ReductionIndex);

    }

    if(tensorD==1){
        fcm->Run_fcm_tensor(tensor_sample,wholeData.sampleNum,wholeData.geneNum-ReductionNum,tensorDecomposition->get_tensor_sample_fN());

    }else{
        fcm->Run_fcm(wholeData,ReductionNum, ReductionIndex);
    }

    FreeGrid2(tensor_sample,wholeData.sampleNum, tensorDecomposition->get_tensor_sample_fN());
    FreeGrid2(tensor_gene,wholeData.geneNum, tensorDecomposition->get_tensor_gene_fN());
    FreeGrid2(tensor_type,wholeData.typeNum, tensorDecomposition->get_tensor_type_fN());
    tensor_sample=NULL;
    tensor_gene=NULL;
    tensor_type=NULL;
    delete tensorDecomposition;



            
    int fcm_opt_k=fcm->getOpt_k();
    
    
    ofstream outfile_runlog(out_runlog.c_str(),ios::app);
    outfile_runlog<<endl<<endl<<"fuzzy c_means cluster(FCM) algorithm........"<<endl<<endl;
    outfile_runlog<<"parameters: "<<endl;
    outfile_runlog<<"fuzzifier m value: "<<fcm->get_m()<<endl;
    outfile_runlog<<"the best K value: "<<fcm_opt_k<<endl<<endl<<endl;
    outfile_runlog.close();
    cout<<endl<<"the best K value: "<<fcm_opt_k<<endl<<endl;
    
    
 /******************** output cluster results **************************/
    fcm_out_noK(wholeData, fcm);
    
  /*****************************************************************/
 
   
 
    float* cluster=new float[wholeData.sampleNum];
    for (int i=0; i<wholeData.sampleNum; ++i) {
        cluster[i]=static_cast<float>(fcm->getFcm_k_cluster()[fcm_opt_k][i]);
    }
    
    delete fcm;
    
    if(gene_select==1){
            
    cluster_label(wholeData.sample_label, wholeData.labelType, cluster, wholeData.sampleNum, fcm_opt_k);
    
    
    /*************************RandomForest***************************************/
    float**trainset=CreateGrid2(wholeData.sampleNum, wholeData.geneNum*typenum);
    GridTrans(trainset, wholeData.Data, wholeData.sampleNum, wholeData.geneNum, wholeData.typeNum);
    
    
    label_process(wholeData, ReductionIndex, ReductionNum, cluster, fcm_opt_k, trainset);
         
         FreeGrid2(trainset, wholeData.sampleNum, wholeData.geneNum);
            trainset=NULL;
     }
    
    //输出out_runlog
    runlog_out(wholeData, fcm_opt_k,true);
    cout<<endl<<"save result in "<<out_runlog<<endl<<endl;
    
    
  
    if (cluster!=NULL) {
        delete []cluster;
        cluster=NULL;
    }
   
   
   
    delete []ReductionIndex;
    ReductionIndex=NULL;
  
}


void NoLaber_k(WholeData wholeData, int typenum,int k,int tensorD){
    Standardize(wholeData.Data, wholeData.sampleNum, wholeData.geneNum, wholeData.typeNum);//归一化
    int ReductionNum = wholeData.geneNullNum;
//    int ReductionNum=0;
    int * ReductionIndex = new int[wholeData.geneNum];
    

    /*************************Reduction***************************************/
     if (wholeData.typeNum>1) {
     for (int i=0; i<wholeData.geneNum; ++i)
         ReductionIndex[i]= wholeData.geneNullsign[i];
     }
     else{
         Reduction* reduction=new Reduction();
         reduction->Run_Variance(wholeData,1);
         for (int i=0; i<wholeData.geneNum; ++i) {
             if(reduction->indexReduction_var[i]==1){
                 ReductionIndex[i]=1;
             }
             else
                 ReductionIndex[i]=0;
         }
         ReductionNum = reduction->Reduction_varNum;
         delete reduction;
     }



    /*************************FCM***************************************/

    
    
    FCM* fcm=new FCM();
    TensorDecomposition* tensorDecomposition=new TensorDecomposition();
    tensorDecomposition->Tensor_fN(wholeData, ReductionNum);
     
    float **tensor_sample = CreateGrid2(wholeData.sampleNum, tensorDecomposition->get_tensor_sample_fN());
    float **tensor_gene = CreateGrid2(wholeData.geneNum, tensorDecomposition->get_tensor_gene_fN());
    float **tensor_type = CreateGrid2(wholeData.typeNum, tensorDecomposition->get_tensor_type_fN());
 
    if(tensorD==1) {
        tensorDecomposition->tensor_de(tensor_sample, tensor_gene, tensor_type, wholeData, ReductionNum,
                                       ReductionIndex);

    }

    if(tensorD==1){
        fcm->Run_fcm2_tensor(tensor_sample,wholeData.sampleNum,wholeData.geneNum-ReductionNum,tensorDecomposition->get_tensor_sample_fN(),k);

    }else{
        fcm->Run_fcm2(wholeData,ReductionNum, ReductionIndex,k);
    }

    FreeGrid2(tensor_sample,wholeData.sampleNum, tensorDecomposition->get_tensor_sample_fN());
    FreeGrid2(tensor_gene,wholeData.geneNum, tensorDecomposition->get_tensor_gene_fN());
    FreeGrid2(tensor_type,wholeData.typeNum, tensorDecomposition->get_tensor_type_fN());
    tensor_sample=NULL;
    tensor_gene=NULL;
    tensor_type=NULL;
    delete tensorDecomposition;


    ofstream outfile_runlog(out_runlog.c_str(),ios::app);

    outfile_runlog<<endl<<endl<<endl<<"fuzzy c_means cluster(FCM) algorithm........"<<endl<<endl;
    outfile_runlog<<"parameters: "<<endl;
    outfile_runlog<<"fuzzifier m value: "<<fcm->get_m()<<endl;
    outfile_runlog<<"K: "<<k<<endl<<endl<<endl;
    outfile_runlog.close();
    
    fcm_out(wholeData, fcm, k);

    /*****************************************************************/
    
    float* cluster=new float[wholeData.sampleNum];
    for (int i=0; i<wholeData.sampleNum; ++i) {
        cluster[i]=static_cast<float>(fcm->getFcm_result()[i]);
    }
    
    delete fcm;
    
    if(gene_select==1){
    cluster_label(wholeData.sample_label, wholeData.labelType, cluster, wholeData.sampleNum, k);
    
    /*************************RandomForest***************************************/
    
    float**trainset=CreateGrid2(wholeData.sampleNum, wholeData.geneNum*typenum);
    
    GridTrans(trainset, wholeData.Data, wholeData.sampleNum, wholeData.geneNum, wholeData.typeNum);
    
    
   label_process(wholeData, ReductionIndex, ReductionNum, cluster, k, trainset);
    FreeGrid2(trainset, wholeData.sampleNum, wholeData.geneNum);
       trainset=NULL;
    }
    //输出out_runlog
    runlog_out(wholeData, k,true);
    cout<<endl<<"save result in "<<out_runlog<<endl<<endl;
    
    
    if (cluster!=NULL) {
        delete []cluster;
        cluster=NULL;
    }
  
    
   
    delete []ReductionIndex;
    ReductionIndex=NULL;
  
}



void Laber(WholeData wholeData, int typenum, string label_filename){
     Standardize(wholeData.Data, wholeData.sampleNum, wholeData.geneNum, wholeData.typeNum);
//    for (int i=0; i<wholeData.geneNum; ++i) {
//        for (int j=0; j<wholeData.sampleNum; ++j) {
//            if(wholeData.Data[j][i][0]!=0){
//                wholeData.Data[j][i][0]=log2(wholeData.Data[j][i][0]);
//            }
//        }
//    }
    
    
    int ReductionNum =wholeData.geneNullNum;
    int * ReductionIndex = new int[wholeData.geneNum];
    
    for (int i=0; i<wholeData.geneNum; ++i)
        ReductionIndex[i]= wholeData.geneNullsign[i];
        

    
 
    int classNum=0;
    float**trainset=CreateGrid2(wholeData.sampleNum, wholeData.geneNum*typenum);
    
    /**********************normaliz***************************/
    GridTrans(trainset, wholeData.Data, wholeData.sampleNum, wholeData.geneNum, wholeData.typeNum);
    
    
    /**********************rawData***************************/
//    GridTrans(trainset, wholeData.rawData, wholeData.sampleNum, wholeData.geneNum, wholeData.typeNum);
    
    
    /*************cluster file*************/
    float* cluster=new float[wholeData.sampleNum];
    ReadLabel(cluster,classNum,wholeData.labelType,wholeData.sample_label, wholeData.sampleNum, label_filename,wholeData.sampleName);
    
   
   
    label_process(wholeData, ReductionIndex, ReductionNum, cluster, classNum, trainset);
    
    //out_runlog
    runlog_out(wholeData, classNum,false);
    
  
    cout<<endl<<"save result in "<<out_runlog<<endl<<endl;
    
    
    if (cluster!=NULL) {
        delete []cluster;
        cluster=NULL;
    }
  

    FreeGrid2(trainset, wholeData.sampleNum, wholeData.geneNum);
    trainset=NULL;
    delete []ReductionIndex;
    ReductionIndex=NULL;
    
    
}

void label_process(WholeData wholeData,int* ReductionIndex,int ReductionNum,float*cluster,int classNum,float**trainset){

    
//    for (int i=0; i<wholeData.geneNum; ++i) {
//        if (wholeData.geneNullsign[i]==1) {
//            ReductionIndex[i]=1;
//        }
//        else ReductionIndex[i]=0;
//    }
//    ReductionNum=wholeData.geneNullNum;
    
    /*********************************** significance ******************************************/

    wholeData.gene_significant_type=CreateGrid2_int(wholeData.geneNum, wholeData.typeNum);
    int groupNum=Combination(2,classNum);
    wholeData.significant_type_num=CreateGrid2_int(wholeData.typeNum, groupNum);
    
    for (int i=0; i<wholeData.typeNum; ++i) {
        for (int j=0; j<=groupNum; ++j) {
            wholeData.significant_type_num[i][j]=0;
        }
    }
    wholeData.gene_class_significant=CreateGrid4(wholeData.geneNum, wholeData.typeNum, classNum, classNum);
    
    wholeData.specific=new string *[wholeData.geneNum];
    for (int i = 0;i <wholeData.geneNum;++i){wholeData.specific[i] = new string[wholeData.typeNum];}
    
//    cout<<"p_value:"<<p_value<<endl;
    significant_run(wholeData,cluster, classNum,ReductionIndex,ReductionNum,p_value);

//    if (wholeData.geneNum-ReductionNum>=(wholeData.geneNum/3)) {
//
//        for (int i=0; i<wholeData.typeNum; ++i) {
//            for (int j=0; j<=groupNum; ++j) {
//                wholeData.significant_type_num[i][j]=0;
//            }
//        }
//        significant_run(wholeData,cluster, classNum,ReductionIndex,ReductionNum,0.001);
//            cout<<"Number of filtered features by significance: "<<ReductionNum<<endl;
//
//    }
    
//    if (wholeData.geneNum-ReductionNum<50) {
//        for (int i=0; i<wholeData.geneNum; ++i) {
//            if (wholeData.geneNullsign[i]==1) {
//                ReductionIndex[i]=1;
//            }
//            else ReductionIndex[i]=0;
//        }
//        ReductionNum=wholeData.geneNullNum;
//        for (int i=0; i<wholeData.typeNum; ++i) {
//            for (int j=0; j<=groupNum; ++j) {
//                wholeData.significant_type_num[i][j]=0;
//            }
//        }
//        significant_run(wholeData,cluster, classNum,ReductionIndex,ReductionNum,0.05);
////        cout<<"Number of filtered features by significance: "<<ReductionNum<<endl;
//
//    }
//    if (wholeData.geneNum-ReductionNum<10) {
//        for (int i=0; i<wholeData.geneNum; ++i) {
//            if (wholeData.geneNullsign[i]==1) {
//                ReductionIndex[i]=1;
//            }
//            else ReductionIndex[i]=0;
//        }
//        ReductionNum=wholeData.geneNullNum;
//        for (int i=0; i<wholeData.typeNum; ++i) {
//            for (int j=0; j<=groupNum; ++j) {
//                wholeData.significant_type_num[i][j]=0;
//            }
//        }
//        significant_run(wholeData,cluster, classNum,ReductionIndex,ReductionNum,0.1);
//    }
//     cout<<"Number of filtered features by significance: "<<ReductionNum<<endl;
    
    /******************************filtered features***********************************/

    if(wholeData.geneNum-ReductionNum>100){
    GainSelect(ReductionIndex,ReductionNum, wholeData,cluster, classNum,0);
    }

    cout<<"Number of filtered features by GiniInfo: "<<ReductionNum<<endl;
    int GiniFeatureN=wholeData.geneNum-ReductionNum;

    //for output more genes
    for (int i=0; i<wholeData.geneNum; ++i) {
        if (ReductionIndex[i]==1) {
            wholeData.geneNullsign[i]=1;
            wholeData.geneNullNum++;
        }

    }


    //////test
//    for (int i=0; i<wholeData.geneNum; ++i) {
//        if (wholeData.gene_significant_type[i][0]!=3 && ReductionIndex[i]==0) {
//            ReductionNum++;
//            ReductionIndex[i]=1;
//        }
//
//    }
    //keep gene number after fillter, and as the final important genes
   
    /***************************** RandomForest ********************************/
    
    RandomForest *randomForest = nullptr;
    float *importance_mean=new float[wholeData.geneNum];
    
    /******************************** No test *****************************/
    
    randomforeastProsse(randomForest,importance_mean, trainset, classNum, cluster,wholeData.sampleNum, wholeData, ReductionNum, ReductionIndex);

    randomForest->importance_Rank(wholeData.geneNum,ReductionIndex);
    
    
    /*******************split train and test sets**************************/
   // bu ke xing
//    int* i_ReductionIndex=new int[wholeData.geneNum];
//    int i_ReductionNum=ReductionNum;
//    for (int i=0; i<wholeData.geneNum; ++i) {
//            i_ReductionIndex[i]=ReductionIndex[i];
//    }

    //bu ke xing
//    for (int g=groupNum; g>0; --g) {
//
//        for (int i=0; i<wholeData.geneNum; ++i) {
//            ReductionIndex[i]=i_ReductionIndex[i];
//        }
//        ReductionNum=i_ReductionNum;
//
//        for (int i=0; i<wholeData.geneNum; ++i) {
//            int sign=0,signOut=0;
//            for (int j=0; j<wholeData.typeNum; ++j) {
//                if (wholeData.gene_significant_type[i][j]>g) {
//                    signOut=1;
//                    break;
//                }
//                if (wholeData.gene_significant_type[i][j]==g) {
//                    sign=1;
//                }
//            }
//            if (signOut==1 || sign!=1) {
//                if (ReductionIndex[i]!=1) {
//                    ReductionIndex[i]=1;
//                    ReductionNum+=1;
//                }
//
//            }
//        }
//
//
//    }
    /*******************split train and test sets**************************/
//    int TEST_NUM = wholeData.sampleNum * 0.2;
//    float**testset=CreateGrid2(TEST_NUM, wholeData.geneNum * wholeData.typeNum);
//    float* testlabels=new float[TEST_NUM];
//
//    int TRAIN_NUM = wholeData.sampleNum -TEST_NUM;
//    float**trainset2=CreateGrid2(TRAIN_NUM, wholeData.geneNum * wholeData.typeNum);
//    float* trainlabels=new float[TRAIN_NUM];
//
//    splict_train_test(trainset2, trainlabels, testset, testlabels, TEST_NUM, wholeData.sampleNum, trainset, cluster,classNum);
//
//    randomforeastProsse_test(randomForest,importance_mean, trainset2, classNum, trainlabels,TRAIN_NUM, wholeData, ReductionNum, ReductionIndex,testset,testlabels,TEST_NUM);
//
//    //对最后的树特征排序，只排下标
//    randomForest->importance_Rank(wholeData.geneNum,ReductionIndex);
//
//
//
//    //        QuickSort_top(importance_mean, 0, wholeData.geneNum-1, geneFeatureImport_mean_Index);//对importance_mean 从小到大 排序，并记录下标到geneFeatureImport_Index，对应重要程度逐渐增大
    
    
    
    /****************************** score ***********************************/
    int * geneFeatureImport_mean_Index=new int[wholeData.geneNum];
    float *score=new float[wholeData.geneNum];
    
    int N_importance_mean_l=0;
    for (int i=0; i<wholeData.geneNum; ++i) {
        geneFeatureImport_mean_Index[i]=i;
        score[i]=importance_mean[i];
        if (importance_mean[i]>0) {
            N_importance_mean_l++;
        }
        
    }
    
    /******************************gene correlation in types***********************************/
//    if (wholeData.typeNum>1) {
//        int cor_depth=(wholeData.typeNum*(wholeData.typeNum-1))/2;//计算Cn,2
//        float **gene_correlation_omics=CreateGrid2(wholeData.geneNum, cor_depth);
//        correlation_omics(gene_correlation_omics, ReductionIndex, ReductionNum, wholeData, cor_depth);
//
//        for (int i=0; i<wholeData.geneNum; ++i) {
//            float a=0;
//            for (int j=0; j<cor_depth; ++j) {
//                a+=gene_correlation_omics[i][j];
//            }
//            score[i]+=a/cor_depth;
//            if (randomForest->get_MeanDecreaseError()[i]>0) {
//                score[i]+=randomForest->get_MeanDecreaseError()[i];
//            }
//        }
//
//        FreeGrid2(gene_correlation_omics, wholeData.geneNum, cor_depth);
//        gene_correlation_omics=NULL;
//    }
//    else{
//        for (int i=0; i<wholeData.geneNum; ++i) {
//            if (randomForest->get_MeanDecreaseError()[i]>0) {
//                score[i]+=randomForest->get_MeanDecreaseError()[i];
//            }
//        }
//    }
    
    
    /*************************results***************************************/
    Standardize_score(score,wholeData.geneNum,0.000001);
    QuickSort_top(score, 0, wholeData.geneNum-1, geneFeatureImport_mean_Index);//对score 从小到大 排序，并记录下标到geneFeatureImport_Index，对应重要程度逐渐增大
    
    //输出feature_importance，0.0001
//    float cut=1/float(wholeData.geneNum);
//    float cut= 1/float(GiniFeatureN);
    float cut=0;
    feature_importance_out2(score,cut,randomForest,wholeData, classNum, geneFeatureImport_mean_Index, importance_mean,cluster,wholeData.geneNullsign,wholeData.geneNullNum);
    
   
    if (geneFeatureImport_mean_Index!=NULL) {
        delete []geneFeatureImport_mean_Index;
        geneFeatureImport_mean_Index=NULL;
    }
    if (importance_mean!=NULL) {
        delete []importance_mean;
        importance_mean=NULL;
    }
    if (score!=NULL) {
        delete []score;
        score=NULL;
    }
  delete randomForest;
    
    FreeGrid2_int(wholeData.gene_significant_type, wholeData.geneNum, wholeData.typeNum);
    FreeGrid2_int(wholeData.significant_type_num, wholeData.typeNum, groupNum);
    FreeGrid4(wholeData.gene_class_significant, wholeData.geneNum, wholeData.typeNum, classNum, classNum);
    
}

void splict_train_test(float** &trainset_new,float* &trainlabels,float**  &testset,float*  &testlabels,int TEST_NUM,int sampleNum,float**trainset,float* cluster,int classNum){
    //label Num
    int * select_test=new int[sampleNum];
    
    int* labelNum=new int[classNum];
    for (int i=0; i<classNum; ++i) {
        labelNum[i]=0;
    }
    for (int i=0; i<sampleNum; ++i) {
        labelNum[(int)cluster[i]]+=1;
        select_test[i]=0;
    }
    
    //diff class textNum
    int* select_labelNum=new int[classNum];
    int sel=0;
    for (int i=0; i<classNum-1; ++i) {
        select_labelNum[i]=TEST_NUM*labelNum[i]/sampleNum;
        sel+=select_labelNum[i];
    }
    select_labelNum[classNum-1]=TEST_NUM-sel;
    
//    for (int i=0; i<classNum; ++i) {
//        cout<<i<<'\t'<<labelNum[i]<<'\t'<<select_labelNum[i]<<endl;
//    }
    
    int i_TEST_NUM=TEST_NUM;
    
    while (i_TEST_NUM>0) {
        
        int id=rand()%(sampleNum);
        if (select_labelNum[(int)cluster[id]]>0 && select_test[id]==0) {
            select_test[id]=1;
            select_labelNum[(int)cluster[id]]--;
            i_TEST_NUM--;
//            cout<<id<<'\t'<<(int)cluster[id]<<endl;
        }
    }
    
    
    //fixed text
//    for (int i=0; i<sampleNum; ++i) {
//        cout<<select_test[i]<<", ";
//    }
//    cout<<endl;
//
//    int test_index=0,train_index=0;
//    for (int i=0; i<sampleNum; ++i) {
//        if (select_test[i]==1) {
//            testset[test_index]=trainset[i];
//            testlabels[test_index]=cluster[i];
//            test_index++;
//        }
//        else{
//            trainset_new[train_index]=trainset[i];
//            trainlabels[train_index]=cluster[i];
//            train_index++;
//        }
//
//    }
    
    delete []labelNum;
    delete []select_labelNum;
    delete []select_test;
    
/******* fixed text id ***********/
    int select_test2[]={0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0};

    int test_index=0,train_index=0;
    for (int i=0; i<sampleNum; ++i) {
        if (select_test2[i]==1) {
            testset[test_index]=trainset[i];
            testlabels[test_index]=cluster[i];
            test_index++;
        }
        else{
            trainset_new[train_index]=trainset[i];
            trainlabels[train_index]=cluster[i];
            train_index++;
        }
    }
}


void randomforeastProsse(RandomForest* &randomForest, float* &importance_mean,float**trainset,int classNum,float* cluster,int trainSampleNum,WholeData wholeData,int ReductionNum,int * ReductionIndex){
    
    /**************************************************************
     *treeNum:    the number of trees in this forest
     *maxDepth:    the max Depth of one single tree
     *minLeafSample:terminate criterion,the min samples in a leaf
     *minInfoGain:terminate criterion,the min information
     *            gain in a node if it can be splitted
     **************************************************************/
//    int* labelNum=new int[classNum];
//    for (int i=0; i<classNum; ++i) {
//        labelNum[i]=0;
//    }
//
//    for (int i=0; i<trainSampleNum; ++i) {
//        labelNum[(int)cluster[i]]+=1;
//    }
//    int min_labelNum=labelNum[0];
//    for (int i=0; i<classNum; ++i) {
//        if (min_labelNum>labelNum[i]) {
//            min_labelNum=labelNum[i];
//        }
//    }
//    delete []labelNum;
    
//    int min_sample_leaf=min_labelNum/3;
//    if (min_labelNum<1) {
//        min_sample_leaf=1;
//    }
    
    int min_sample_leaf=2;
    
    float min_gini=0.0;
    
    cout<<endl<<"randomForest algorithm:"<<endl<<endl;
    cout<<"parameters:"<<endl;
    cout<<"classification parameter: "<<"class(K) = "<<classNum<<endl;
    cout<<"the minimum samples in a leaf: "<<min_sample_leaf<<endl;
    cout<<"the minimum information gain: "<<min_gini<<endl<<endl;
    
    for (int i=0; i<wholeData.geneNum; ++i) {
        importance_mean[i]=0;

    }
    int breakN=(wholeData.geneNum-ReductionNum)*0.05;
    int ReductionNum_last=ReductionNum*0.05;
    int treeNum=0,maxDepth=0;
    int typenum=wholeData.typeNum;
    int f_i=0;
   
    
    for (; ReductionNum_last!=ReductionNum; f_i++) {
        if ((f_i!=0) && (ReductionNum-ReductionNum_last<=breakN)) {
            break;
        }
        //        cout<<"RandomForest "<<f_i<<", ReductionNum="<<ReductionNum<<endl;
        cout<<endl<<"RandomForest "<<f_i<<endl;
        
        ReductionNum_last=ReductionNum;
        
        if (wholeData.geneNum < trainSampleNum) {
            treeNum = 100;
        }
        else
            treeNum = (int)((wholeData.geneNum-ReductionNum)/trainSampleNum*10);
        if (treeNum>1000) {
            treeNum=1000;
        }
        if (treeNum<100) {
            treeNum=100;
        }
        maxDepth = log((wholeData.geneNum-ReductionNum)*typenum)/log(2)-1;
        if (maxDepth<3) {
            maxDepth=3;
        }
        
        if (randomForest!=nullptr) {
            delete randomForest;
        }
        
        cout<<"total tree number: "<<treeNum<<endl;
        cout<<"max depth of a single tree: "<<maxDepth<<endl;
        cout<<"the feature number: "<<wholeData.geneNum-ReductionNum<<endl<<endl;
        
        randomForest=new RandomForest(treeNum,maxDepth,min_sample_leaf,0);
        randomForest->train(trainset, cluster, trainSampleNum, wholeData.geneNum*wholeData.typeNum, classNum, false,ReductionIndex,ReductionNum,wholeData.typeNum);
        
        //        float Tree_MeanErr = randomForest->get_Tree_MeanErrOOB1();
        //        cout<<endl<<"the trees mean error rate : "<<Tree_MeanErr<<endl<<endl;
        
        //        output every tree error
        //        string out_error= output_filename + "_tree_error.txt";
        //        ofstream outfile_out_error(out_error.c_str(),ios::out);
        //        outfile_out_error<<"tree"<<"\t"<<"error (mean:"<<Tree_MeanErr<<")"<<endl;
        //        for (int tr_i=0; tr_i<randomForest->get_treeNum(); tr_i++) {
        //            outfile_out_error<<tr_i<<"\t"<<randomForest->get_errOOB1()[tr_i]<<endl;
        //        }
        //        outfile_out_error.close();
        
        randomForest->importance(wholeData.geneNum,ReductionIndex);
        
        
        for (int i=0; i<wholeData.geneNum; ++i) {
            
            importance_mean[i]=importance_mean[i]+randomForest->get_MeanDecreaseError()[i];
            
            if (randomForest->get_MeanDecreaseError()[i]<0 && ReductionIndex[i]==0) {
                ReductionNum=ReductionNum+1;
                ReductionIndex[i]=1;
            }
        }
        
        
    }
    for (int i=0; i<wholeData.geneNum; ++i) {
        importance_mean[i]=importance_mean[i]/float(f_i);
    }
}


void randomforeastProsse_test(RandomForest* &randomForest, float* &importance_mean,float**trainset,int classNum,float* cluster,int trainSampleNum,WholeData wholeData,int ReductionNum,int * ReductionIndex,float**testset,float*testlabels,int TEST_NUM){
    
    /**************************************************************
     *treeNum:    the number of trees in this forest
     *maxDepth:    the max Depth of one single tree
     *minLeafSample:terminate criterion,the min samples in a leaf
     *minInfoGain:terminate criterion,the min information
     *            gain in a node if it can be splitted
     **************************************************************/
    
//    int* labelNum=new int[classNum];
//    for (int i=0; i<classNum; ++i) {
//        labelNum[i]=0;
//    }
//
//    for (int i=0; i<trainSampleNum; ++i) {
//        labelNum[(int)cluster[i]]+=1;
//    }
//    int min_labelNum=labelNum[0];
//    for (int i=0; i<classNum; ++i) {
//        if (min_labelNum>labelNum[i]) {
//            min_labelNum=labelNum[i];
//        }
//    }
//    delete []labelNum;
    
//    int min_sample_leaf=min_labelNum/10;
//    if (min_labelNum<2) {
//        min_sample_leaf=2;
//    }
   int min_sample_leaf=2;
    
    float min_gini=0.0;
    
    cout<<endl<<"randomForest algorithm:"<<endl<<endl;
    cout<<"parameters:"<<endl;
    cout<<"classification parameter: "<<"class(K) = "<<classNum<<endl;
    cout<<"the minimum samples in a leaf: "<<min_sample_leaf<<endl;
    cout<<"the minimum information gain: "<<min_gini<<endl<<endl;
    
    for (int i=0; i<wholeData.geneNum; ++i) {
        importance_mean[i]=0;
    }
    
    int ReductionNum_last=ReductionNum*0.05;
    int treeNum=0,maxDepth=0;
    int typenum=wholeData.typeNum;
    int f_i=0;
    float test_err_last=1;
    float test_err=0;
    
    cout<<endl<<endl<<"rf_N"<<'\t'<<"tree_number"<<'\t'<<"max_depth"<<'\t'<<"feature_number"<<'\t'<<"Tree_MeanErrOOB1"<<'\t'<<"text_error_rate";
    
    for (; ReductionNum_last!=ReductionNum; f_i++) {
        if ((f_i!=0) && (ReductionNum-ReductionNum_last<=10)) {
            break;
        }
//        cout<<endl<<endl<<"RandomForest "<<f_i<<endl;

        ReductionNum_last=ReductionNum;
        
        if (wholeData.geneNum < trainSampleNum) {
            treeNum = 100;
        }
        else
            treeNum = (int)((wholeData.geneNum-ReductionNum)/trainSampleNum*10);
        if (treeNum>1000) {
            treeNum=1000;
        }
        if (treeNum<100) {
            treeNum=100;
        }
 
       
        
        maxDepth = log((wholeData.geneNum-ReductionNum)*typenum)/log(2)-1;
        if (maxDepth<3) {
            maxDepth=3;
        }
        
        
        if (randomForest!=nullptr) {
            delete randomForest;
        }
        
//        cout<<"total tree number: "<<treeNum<<endl;
//        cout<<"max depth of a single tree: "<<maxDepth<<endl;
//        cout<<"the feature number: "<<wholeData.geneNum-ReductionNum<<endl<<endl;
        
        cout<<endl<<f_i<<'\t'<<treeNum<<'\t'<<maxDepth<<'\t'<<wholeData.geneNum-ReductionNum<<'\t';
        
        randomForest=new RandomForest(treeNum,maxDepth,min_sample_leaf,min_gini);
        randomForest->train(trainset, cluster, trainSampleNum, wholeData.geneNum*wholeData.typeNum, classNum, false,ReductionIndex,ReductionNum,wholeData.typeNum);
        

        randomForest->importance(wholeData.geneNum,ReductionIndex);
        
         randomForest->testPredict(testset, TEST_NUM, testlabels);
        test_err=randomForest->get_test_error();
        float Tree_MeanErrOOB1=randomForest->get_Tree_MeanErrOOB1();
//        cout<<"Tree_MeanErrOOB1 "<<Tree_MeanErrOOB1<<endl;
        //    cout<<endl<<"the total error rate is: "<<test_err<<endl<<endl;

        cout<<Tree_MeanErrOOB1<<'\t'<<test_err;


        test_err_last=test_err;
        
        for (int i=0; i<wholeData.geneNum; ++i) {
            
            importance_mean[i]=importance_mean[i]+randomForest->get_MeanDecreaseError()[i];
            
            if (randomForest->get_MeanDecreaseError()[i]<0 && ReductionIndex[i]==0) {
                ReductionNum=ReductionNum+1;
                ReductionIndex[i]=1;
            }
        }
        
    }
    for (int i=0; i<wholeData.geneNum; ++i) {
        importance_mean[i]=importance_mean[i]/float(f_i);
    }
//    randomForest->saveModel((output_filename+"rf.model").c_str());
    
}


void feature_importance_out2( float *score,float cut, RandomForest *randomForest,WholeData wholeData,int classNum,int* geneFeatureImport_Index,float*importance_mean,float*cluster,int * ReductionIndex,int ReductionN){
    out_importance= output_filename + "feature_importance.txt";
    ofstream outfile_importance(out_importance.c_str(),ios::out);
    outfile_importance<<endl<<"# input parameters:"<<endl<<endl;
    outfile_importance<<"# sample number: "<<wholeData.sampleNum<<endl<<"# feature number: "<<wholeData.geneNum<<endl<<"# data type number: "<<wholeData.typeNum<<endl;
    outfile_importance<<"# The number of categories for sample label: "<<classNum<<endl<<endl;
    outfile_importance<<"# The following is the order of importance score of features."<<endl;
    outfile_importance<<"# score: It is the mean classification error rate of the remaining features after a feature is removed, and the larger the value, the more important the feature is."<<endl;
    if (classNum>2) {
        outfile_importance<<"# significant group: Wilcoxon signed-rank test was performed between each two classes then filtered significant groups (p<0.01). For example, [class1, class2] vs [class3] means that the significance between class1/class2 and class3 is p<0.01, but the significance between class1 and class2 is p>0.01."<<endl;
        outfile_importance<<"# specific class: The class which is only one has significant difference (p<0.01) between other classes. For example, [class1, class2] vs [class3] means that the class3 is the specific class."<<endl;

    }
    outfile_importance<<endl;
//     outfile_importance<<"cut: "<<cut<<endl;
//    outfile_importance<<"Important feature: the value Y(Yes) means that the faeture is important (MeanDecreaseError>0)."<<endl<<endl;
    
    outfile_importance<<"Rank"<<'\t'<<"feature id"<<'\t'<<"score";///<'\t'<<"Error";
    if (classNum>2) {
    for (int j=0; j<wholeData.typeNum; ++j) {
        vector<string> v = split(input_filename[j], "/"); //可按多个字符来分隔;
        vector<string> s = split(v[v.size()-1], ".");
       outfile_importance<<'\t'<<"significant group in "<<s[0];
        outfile_importance<<'\t'<<"specific class in "<<s[0];
    }
    }
    outfile_importance<<endl;
    
    
    int rank=0,cutRank=0;
    for (int i=0; i<wholeData.geneNum; ++i) {
        
        if(ReductionIndex[geneFeatureImport_Index[i]]!=1){//>cut
//        if(importance_mean[geneFeatureImport_Index[i]]>0){//>cut
//        if(score[i]>cut){
            rank=rank+1;
            outfile_importance<<rank<<'\t'<<wholeData.geneName[geneFeatureImport_Index[i]]<<'\t'<<score[i];
//            <<'\t'<<importance_mean[geneFeatureImport_Index[i]];
             if (classNum>2) {
            for (int j=0; j<wholeData.typeNum; ++j) {
                
                string group_result="";
                string specific_class="";
//                if (wholeData.gene_significant_type[geneFeatureImport_Index[i]][j]==Combination(2,classNum)) {
//                    group_result="class";
//                }else if(wholeData.specific[geneFeatureImport_Index[i]][j]!=""){
//                    specific_class=wholeData.specific[geneFeatureImport_Index[i]][j];
//                }
                feature_group(group_result,specific_class,i, wholeData, j, cluster, classNum);
                if (group_result.find("vs") == std::string::npos) {
                    group_result="";
                }
                outfile_importance<<'\t'<<group_result;
                outfile_importance<<'\t'<<specific_class;
                
                
             }
             }
            outfile_importance<<endl;
        }
        if(importance_mean[geneFeatureImport_Index[i]]<0){
            if(cutRank!=0){
                if((i-cutRank)>(wholeData.geneNum-ReductionN-cutRank)/3){
                    break;
                }
            }else{
                cutRank=rank;
            }
        }

    }
    
    outfile_importance.close();
    
    for (int j=0; j<wholeData.typeNum; ++j) {
        vector<string> v = split(input_filename[j], "/"); //可按多个字符来分隔;
        vector<string> s = split(v[v.size()-1], ".");
        string out_orderByImport = output_filename + s[0] + "_orderByImportance.txt";
        saveImportanceGene2(wholeData,j, geneFeatureImport_Index,importance_mean,cut, wholeData.geneNum, out_orderByImport);
        
    }
  
}

void runlog_out(WholeData wholeData,int classNum,bool fcm){
    
    ofstream outfile_runlog(out_runlog.c_str(),ios::app);

    outfile_runlog<<"randomForest algorithm........"<<endl<<endl;
    outfile_runlog<<"parameters:"<<endl<<endl;
    outfile_runlog<<"classification parameter: "<<"class(K) = "<<classNum<<endl;
    //    outfile_runlog<<"total tree number: "<<treeNum<<endl;
    //    outfile_runlog<<"max depth of a single tree: "<<maxDepth<<endl;
    //    outfile_runlog<<"the minimum samples in a leaf: "<<2<<endl;
    //    outfile_runlog<<"the minimum information gain: "<<"0.000"<<endl;
    outfile_runlog<<"the feature number: "<<wholeData.geneNum<<endl;
    
    
    outfile_runlog<<endl<<"end time: "<<getTime()<<endl;
    t_end=clock();
    cout<<"running time: "<<(double(t_end-t_start)/CLOCKS_PER_SEC)<<"s"<<endl;
//    cout<<"run time(ms):"<<'\t'<<(1000*double(t_end-t_start)/CLOCKS_PER_SEC)<<endl;
    
    
    outfile_runlog<<endl<<endl<<"results:"<<endl;
    if(fcm){
    outfile_runlog<<endl<<"The result of fuzzy c_means cluster(FCM) is in the file:"<<endl;
    outfile_runlog<<"fuzzy_c_means_cluster_results.txt"<<endl;
    }
    outfile_runlog<<endl<<"The result of feature importance is in the file: "<<endl<<"feature_importance.txt"<<endl<<endl;
    outfile_runlog<<"The raw dataset(s) ordered by important features is in the file: "<<endl;
    for (int j=0; j<wholeData.typeNum; ++j) {
        vector<string> v = split(input_filename[j], "/"); //可按多个字符来分隔;
        vector<string> s = split(v[v.size()-1], ".");
        outfile_runlog<<s[0] + "_orderByImportance.txt"<<endl;
    }
    outfile_runlog<<endl<<"running time: "<<(double(t_end-t_start)/CLOCKS_PER_SEC)<<"s"<<endl;
    outfile_runlog.close();
}

 /******************** output cluster results (k)**************************/
void fcm_out(WholeData wholeData,FCM* fcm,int k){
   
    string out_FCM= output_filename + "fuzzy_c_means_cluster_results.txt";
    
    ofstream outfile_FCM(out_FCM.c_str(),ios::out);
    outfile_FCM<<endl<<"# input parameters:"<<'\t'<<endl<<endl;
    
    outfile_FCM<<"# sample number: "<<wholeData.sampleNum<<'\t'<<endl<<"# feature number: "<<wholeData.geneNum<<'\t'<<endl<<"# data type number: "<<wholeData.typeNum<<'\t'<<endl<<endl;
    outfile_FCM<<"# fuzzifier m value:"<<fcm->get_m()<<'\t'<<endl;
    outfile_FCM<<"# K = "<<k<<'\t'<<endl;
    
    outfile_FCM<<endl;
    
    outfile_FCM<<"# Elbow Method";
    outfile_FCM<<'\t'<<fcm->getSSE()[k];
    
    outfile_FCM<<endl;
    outfile_FCM<<"# Silhouette Coefficient Index";
    outfile_FCM<<'\t'<<fcm->getSC()[k];
    
    outfile_FCM<<endl;
    outfile_FCM<<"# Davies-Bouldin Index";
    outfile_FCM<<'\t'<<fcm->getDB()[k];
    
    outfile_FCM<<endl;
    outfile_FCM<<"# The following is the results of cluster."<<'\t'<<endl<<endl;
    
    
    outfile_FCM<<"sample";
    
    outfile_FCM<<'\t'<<"k="<<k;
    
    outfile_FCM<<endl;
    for (int i=0; i<wholeData.sampleNum; ++i) {
        outfile_FCM<<wholeData.sampleName[i];
        outfile_FCM<<'\t'<<"class"<<fcm->getFcm_result()[i]+1;
        outfile_FCM<<endl;
    }
    outfile_FCM.close();
}

/******************** output cluster results **************************/

void fcm_out_noK(WholeData wholeData,FCM* fcm){
    
    int fcm_opt_k=fcm->getOpt_k();
    int Opt_k_de1=fcm->getOpt_k();
    int Opt_k_add1=fcm->getOpt_k();
    if (fcm->getOpt_k()-1>=2) {
        Opt_k_de1=fcm->getOpt_k()-1;
    }
    if (fcm->getOpt_k()+1<11 && fcm->getOpt_k()+1<fcm->getMax_k()) {
        Opt_k_add1=fcm->getOpt_k()+1;
    }
    
    string out_FCM= output_filename + "fuzzy_c_means_cluster_results.txt";
    
    ofstream outfile_FCM(out_FCM.c_str(),ios::out);
    outfile_FCM<<endl<<"# input parameters:"<<'\t'<<endl<<endl;
    
    outfile_FCM<<"# sample number: "<<wholeData.sampleNum<<'\t'<<endl<<"# feature number: "<<wholeData.geneNum<<'\t'<<endl<<"# data type number: "<<wholeData.typeNum<<'\t'<<endl<<endl;
    outfile_FCM<<"# fuzzifier m value:"<<fcm->get_m()<<'\t'<<endl;
    outfile_FCM<<"# the best K value: "<<fcm_opt_k<<'\t'<<endl;
    outfile_FCM<<"# The following is the validity index of K from 2 to "<<fcm->getMax_k()<<"."<<'\t'<<endl<<endl;
    
    outfile_FCM<<"#    ";
    for (int j=2; j<=fcm->getMax_k(); ++j) {
        outfile_FCM<<"   "<<"k="<<j;
    }
    outfile_FCM<<'\t'<<endl;
    
    outfile_FCM<<"# Elbow Method";
    for (int j=1; j<=fcm->getMax_k(); ++j) {
        outfile_FCM<<"   "<<fcm->getSSE()[j];
    }
    outfile_FCM<<'\t'<<endl;
    outfile_FCM<<"# Silhouette Coefficient Index";
    for (int j=2; j<=fcm->getMax_k(); ++j) {
        outfile_FCM<<"   "<<fcm->getSC()[j];
    }
    outfile_FCM<<'\t'<<endl;
    outfile_FCM<<"# Davies-Bouldin Index";
    for (int j=2; j<=fcm->getMax_k(); ++j) {
        outfile_FCM<<"   "<<fcm->getDB()[j];
    }
    outfile_FCM<<'\t'<<endl;
    
    
    
    outfile_FCM<<"# The following is the results of cluster (k="<<fcm_opt_k<<")."<<'\t'<<endl<<endl;
    
    
    outfile_FCM<<"sample"<<'\t'<<"k="<<fcm_opt_k<<endl;
//    for (int j=Opt_k_de1; j<=Opt_k_add1; ++j) {
//        outfile_FCM<<'\t'<<"k="<<j;
//    }
//    outfile_FCM<<endl;
    for (int i=0; i<wholeData.sampleNum; ++i) {
        outfile_FCM<<wholeData.sampleName[i];
//        for (int j=Opt_k_de1; j<=Opt_k_add1; ++j) {
//            outfile_FCM<<'\t'<<"class"<<fcm->getFcm_k_cluster()[j][i]+1;
//        }
        outfile_FCM<<'\t'<<"class"<<fcm->getFcm_k_cluster()[fcm_opt_k][i]+1;

        outfile_FCM<<endl;
    }
    outfile_FCM.close();
}

void cluster_label(string* &label,string* &labelType, float* cluster, int sampleNum,int k){
    for (int i=0; i<sampleNum; ++i) {
        label[i]="class"+to_string(int(cluster[i])+1);
        labelType[int(cluster[i])]=label[i];
    }
}

