/*
 * Decompiled with CFR 0.152.
 */
package slib.graph.io.loader.bio.gaf2;

import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.openrdf.model.URI;
import org.openrdf.model.vocabulary.RDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import slib.graph.io.conf.GDataConf;
import slib.graph.io.loader.GraphLoader;
import slib.graph.io.loader.bio.gaf2.EvidenceCodeRules;
import slib.graph.io.loader.utils.filter.graph.Filter;
import slib.graph.io.loader.utils.filter.graph.gaf2.FilterGraph_GAF2;
import slib.graph.io.loader.utils.filter.graph.gaf2.FilterGraph_GAF2_cst;
import slib.graph.io.loader.utils.filter.graph.repo.FilterRepository;
import slib.graph.model.graph.G;
import slib.graph.model.impl.repo.URIFactoryMemory;
import slib.utils.ex.SLIB_Ex_Critic;
import slib.utils.impl.BigFileReader;

public class GraphLoader_GAF_2
implements GraphLoader {
    public static final int DB = 0;
    public static final int DB_OBJECT_ID = 1;
    public static final int DB_OBJECT_SYMBOL = 2;
    public static final int QUALIFIER = 3;
    public static final int GOID = 4;
    public static final int REFERENCE = 5;
    public static final int EVIDENCE_CODE = 6;
    public static final int WITH = 7;
    public static final int ASPECT = 8;
    public static final int DB_OBJECT_NAME = 9;
    public static final int DB_OBJECT_SYNONYM = 10;
    public static final int DB_OBJECT_TYPE = 11;
    public static final int TAXON = 12;
    public static final int DATE = 13;
    public static final int ASSIGNED_BY = 14;
    public static final int ANNOTATION_XP = 15;
    public static final int GENE_PRODUCT_ISOFORM = 16;
    private G graph;
    Logger logger = LoggerFactory.getLogger(this.getClass());
    URIFactoryMemory factory = URIFactoryMemory.getSingleton();
    String prefixUriInstance;
    String defaultURIprefix;
    Pattern colon = Pattern.compile(":");

    @Override
    public void populate(GDataConf conf, G graph) throws SLIB_Ex_Critic {
        this.logger.info("-------------------------------------");
        this.logger.info("Loading data using GAF2 loader.");
        this.logger.info("-------------------------------------");
        if (graph == null) {
            throw new SLIB_Ex_Critic("Cannot process Null Graph");
        }
        this.logger.info("GAF 2 loader populates graph " + graph.getURI());
        this.graph = graph;
        this.process(conf);
        this.logger.info("-------------------------------------");
    }

    private void process(GDataConf conf) throws SLIB_Ex_Critic {
        this.prefixUriInstance = (String)conf.getParameter("prefix");
        if (this.prefixUriInstance == null) {
            this.prefixUriInstance = this.graph.getURI().getNamespace();
        }
        this.logger.info("Instance URIs will be prefixed by: " + this.prefixUriInstance);
        this.defaultURIprefix = this.prefixUriInstance;
        this.logger.info("Default URI prefix is set to: " + this.prefixUriInstance);
        HashSet<Filter> filters = new HashSet<Filter>();
        String filtersAsStrings = (String)conf.getParameter("filters");
        if (filtersAsStrings != null) {
            String[] filterNames = filtersAsStrings.split(",");
            FilterRepository filtersRepo = FilterRepository.getInstance();
            for (String fname : filterNames) {
                Filter f = filtersRepo.getFilter(fname);
                if (f == null) {
                    throw new SLIB_Ex_Critic("Cannot locate filter associated to id " + fname);
                }
                filters.add(f);
            }
        }
        FilterGraph_GAF2 filter = null;
        Set<String> taxons = null;
        Set<String> excludedEC = null;
        if (!filters.isEmpty()) {
            for (Filter f : filters) {
                if (!(f instanceof FilterGraph_GAF2)) continue;
                if (filter != null) {
                    throw new SLIB_Ex_Critic("Two filters " + FilterGraph_GAF2_cst.TYPE + " have been specified. Only one admitted");
                }
                filter = (FilterGraph_GAF2)f;
                this.logger.info("Filtering according to filter " + filter.getId() + "\ttype" + filter.getType());
                taxons = filter.getTaxons();
                excludedEC = filter.getExcludedEC();
            }
        }
        Pattern p_tab = Pattern.compile("\t");
        Pattern p_taxid = null;
        String fileLocation = conf.getLoc();
        if (taxons != null) {
            p_taxid = Pattern.compile(".?taxon:(\\d+).?");
        }
        int countEntities = 0;
        int countAnnotsLoaded = 0;
        this.logger.info("file location : " + fileLocation);
        int existsQualifier = 0;
        int not_found = 0;
        int eC_restriction = 0;
        int taxonsRestriction = 0;
        this.logger.info("Loading...");
        URIFactoryMemory uriManager = URIFactoryMemory.getSingleton();
        boolean validHeader = false;
        int c = 0;
        try {
            BigFileReader file = new BigFileReader(fileLocation);
            while (file.hasNext()) {
                String[] data;
                String line = file.nextTrimmed();
                if (line.startsWith("!")) {
                    data = line.split(":");
                    if (data.length == 2) {
                        String flag = data[0].trim().substring(1);
                        String version = data[1].trim();
                        if (flag.equals("gaf-version") && (version.equals("2") || version.equals("2.0"))) {
                            validHeader = true;
                        }
                    }
                } else if (validHeader) {
                    data = p_tab.split(line);
                    URI entityID = uriManager.getURI(this.prefixUriInstance + data[1]);
                    String gotermURIstring = this.buildURI(data[4]);
                    String qualifier = data[3];
                    String evidenceCode = data[6];
                    String taxon_ids = data[12];
                    if (excludedEC == null || EvidenceCodeRules.areValid(excludedEC, evidenceCode)) {
                        if (qualifier.isEmpty()) {
                            URI uriGOterm = uriManager.getURI(gotermURIstring);
                            if (this.graph.containsVertex(uriGOterm)) {
                                boolean valid = true;
                                if (p_taxid != null) {
                                    Matcher m = p_taxid.matcher(taxon_ids);
                                    valid = false;
                                    while (m.find() && !valid) {
                                        if (taxons == null || !taxons.contains(m.group(1))) continue;
                                        valid = true;
                                    }
                                }
                                if (valid) {
                                    if (!this.graph.containsVertex(entityID)) {
                                        this.graph.addV(entityID);
                                        ++countEntities;
                                    }
                                    this.graph.addE(entityID, RDF.TYPE, uriGOterm);
                                    ++countAnnotsLoaded;
                                } else {
                                    ++taxonsRestriction;
                                }
                            } else {
                                ++not_found;
                                this.logger.debug("Cannot found GO term " + uriGOterm);
                            }
                        } else {
                            ++existsQualifier;
                        }
                    } else {
                        ++eC_restriction;
                    }
                }
                if (++c % 1000000 != 0) continue;
                this.logger.info(c + " GAF entries processed");
            }
            file.close();
        }
        catch (Exception e) {
            throw new SLIB_Ex_Critic((Throwable)e);
        }
        if (!validHeader) {
            throw new SLIB_Ex_Critic("Invalid header for GAF-2 file " + fileLocation + "\nExpecting \"!gaf-version: 2.0\" as first line");
        }
        this.logger.info("\tExcluded  - Taxons restriction         : " + taxonsRestriction);
        this.logger.info("\tExcluded  - Evidence Code restriction  : " + eC_restriction);
        this.logger.info("\tExcluded  - Contains qualifier \t      : " + existsQualifier);
        this.logger.info("\tNot found unexisting term in the graph :\t" + not_found);
        this.logger.info("Number of Instance loaded \t  \t: " + countEntities);
        this.logger.info("Number of Annotation loaded \t: " + countAnnotsLoaded);
        this.logger.info("GAF2 Loader done.");
    }

    private String buildURI(String value) throws SLIB_Ex_Critic {
        String[] info = this.getDataColonSplit(value);
        if (info != null && info.length == 2) {
            String ns = this.factory.getNamespace(info[0]);
            if (ns == null) {
                throw new SLIB_Ex_Critic("No namespace associated to prefix " + info[0] + ". Cannot load " + value + ", please load required namespace prefix");
            }
            return ns + info[1];
        }
        return this.defaultURIprefix + value;
    }

    private String[] getDataColonSplit(String line) {
        if (line.isEmpty()) {
            return null;
        }
        String[] data = this.colon.split(line);
        data[0] = data[0].trim();
        if (data.length > 1) {
            data[1] = data[1].trim();
        }
        return data;
    }
}

