package uk.ac.ox.cs.krr.dlvstructured;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.openscience.cdk.ChemFile;
import org.openscience.cdk.atomtype.CDKAtomTypeMatcher;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IAtomType;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IChemModel;
import org.openscience.cdk.io.MDLV2000Reader;
import org.openscience.cdk.tools.CDKHydrogenAdder;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.manipulator.AtomTypeManipulator;

/*This class builds a set of description graph class objects based on the 
 * molfiles that are located in m_inputFilesPath. The size of the set is 
 * parameterisable.*/

public class DGGenerator {

	private String m_molFilesPath;
	private String m_groupFilesPath;
	private Set<String> m_startConcepts=new LinkedHashSet<String>();
	
	public DGGenerator(String molFilesPath,String groupFilesPath){
		this.m_molFilesPath=molFilesPath;
		this.m_groupFilesPath=groupFilesPath;
	}	
	
	//this method produces a map of strings and  description graph 
	//objects by processing the molfiles that are located in m_inputFilesPath
	//the size of the map is specified by sizeOfDGSet
	public Map<String,DescriptionGraph> assembleDGMap(int sizeOfDGSet){
		Map<String,DescriptionGraph> descriptionGraphs=new LinkedHashMap<String,DescriptionGraph>();
		File molfilesFolder = new File(m_molFilesPath);
		File[] list = molfilesFolder.listFiles();
		ArrayList<File> molFilesToConvert=new ArrayList<File>();
		
		//Filter mol files
		for (File file : list) {
			if ((file.getName()).endsWith("mol")) 
					molFilesToConvert.add(file);									
		}
		
		//Convert mol files into objects of the DescriptionGraph class
		//first discard all molecules that have an R atom
		//if there are not enough molfiles an index out of bounds exception
		//is thrown and the loop is interrupted
		try{
			int dgsIndex=0;
			int molfilesIndex=0;
			while(dgsIndex < sizeOfDGSet) {
				File molfile=molFilesToConvert.get(molfilesIndex);
				DescriptionGraph dg=buildDescriptionGraphFromMolfile(molfile);
				if (isRadicalFree(dg)){
					descriptionGraphs.put(dg.getStartConcept(), dg);
					//System.out.println("Adding description graph with name "+dg.getStartConcept());
					//System.out.println("and number of nodes "+dg.getNodes().size());
					dgsIndex++;
				}
				else
					System.out.println("Molecules with radicals"+dg.getStartConcept());
				molfilesIndex++;
			}
			
		}
		catch (IndexOutOfBoundsException e){
			System.out.println("The number of available radical-free molfiles is smaller than the" +
					" number of molecules to be tested, program exiting.");
			System.exit(1);
		}
		catch (NullPointerException e){
			System.out.println("Encountered error while parsing molfile, program exiting.");
			System.exit(1);
		}
		
		return descriptionGraphs;
	}
	
	
	//this method  produces a map of strings and  description graph 
	//objects by processing the groupfiles that are located in m_groupFilesPath
	//the size of the map is specified by sizeOfDDGSet
	public Map<String,DescriptionGraph> assembleDualDGMap(int sizeOfDDGSet){
		Map<String,DescriptionGraph> descriptionGraphs=new LinkedHashMap<String,DescriptionGraph>();
		File groupfilesFolder = new File(m_groupFilesPath);
		File[] list = groupfilesFolder.listFiles();
		ArrayList<File> groupfilesToConvert=new ArrayList<File>();
		
		//Filter mol files
		for (File file : list) {
			if ((file.getName()).endsWith("grp")) 
				groupfilesToConvert.add(file);									
		}
		
		//Convert group files into objects of the DescriptionGraph class
		try{
			for (int i=0; i<sizeOfDDGSet; i++){
				File groupfile=groupfilesToConvert.get(i);
				DescriptionGraph dg=buildDescriptionGraphFromGroupfile(groupfile);
				descriptionGraphs.put(dg.getStartConcept(), dg);
			}
			
		}
		catch (Exception e){
			e.printStackTrace();
		}
		
		return descriptionGraphs;
	}
	
	//this method examines the sizes that are in the specified file path
	//and prints out metrics about the molecules
	//e.g. CHEBI ID: chebi_12345 NO ATOMS: 10 NO BONDS: 10
	public void inspectSize(int sizeCurrentlyTested){
		File molfilesFolder = new File(m_molFilesPath);
		File[] list = molfilesFolder.listFiles();
		ArrayList<File> molFilesToConvert=new ArrayList<File>();
		
		//Filter mol files
		for (File file : list) {
			if ((file.getName()).endsWith("mol")) 
					molFilesToConvert.add(file);									
		}
		
		//For each mol file retrieve chebi id, number of atoms
		//and number of bonds
		try{
			int molfilesIndex=0;
			double totalNoOfAtoms=0;
			double totalNoOfBonds=0;
			/*ArrayList<String> size1to20=new ArrayList<String>();
			ArrayList<String> size21to40=new ArrayList<String>();
			ArrayList<String> size41to60=new ArrayList<String>();
			ArrayList<String> size61to80=new ArrayList<String>();
			ArrayList<String> size81to100=new ArrayList<String>();
			ArrayList<String> size101to120=new ArrayList<String>();
			ArrayList<String> sizeabove120=new ArrayList<String>();*/
			while(molfilesIndex < sizeCurrentlyTested) {
				File molfile=molFilesToConvert.get(molfilesIndex);
				String molfileContent=retrieveContent(molfile);
				/*String fileName=molfile.getName();
				String moleculeName=retrieveChEBIID(molfile);*/
				MDLV2000Reader reader = new MDLV2000Reader(new StringReader(molfileContent)) ;
				ChemFile chemFile = (ChemFile) reader.read(new ChemFile());
				IChemModel model = chemFile.getChemSequence(0).getChemModel(0);
				IAtomContainer molContent = model.getMoleculeSet().getAtomContainer(0);
				//AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molContent);
				int numberOfAtoms=molContent.getAtomCount();
				totalNoOfAtoms+=numberOfAtoms;
				int numberOfBonds=molContent.getBondCount();
				totalNoOfBonds+=numberOfBonds;
				//System.out.println("CHEBI ID:"+moleculeName+" NO ATOMS:"+numberOfAtoms +" NO BONDS:"+numberOfBonds);
				/*String[] commandArguments={""};
				File workingDirectory = new File(m_inputFilesPath+"/");
				if (numberOfAtoms <= 20){
					//size1to20.add(retrieveChEBIID(fileName));
					String commandLine="cp "+fileName +" size1to20/";
					Runtime.getRuntime().exec(commandLine,commandArguments,workingDirectory);
				}					
				else if ((numberOfAtoms >= 21) && (numberOfAtoms <= 40)){
					String commandLine="cp "+fileName +" size21to40/";
					Runtime.getRuntime().exec(commandLine,commandArguments,workingDirectory);
				}
				else if ((numberOfAtoms >= 41) && (numberOfAtoms <= 60)){
					String commandLine="cp "+fileName +" size41to60/";
					Runtime.getRuntime().exec(commandLine,commandArguments,workingDirectory);
				}
				else if ((numberOfAtoms >= 61) && (numberOfAtoms <= 80)){
					String commandLine="cp "+fileName +" size61to80/";
					Runtime.getRuntime().exec(commandLine,commandArguments,workingDirectory);
				}
				else if ((numberOfAtoms >= 81) && (numberOfAtoms <= 100)){
					String commandLine="cp "+fileName +" size81to100/";
					Runtime.getRuntime().exec(commandLine,commandArguments,workingDirectory);
				}
				else if ((numberOfAtoms >= 101) && (numberOfAtoms <= 120)){
					String commandLine="cp "+fileName +" size101to120/";
					Runtime.getRuntime().exec(commandLine,commandArguments,workingDirectory);
				}
				else if (numberOfAtoms >= 120){
					String commandLine="cp "+fileName +" sizeabove120/";
					Runtime.getRuntime().exec(commandLine,commandArguments,workingDirectory);
				}*/
				molfilesIndex++;
			}
			double averageAtoms=totalNoOfAtoms/sizeCurrentlyTested;
			double averageBonds=totalNoOfBonds/sizeCurrentlyTested;
			System.out.println("Number of atoms on averate for size "+sizeCurrentlyTested+ " is "+averageAtoms);
			System.out.println("Number of bonds on averate for size "+sizeCurrentlyTested+ " is "+averageBonds);
			/*System.out.println("Number of atoms between 1  and 20 "+size1to20.size());
			for (String s:size1to20)
				System.out.println("Chebi id: "+s);
			System.out.println("Number of atoms between 21  and 40 "+size21to40.size());
			for (String s:size21to40)
				System.out.println("Chebi id: "+s);
			System.out.println("Number of atoms between 41  and 60 "+size41to60.size());
			for (String s:size41to60)
				System.out.println("Chebi id: "+s);
			System.out.println("Number of atoms between 61  and 80 "+size61to80.size());
			for (String s:size61to80)
				System.out.println("Chebi id: "+s);
			System.out.println("Number of atoms between 81  and 100 "+size81to100.size());
			for (String s:size81to100)
				System.out.println("Chebi id: "+s);
			System.out.println("Number of atoms between 101 and 120 "+size101to120.size());
			for (String s:size101to120)
				System.out.println("Chebi id: "+s);*/
		}
		catch (IndexOutOfBoundsException e){
			e.printStackTrace();
		}
		catch (NullPointerException npe) {
			npe.printStackTrace();
		} catch (CDKException e) {
			e.printStackTrace();
		}/*catch (IOException e) {
			e.printStackTrace();
		}*/
		
	}
	
	public Set<String> getStartConcepts(){
		return this.m_startConcepts;
	}
	
	//given a molfile it produces an object of the
	//description graph class
	public DescriptionGraph buildDescriptionGraphFromMolfile(File molfile){
		String molfileContent=retrieveContent(molfile);
		Set<Node> nodes=new LinkedHashSet<Node>();
		Set<Edge> edges=new LinkedHashSet<Edge>();
		String moleculeName="";
		try{
			//convert into lower case to fit DLV syntax
			moleculeName=retrieveChEBIID(molfile).toLowerCase();
			//System.out.println("Molecule name:"+moleculeName);
			m_startConcepts.add(moleculeName);
			MDLV2000Reader reader = new MDLV2000Reader(new StringReader(molfileContent)) ;
			ChemFile chemFile = (ChemFile) reader.read(new ChemFile());
			IChemModel model = chemFile.getChemSequence(0).getChemModel(0);
			IAtomContainer molContent = model.getMoleculeSet().getAtomContainer(0);
			
			CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(molContent.getBuilder());
			Iterator<IAtom> atoms = molContent.atoms().iterator();
			while (atoms.hasNext()) {
			  IAtom atom = atoms.next();
			  IAtomType type = matcher.findMatchingAtomType(molContent, atom);
			  AtomTypeManipulator.configure(atom, type);
			}
			
			AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molContent);
			CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(molContent.getBuilder());
			adder.addImplicitHydrogens(molContent);
						
			/*System.out.println("Molecule name is:"+moleculeName);
			for (int i=0; i<molContent.getAtomCount(); i++){
				int numberHydrogens=((IAtom)molContent.getAtom(i)).getImplicitHydrogenCount();
				System.out.println("Number of implicit hydrogens for atom " +i+ " is:"+numberHydrogens);
			}*/
			
			nodes=retrieveNodes(molContent,moleculeName);
			//System.out.println("Number of nodes is:"+nodes.size());			
			edges=retrieveEdges(molContent,moleculeName);
			
			DescriptionGraph descriptionGraph=new DescriptionGraph(nodes,edges,moleculeName);
			DescriptionGraph hydrogensDescriptionGraph=addImplicitHydrogenNodes(descriptionGraph,molContent);
			//System.out.println(hydrogensDescriptionGraph.getTextRepresentation());
			return hydrogensDescriptionGraph;
		} catch (NullPointerException npe) {
			npe.printStackTrace();
		} catch (CDKException e) {
			e.printStackTrace();
		} catch (Exception e) {
			System.out.println("Exception thrown for molecule "+molfile.getName());
			e.printStackTrace();
		}
		return new DescriptionGraph(nodes,edges,moleculeName);
	}
	

	//converts a description graph without hydrogens into
	//a description graph with hydrogens by taking into account
	//the implicit hydrogen information in the molContent
	public DescriptionGraph addImplicitHydrogenNodes(DescriptionGraph descriptionGraph,IAtomContainer molContent){
		Set<Node> nodes = descriptionGraph.getNodes();
		Set<Edge> edges = descriptionGraph.getEdges();
		String moleculeName = descriptionGraph.getStartConcept();
		int nextHydrogenIndex=nodes.size();
		
		try{
				for (IAtom atom:molContent.atoms()){
					int hydrogenCount=atom.getImplicitHydrogenCount();
					if (hydrogenCount>0){
						for (int i=0; i<hydrogenCount; i++){
							//create the labels for nodes and edges
							Set<String> nodeLabel=new LinkedHashSet<String>();
							nodeLabel.add("h");
							Set<String> bondLabel=new LinkedHashSet<String>();
							bondLabel.add("single");
							Set<String> hasAtomLabel=new LinkedHashSet<String>();
							hasAtomLabel.add("hasAtom");
							
							//create the from and to Integer objects for edges
							int currentAtomIndex=molContent.getAtomNumber(atom);
							Integer	zeroNode=new Integer(0);
							Integer	from=new Integer(currentAtomIndex);
							Integer to=new Integer(nextHydrogenIndex);
							
							//create the edges
							Edge edgeForward=new Edge(from,to,bondLabel);
							Edge edgeBackward=new Edge(to,from,bondLabel);
							Edge edgeHasAtom=new Edge(zeroNode,to,hasAtomLabel);
							
							//add the edges and nodes
							nodes.add(new Node(nextHydrogenIndex, nodeLabel));
							edges.add(edgeForward);
							edges.add(edgeBackward);
							edges.add(edgeHasAtom);
							nextHydrogenIndex++;
					}
				}
			}
		} catch (NullPointerException npe) {
			System.out.println("Exception thrown for molecule "+moleculeName);
			npe.printStackTrace();
		} 		
			
		return new DescriptionGraph(nodes,edges,moleculeName);
	}
	
	//detects whether the graph contains a node labeled with r
	public boolean isRadicalFree(DescriptionGraph dg){
		Set<Node> nodes=dg.getNodes();
		
		for (Node n:nodes){
			Set<String> labels=n.getLabel();
			for (String label:labels)
				if (label.equals("r"))
					return false;
		}		
		return true;
	}
	
	//it returns the content of a molfile (in particular) in the form of a string
	public String retrieveContent(File molfile){
		StringBuffer content=new StringBuffer();
        String CRLF=System.getProperty("line.separator");
        try {
			BufferedReader input=new BufferedReader(new FileReader(molfile)) ;
			String line="";
			while(!(line.endsWith("END"))){
				line=input.readLine();
				content.append(line);
				if (!(line.endsWith("END"))){
					content.append(CRLF);
				}
			}
			input.close();
		}
        catch (FileNotFoundException e) {
			e.printStackTrace();
		} 
        catch (IOException e) {
			e.printStackTrace();
		}
		return content.toString();
	}
	
	//it returns a set of objects in the class Node from an IAtomContainer
	public Set<Node> retrieveNodes(IAtomContainer molContent,String moleculeName){
		Set<Node> nodes=new LinkedHashSet<Node>();
		try{
			Set<String> labelZeroNode=new LinkedHashSet<String>();
			//labelZeroNode.add(moleculeName);
			labelZeroNode.add("molecule");
			nodes.add(new Node(new Integer(0),labelZeroNode));
			int numberOfAtoms=molContent.getAtomCount();
			for (int i=0; i<numberOfAtoms; i++){
				int nodeIndex=i+1;
				Set<String> label=new LinkedHashSet<String>();
				label.add(molContent.getAtom(i).getSymbol().toLowerCase());
				Integer dcharge=molContent.getAtom(i).getFormalCharge();
				if (dcharge!=null){
					int charge=molContent.getAtom(i).getFormalCharge().intValue();
					switch (charge) {
					case 1:  label.add("plus1");
	                     	break;
					case 2:  label.add("plus2");
	                     	break;
					case 3:  label.add("plus3");
	                     	break;
					case -1:  label.add("minus1");
	                     	break;
					case -2:  label.add("minus2");
	                     	break;
					case -3:  label.add("minus3");
	                     	break;
					default: 
							break;
					}
				}
				nodes.add(new Node(new Integer(nodeIndex),label));
			}
		}
		catch(Exception e){
			e.printStackTrace();
		}		
		return nodes;		
	} 
	
	//it returns a set of objects in the class Edge from an IAtomContainer
	public Set<Edge> retrieveEdges(IAtomContainer molContent,String moleculeName){
		Set<Edge> edges=new LinkedHashSet<Edge>();
		try {
			//keep the indices of the atoms from molContent
			Map<IAtom,Integer> atomIndexMap = new LinkedHashMap<IAtom, Integer>();
			int numberOfAtoms=molContent.getAtomCount();
			for (int i=0; i<numberOfAtoms; i++){
				atomIndexMap.put(molContent.getAtom(i), new Integer(i+1));
			}
			Integer rootNode=new Integer(0);
			//first add the hasAtom edges
			Set<String> labelHasAtom=new LinkedHashSet<String>();
			labelHasAtom.add("hasAtom");
			for (int i=0; i<numberOfAtoms; i++){
				int nodeIndex=i+1;
				Edge edge=new Edge(rootNode,new Integer(nodeIndex),labelHasAtom);
				edges.add(edge);
			}
			//then add the bond edges
			int numberOfBonds=molContent.getBondCount();
			for (int i=0; i<numberOfBonds; i++){
				IBond bond=molContent.getBond(i);
				Integer fromNode=atomIndexMap.get(bond.getAtom(0));
				Integer toNode=atomIndexMap.get(bond.getAtom(1));
				String bondOrder=bond.getOrder().toString().toLowerCase();
				Set<String> label=new LinkedHashSet<String>();
				label.add(bondOrder);
				Edge edgeForward=new Edge(fromNode,toNode,label);
				Edge edgeBackward=new Edge(toNode,fromNode,label);
				edges.add(edgeForward);
				edges.add(edgeBackward);
			}
		}
		catch(Exception e){
			e.printStackTrace();
		}	
		return edges;
		
	}
	
	//it returns the chebi ID, e.g. ChEBI_12345
	public String retrieveChEBIID(File molfile){
		return molfile.getName().replaceFirst("\\.mol", "");
	}
	
	//it returns the functional group name e.g. alcohol
	public String retrieveGroupName(File groupfile){
		return groupfile.getName().replaceFirst("\\.grp", "");
	}
	
	//this method returns the predicate name from a string fact 
	public String getPredicate(String fact){
		String predicate="";
	
		try{
			String delimiter = "\\(";
			Pattern delimiterPattern = Pattern.compile(delimiter);
			String[] items = delimiterPattern.split(fact);
			if (items.length==0)
				predicate=fact;
			else
				predicate=items[0];
		}
		catch(Exception e){
			e.printStackTrace();
		}
		return predicate;		
	}
	
	//this method parses a string fact and returns a set of strings 
	//that correspond to the terms of the fact  
	public ArrayList<String> getTerms(String fact){
		ArrayList<String> terms=new ArrayList<String>();
		
		try{
			Pattern pattern = Pattern.compile("(\\(.*\\))");
	        Matcher matcher = pattern.matcher(fact);
	        String stringTerms="";
	        if (matcher.find()) {
	       	stringTerms=matcher.group();
	       	stringTerms=stringTerms.substring(1, stringTerms.length()-1);
            String delimiter = ",";
	  		Pattern delimiterPattern = Pattern.compile(delimiter);
	   		String[] arrayTerms = delimiterPattern.split(stringTerms);
	   		if (arrayTerms.length >= 1)
	   			for (String stringTerm:arrayTerms)
	   				terms.add(stringTerm);
	   			else
	       			terms.add(stringTerms);
	           }
		}
		catch(Exception e){
			e.printStackTrace();
		}
		return terms;
	}
	
	//it returns a set of objects in the class Node from a set of  
	//strings that are lines of a group file 
	public Set<Node> retrieveNodes(ArrayList<String> groupContent){
		Set<Node> nodes=new LinkedHashSet<Node>();
		try {
			for (int i=2; i<groupContent.size(); i++){
				String fact=groupContent.get(i);
				if (!fact.contains(",")&(!fact.equals("END"))){
					Set<String> label = new LinkedHashSet<String>();
					String element=getPredicate(fact);
					Integer nodeId=new Integer(getTerms(fact).get(0));
					if (!element.equals("*"))
						label.add(element);
					nodes.add(new Node(nodeId, label));										
				}					
			}
		}
		catch(Exception e){
			e.printStackTrace();
		}		
		return nodes;		
	} 
	
	//it returns the number of nodes in a functional group from
	//a set of strings that are lines of a group file
	public int retrieveNodesNumber(String nodesNumberInfo){
		
		try {
			int beginIndex=nodesNumberInfo.indexOf(':');
			int endIndex=nodesNumberInfo.length();
			String number=nodesNumberInfo.substring(beginIndex+1, endIndex);
			return Integer.parseInt(number);
		}
		catch(Exception e){
			e.printStackTrace();
		}	
		return 0;
		
	}
	
		
	//it returns a set of objects in the class Node from a set of  
	//strings that are lines of a group file 
	public Set<Edge> retrieveEdges(ArrayList<String> groupContent){
		Set<Edge> edges=new LinkedHashSet<Edge>();
		try {
			for (int i=2; i<groupContent.size(); i++){
				String fact=groupContent.get(i);
				//first add the bond edges				
				if (fact.contains(",")){
					Set<String> label = new LinkedHashSet<String>();
					label.add(getPredicate(fact));
					Integer from=new Integer(getTerms(fact).get(0));
					Integer to=new Integer(getTerms(fact).get(1));
					edges.add(new Edge(from,to,label));
					edges.add(new Edge(to,from,label));
				}				
			}
			//then add the hasAtom edges
			Set<String> label = new LinkedHashSet<String>();
			label.add("hasAtom");
			int nodesNumber=retrieveNodesNumber(groupContent.get(0));
			for (int i=1; i<=nodesNumber; i++){
				edges.add(new Edge(new Integer(0),new Integer(i),label));
			}
		}
		catch(Exception e){
			e.printStackTrace();
		}	
		return edges;			
	}
	
	public ArrayList<String> parseGroupFileContent(File groupfile){
		ArrayList<String> groupContent=new ArrayList<String>();
		        
		try {
			BufferedReader input=new BufferedReader(new FileReader(groupfile)) ;
			String line="";
			while(!(line.endsWith("END"))){
				line=input.readLine();
				groupContent.add(line);
			}
			input.close();
		}
        catch (FileNotFoundException e) {
			e.printStackTrace();
		} 
        catch (IOException e) {
			e.printStackTrace();
		}
		return groupContent;
	}
		
	//given a graph file it produces an object of the
	//description graph class
	public DescriptionGraph buildDescriptionGraphFromGroupfile(File groupfile){
		//String groupFileContent=retrieveContent(groupfile);
		Set<Node> nodes=new LinkedHashSet<Node>();
		Set<Edge> edges=new LinkedHashSet<Edge>();
		String groupName="";
		
		try{
			groupName=retrieveGroupName(groupfile);
			ArrayList<String> groupContent=parseGroupFileContent(groupfile);
			nodes=retrieveNodes(groupContent);
			edges=retrieveEdges(groupContent);

		} catch (Exception e) {
			e.printStackTrace();
		}
		return new DescriptionGraph(nodes,edges,groupName);
	}
}
