package pkg1;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;

public class proteinLinkTable {
	/*
	 * parameters: filepath-the large file that correspond each protein a real name
	 * proteinFile-the file generated by super-seq mining
	 * nbLine-number of lines need to be read
	 */
	public ArrayList<ArrayList<Integer>> loadFile(String filepath, String proteinFile, String linkFile, int nbLine) {
		String thisLineOnce;
		String thisLineTwice;
		String thisLineLink;
		String thisLine1;
		BufferedReader myInputOnce = null;
		BufferedReader myInputTwice = null;
		BufferedReader myInputLink = null;
		BufferedReader myInputProtein = null;
		int n = 0;//used to control how many lines are read, when nbLine parameter is used
		ArrayList<ArrayList<Integer>> sequence = new ArrayList<ArrayList<Integer>>();
		PrintWriter out;
		try {
			out = new PrintWriter(new FileWriter("table"+proteinFile));
			FileInputStream finProtein = new FileInputStream(new File(proteinFile));
			myInputProtein = new BufferedReader(new InputStreamReader(finProtein));
			//read each line in protein super seq, and divide by comma, translate each one into meaning
			while (!((thisLine1 = myInputProtein.readLine()).contains("==")))  {
				String[] split1 = thisLine1.split("---");
				if(split1.length > 1)
				{
					//a path is in this format:MAL13P1.137-1---PF07_0016-1---MAL8P1.153-1---MAL7P1.87-1---PF08_0126-1	
					for(int i = 0; i < split1.length-1; i++)
					{
						//Use the ID-meaning file to translate
						FileInputStream finOnce = new FileInputStream(new File(filepath));
						myInputOnce = new BufferedReader(new InputStreamReader(finOnce));
			 
						 
						while (((thisLineOnce = myInputOnce.readLine()) != null))  {
							 
							String[] split = thisLineOnce.split("\t");
							if((split.length > 1)&&(split[0].contains(split1[i])))
							{  
								out.print(split1[i]+" ");
								out.print(split[4]+ " " );
								
								break;
							} 
							
						}
						myInputOnce.close();
						//Read the same file the second time to get the other link's name
						FileInputStream finTwice = new FileInputStream(new File(filepath));
						myInputTwice = new BufferedReader(new InputStreamReader(finTwice));
						while (((thisLineTwice = myInputTwice.readLine()) != null))  {
							 
							String[] split = thisLineTwice.split("\t");
							if((split.length > 1)&&(split[0].contains(split1[i+1])))
							{  
								out.print(split1[i+1]+" ");
								out.print(split[4]+ " " );
								
								break;
							} 
						}
						myInputTwice.close();
					   FileInputStream finLink = new FileInputStream(new File(linkFile));
					   myInputLink = new BufferedReader(new InputStreamReader(finLink));
					   while (((thisLineLink = myInputLink.readLine()) != null))  {
							 
							String[] split = thisLineLink.split("\t");
							if((split.length > 1)&&(split[0].contains(split1[i]))&&(split[1].contains(split1[i])))
							{  
								out.print(split[2]+"\t\n ");
								
								break;
							} 
						}
					   myInputLink.close();
					}
					
					 
				} 
			}
			myInputProtein.close();
		} catch (Exception e) {
			e.printStackTrace();
		} 
		return sequence;
	}
	public static void main(String args[]) throws IOException
	{

		proteinLinkTable a = new proteinLinkTable();
		a.loadFile("COG.mappings.v9.1.txt","out5t3800.txt","PlasmodiumfalciparumLinkSimp.txt", 111);
	}
}
