package pkg1; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; public class proteinLinkTable { /* * parameters: filepath-the large file that correspond each protein a real name * proteinFile-the file generated by super-seq mining * nbLine-number of lines need to be read */ public ArrayList> loadFile(String filepath, String proteinFile, String linkFile, int nbLine) { String thisLineOnce; String thisLineTwice; String thisLineLink; String thisLine1; BufferedReader myInputOnce = null; BufferedReader myInputTwice = null; BufferedReader myInputLink = null; BufferedReader myInputProtein = null; int n = 0;//used to control how many lines are read, when nbLine parameter is used ArrayList> sequence = new ArrayList>(); PrintWriter out; try { out = new PrintWriter(new FileWriter("table"+proteinFile)); FileInputStream finProtein = new FileInputStream(new File(proteinFile)); myInputProtein = new BufferedReader(new InputStreamReader(finProtein)); //read each line in protein super seq, and divide by comma, translate each one into meaning while (!((thisLine1 = myInputProtein.readLine()).contains("=="))) { String[] split1 = thisLine1.split("---"); if(split1.length > 1) { //a path is in this format:MAL13P1.137-1---PF07_0016-1---MAL8P1.153-1---MAL7P1.87-1---PF08_0126-1 for(int i = 0; i < split1.length-1; i++) { //Use the ID-meaning file to translate FileInputStream finOnce = new FileInputStream(new File(filepath)); myInputOnce = new BufferedReader(new InputStreamReader(finOnce)); while (((thisLineOnce = myInputOnce.readLine()) != null)) { String[] split = thisLineOnce.split("\t"); if((split.length > 1)&&(split[0].contains(split1[i]))) { out.print(split1[i]+" "); out.print(split[4]+ " " ); break; } } myInputOnce.close(); //Read the same file the second time to get the other link's name FileInputStream finTwice = new FileInputStream(new File(filepath)); myInputTwice = new BufferedReader(new InputStreamReader(finTwice)); while (((thisLineTwice = myInputTwice.readLine()) != null)) { String[] split = thisLineTwice.split("\t"); if((split.length > 1)&&(split[0].contains(split1[i+1]))) { out.print(split1[i+1]+" "); out.print(split[4]+ " " ); break; } } myInputTwice.close(); FileInputStream finLink = new FileInputStream(new File(linkFile)); myInputLink = new BufferedReader(new InputStreamReader(finLink)); while (((thisLineLink = myInputLink.readLine()) != null)) { String[] split = thisLineLink.split("\t"); if((split.length > 1)&&(split[0].contains(split1[i]))&&(split[1].contains(split1[i]))) { out.print(split[2]+"\t\n "); break; } } myInputLink.close(); } } } myInputProtein.close(); } catch (Exception e) { e.printStackTrace(); } return sequence; } public static void main(String args[]) throws IOException { proteinLinkTable a = new proteinLinkTable(); a.loadFile("COG.mappings.v9.1.txt","out5t3800.txt","PlasmodiumfalciparumLinkSimp.txt", 111); } }