Sequence File Handler Java Code 07-16-2014d1122

2015-01-13

Sequence File Handler Java Code 07-16-2014d1122

fasta file handler
protein dna sequence file handler code
import fasta file sequences
export sequences to fasta file

code

import java.util.ArrayList;


public class SequenceFileHandler {

UsefulTools useful_tools = new UsefulTools();
ArrayList names = new ArrayList();
ArrayList sequences = new ArrayList();
String current_line = "";


public ArrayList getNames()
{
return names;


public ArrayList getSequences()

return sequences;


public ArrayList getSequencesFromFile(String file_path)


ArrayList lines = useful_tools.storeTextFiletoArrayList(file_path);
boolean contains_greater_than_sign = false;
for(int i=0; i<lines.size();i++)
{
String current_line = lines.get(i).toString();
if(current_line.indexOf(">")!=-1)
{
contains_greater_than_sign = true;

}

//if it contains a greater than sign, then this is a fasta file and the names and sequences should be collected accordingly
//otherwise, each line without a space is a sequence, and the name should be some arbitrary name
if(contains_greater_than_sign)

boolean title_found = false;

for(int i=0; i<lines.size(); i++)
{
current_line = lines.get(i).toString();
if(!title_found)
{
if(current_line.indexOf(">")!=-1 && current_line.length()>0)
{
title_found = true;
names.add(current_line.replaceAll(">(.+)", "$1"));

}
else if(title_found)

current_line = lines.get(i).toString();
//make sure there are no spaces
if(useful_tools.getMatchesRegEx(current_line, "\\s")==0 && current_line.length()>0)

{

sequences.add(current_line);
title_found = false;

}
}
}
else

int sequence_count = 0;
//each line without a space should be a sequence
for(int i=0; i<lines.size(); i++)
{
current_line = lines.get(i).toString();
if(useful_tools.getMatchesRegEx(current_line, "\\s")==0 && current_line.length()>0)
{

sequences.add(current_line);
sequence_count++;
names.add("seq"+sequence_count);

}
}
return sequences;
}

/*
* this function expects sequences separated by commas
*/
public void createFastaFromSequences(String sequences, String directory, String filename)

createFastaFromSequences(useful_tools.stringWithCommasToArrayList(sequences), directory, filename);


public void createFastaFromSequences(ArrayList sequences, String directory, String filename)

String output_string = "";
for(int i=0; i<sequences.size(); i++)
{
output_string+=">seq"+(i+1)+"\r\n"+sequences.get(i).toString()+"\r\n";

useful_tools.createTextFile(directory, filename, output_string);
}
}

}