Sequence File Handler Java Code 07-16-2014d1122
2015-01-13Sequence File Handler Java Code 07-16-2014d1122
fasta file handler
protein dna sequence file handler code
import fasta file sequences
export sequences to fasta file
code
import java.util.ArrayList;
public class SequenceFileHandler {
UsefulTools useful_tools = new UsefulTools();
ArrayList names = new ArrayList();
ArrayList sequences = new ArrayList();
String current_line = "";
public ArrayList getNames()
{
return names;
public ArrayList getSequences()
return sequences;
public ArrayList getSequencesFromFile(String file_path)
ArrayList lines = useful_tools.storeTextFiletoArrayList(file_path);
boolean contains_greater_than_sign = false;
for(int i=0; i<lines.size();i++)
{
String current_line = lines.get(i).toString();
if(current_line.indexOf(">")!=-1)
{
contains_greater_than_sign = true;
}
//if it contains a greater than sign, then this is a fasta file and the names and sequences should be collected accordingly
//otherwise, each line without a space is a sequence, and the name should be some arbitrary name
if(contains_greater_than_sign)
boolean title_found = false;
for(int i=0; i<lines.size(); i++)
{
current_line = lines.get(i).toString();
if(!title_found)
{
if(current_line.indexOf(">")!=-1 && current_line.length()>0)
{
title_found = true;
names.add(current_line.replaceAll(">(.+)", "$1"));
}
else if(title_found)
current_line = lines.get(i).toString();
//make sure there are no spaces
if(useful_tools.getMatchesRegEx(current_line, "\\s")==0 && current_line.length()>0)
{
sequences.add(current_line);
title_found = false;
}
}
}
else
int sequence_count = 0;
//each line without a space should be a sequence
for(int i=0; i<lines.size(); i++)
{
current_line = lines.get(i).toString();
if(useful_tools.getMatchesRegEx(current_line, "\\s")==0 && current_line.length()>0)
{
sequences.add(current_line);
sequence_count++;
names.add("seq"+sequence_count);
}
}
return sequences;
}
/*
* this function expects sequences separated by commas
*/
public void createFastaFromSequences(String sequences, String directory, String filename)
createFastaFromSequences(useful_tools.stringWithCommasToArrayList(sequences), directory, filename);
public void createFastaFromSequences(ArrayList sequences, String directory, String filename)
String output_string = "";
for(int i=0; i<sequences.size(); i++)
{
output_string+=">seq"+(i+1)+"\r\n"+sequences.get(i).toString()+"\r\n";
useful_tools.createTextFile(directory, filename, output_string);
}
}
}