//  $Id: NucleicAcidInfo.java,v 1.1 2006/05/20 17:02:03 Sasha Buzko Exp $
//
//  Copyright (c) 2000-2002  San Diego Supercomputer Center (SDSC),
//  a facility operated jointly by the University of California,
//  San Diego (UCSD) and General Atomics, San Diego, California, USA.
//
//  Users and possessors of this source code are hereby granted a
//  nonexclusive, royalty-free copyright and design patent license to
//  use this code in individual software.  License is not granted for
//  commercial resale, in whole or in part, without prior written
//  permission from SDSC.  This source is provided "AS IS" without express
//  or implied warranty of any kind.
//
//  For further information, please see:  http://mbt.sdsc.edu
//
//  History:
//  $Log: NucleicAcidInfo.java,v $
//  Revision 1.1  2006/05/20 17:02:03  Sasha Buzko
//  Updated version
//
//  Revision 1.1  2006/04/30 20:13:59  Sasha Buzko
//  New version of the app
//
//  Revision 1.1  2006/04/15 19:42:26  Sasha Buzko
//  Initial commit
//
//  Revision 1.1  2005/11/13 04:35:04  Administrator
//  *** empty log message ***
//
//  Revision 1.4  2003/04/30 17:57:44  moreland
//  If compound is not one letter, null is now returned.
//
//  Revision 1.3  2003/04/03 22:43:00  moreland
//  Added a comment to suggest that the class should eventually be divided
//  into separate "NucleicAcid" and "NucleicAcids" classes.
//
//  Revision 1.2  2002/10/24 18:05:40  moreland
//  Added some missing method comments.
//
//  Revision 1.1.1.1  2002/07/16 18:00:21  moreland
//  Imported sources
//
//  Revision 1.0  2002/06/10 23:38:39  moreland
//


package edu.sdsc.mbt.util;


/**
 *  Provides static information about Nucleic Acids for DNA/RNA such as
 *  base character codes, base names, and base pairing rules.
 *  <P>
 *  @see edu.sdsc.mbt.Residue
 *  <P>
 *  @author John L. Moreland
 */
public class NucleicAcidInfo
{
	/**
	 *  Letter codes of the 4 nitrogenous bases for DNA.
	 */
	public static final String dna_bases[] = { "A", "C", "G", "T" };

	/**
	 *  Letter codes of the 4 nitrogenous bases for RNA.
	 */
	public static final String rna_bases[] = { "A", "C", "G", "U" };

	/**
	 *  A 2D storage array for nucleic acid base character codes, and full
	 *  names.
	 *  <P>
	 *  This class should probably be broken into two classes:
	 *  "NucleicAcid" to hold properties and and "NucleicAcids" as.
	 *  a container class.
	 *  http://bama.ua.edu/~hsmithso/class/bsc_495/macromolecules/macromolecule.html
	 *  <P>
	 *  names[][0] is the Nucleic acid character code.
	 *  names[][1] is the Nucleic acid name.
	 */
	private static final String names[][] =
	{
		{ "A", "Adenine" },
		{ "B", "UNKNOWN" },
		{ "C", "Cytosine" },
		{ "D", "UNKNOWN" },
		{ "E", "UNKNOWN" },
		{ "F", "UNKNOWN" },
		{ "G", "Guanine" },
		{ "H", "UNKNOWN" },
		{ "I", "UNKNOWN" },
		{ "J", "UNKNOWN" },
		{ "K", "UNKNOWN" },
		{ "L", "UNKNOWN" },
		{ "M", "UNKNOWN" },
		{ "N", "UNKNOWN" },
		{ "O", "UNKNOWN" },
		{ "P", "UNKNOWN" },
		{ "Q", "UNKNOWN" },
		{ "R", "UNKNOWN" },
		{ "S", "UNKNOWN" },
		{ "T", "Thymine" },
		{ "U", "Uracil" },
		{ "V", "UNKNOWN" },
		{ "W", "UNKNOWN" },
		{ "X", "UNKNOWN" },
		{ "Y", "UNKNOWN" },
		{ "Z", "UNKNOWN" }
	};

	private static final int A_INDEX = 0;
	private static final int C_INDEX = 2;
	private static final int G_INDEX = 6;
	private static final int T_INDEX = 19;
	private static final int U_INDEX = 20;

	/**
	 *  Returns the number of nucleic acid base name tuples in the database.
	 *  <P>
	 */
	public static int getBaseCount( )
	{
		return 5; // There are only 5 nucleic acid bases.
	}

	/**
	 *  Returns the nucleic acid base name equivalent for the given letter.
	 *  <P>
	 */
	public static String getNameFromLetter( String letter )
	{
		// The "UNKNOWN" names are pad entries to enable direct indexing
		// by character/byte value into the array.
		if ( letter == null ) return null;
		if ( letter.length() != 1 ) return null;
		int offset = letter.charAt(0) - 'A';
		if ( offset < 0 ) return null;
		if ( offset > names.length ) return null;
		return names[offset][1];
	}

	/**
	 *  Returns the full name equivalent for the given character code.
	 *  <P>
	 */
	public static String getLetterFromName( String name )
	{
		if ( names[A_INDEX][1].equals( name ) ) return names[A_INDEX][0];
		if ( names[C_INDEX][1].equals( name ) ) return names[C_INDEX][0];
		if ( names[G_INDEX][1].equals( name ) ) return names[G_INDEX][0];
		if ( names[T_INDEX][1].equals( name ) ) return names[T_INDEX][0];
		if ( names[U_INDEX][1].equals( name ) ) return names[U_INDEX][0];
		return null;
	}

	/**
	 *  Returns the base pair pairing for DNA.
	 *  <P>
	 */
	public static String getDnaPairing( String letter )
	{
		if ( letter.equals( names[A_INDEX][0] ) ) return names[T_INDEX][0];
		if ( letter.equals( names[T_INDEX][0] ) ) return names[A_INDEX][0];
		if ( letter.equals( names[G_INDEX][0] ) ) return names[C_INDEX][0];
		if ( letter.equals( names[C_INDEX][0] ) ) return names[G_INDEX][0];
		return null;  // UNKNOWN
	}

	/**
	 *  Returns the base pair pairing for RNA-DNA interaction.
	 *  <P>
	 */
	public static String getRnaDnaPairing( String letter )
	{
		if ( letter.equals( names[U_INDEX][0] ) ) return names[A_INDEX][0];
		if ( letter.equals( names[A_INDEX][0] ) ) return names[T_INDEX][0];
		if ( letter.equals( names[G_INDEX][0] ) ) return names[C_INDEX][0];
		if ( letter.equals( names[C_INDEX][0] ) ) return names[G_INDEX][0];
		return null;  // UNKNOWN
	}

	/**
	 *  Returns the base pair pairing for RNA-RNA interaction.
	 *  <P>
	 */
	public static String getRnaRnaPairing( String letter )
	{
		if ( letter.equals( names[A_INDEX][0] ) ) return names[U_INDEX][0];
		if ( letter.equals( names[U_INDEX][0] ) ) return names[A_INDEX][0];
		if ( letter.equals( names[G_INDEX][0] ) ) return names[C_INDEX][0];
		if ( letter.equals( names[C_INDEX][0] ) ) return names[G_INDEX][0];
		return null;  // UNKNOWN
	}
	
	public static String getCodeFromLetter( byte letter )
	{
		
		// The "UNK" names are pad entries to enable direct indexing
		// by character/byte value into the array.
		int offset = letter - 'A';
		if ((offset > ('Z' - 'A'))||(offset < 0)){
			return null;
		}
		return names[offset][0];
	}


}