Source code for GenomeVisualizer.basic

[docs] def load_genome_from_txt(filepath: str) -> str: """ Loads a genome sequence from a plain text (.txt) file. This function reads the entire content of a text file and removes any whitespace or newline characters, returning a continuous DNA string. Args: filepath (str): Path to the genome file (must be a .txt file containing ACGT characters). Returns: str: A cleaned DNA sequence as a single string. Raises: FileNotFoundError: If the specified file does not exist. ValueError: If the file is empty or contains invalid characters. Example: >>> genome = load_genome_from_txt("data/ecoli.txt") >>> genome[:10] 'AGCTTTTCAT' """ with open(filepath, "r") as file: content = file.read().replace("\n", "").replace(" ", "").upper() if not content: raise ValueError("The file is empty.") if not set(content).issubset({"A", "C", "G", "T"}): raise ValueError("The file contains invalid DNA characters.") return content
[docs] def FrequencyMap(Text: str, k: int) -> dict[str, int]: """ Computes the frequency of all k-length substrings (k-mers) in a DNA sequence. This function counts how many times each k-mer appears in the input DNA sequence using a sliding window. The output is a dictionary where each key is a unique k-mer, and the value is the number of occurrences. Args: Text (str): The DNA sequence to scan. k (int): Length of the k-mers to count. Returns: dict[str, int]: A dictionary mapping each k-mer to its count in the sequence. Example: >>> FrequencyMap("ATATA", 3) {'ATA': 2, 'TAT': 1} """ freq = {} n = len(Text) for i in range(n-k+1): Pattern = Text[i:i+k] freq[Pattern] = 0 for i in range(n-k+1): Pattern = Text[i:i+k] freq[Pattern] += 1 return freq
[docs] def FrequentWords(Text: str, k: int) -> list[str]: """ Identifies the most frequent k-length substrings (k-mers) in a DNA sequence. This function uses FrequencyMap to find all k-mers in the sequence and then returns those that occur with the highest frequency. Args: Text (str): The DNA sequence to search. k (int): Length of the k-mers. Returns: list[str]: A list of k-mers with the highest frequency in the sequence. Example: >>> FrequentWords("ACGTTGCATGTCGCATGATGCATGAGAGCT", 4) ['CATG', 'GCAT'] """ words = [] freq = FrequencyMap(Text, k) m = max(freq.values()) for key in freq: # add each key to words whose corresponding frequency value is equal to m if(freq.get(key) == m): words.append(key) return words
def MinPositions(values: list[int]) -> list[int]: """ Given a list of numbers, return all indices in the list that contain the minimum value. """ positions = [] mn = min(values) for i in range(len(values)): if values[i] == mn: positions.append(i) return positions