Commit 60a7d13a authored by Rafael Artinano's avatar Rafael Artinano

added comments to metric.py

parent da384ed5
......@@ -8,6 +8,15 @@ from descarteProteinas import substitute_or_remove_prot_id
from generate_the_excel import substitute_or_remove_prot_id
def similitudProteinas(sequences):
"""
Calculates the similarity between protein sequences using the Needleman-Wunsch algorithm.
Parameters:
- sequences (list): List of protein sequences.
Returns:
- list: List of lists containing pairs of protein sequences and their calculated similarity.
"""
output = []
for row1 in sequences:
......@@ -22,6 +31,11 @@ def similitudProteinas(sequences):
return output
def metrica_distanciaProteinas():
"""
Calculates a metric for the distance between proteins based on preprocessed data.
Reads CSV files, creates a dictionary of similarities, and performs calculations to generate a new CSV file.
"""
# Leer los archivos CSV
data = pd.read_csv("resultados/patronesIdenticos.csv")
df_b = pd.read_csv("AllProteins_%Similitud.csv")
......@@ -54,7 +68,18 @@ def metrica_distanciaProteinas():
index=False)
def patronesComun(patronesComun,archivoEntrada,ocurrencia,sal,archivoClases):
"""
Computes common patterns between proteins and generates metrics based on pattern occurrences.
Parameters:
- patronesComun (int): Minimum number of common patterns required.
- archivoEntrada (str): Path to the input CSV file.
- ocurrencia (float): Occurrence value.
- sal (str): Output file suffix.
- archivoClases (str): Path to the input Excel file containing protein classes.
Reads data, extracts unique patterns, finds common patterns, and generates metrics and CSV files.
"""
# Leer el archivo CSV y cargar los datos en una lista de diccionarios
registros = []
cl=pd.read_excel(archivoClases)
......@@ -214,7 +239,19 @@ def patronesComun(patronesComun,archivoEntrada,ocurrencia,sal,archivoClases):
def patronesComunClas(patronesComun,name,archivoEntrada,ocurrencia,sal,archivoClases):
"""
Computes common patterns between proteins within a class and generates class-specific metrics.
Parameters:
- patronesComun (int): Minimum number of common patterns required.
- name (str): Name of the class for output file organization.
- archivoEntrada (str): Path to the input CSV file.
- ocurrencia (float): Occurrence value.
- sal (str): Output file suffix.
- archivoClases (str): Path to the input Excel file containing protein classes.
Reads data, extracts unique patterns, finds common patterns within a class, and generates metrics of amount of patterns and if match the whole sequence and saves it CSV files.
"""
# Leer el archivo CSV y cargar los datos en una lista de diccionarios
registros = []
cl=pd.read_excel(archivoClases)
......@@ -373,6 +410,16 @@ def patronesComunClas(patronesComun,name,archivoEntrada,ocurrencia,sal,archivoCl
def remplazar_sequence_for_ID(output,archivoEntrada):
"""
Replaces protein sequences with corresponding protein IDs in the output based on a reference CSV file.
Parameters:
- output (list): List of lists containing protein pairs and their similarity.
- archivoEntrada (str): Path to the input CSV file containing protein information.
Reads the reference CSV file, sorts the output, and replaces sequences with protein IDs.
Updates the output CSV file.
"""
df_b = pd.read_excel(archivoEntrada)
#df_b = pd.read_excel("proteinasClase_PC00060.xlsx")
#df_b=substitute_or_remove_prot_id(df_b,"r")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment