Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
P
ProteinsPatterns
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Rafael Artinano
ProteinsPatterns
Commits
d5cae6ac
Commit
d5cae6ac
authored
Dec 11, 2023
by
Rafael Artinano
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update in similarity with aa
parent
3c565e97
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
691 additions
and
136 deletions
+691
-136
TFM-main/src/compute_for_clases.py
TFM-main/src/compute_for_clases.py
+406
-0
TFM-main/src/generate_the_excel.py
TFM-main/src/generate_the_excel.py
+0
-0
TFM-main/src/metricas.py
TFM-main/src/metricas.py
+160
-0
TFM-main/src/patrones_similares_aa.py
TFM-main/src/patrones_similares_aa.py
+125
-136
No files found.
TFM-main/src/compute_for_clases.py
0 → 100644
View file @
d5cae6ac
This diff is collapsed.
Click to expand it.
TFM-main/src/generate_th
a
_excel.py
→
TFM-main/src/generate_th
e
_excel.py
View file @
d5cae6ac
File moved
TFM-main/src/metricas.py
View file @
d5cae6ac
...
...
@@ -212,6 +212,166 @@ def patronesComun(patronesComun):
# index=False)
def
patronesComunClas
(
patronesComun
,
name
):
# Leer el archivo CSV y cargar los datos en una lista de diccionarios
registros
=
[]
cl
=
pd
.
read_excel
(
"alzheimer_protein_class 2.xlsx"
)
#cl=substitute_or_remove_prot_id(cl,"r")
#data2=data.copy()
cli
=
cl
.
groupby
(
'protein_id'
)
di
=
[]
do
=
{}
for
k
,
v
in
cli
:
for
index
,
row
in
v
.
iterrows
():
di
.
append
(
row
[
'class_name'
])
do
[
k
]
=
di
di
=
[]
class_dict
=
do
with
open
(
"clases/"
+
name
+
"/patronesIdenticos.csv"
,
'r'
)
as
file
:
reader
=
csv
.
DictReader
(
file
)
for
row
in
reader
:
registros
.
append
(
row
)
# Diccionario para almacenar la cantidad de patrones únicos por proteína
patrones_por_proteina
=
{}
posiciones_patron
=
{}
# Iterar sobre los registros y extraer los patrones únicos de cada proteína
for
registro
in
registros
:
proteina
=
registro
[
'Proteina'
]
patron
=
registro
[
'Patron'
]
posicion
=
registro
[
'Posiciones'
]
if
proteina
not
in
patrones_por_proteina
:
patrones_por_proteina
[
proteina
]
=
set
()
patrones_por_proteina
[
proteina
]
.
add
(
patron
)
pp
=
[
oo
for
oo
in
ast
.
literal_eval
(
posicion
)
if
oo
is
not
'['
and
oo
is
not
']'
]
if
proteina
not
in
posiciones_patron
:
posiciones_patron
[
proteina
]
=
{}
posiciones_patron
[
proteina
][
patron
]
=
[]
for
u
in
pp
:
for
kaa
in
range
(
0
,
len
(
patron
)):
posiciones_patron
[
proteina
][
patron
]
.
append
(
kaa
+
int
(
u
))
# Diccionario para almacenar las proteinas que tienen en común cada par de proteinas
proteinas_comunes
=
{}
rr
=
[]
df_p
=
pd
.
read_excel
(
"data_nervous_genes_xf.xlsx"
)
#df_p = pd.read_excel("proteinasClase_PC00060.xlsx")
#df_p=substitute_or_remove_prot_id(df_p,"r")
proteinas_dict2
=
dict
(
df_p
[[
'protein_id'
,
'protein_sequence'
]]
.
values
)
pares_proteinas_procesados
=
set
()
# Filtrar las proteínas que tienen al menos 10 patrones únicos en común
for
proteina1
,
patrones1
in
patrones_por_proteina
.
items
():
for
proteina2
,
patrones2
in
patrones_por_proteina
.
items
():
if
proteina1
!=
proteina2
and
(
proteina2
,
proteina1
)
not
in
pares_proteinas_procesados
:
patrones_comunes
=
patrones1
.
intersection
(
patrones2
)
if
len
(
patrones_comunes
)
>=
patronesComun
:
par_proteinas
=
(
proteina1
,
proteina2
)
proteinas_comunes
[
par_proteinas
]
=
patrones_comunes
pares_proteinas_procesados
.
add
(
par_proteinas
)
output
=
[]
df_b
=
pd
.
read_csv
(
"AllProteins_
%
Similitud.csv"
)
output2
=
[]
proteinas_dict
=
df_b
.
set_index
([
'Proteina1'
,
'Proteina2'
])[
'Similaridad'
]
.
to_dict
()
outbreak
=
[]
first
=
True
first2
=
True
for
par_proteinas
,
patrones_comunes
in
proteinas_comunes
.
items
():
proteina1
,
proteina2
=
par_proteinas
pattern_lengths
=
{}
pattern_l
=
{}
Antecedentes
=
{}
if
(
proteina1
==
'Q13753'
and
proteina2
==
'P07550'
):
print
(
patrones_comunes
)
for
pattern
in
patrones_comunes
:
length
=
len
(
pattern
)
key
=
f
'Longitud {length}'
if
key
in
pattern_lengths
:
pattern_lengths
[
key
]
.
append
([
pattern
])
Add
=
posiciones_patron
[
proteina1
][
pattern
]
if
(
proteina1
==
'Q13753'
and
proteina2
==
'P07550'
):
print
(
Add
)
if
proteina1
not
in
Antecedentes
:
Antecedentes
[
proteina1
]
=
set
()
lex
=
len
(
Antecedentes
[
proteina1
]
&
set
(
Add
))
Antecedentes
[
proteina1
]
.
update
(
Add
)
pattern_l
[
key
][
0
]
+=
len
(
Add
)
-
lex
Add
=
posiciones_patron
[
proteina2
][
pattern
]
if
proteina2
not
in
Antecedentes
:
Antecedentes
[
proteina2
]
=
set
()
lex
=
len
(
Antecedentes
[
proteina2
]
&
set
(
Add
))
Antecedentes
[
proteina2
]
.
update
(
Add
)
pattern_l
[
key
][
1
]
+=
len
(
Add
)
-
lex
#sprint(length*len(Posic))
else
:
pattern_lengths
[
key
]
=
[[
pattern
]]
Add
=
posiciones_patron
[
proteina1
][
pattern
]
if
proteina1
not
in
Antecedentes
:
Antecedentes
[
proteina1
]
=
set
()
lex
=
len
(
Antecedentes
[
proteina1
]
&
set
(
Add
))
#print(lex)
#print(Antecedentes)
Antecedentes
[
proteina1
]
.
update
(
Add
)
Add2
=
posiciones_patron
[
proteina2
][
pattern
]
if
proteina2
not
in
Antecedentes
:
Antecedentes
[
proteina2
]
=
set
()
lex2
=
len
(
Antecedentes
[
proteina2
]
&
set
(
Add2
))
Antecedentes
[
proteina2
]
.
update
(
Add2
)
pattern_l
[
key
]
=
[
len
(
Add
)
-
lex
,
len
(
Add2
)
-
lex2
]
sorted_pattern_lengths
=
dict
(
sorted
(
pattern_lengths
.
items
(),
key
=
lambda
x
:
int
(
x
[
0
][
9
:]),
reverse
=
True
))
if
proteina1
!=
proteina2
:
prot
=
[
proteinas_dict2
[
proteina1
],
proteinas_dict2
[
proteina2
]]
if
Antecedentes
!=
{}
and
(
len
(
prot
[
0
])
>
0
and
len
(
prot
[
1
])
>
0
):
output
.
append
([
sorted_pattern_lengths
,
proteina1
,
proteina2
,
class_dict
[
proteina1
]
if
proteina1
in
class_dict
else
"N/A"
,
class_dict
[
proteina2
]
if
proteina2
in
class_dict
else
"N/A"
])
df
=
pd
.
DataFrame
(
output
,
columns
=
[
'Patrones'
,
'Proteina1'
,
'Proteina2'
,
"classesProt1"
,
"classesProt2"
])
output
=
[]
if
(
first2
):
df
.
to_csv
(
'clases/'
+
name
+
'/Metrica_patronesComunes.csv'
,
index
=
False
)
first2
=
False
else
:
df
.
to_csv
(
'clases/'
+
name
+
'/Metrica_patronesComunes.csv'
,
index
=
False
,
header
=
False
,
mode
=
'a'
)
#else:
#output.append([sorted_pattern_lengths, proteina1, proteina2,
# 'N/A'])
#print("prot1 : "+proteina1 + " : "+str(len(Antecedentes[proteina1])))
#print("prot2 : "+proteina2 + " : " + str(len(Antecedentes[proteina2]) ))
if
Antecedentes
!=
{}
and
(
len
(
prot
[
0
])
>
0
and
len
(
prot
[
1
])
>
0
):
output2
.
append
([
proteina1
,
proteina2
,
(
max
(
len
(
Antecedentes
[
proteina1
])
/
len
(
prot
[
0
]),
len
(
Antecedentes
[
proteina2
])
/
len
(
prot
[
1
]))
*
100
),
class_dict
[
proteina1
]
if
proteina1
in
class_dict
else
"N/A"
,
class_dict
[
proteina2
]
if
proteina2
in
class_dict
else
"N/A"
])
df2
=
pd
.
DataFrame
(
output2
,
columns
=
[
'proteina1'
,
'proteina2'
,
'
%
Coincidencia'
,
"classesProt1"
,
"classesProt2"
])
output2
=
[]
if
(
first
):
df2
.
to_csv
(
'clases/'
+
name
+
'/Metrica_Coincidencia.csv'
,
index
=
False
)
first
=
False
else
:
df2
.
to_csv
(
'clases/'
+
name
+
'/Metrica_Coincidencia.csv'
,
index
=
False
,
header
=
False
,
mode
=
'a'
)
#output2=sorted(output2, key = lambda x: int(x[2]))
#df2=pd.DataFrame(output2,columns=['proteina1','proteina2','%Coincidencia'])
#df2.to_csv('resultados/Metrica_Coincidencia.csv',
# index=False)
def
remplazar_sequence_for_ID
(
output
):
df_b
=
pd
.
read_excel
(
"data_nervous_genes_xf.xlsx"
)
#df_b = pd.read_excel("proteinasClase_PC00060.xlsx")
...
...
TFM-main/src/patrones_similares_aa.py
View file @
d5cae6ac
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment