Spaces:
Running
Running
Erva Ulusoy
commited on
Commit
·
fcbba9e
1
Parent(s):
a38fdcd
make protein id search matching case insensitive
Browse files- ProtHGT_app.py +9 -2
ProtHGT_app.py
CHANGED
|
@@ -103,7 +103,14 @@ with st.sidebar:
|
|
| 103 |
# Apply fuzzy search only if query length is >= 3
|
| 104 |
filtered_proteins = []
|
| 105 |
if len(search_query) >= 3:
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# Multi-select for filtered results
|
| 109 |
selected_proteins = st.multiselect(
|
|
@@ -115,7 +122,7 @@ with st.sidebar:
|
|
| 115 |
|
| 116 |
if selected_proteins:
|
| 117 |
st.write(f"Selected {len(selected_proteins)} proteins")
|
| 118 |
-
|
| 119 |
else:
|
| 120 |
uploaded_file = st.file_uploader(
|
| 121 |
"Upload a text file with UniProt IDs (one per line, max 100)*",
|
|
|
|
| 103 |
# Apply fuzzy search only if query length is >= 3
|
| 104 |
filtered_proteins = []
|
| 105 |
if len(search_query) >= 3:
|
| 106 |
+
# Case-insensitive search by converting query and proteins to lowercase
|
| 107 |
+
matches = process.extract(
|
| 108 |
+
search_query.lower(),
|
| 109 |
+
{p: p.lower() for p in available_proteins},
|
| 110 |
+
limit=50,
|
| 111 |
+
score_cutoff=50 # Optional: only include matches above 50% similarity
|
| 112 |
+
)
|
| 113 |
+
filtered_proteins = [match[0] for match in matches] # Show top 50 matches
|
| 114 |
|
| 115 |
# Multi-select for filtered results
|
| 116 |
selected_proteins = st.multiselect(
|
|
|
|
| 122 |
|
| 123 |
if selected_proteins:
|
| 124 |
st.write(f"Selected {len(selected_proteins)} proteins")
|
| 125 |
+
|
| 126 |
else:
|
| 127 |
uploaded_file = st.file_uploader(
|
| 128 |
"Upload a text file with UniProt IDs (one per line, max 100)*",
|