Source code for msions.encyclopedia

"""
This module contains functions that are useful for interacting with
EncyclopeDIA files in Python.
"""
import sqlite3
import pandas as pd
import numpy as np


[docs]def dia_df(input_elib: str) -> pd.DataFrame: """ Create a pandas DataFrame from an EncyclopeDIA elib output Parameters ---------- input_elib : str The input elib file. Returns ------- pd.DataFrame A pandas DataFrame containing PrecursorMz, PrecursorCharge, PeptideModSeq, PeptideSeq, RtInSeconds, RTInSecondsStart, and RTInSecondsStop columns. Examples ------- >>> from msions.encyclopedia import dia_df >>> dia_df("test.elib") """ # create connection object elib_connection = sqlite3.connect(input_elib) # create DataFrame with SQL query encyclo_df = pd.read_sql_query("""SELECT PrecursorMz, PrecursorCharge, PeptideModSeq, PeptideSeq, RtInSeconds, RTInSecondsStart, RTInSecondsStop FROM entries""", elib_connection) # close connection elib_connection.close() # return data frame return encyclo_df
[docs]def match_hk(ref_row: pd.Series, other_df: pd.DataFrame) -> int: """ Match EncyclopeDIA elib output to Hardklor output Parameters ---------- ref_row : pd.Series The row of data to match. other_df : pd.DataFrame The other DataFrame to match. Returns ------- int Number of rows in match DataFrame. Examples ------- >>> from msions.encyclopedia import match_hk >>> import msions.hardklor as hk >>> import msions.encyclopedia as encyclo >>> hk_df = hk.hk2df("test.hk") >>> encyclo_df = encyclo.dia_df("test.elib") >>> hk_df["in_encyclo"] = hk_df.apply(match_hk, axis=1, other_df=encyclo_df) """ # define info to match mz2match = ref_row.mz charge2match = ref_row.charge rt2match = ref_row.rt_s # re-assign data frame to use previously written code small_df = other_df # only search scans that match small_df = small_df.loc[(small_df.RTInSecondsStart <= rt2match) & (small_df.RTInSecondsStop >= rt2match) & (small_df.PrecursorCharge == charge2match)] # look for mass that matches small_df = small_df[np.isclose(small_df.PrecursorMz, mz2match, rtol=5e-6)] # return number of matches return(small_df.shape[0])