Convert plipxml format to plipcsv

Using plipxml2plipcsv from command line

In [1]:
import os
import pandas as pd
import plip2ml.tests.test_plipxml2plipcsv as test_pxpc

os.chdir(test_pxpc.data_dir())

os.system('plipxml2plipcsv 10gs_report.xml -o 10gs_report_cli.csv -s 10gs -f')

df_cli = pd.read_csv('10gs_report_cli.csv')
df_cli.head(3)
Out[1]:
structure_id ligand_id num_heavy_atoms num_hbd num_unpaired_hbd num_hba num_unpaired_hba num_hal num_unpaired_hal num_aromatic_rings ... restype restype_lig donortype acceptortype lig_group type metal_type target_type location geometry
0 10gs A.VWW.210 473.54198 3.0209 33 6 1 9 1 0 ... None None None None None None None None None None
1 10gs A.VWW.210 473.54198 3.0209 33 6 1 9 1 0 ... PHE VWW None None None None None None None None
2 10gs A.VWW.210 473.54198 3.0209 33 6 1 9 1 0 ... PHE VWW None None None None None None None None

3 rows × 39 columns

Using plipxml2plipcsv as imported python module

In [2]:
import plip2ml.plipxml2plipcsv as pxpc


pxpc.generate_plipcsv('10gs_report.xml', '10gs_report_lib.csv', structrure_id='10gs')


df_lib = pd.read_csv('10gs_report_lib.csv')
df_lib.head(3)
Out[2]:
structure_id ligand_id num_heavy_atoms num_hbd num_unpaired_hbd num_hba num_unpaired_hba num_hal num_unpaired_hal num_aromatic_rings ... restype restype_lig donortype acceptortype lig_group type metal_type target_type location geometry
0 10gs A.VWW.210 473.54198 3.0209 33 6 1 9 1 0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 10gs A.VWW.210 473.54198 3.0209 33 6 1 9 1 0 ... PHE VWW NaN NaN NaN NaN NaN NaN NaN NaN
2 10gs A.VWW.210 473.54198 3.0209 33 6 1 9 1 0 ... PHE VWW NaN NaN NaN NaN NaN NaN NaN NaN

3 rows × 39 columns