--- title: Title keywords: fastai sidebar: home_sidebar nb_path: "nbs/05_ADFam.ipynb" ---
{% raw %}
{% endraw %}

Fam file creation and infomation checking

{% raw %}
from SEQLinkage.Plot import plotped
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
/tmp/2206534.1.plot.q/ipykernel_11876/3829950798.py in <module>
----> 1 from SEQLinkage.Plot import plotped

ModuleNotFoundError: No module named 'SEQLinkage'
{% endraw %} {% raw %}
import pandas as pd
import gzip

def get_vcf_names(vcf_path):
    with gzip.open(vcf_path, "rt") as ifile:
        for line in ifile:
            if line.startswith("#CHROM"):
                vcf_names = [x.strip() for x in line.split('\t')]
                break
    ifile.close()
    return vcf_names

def read_vcf_chunk(fn,chunksize=10):
    names = get_vcf_names(fn)
    try:
        vcf = pd.read_csv(fn, compression='gzip', comment='#', chunksize=chunksize, delim_whitespace=True, header=None, names=names)
    except:
        vcf = pd.read_csv(fn, comment='#', chunksize=chunksize, delim_whitespace=True, header=None, names=names)
    df= pd.DataFrame(vcf.get_chunk(chunksize))
    return df
{% endraw %} {% raw %}
vcf = read_vcf_chunk('/mnt/mfs/statgen/alzheimers-family/linkage_files/geno/full_sample/vcf/full_sample.vcf.gz', chunksize=10)
{% endraw %} {% raw %}
vcf
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 4_364_99 ... 8_64039_20 8_64039_25 8_64039_3 25_6_107C 25_22_491C 26_EGH_EGH64401 26_SW_SW27020 10R_R111_16 10R_R114_16 10R_R114_20
0 1 10140 chr1:10140:ACCCTAAC:A ACCCTAAC A . . PR GT 0/0 ... 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0
1 1 10146 chr1:10146:AC:A AC A . . PR GT 0/0 ... 0/0 0/0 0/1 0/0 0/0 0/0 0/1 0/0 1/1 0/1
2 1 10146 chr1:10146:ACC:* ACC * . . PR GT 0/0 ... 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0
3 1 10150 chr1:10150:CT:C CT C . . PR GT 0/0 ... 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 ./. 0/0
4 1 10172 chr1:10172:CCCTAA:C CCCTAA C . . PR GT ./. ... 0/0 0/0 0/0 0/0 0/0 0/0 ./. 0/0 ./. ./.
5 1 10178 chr1:10178:CCTAA:C CCTAA C . . PR GT ./. ... ./. ./. ./. ./. ./. ./. ./. 0/0 0/0 ./.
6 1 10198 chr1:10198:TAACCC:T TAACCC T . . PR GT 0/0 ... 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0
7 1 10206 chr1:10206:ACCCTAACCCTAACCCTAACCCTAAC:A ACCCTAACCCTAACCCTAACCCTAAC A . . PR GT 0/0 ... 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0
8 1 10231 chr1:10231:C:A C A . . PR GT 0/0 ... 0/0 0/0 0/0 0/0 ./. 0/0 0/0 0/0 0/0 0/0
9 1 10231 chr1:10231:CCCCTAACCCTAACCCTAAACCCTAAACCCTAACC... CCCCTAACCCTAACCCTAAACCCTAAACCCTAACCCTAACCCTAAC... * . . PR GT 0/0 ... 0/0 0/0 0/0 0/0 ./. 0/0 0/0 0/0 0/0 0/0

10 rows × 3488 columns

{% endraw %} {% raw %}
pheno_full_sample_path = '/mnt/mfs/statgen/alzheimers-family/linkage_files/pheno/full_sample/'
{% endraw %}

efiga_pedigree.txt full_sample_efi_nia.fam full_sample_fam_id.txt full_sample_fam_pop.txt full_sample_id_list.txt full_sample_pheno.txt niaload_pedigree.txt

{% raw %}
pheno_df = pd.read_csv('/mnt/mfs/statgen/alzheimers-family/pheno/pheno_modified/AD.txt',header=0,sep='\t')
{% endraw %} {% raw %}
efiga_ped = pd.read_csv(pheno_full_sample_path+'efiga_pedigree.txt',delim_whitespace=True)
{% endraw %} {% raw %}
efiga_ped
ID SEX AD AGE APOE FATHID MOTHID
0 127_99 1.0 1 69.0 1.0 1 2
1 127_15 0.0 0 NaN NaN 0 0
2 127_14 1.0 0 NaN NaN 0 0
3 127_13 0.0 0 NaN NaN 0 0
4 127_12 1.0 0 NaN NaN 15 99
... ... ... ... ... ... ... ...
12765 359_153 0.0 0 NaN NaN 0 0
12766 359_154 1.0 0 NaN NaN 153 121
12767 359_155 0.0 0 NaN NaN 153 121
12768 359_156 0.0 0 NaN NaN 153 121
12769 359_157 0.0 0 NaN NaN 153 121

12770 rows × 7 columns

{% endraw %} {% raw %}
nia_ped = pd.read_csv(pheno_full_sample_path+'niaload_pedigree.txt',delim_whitespace=True)
{% endraw %} {% raw %}
nia_ped[nia_ped.FID =='27_25']
Sample_ID FID Gender MOTHID FATHID prob.AD APOE APOE4NUM AAO_AgeLastSeen
2655 27_25_62220 27_25 F 84401 84402 2 44 2.0 79.0
2656 27_25_62222 27_25 F 84401 84402 2 44 2.0 68.0
2657 27_25_84237 27_25 F 84401 84402 2 NANA NaN NaN
2658 27_25_84401 27_25 F 0 0 2 NANA NaN NaN
2659 27_25_84402 27_25 M 0 0 1 NANA NaN NaN
2660 27_25_85555 27_25 F 84237 85736 1 34 1.0 79.0
2661 27_25_85736 27_25 M 0 0 1 NANA NaN NaN
2662 27_25_85742 27_25 F 85743 85744 1 44 2.0 65.0
2663 27_25_85743 27_25 F 0 0 -9 NANA NaN NaN
2664 27_25_85744 27_25 M 84401 84402 2 NANA NaN NaN
2665 27_25_85937 27_25 F 84401 84402 2 NANA NaN NaN
6222 27_25_86616 27_25 M -9 NANA NaN NaN NaN NaN
6223 27_25_86610 27_25 M 62222 86616 1 NANA NaN 69.0
6226 27_25_ 27_25 F 62222 86616 -9 NANA NaN NaN
6227 27_25_ 27_25 F -9 NANA NaN NaN NaN NaN
6228 27_25_ 27_25 M 86610 -9 NANA NaN NaN NaN
6229 27_25_ 27_25 F 86610 -9 NANA NaN NaN NaN
6230 27_25_ 27_25 F 62220 -9 NANA NaN NaN NaN
6231 27_25_ 27_25 M 62220 -9 NANA NaN NaN NaN
6232 27_25_ 27_25 M -9 NANA NaN NaN NaN NaN
6233 27_25_ 27_25 M 85937 -9 NANA NaN NaN NaN
{% endraw %} {% raw %}
nia_ped.astype(str).groupby(['prob.AD','APOE']).size().unstack()
APOE 0 00 22 23 24 32 33 34 42 43 44 NANA nan
prob.AD
-9 NaN NaN NaN NaN 1.0 3.0 13.0 8.0 NaN 18.0 7.0 1136.0 NaN
1 NaN 2.0 3.0 18.0 12.0 54.0 516.0 147.0 27.0 324.0 64.0 2286.0 NaN
2 NaN NaN 1.0 5.0 2.0 18.0 249.0 85.0 12.0 379.0 233.0 622.0 NaN
33 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
NANA NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 26.0
nan NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 36.0
{% endraw %} {% raw %}
nia_ped.astype(str).groupby(['prob.AD','APOE4NUM']).size().unstack()
APOE4NUM 0.0 1.0 2.0 71.0 nan
prob.AD
-9 16.0 27.0 7.0 NaN 1136.0
1 591.0 510.0 64.0 NaN 2288.0
2 273.0 478.0 233.0 NaN 622.0
33 NaN NaN NaN 1.0 NaN
NANA NaN NaN NaN NaN 26.0
nan NaN NaN NaN NaN 36.0
{% endraw %} {% raw %}
efiga_ped.astype(str).groupby(['AD','APOE']).size().unstack()
APOE 0.0 1.0 2.0 nan
AD
0 55 41 15 9065
1 983 638 130 69
2 866 680 203 25
{% endraw %} {% raw %}
pheno_df.astype(str).groupby(['AD','APOE']).size().unstack()
APOE 0.0 1.0 2.0 nan
AD
-9.0 19.0 18.0 10.0 28.0
1.0 731.0 559.0 123.0 56.0
2.0 813.0 792.0 293.0 30.0
nan 4.0 3.0 NaN NaN
{% endraw %} {% raw %}
all_fam.ad.value_counts()
-9    10430
 1     5488
 2     3411
Name: ad, dtype: int64
{% endraw %} {% raw %}
pheno_df
IID ID ProjectID Columbia_Description SEX AD AGE APOE RACE STUDY FID pop super_pop
0 02AD4427 4_364_99 CCDG_12711 NYGC2 F 2.0 71.0 1.0 1.0 NIALOAD 4_364 European European
1 02AD4429 4_44_3 CCDG_12711 NYGC2 F 2.0 90.0 1.0 2.0 NIALOAD 4_44 African African
2 03AD4435 27_104_62571 CCDG_11949 NYGC1 M 2.0 74.0 1.0 1.0 NIALOAD 27_104 European European
3 03AD4437 27_90_84583 CCDG_11949 NYGC1 F 2.0 71.0 1.0 1.0 NIALOAD 27_90 European European
4 03AD4438 27_90_84784 CCDG_11949 NYGC1 F 2.0 85.0 0.0 1.0 NIALOAD 27_90 European European
... ... ... ... ... ... ... ... ... ... ... ... ... ...
3474 05AD8765 26_EGH_EGH64401 CCDG_11949 NYGC1 F NaN NaN 1.0 NaN NIALOAD 26_EGH European European
3475 07AD1732 26_SW_SW27020 CCDG_11949 NYGC1 M NaN NaN 0.0 2.0 NIALOAD 26_SW European European
3476 10AD23886 10R_R111_16 CCDG_12711 NYGC2 M NaN 69.0 0.0 2.0 NIALOAD 10R_R111 European European
3477 10AD32608 10R_R114_16 CCDG_12711 NYGC2 M NaN 63.0 1.0 2.0 NIALOAD 10R_R114 European European
3478 10AD32610 10R_R114_20 CCDG_12711 NYGC2 F NaN 61.0 1.0 2.0 NIALOAD 10R_R114 European European

3479 rows × 13 columns

{% endraw %} {% raw %}
pheno_df[['AD','APOE']].sort_values(['AD','APOE']).astype(str).value_counts()
AD    APOE
2.0   0.0     813
      1.0     792
1.0   0.0     731
      1.0     559
2.0   2.0     293
1.0   2.0     123
      nan      56
2.0   nan      30
-9.0  nan      28
      0.0      19
      1.0      18
      2.0      10
nan   0.0       4
      1.0       3
dtype: int64
{% endraw %} {% raw %}
efi_nia_fam = pd.read_csv(pheno_full_sample_path+'full_sample_efi_nia.fam',delim_whitespace=True,header=None,names = ['fid','iid','father','mother','gender','trait'])
{% endraw %} {% raw %}
efi_nia_txt = pd.read_csv(pheno_full_sample_path+'full_sample_fam_id.txt',delim_whitespace=True,header=None,names = ['fid','iid','father','mother','gender','trait','id'])
{% endraw %} {% raw %}
efi_nia_txt
fid iid father mother gender trait id
0 4_364 4_364_99 4_364_1 4_364_2 2 2 02AD4427
1 4_44 4_44_3 4_44_1 4_44_2 2 2 02AD4429
2 27_104 27_104_62571 27_104_84753 27_104_84752 1 2 03AD4435
3 27_90 27_90_84583 27_90_84575 27_90_84574 2 2 03AD4437
4 27_90 27_90_84784 27_90_84575 27_90_84574 2 2 03AD4438
... ... ... ... ... ... ... ...
3474 26_EGH 26_EGH_EGH64401 0 0 2 -9 05AD8765
3475 26_SW 26_SW_SW27020 26_SW_SW27023 26_SW_SW27022 1 -9 07AD1732
3476 10R_R111 10R_R111_16 10R_R111_5 10R_R111_4 1 -9 10AD23886
3477 10R_R114 10R_R114_16 10R_R114_2 10R_R114_3 1 -9 10AD32608
3478 10R_R114 10R_R114_20 10R_R114_2 10R_R114_3 2 -9 10AD32610

3479 rows × 7 columns

{% endraw %} {% raw %}
sample_id_list = pd.read_csv(pheno_full_sample_path+'full_sample_id_list.txt',delim_whitespace=True,header=None,names = ['id'])
{% endraw %} {% raw %}
sample_id_list
id
0 02AD4427
1 02AD4429
2 03AD4435
3 03AD4437
4 03AD4438
... ...
3474 05AD8765
3475 07AD1732
3476 10AD23886
3477 10AD32608
3478 10AD32610

3479 rows × 1 columns

{% endraw %} {% raw %}
sum(pheno_df.IID == sample_id_list.id)
3479
{% endraw %} {% raw %}
sample_pheno = pd.read_csv(pheno_full_sample_path+'full_sample_pheno.txt',sep='\t')
{% endraw %} {% raw %}
sample_pheno
project/alzheimers-family/linkage_files/pheno/full_sample/niaload_pedigree.txtIID ID ProjectID Columbia_Description SEX AD AGE APOE RACE STUDY FID pop super_pop
0 02AD4427 4_364_99 CCDG_12711 NYGC2 F 2.0 71.0 1.0 1.0 NIALOAD 4_364 European European
1 02AD4429 4_44_3 CCDG_12711 NYGC2 F 2.0 90.0 1.0 2.0 NIALOAD 4_44 African African
2 03AD4435 27_104_62571 CCDG_11949 NYGC1 M 2.0 74.0 1.0 1.0 NIALOAD 27_104 European European
3 03AD4437 27_90_84583 CCDG_11949 NYGC1 F 2.0 71.0 1.0 1.0 NIALOAD 27_90 European European
4 03AD4438 27_90_84784 CCDG_11949 NYGC1 F 2.0 85.0 0.0 1.0 NIALOAD 27_90 European European
... ... ... ... ... ... ... ... ... ... ... ... ... ...
3474 05AD8765 26_EGH_EGH64401 CCDG_11949 NYGC1 F NaN NaN 1.0 NaN NIALOAD 26_EGH European European
3475 07AD1732 26_SW_SW27020 CCDG_11949 NYGC1 M NaN NaN 0.0 2.0 NIALOAD 26_SW European European
3476 10AD23886 10R_R111_16 CCDG_12711 NYGC2 M NaN 69.0 0.0 2.0 NIALOAD 10R_R111 European European
3477 10AD32608 10R_R114_16 CCDG_12711 NYGC2 M NaN 63.0 1.0 2.0 NIALOAD 10R_R114 European European
3478 10AD32610 10R_R114_20 CCDG_12711 NYGC2 F NaN 61.0 1.0 2.0 NIALOAD 10R_R114 European European

3479 rows × 13 columns

{% endraw %} {% raw %}
sample_pheno[sample_pheno.ID == '167_DCH23.54']
project/alzheimers-family/linkage_files/pheno/full_sample/niaload_pedigree.txtIID ID ProjectID Columbia_Description SEX AD AGE APOE RACE STUDY FID pop super_pop
{% endraw %} {% raw %}
sample_pheno.describe(include='all')
project/alzheimers-family/linkage_files/pheno/full_sample/niaload_pedigree.txtIID ID ProjectID Columbia_Description SEX AD AGE APOE RACE STUDY FID pop super_pop
count 3479 3479 3479 3479 3479 3472.000000 3372.000000 3365.000000 3478.000000 3479 3479 3479 3479
unique 3479 3479 8 8 2 NaN NaN NaN NaN 3 1068 4 4
top 02AD4427 4_364_99 CCDG_12711 NYGC2 F NaN NaN NaN NaN EFIGA 3761 Hispanic Hispanic
freq 1 1 1172 1172 2180 NaN NaN NaN NaN 2059 47 2133 2133
mean NaN NaN NaN NaN NaN 1.339286 91.765563 0.660921 2.480161 NaN NaN NaN NaN
std NaN NaN NaN NaN NaN 1.612784 366.352308 0.690971 3.706756 NaN NaN NaN NaN
min NaN NaN NaN NaN NaN -9.000000 35.000000 0.000000 1.000000 NaN NaN NaN NaN
25% NaN NaN NaN NaN NaN 1.000000 66.000000 0.000000 1.000000 NaN NaN NaN NaN
50% NaN NaN NaN NaN NaN 2.000000 73.000000 1.000000 3.000000 NaN NaN NaN NaN
75% NaN NaN NaN NaN NaN 2.000000 80.000000 1.000000 3.000000 NaN NaN NaN NaN
max NaN NaN NaN NaN NaN 2.000000 8070.000000 2.000000 99.000000 NaN NaN NaN NaN
{% endraw %} {% raw %}
pheno_df
IID ID ProjectID Columbia_Description SEX AD AGE APOE RACE STUDY FID pop super_pop
0 02AD4427 4_364_99 CCDG_12711 NYGC2 F 2.0 71.0 1.0 1.0 NIALOAD 4_364 European European
1 02AD4429 4_44_3 CCDG_12711 NYGC2 F 2.0 90.0 1.0 2.0 NIALOAD 4_44 African African
2 03AD4435 27_104_62571 CCDG_11949 NYGC1 M 2.0 74.0 1.0 1.0 NIALOAD 27_104 European European
3 03AD4437 27_90_84583 CCDG_11949 NYGC1 F 2.0 71.0 1.0 1.0 NIALOAD 27_90 European European
4 03AD4438 27_90_84784 CCDG_11949 NYGC1 F 2.0 85.0 0.0 1.0 NIALOAD 27_90 European European
... ... ... ... ... ... ... ... ... ... ... ... ... ...
3474 05AD8765 26_EGH_EGH64401 CCDG_11949 NYGC1 F NaN NaN 1.0 NaN NIALOAD 26_EGH European European
3475 07AD1732 26_SW_SW27020 CCDG_11949 NYGC1 M NaN NaN 0.0 2.0 NIALOAD 26_SW European European
3476 10AD23886 10R_R111_16 CCDG_12711 NYGC2 M NaN 69.0 0.0 2.0 NIALOAD 10R_R111 European European
3477 10AD32608 10R_R114_16 CCDG_12711 NYGC2 M NaN 63.0 1.0 2.0 NIALOAD 10R_R114 European European
3478 10AD32610 10R_R114_20 CCDG_12711 NYGC2 F NaN 61.0 1.0 2.0 NIALOAD 10R_R114 European European

3479 rows × 13 columns

{% endraw %} {% raw %}
all_fam = pd.read_csv('all_sample.fam',delim_whitespace=True,header=None,names=['fid','iid','fathid','mothid','sex','ad'])
{% endraw %} {% raw %}
all_fam['vcf'] = list(all_fam.iid.isin(vcf.columns))
{% endraw %} {% raw %}
all_fam[all_fam.iid.isin(['10R_R99_8','215_59','27_25_','4_595_18','4_595_69','4_603_43'])]
fid iid fathid mothid sex ad vcf
1897 215 215_59 215_5 215_50 1 -9 True
1898 215 215_59 0 0 1 -9 True
14616 10R_R99 10R_R99_8 0 0 1 1 False
17178 4_595 4_595_18 0 0 1 1 False
17257 4_603 4_603_43 4_603_79 4_603_10 2 1 False
18863 10R_R99 10R_R99_8 10R_R99_19 10R_R99_5 2 1 False
19134 4_603 4_603_43 4_603_79 4_603_10 2 1 False
19168 4_595 4_595_69 4_595_66 4_595_64 2 1 False
19175 4_595 4_595_69 4_595_66 4_595_64 2 -9 False
19186 4_595 4_595_18 0 0 1 1 False
19247 27_25 27_25_ 27_25_86616 27_25_62222 2 -9 False
19248 27_25 27_25_ 0 0 2 -9 False
19249 27_25 27_25_ 27_25_86610 0 1 -9 False
19250 27_25 27_25_ 27_25_86610 0 2 -9 False
19251 27_25 27_25_ 0 27_25_62220 2 -9 False
19252 27_25 27_25_ 0 27_25_62220 1 -9 False
19253 27_25 27_25_ 0 0 1 -9 False
19254 27_25 27_25_ 0 27_25_85937 1 -9 False
{% endraw %}

1.removing only one member family

{% raw %}
famid = all_fam.fid.value_counts()
{% endraw %} {% raw %}
one_fam = all_fam[all_fam.fid.isin(famid[famid==1].keys())]
{% endraw %} {% raw %}
sum(one_fam.vcf)
250
{% endraw %} {% raw %}
twom_fam = all_fam[all_fam.fid.isin(famid[famid==1].keys())==False]
{% endraw %} {% raw %}
twom_fam.to_csv('twoormore_member_fam.csv',header=False,index=False)
{% endraw %} {% raw %}
fmid=list(set(twom_fam.fathid))+list(set(twom_fam.mothid))
{% endraw %} {% raw %}
def create_founder(fam,foid='fathid'):
    if foid == 'fathid':
        tmp =fam[~fam.fathid.isin(fam.iid) & (fam.fathid != '0')].copy()
        tmp.iid = tmp.fathid
        tmp.sex = 1
    elif foid == 'mothid':
        tmp =fam[~fam.mothid.isin(fam.iid) & (fam.mothid != '0')].copy()
        tmp.iid = tmp.mothid
        tmp.sex = 2
    else:
        raise print('id error, do not match')
    tmp.fathid = '0'
    tmp.mothid = '0'
    tmp.ad = -9
    tmp.vcf = False
    return tmp
{% endraw %} {% raw %}
new_twom_fam = pd.concat([twom_fam,create_founder(twom_fam,'fathid'),create_founder(twom_fam,'mothid')])
{% endraw %} {% raw %}
new_twom_fam
fid iid fathid mothid sex ad vcf
0 4_364 4_364_99 4_364_1 4_364_2 2 2 True
1 4_44 4_44_3 4_44_1 4_44_2 2 2 True
2 27_104 27_104_62571 27_104_84753 27_104_84752 1 2 True
3 27_90 27_90_84583 27_90_84575 27_90_84574 2 2 True
4 27_90 27_90_84784 27_90_84575 27_90_84574 2 2 True
... ... ... ... ... ... ... ...
19000 27_119 27_119_86892 0 0 2 -9 False
19067 27_192 27_192_86089 0 0 2 -9 False
19080 10R_R1 10R_R1_20 0 0 2 -9 False
19083 10R_R57 10R_R57_R15 0 0 2 -9 False
19215 10R_R114 10R_R114_R12 0 0 2 -9 False

19106 rows × 7 columns

{% endraw %} {% raw %}
new_twom_fam.to_csv('data/new_twoormore_member_fam.csv',header=False,index=False)
{% endraw %}

1.removing duplicates

{% raw %}
efiga_ped[efiga_ped.ID.duplicated(keep=False)]
ID SEX AD AGE APOE FATHID MOTHID
6483 215_59 1.0 2 71.0 1.0 5 50
12644 215_59 0.0 0 NaN NaN 0 0
{% endraw %} {% raw %}
nia_ped[nia_ped.Sample_ID.duplicated(keep=False)].sort_values('Sample_ID')
Sample_ID FID Gender MOTHID FATHID prob.AD APOE APOE4NUM AAO_AgeLastSeen
643 10R_R99_8 10R_R99 M 0 0 1 NANA NaN NaN
5802 10R_R99_8 10R_R99 F 5 19 1 NANA NaN NaN
6226 27_25_ 27_25 F 62222 86616 -9 NANA NaN NaN
6227 27_25_ 27_25 F -9 NANA NaN NaN NaN NaN
6228 27_25_ 27_25 M 86610 -9 NANA NaN NaN NaN
6229 27_25_ 27_25 F 86610 -9 NANA NaN NaN NaN
6230 27_25_ 27_25 F 62220 -9 NANA NaN NaN NaN
6231 27_25_ 27_25 M 62220 -9 NANA NaN NaN NaN
6232 27_25_ 27_25 M -9 NANA NaN NaN NaN NaN
6233 27_25_ 27_25 M 85937 -9 NANA NaN NaN NaN
3899 4_595_18 4_595 M 0 0 1 NANA NaN NaN
6165 4_595_18 4_595 M 0 0 1 NANA NaN NaN
6147 4_595_69 4_595 F 64 66 1 NANA NaN 61.0
6154 4_595_69 4_595 F 64 66 -9 NANA NaN NaN
3989 4_603_43 4_603 F 10 79 1 NANA NaN NaN
6113 4_603_43 4_603 F 10 79 1 NANA NaN NaN
{% endraw %} {% raw %}
dufam = new_twom_fam[new_twom_fam.iid.duplicated(keep=False)].copy().sort_values('iid').reset_index(drop=True)
{% endraw %} {% raw %}
nodp_fam = pd.concat([new_twom_fam[~new_twom_fam.iid.duplicated(keep=False)],dufam.iloc[[0,4,5,11,13,24,26,28]]])
{% endraw %}

1.1 one parent issue

{% raw %}
nof_id = (nodp_fam.fathid == '0') & (nodp_fam.mothid != '0')
nodp_fam.fathid[nof_id] = [x+'c' for x in nodp_fam.mothid[nof_id]]
{% endraw %} {% raw %}
nom_id = (nodp_fam.fathid != '0') & (nodp_fam.mothid == '0')
nodp_fam.mothid[nom_id] = [x+'c' for x in nodp_fam.fathid[nom_id]]
{% endraw %} {% raw %}
nodp_fam = pd.concat([nodp_fam,create_founder(nodp_fam,'fathid'),create_founder(nodp_fam,'mothid')])
{% endraw %} {% raw %}
nodp_fam = nodp_fam[~nodp_fam.iid.duplicated()]
{% endraw %} {% raw %}
nodp_fam.to_csv('data/nodp_fam.csv',header=False,index=False)
{% endraw %}

2.Correct misspecified gender

2.1 27_122 family: shift fathid and mothid

{% raw %}
nodp_fam.fathid[nodp_fam.iid=='27_122_16055'] = '27_122_84953'
nodp_fam.mothid[nodp_fam.iid=='27_122_16055'] = '27_122_84952'
/tmp/1974374.1.plot.q/ipykernel_27113/3531353860.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.fathid[nodp_fam.iid=='27_122_16055'] = '27_122_84953'
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
/tmp/1974374.1.plot.q/ipykernel_27113/3531353860.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.mothid[nodp_fam.iid=='27_122_16055'] = '27_122_84952'
{% endraw %}

2.2 10R_R47 family: shift fathid and mothid

{% raw %}
nodp_fam.fathid[nodp_fam.iid=='10R_R47_62'] = '10R_R47_43'
nodp_fam.mothid[nodp_fam.iid=='10R_R47_62'] = '10R_R47_1'
/tmp/1974374.1.plot.q/ipykernel_27113/2290483744.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.fathid[nodp_fam.iid=='10R_R47_62'] = '10R_R47_43'
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
/tmp/1974374.1.plot.q/ipykernel_27113/2290483744.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.mothid[nodp_fam.iid=='10R_R47_62'] = '10R_R47_1'
{% endraw %}

2.3 27_126 family: shift fathid and mothid

{% raw %}
nodp_fam[nodp_fam.fid == '27_126'].sort_values('fathid')
fid iid fathid mothid sex ad vcf
15933 27_126 27_126_85087 0 0 2 -9 False
15934 27_126 27_126_85088 0 0 1 -9 False
15936 27_126 27_126_85180 0 0 2 -9 False
15939 27_126 27_126_85185 0 0 1 -9 False
19032 27_126 27_126_86560 0 0 1 -9 False
19035 27_126 27_126_86559 27_126_85004 27_126_86560 2 1 False
15930 27_126 27_126_85000 27_126_85088 27_126_85087 2 2 False
15931 27_126 27_126_85004 27_126_85088 27_126_85087 2 1 False
15932 27_126 27_126_85005 27_126_85088 27_126_85087 2 2 False
15935 27_126 27_126_85178 27_126_85088 27_126_85087 1 2 False
292 27_126 27_126_85179 27_126_85178 27_126_85180 2 1 True
15937 27_126 27_126_85181 27_126_85178 27_126_85180 1 1 False
15938 27_126 27_126_85184 27_126_85185 27_126_85000 2 1 False
15940 27_126 27_126_85224 27_126_85185 27_126_85000 1 1 False
15941 27_126 27_126_85225 27_126_85185 27_126_85000 1 1 False
19003 27_126 27_126_85025 27_126_85185 27_126_85000 2 1 False
{% endraw %} {% raw %}
nodp_fam.fathid[nodp_fam.iid=='27_126_86559'] = '27_126_86560'
nodp_fam.mothid[nodp_fam.iid=='27_126_86559'] = '27_126_85004'
/tmp/1974374.1.plot.q/ipykernel_27113/238780734.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.fathid[nodp_fam.iid=='27_126_86559'] = '27_126_86560'
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
/tmp/1974374.1.plot.q/ipykernel_27113/238780734.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.mothid[nodp_fam.iid=='27_126_86559'] = '27_126_85004'
{% endraw %}

2.4 10R_R78 family: wrong fathid in 10R_R78_31

{% raw %}
nodp_fam[nodp_fam.fid == '10R_R78'].sort_values(['fathid','iid'])
fid iid fathid mothid sex ad vcf
14499 10R_R78 10R_R78_11 0 0 1 1 False
14501 10R_R78 10R_R78_2 0 0 1 -9 False
14502 10R_R78 10R_R78_22 0 0 2 1 False
14503 10R_R78 10R_R78_23 0 0 1 1 False
18992 10R_R78 10R_R78_24 0 0 2 -9 False
14504 10R_R78 10R_R78_29 0 0 2 1 False
14507 10R_R78 10R_R78_34 0 0 2 -9 False
14508 10R_R78 10R_R78_36 0 0 2 -9 False
14510 10R_R78 10R_R78_4 0 0 1 1 False
14511 10R_R78 10R_R78_43 0 0 1 -9 False
14512 10R_R78 10R_R78_44 0 0 2 1 False
14515 10R_R78 10R_R78_47 0 0 2 1 False
14518 10R_R78 10R_R78_51 0 0 1 -9 False
14520 10R_R78 10R_R78_53 0 0 1 1 False
14521 10R_R78 10R_R78_7 0 0 1 1 False
14500 10R_R78 10R_R78_19 10R_R78_11 10R_R78_10 1 1 False
568 10R_R78 10R_R78_25 10R_R78_11 10R_R78_10 1 1 True
569 10R_R78 10R_R78_26 10R_R78_11 10R_R78_10 2 1 True
14516 10R_R78 10R_R78_48 10R_R78_12 10R_R78_47 2 1 False
19073 10R_R78 10R_R78_55 10R_R78_12 10R_R78_47 1 -9 False
19074 10R_R78 10R_R78_56 10R_R78_12 10R_R78_47 1 -9 False
602 10R_R78 10R_R78_14 10R_R78_2 10R_R78_1 1 1 True
547 10R_R78 10R_R78_16 10R_R78_2 10R_R78_1 2 1 True
18995 10R_R78 10R_R78_31 10R_R78_21 10R_R78_50 1 -9 False
14505 10R_R78 10R_R78_3 10R_R78_23 10R_R78_22 2 1 False
14517 10R_R78 10R_R78_50 10R_R78_23 10R_R78_22 2 2 False
14514 10R_R78 10R_R78_46 10R_R78_28 10R_R78_29 2 1 False
592 10R_R78 10R_R78_39 10R_R78_33 10R_R78_34 2 1 True
19008 10R_R78 10R_R78_40 10R_R78_33 10R_R78_34 2 1 False
596 10R_R78 10R_R78_41 10R_R78_33 10R_R78_34 2 1 True
14509 10R_R78 10R_R78_38 10R_R78_35 10R_R78_36 1 1 False
543 10R_R78 10R_R78_1 10R_R78_4 10R_R78_3 2 2 True
545 10R_R78 10R_R78_10 10R_R78_4 10R_R78_3 2 1 True
546 10R_R78 10R_R78_12 10R_R78_4 10R_R78_3 1 1 True
18994 10R_R78 10R_R78_13 10R_R78_4 10R_R78_3 2 -9 False
18993 10R_R78 10R_R78_5 10R_R78_4 10R_R78_3 1 -9 False
544 10R_R78 10R_R78_6 10R_R78_4 10R_R78_3 2 2 True
14522 10R_R78 10R_R78_9 10R_R78_4 10R_R78_3 1 1 False
512 10R_R78 10R_R78_42 10R_R78_43 10R_R78_30 1 1 True
573 10R_R78 10R_R78_20 10R_R78_51 10R_R78_50 1 2 True
574 10R_R78 10R_R78_21 10R_R78_51 10R_R78_50 2 2 True
575 10R_R78 10R_R78_28 10R_R78_51 10R_R78_50 1 2 True
14506 10R_R78 10R_R78_30 10R_R78_51 10R_R78_50 2 1 False
18996 10R_R78 10R_R78_32 10R_R78_51 10R_R78_50 1 -9 False
594 10R_R78 10R_R78_33 10R_R78_51 10R_R78_50 1 2 True
579 10R_R78 10R_R78_35 10R_R78_51 10R_R78_50 1 2 True
18997 10R_R78 10R_R78_37 10R_R78_51 10R_R78_50 1 -9 False
14519 10R_R78 10R_R78_52 10R_R78_53 10R_R78_21 2 1 False
548 10R_R78 10R_R78_18 10R_R78_7 10R_R78_6 2 1 True
14513 10R_R78 10R_R78_45 10R_R78_9 10R_R78_44 2 1 False
{% endraw %} {% raw %}
nodp_fam.fathid[nodp_fam.iid=='10R_R78_31'] = '10R_R78_51'
/tmp/1974374.1.plot.q/ipykernel_27113/1353521524.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.fathid[nodp_fam.iid=='10R_R78_31'] = '10R_R78_51'
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
{% endraw %}

2.5 27_152 family: shift fathid and mothid

{% raw %}
nodp_fam[nodp_fam.fid == '27_152'].sort_values('fathid')
fid iid fathid mothid sex ad vcf
16127 27_152 27_152_85523 0 0 2 1 False
16128 27_152 27_152_85524 0 0 1 2 False
16135 27_152 27_152_85911 0 0 1 -9 False
16136 27_152 27_152_85912 0 0 2 2 False
16137 27_152 27_152_86139 0 0 2 -9 False
16138 27_152 27_152_86140 0 0 1 -9 False
565 27_152 27_152_85525 27_152_85524 27_152_85523 2 2 True
16125 27_152 27_152_63599 27_152_85524 27_152_85523 1 2 False
16126 27_152 27_152_85522 27_152_85524 27_152_85523 1 2 False
16129 27_152 27_152_85526 27_152_85524 27_152_85523 1 1 False
16130 27_152 27_152_85541 27_152_85524 27_152_85523 2 2 False
16131 27_152 27_152_85554 27_152_85524 27_152_85523 2 2 False
16132 27_152 27_152_85908 27_152_85524 27_152_85523 2 2 False
16133 27_152 27_152_85909 27_152_85524 27_152_85523 1 2 False
16134 27_152 27_152_85910 27_152_85912 27_152_85911 2 2 False
{% endraw %} {% raw %}
nodp_fam.fathid[nodp_fam.iid=='27_152_85910'] = '27_152_85911'
nodp_fam.mothid[nodp_fam.iid=='27_152_85910'] = '27_152_85912'
/tmp/1974374.1.plot.q/ipykernel_27113/3269376899.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.fathid[nodp_fam.iid=='27_152_85910'] = '27_152_85911'
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
/tmp/1974374.1.plot.q/ipykernel_27113/3269376899.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.mothid[nodp_fam.iid=='27_152_85910'] = '27_152_85912'
{% endraw %}

2.6 26_TCC family: shift fathid and mothid

{% raw %}
nodp_fam[nodp_fam.fid == '26_TCC'].sort_values('fathid')
fid iid fathid mothid sex ad vcf
15770 26_TCC 26_TCC_TCC65603 0 0 1 2 False
15771 26_TCC 26_TCC_TCC65604 0 0 2 2 False
15775 26_TCC 26_TCC_TCC65610 0 0 1 -9 False
585 26_TCC 26_TCC_TCC65601 26_TCC_TCC65603 26_TCC_TCC65604 2 2 True
15769 26_TCC 26_TCC_TCC65602 26_TCC_TCC65603 26_TCC_TCC65604 2 2 False
15772 26_TCC 26_TCC_TCC65606 26_TCC_TCC65603 26_TCC_TCC65604 2 2 False
15773 26_TCC 26_TCC_TCC65607 26_TCC_TCC65603 26_TCC_TCC65604 2 2 False
15774 26_TCC 26_TCC_TCC65608 26_TCC_TCC65603 26_TCC_TCC65604 2 2 False
820 26_TCC 26_TCC_TCC65609 26_TCC_TCC65606 26_TCC_TCC65610 2 1 True
{% endraw %} {% raw %}
nodp_fam.fathid[nodp_fam.iid=='26_TCC_TCC65609'] = '26_TCC_TCC65610'
nodp_fam.mothid[nodp_fam.iid=='26_TCC_TCC65609'] = '26_TCC_TCC65606'
/tmp/1974374.1.plot.q/ipykernel_27113/3628568965.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.fathid[nodp_fam.iid=='26_TCC_TCC65609'] = '26_TCC_TCC65610'
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
/tmp/1974374.1.plot.q/ipykernel_27113/3628568965.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.mothid[nodp_fam.iid=='26_TCC_TCC65609'] = '26_TCC_TCC65606'
{% endraw %}

2.7 10R_R99 family

10R_R99_19 and 10R_R99_5 are male. But one father and one mother. Only can be verified by VCF, and both of them have vcfs.

{% raw %}
nodp_fam[nodp_fam.fid == '10R_R99'].sort_values('fathid')
fid iid fathid mothid sex ad vcf
14610 10R_R99 10R_R99_2 0 0 1 -9 False
14614 10R_R99 10R_R99_29 0 0 1 2 False
14615 10R_R99 10R_R99_7 0 0 2 2 False
4 10R_R99 10R_R99_8 10R_R99_19 10R_R99_5 2 1 False
765 10R_R99 10R_R99_10 10R_R99_2 10R_R99_1 2 1 True
767 10R_R99 10R_R99_19 10R_R99_2 10R_R99_1 1 1 True
14611 10R_R99 10R_R99_20 10R_R99_2 10R_R99_1 1 1 False
14612 10R_R99 10R_R99_21 10R_R99_2 10R_R99_1 1 1 False
766 10R_R99 10R_R99_17 10R_R99_29 10R_R99_15 1 1 True
14613 10R_R99 10R_R99_22 10R_R99_29 10R_R99_15 1 1 False
754 10R_R99 10R_R99_15 10R_R99_8 10R_R99_7 2 2 True
759 10R_R99 10R_R99_12 10R_R99_8 10R_R99_7 2 2 True
762 10R_R99 10R_R99_1 10R_R99_8 10R_R99_7 2 2 True
763 10R_R99 10R_R99_5 10R_R99_8 10R_R99_7 1 1 True
764 10R_R99 10R_R99_6 10R_R99_8 10R_R99_7 2 1 True
{% endraw %} {% raw %}
nodp_fam.fathid[nodp_fam.iid=='10R_R99_8'] = '0'
nodp_fam.mothid[nodp_fam.iid=='10R_R99_8'] = '0'
/tmp/1974374.1.plot.q/ipykernel_27113/1156509625.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.fathid[nodp_fam.iid=='10R_R99_8'] = '0'
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
/tmp/1974374.1.plot.q/ipykernel_27113/1156509625.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.mothid[nodp_fam.iid=='10R_R99_8'] = '0'
{% endraw %} {% raw %}
nodp_fam.sex[nodp_fam.iid=='10R_R99_8'] = 1
/tmp/1974374.1.plot.q/ipykernel_27113/3169029369.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.sex[nodp_fam.iid=='10R_R99_8'] = 1
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
{% endraw %}

2.8 27_192 family

Both 27_192_86076 and 27_192_86089 are female. 27_192_86076 has vcf.

{% raw %}
nodp_fam[nodp_fam.fid == '27_192'].sort_values('fathid')
fid iid fathid mothid sex ad vcf
16168 27_192 27_192_86071 0 0 2 -9 False
16169 27_192 27_192_86073 0 0 2 2 False
16170 27_192 27_192_86074 0 0 1 -9 False
19067 27_192 27_192_86089 0 0 2 -9 False
914 27_192 27_192_86070 27_192_86072 27_192_86071 1 1 True
915 27_192 27_192_86069 27_192_86072 27_192_86071 1 1 True
913 27_192 27_192_86072 27_192_86074 27_192_86073 1 1 True
921 27_192 27_192_86076 27_192_86074 27_192_86073 2 1 True
929 27_192 27_192_86075 27_192_86074 27_192_86073 2 1 True
932 27_192 27_192_86101 27_192_86074 27_192_86073 1 2 True
1042 27_192 27_192_86085 27_192_86074 27_192_86073 2 2 True
16171 27_192 27_192_86108 27_192_86074 27_192_86073 2 2 False
19067 27_192 27_192_86564 27_192_86076 27_192_86089 2 1 False
{% endraw %} {% raw %}
nodp_fam.sex[nodp_fam.iid=='27_192_86076'] = 1
/tmp/1974374.1.plot.q/ipykernel_27113/1372026696.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.sex[nodp_fam.iid=='27_192_86076'] = 1
{% endraw %}

2.9 4_715 family

4_715_6 is female but father to two children. But S/he has no information, which will be trimed in the future. so set the gender as male(1).

{% raw %}
nodp_fam[nodp_fam.fid == '4_715'].sort_values('fathid')
fid iid fathid mothid sex ad vcf
19143 4_715 4_715_6c 0 0 2 -9 False
17573 4_715 4_715_20 0 0 1 1 False
17571 4_715 4_715_19 0 0 1 1 False
17570 4_715 4_715_18 0 0 2 1 False
19141 4_715 4_715_8c 0 0 2 -9 False
17568 4_715 4_715_16 0 0 2 1 False
17567 4_715 4_715_15 0 0 1 1 False
17569 4_715 4_715_17 0 0 1 1 False
19193 4_715 4_715_39 0 0 2 1 False
19194 4_715 4_715_38 0 0 2 1 False
19199 4_715 4_715_40 0 0 1 1 False
19139 4_715 4_715_3c 0 0 2 -9 False
17579 4_715 4_715_26 0 0 1 1 False
17591 4_715 4_715_99 4_715_1 4_715_2 2 -9 False
17589 4_715 4_715_8 4_715_1 4_715_2 1 1 False
17588 4_715 4_715_7 4_715_1 4_715_2 2 2 False
17587 4_715 4_715_6 4_715_1 4_715_2 2 -9 False
17586 4_715 4_715_5 4_715_1 4_715_2 2 2 False
17585 4_715 4_715_4 4_715_1 4_715_2 2 1 False
17590 4_715 4_715_9 4_715_1 4_715_2 1 1 False
985 4_715 4_715_3 4_715_1 4_715_2 1 -9 True
17561 4_715 4_715_1 4_715_15 4_715_16 1 1 False
17562 4_715 4_715_10 4_715_15 4_715_16 2 1 False
17563 4_715 4_715_11 4_715_15 4_715_16 2 1 False
17572 4_715 4_715_2 4_715_17 4_715_18 2 1 False
17566 4_715 4_715_14 4_715_17 4_715_18 1 1 False
17565 4_715 4_715_13 4_715_17 4_715_18 2 1 False
17564 4_715 4_715_12 4_715_17 4_715_18 2 1 False
17578 4_715 4_715_25 4_715_19 4_715_5 1 1 False
17577 4_715 4_715_24 4_715_19 4_715_5 1 1 False
17574 4_715 4_715_21 4_715_19 4_715_5 1 1 False
17576 4_715 4_715_23 4_715_20 4_715_99 2 1 False
17575 4_715 4_715_22 4_715_20 4_715_99 1 1 False
17581 4_715 4_715_28 4_715_26 4_715_7 1 1 False
17580 4_715 4_715_27 4_715_26 4_715_7 1 1 False
17584 4_715 4_715_31 4_715_26 4_715_7 2 1 False
17583 4_715 4_715_30 4_715_26 4_715_7 1 1 False
17582 4_715 4_715_29 4_715_26 4_715_7 1 1 False
19140 4_715 4_715_33 4_715_3 4_715_3c 2 1 False
19139 4_715 4_715_32 4_715_3 4_715_3c 2 1 False
19143 4_715 4_715_36 4_715_6 4_715_6c 1 1 False
19144 4_715 4_715_37 4_715_6 4_715_6c 2 1 False
19141 4_715 4_715_34 4_715_8 4_715_8c 2 1 False
19142 4_715 4_715_35 4_715_8 4_715_8c 2 1 False
{% endraw %} {% raw %}
nodp_fam.sex[nodp_fam.iid=='4_715_6'] = 1
/tmp/1974374.1.plot.q/ipykernel_27113/3070835650.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.sex[nodp_fam.iid=='4_715_6'] = 1
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
{% endraw %}

2.10 170 family

{% raw %}
nodp_fam[nodp_fam.fid == '170'].sort_values('fathid')
fid iid fathid mothid sex ad vcf
11691 170 170_13 0 0 2 -9 False
12490 170 170_31 0 0 1 -9 False
11994 170 170_16 0 0 2 -9 False
11694 170 170_14 0 0 1 -9 False
11693 170 170_15 0 0 2 -9 False
11692 170 170_12 0 0 1 -9 False
3628 170 170_11 0 0 1 -9 False
3634 170 170_4 170_1 170_2 1 1 False
3633 170 170_6 170_1 170_2 2 1 False
3632 170 170_7 170_1 170_2 1 1 False
1341 170 170_5 170_1 170_2 2 1 True
1342 170 170_99 170_1 170_2 2 2 True
2045 170 170_3 170_1 170_2 1 2 True
3629 170 170_10 170_11 170_99 2 -9 False
3630 170 170_9 170_11 170_99 1 -9 False
3631 170 170_8 170_11 170_99 2 -9 False
2854 170 170_23 170_12 170_13 2 -9 True
12489 170 170_30 170_12 170_13 1 -9 False
12488 170 170_29 170_12 170_13 1 -9 False
12487 170 170_28 170_12 170_13 1 -9 False
12486 170 170_27 170_12 170_13 1 -9 False
12485 170 170_26 170_12 170_13 1 -9 False
12484 170 170_25 170_12 170_13 1 -9 False
12483 170 170_24 170_12 170_13 1 -9 False
12482 170 170_22 170_12 170_13 2 -9 False
12481 170 170_21 170_12 170_13 2 -9 False
12479 170 170_19 170_12 170_13 2 2 False
1793 170 170_17 170_12 170_16 2 2 True
1827 170 170_1 170_12 170_13 1 2 True
2483 170 170_18 170_12 170_13 2 2 True
12480 170 170_20 170_12 170_13 2 -9 False
1494 170 170_2 170_14 170_15 2 1 True
2848 170 170_38 170_18 170_31 2 1 True
2849 170 170_35 170_31 170_18 2 1 True
12492 170 170_34 170_31 170_18 1 1 False
2846 170 170_32 170_31 170_18 1 1 True
2847 170 170_36 170_31 170_18 2 1 True
12491 170 170_33 170_31 170_18 1 -9 False
12493 170 170_37 170_31 170_18 2 1 False
{% endraw %} {% raw %}
nodp_fam.fathid[nodp_fam.iid=='170_38'] = '170_31'
nodp_fam.mothid[nodp_fam.iid=='170_38'] = '170_18'
/tmp/1974374.1.plot.q/ipykernel_27113/2512832026.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.fathid[nodp_fam.iid=='170_38'] = '170_31'
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
/tmp/1974374.1.plot.q/ipykernel_27113/2512832026.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodp_fam.mothid[nodp_fam.iid=='170_38'] = '170_18'
{% endraw %} {% raw %}
sum(nodp_fam.vcf)+250
3479
{% endraw %} {% raw %}
nodp_fam.to_csv('data/nodp_fam.csv',header=False,index=False)
{% endraw %}

Trimming pedigree

2.1Trim bottomup and depth

{% raw %}
ped = nodp_fam.sort_values(['fid','fathid']).copy()
{% endraw %} {% raw %}
ped.index = list(ped.iid)
{% endraw %} {% raw %}
ped
fid iid fathid mothid sex ad vcf
1005_1 1005 1005_1 0 0 1 -9 False
1005_2 1005 1005_2 0 0 2 -9 False
1005_8 1005 1005_8 0 0 1 -9 False
1005_11 1005 1005_11 0 0 1 -9 False
1005_99 1005 1005_99 1005_1 1005_2 2 2 True
... ... ... ... ... ... ... ...
997_3 997 997_3 997_1 997_2 2 -9 False
997_4 997 997_4 997_1 997_2 1 -9 False
997_5 997 997_5 997_1 997_2 1 -9 False
997_6 997 997_6 997_1 997_2 2 -9 False
997_7 997 997_7 997_1 997_2 2 -9 False

19108 rows × 7 columns

{% endraw %}

get depth function

{% raw %}
def get_depth(ped,i,value):
    r = ped.iloc[i,:]
    if r[2]=='0':
        dm = 1
    else:
        try:
            ind = ped.index.get_loc(r[2])
            v = value[ind]
            if v==0:
                dm = 1+get_depth(ped,ind,value)
            else:
                dm = 1+v
        except:
            dm = 1
            
    if r[3]=='0':
        df = 1
    else:
        try:
            ind = ped.index.get_loc(r[3])
            v = value[ind]
            if v==0:
                df = 1+get_depth(ped,ind,value)
            else:
                df = 1+v
        except:
            df = 1
    return max(dm,df)
{% endraw %} {% raw %}
depth = [0]*len(ped)
for i in range(len(ped)):
    depth[i] = get_depth(ped,i,depth)
{% endraw %} {% raw %}
max(depth)
4
{% endraw %} {% raw %}
ped['depth'] = depth
{% endraw %} {% raw %}
ped.depth[0]
1
{% endraw %} {% raw %}
ped.ad.value_counts()
-9    10460
 1     5268
 2     3380
Name: ad, dtype: int64
{% endraw %} {% raw %}
ped
fid iid fathid mothid sex ad vcf depth
1005_1 1005 1005_1 0 0 1 -9 False 1
1005_2 1005 1005_2 0 0 2 -9 False 1
1005_8 1005 1005_8 0 0 1 -9 False 1
1005_11 1005 1005_11 0 0 1 -9 False 1
1005_99 1005 1005_99 1005_1 1005_2 2 2 True 2
... ... ... ... ... ... ... ... ...
997_3 997 997_3 997_1 997_2 2 -9 False 2
997_4 997 997_4 997_1 997_2 1 -9 False 2
997_5 997 997_5 997_1 997_2 1 -9 False 2
997_6 997 997_6 997_1 997_2 2 -9 False 2
997_7 997 997_7 997_1 997_2 2 -9 False 2

19108 rows × 8 columns

{% endraw %} {% raw %}
def trim_trees(ped,depth_cut=3):
    '''sort ped by fam and depth'''
    trim = []
    for i in ped.fid.unique():
        fi = ped[ped.fid==i]
        trim += trim_tree(fi,depth_cut)
    return trim
{% endraw %} {% raw %}
def trim_tree(fi,depth_cut=3):
    trim = pd.Series([False]*len(fi))
    for i,r in enumerate(fi.iterrows()):
        r=r[1]
        if r.depth>depth_cut:
            if r.vcf==False and r.ad!=2:
                #if leaf node or not: if leaf, trim it. otherwise, all children are trimed, then trim it.
                ch_r = (fi.fathid==r.iid) | (fi.mothid==r.iid)
                if not ch_r.any():
                    trim[i] = True
                elif trim[list(ch_r)].all():
                    trim[i] = True
        else:
            #trim founders without children
            if r.fathid=='0' and r.mothid=='0':
                ch_r = (fi.fathid==r.iid) | (fi.mothid==r.iid)
                if not ch_r.any():
                    trim[i] = True
                elif trim[list(ch_r)].all():
                    trim[i] = True
    return list(trim)
{% endraw %} {% raw %}
ped = ped.sort_values(['fid','depth'],ascending=False)
{% endraw %} {% raw %}
trim = trim_trees(ped,depth_cut=3)
{% endraw %} {% raw %}
sum(trim)
3333
{% endraw %} {% raw %}
len(trim)
19108
{% endraw %} {% raw %}
ped['trim'] = trim
{% endraw %} {% raw %}
ped.to_csv('data/nodp_ped_with_depth_trim.csv',header=True,index=False)
{% endraw %}

2.2Trim bottomup and topdown

{% raw %}
def trim_trees(ped):
    '''sort ped by fam and depth'''
    ped = ped.sort_values(['fid','depth'],ascending=False) # from high to low
    trim = []
    for i in ped.fid.unique():
        fi = ped[ped.fid==i]     
        trim += trim_tree_all(fi)
    return trim
def trim_tree_all(fi):
    n = len(fi)
    trim = pd.Series([False]*n)
    for i in range(n):
        r=fi.iloc[i]
        ch_r = (fi.fathid==r.iid) | (fi.mothid==r.iid)
        if not ch_r.any() or trim[list(ch_r)].all(): #leaf
            if r.vcf==False and r.ad!=2:
                trim[i] = True
    for i in range(n-1, -1, -1):
        r=fi.iloc[i]
        ch_r = (fi.fathid==r.iid) | (fi.mothid==r.iid)
        if r.fathid=='0' and r.mothid=='0' or trim[list(fi.iid.isin([r.fathid,r.mothid]))].all():
            if not ch_r.any():
                trim[i] = True
            elif trim[list(ch_r)].all():
                trim[i] = True
            elif not fi.vcf[ch_r].any(): #no vcf in any chrildren
                trim[i] = True
            elif (fi.ad[ch_r]!=2).all() and r.ad!=2: #no ad info in all chrildren
                trim[i] = True
    return list(trim)
{% endraw %} {% raw %}
new_ped = ped.sort_values(['fid','depth'],ascending=False).copy()
{% endraw %} {% raw %}
trim = trim_trees(new_ped)
{% endraw %} {% raw %}
new_ped['trim']=trim
{% endraw %} {% raw %}
new_ped.to_csv('data/nodp_ped_with_trim_bottomup_topdown.csv',header=True,index=False)
{% endraw %}

2.2.1 make node with depth of 1 as parents

{% raw %}
trim_ped = new_ped[new_ped.trim==False]
{% endraw %} {% raw %}
depth = [0]*len(trim_ped)
for i in range(len(trim_ped)):
    depth[i] = get_depth(trim_ped,i,depth)
{% endraw %} {% raw %}
max(depth)
4
{% endraw %} {% raw %}
trim_ped['depth'] = depth
/tmp/1974374.1.plot.q/ipykernel_27113/4238934653.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trim_ped['depth'] = depth
{% endraw %} {% raw %}
trim_ped.depth.value_counts()
2    3116
1    1913
3     995
4      20
Name: depth, dtype: int64
{% endraw %} {% raw %}
trim_ped.fathid[trim_ped.depth==1] = '0'
trim_ped.mothid[trim_ped.depth==1] = '0'
/tmp/1974374.1.plot.q/ipykernel_27113/2729491037.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trim_ped.fathid[trim_ped.depth==1] = '0'
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:8870: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
/tmp/1974374.1.plot.q/ipykernel_27113/2729491037.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trim_ped.mothid[trim_ped.depth==1] = '0'
{% endraw %}

2.3 Summary trimming trees

{% raw %}
trim_smry = pd.DataFrame([[new_ped.depth[new_ped.fid == i].max() for i in trim_ped.fid.unique()],[trim_ped.depth[trim_ped.fid == i].max() for i in trim_ped.fid.unique()]])
trim_smry = trim_smry.T
trim_smry.index = trim_ped.fid.unique()
trim_smry.columns = ['before','after']
trim_smry.hist()
array([[<AxesSubplot:title={'center':'before'}>,
        <AxesSubplot:title={'center':'after'}>]], dtype=object)
{% endraw %} {% raw %}
len(trim_smry.index[(trim_smry.before>3) & (trim_smry.after<4)])
188
{% endraw %} {% raw %}
sum(trim_smry.before>3)
201
{% endraw %} {% raw %}
sum(trim_smry.after>3)
13
{% endraw %} {% raw %}
dots1 = [plotped(new_ped[new_ped.fid ==i],output=True,folder='data/new_ADfam_depthless4') for i in trim_smry.index[trim_smry.before<4]]
{% endraw %} {% raw %}
dots2 = [plotped(new_ped[new_ped.fid ==i],output=True,folder='data/new_ADfam_trimless4') for i in trim_smry.index[(trim_smry.before>3) & (trim_smry.after<4)]]
{% endraw %} {% raw %}
dots3 = [plotped(new_ped[new_ped.fid ==i],output=True,folder='data/new_ADfam_trimmore4') for i in trim_smry.index[trim_smry.after>3]]
{% endraw %} {% raw %}
!cd data/
!tar -zcvf morethan4_aft_trim.tar.gz morethan4_aft_trim
{% endraw %}

2.3.1 Bigest family

{% raw %}
trim_smry1 = pd.DataFrame([[len(new_ped[new_ped.fid == i]) for i in trim_ped.fid.unique()],[len(trim_ped[trim_ped.fid == i]) for i in trim_ped.fid.unique()]])
trim_smry1 = trim_smry1.T
trim_smry1.index = trim_ped.fid.unique()
trim_smry1.columns = ['before','after']
{% endraw %} {% raw %}
trim_smry1.sort_values('before',ascending=False)[:10]
before after
4_649 373 5
3761 220 73
197 187 30
359 157 44
4_558 132 31
724 126 23
237 115 17
158 105 16
597 105 25
2 105 24
{% endraw %} {% raw %}
dots3 = [plotped(new_ped[new_ped.fid ==i],output=True,folder='data/bigest_families') for i in trim_smry1.sort_values('before',ascending=False)[:10].index]
{% endraw %} {% raw %}
!tar -zcvf data/bigest_families.tar.gz data/bigest_families/*.svg
data/bigest_families/ADfam_158.svg
data/bigest_families/ADfam_197.svg
data/bigest_families/ADfam_237.svg
data/bigest_families/ADfam_2.svg
data/bigest_families/ADfam_359.svg
data/bigest_families/ADfam_3761.svg
data/bigest_families/ADfam_4_558.svg
data/bigest_families/ADfam_4_649.svg
data/bigest_families/ADfam_597.svg
data/bigest_families/ADfam_724.svg
{% endraw %}

2.4 Plug-in missing parents after trimming and filtering

{% raw %}
all_parents = set(trim_ped.fathid[trim_ped.fathid!='0']).union(set(trim_ped.mothid[trim_ped.mothid!='0']))
{% endraw %} {% raw %}
missing_parents_ped = new_ped[new_ped.iid.isin(all_parents.difference(set(trim_ped.iid)))].copy()
{% endraw %} {% raw %}
missing_parents_ped.fathid = '0'
missing_parents_ped.mothid = '0'
{% endraw %}

merging with trim_ped and update depth

{% raw %}
trim_ped = pd.concat([trim_ped,missing_parents_ped])
{% endraw %} {% raw %}
trim_ped = trim_ped.sort_values('fid')
{% endraw %} {% raw %}
depth = [0]*len(trim_ped)
for i in range(len(trim_ped)):
    depth[i] = get_depth(trim_ped,i,depth)
{% endraw %} {% raw %}
trim_ped['depth'] = depth
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='4_558'])
%3 4_558_114 4_558_114 4_558_113x4_558_35 4_558_113x4_558_35->4_558_114 4_558_115 4_558_115 4_558_113x4_558_35->4_558_115 4_558_116 4_558_116 4_558_113x4_558_35->4_558_116 4_558_117 4_558_117 4_558_113x4_558_35->4_558_117 4_558_118 4_558_118 4_558_113x4_558_35->4_558_118 4_558_DCH130.15 4_558_DCH130.15 4_558_130x4_558_15 4_558_130x4_558_15->4_558_DCH130.15 4_558_22 4_558_22 4_558_4x4_558_35 4_558_4x4_558_35->4_558_22 4_558_41 4_558_41 4_558_4x4_558_35->4_558_41 4_558_43 4_558_43 4_558_4x4_558_35->4_558_43 4_558_45 4_558_45 4_558_4x4_558_35->4_558_45 4_558_32 4_558_32 4_558_11x4_558_40 4_558_11x4_558_40->4_558_32 4_558_19 4_558_19 4_558_34x4_558_3 4_558_34x4_558_3->4_558_19 4_558_20 4_558_20 4_558_34x4_558_3->4_558_20 4_558_21 4_558_21 4_558_34x4_558_3->4_558_21 4_558_44 4_558_44 4_558_34x4_558_3->4_558_44 4_558_97 4_558_97 4_558_34x4_558_3->4_558_97 4_558_25 4_558_25 4_558_37x4_558_7 4_558_37x4_558_7->4_558_25 4_558_31 4_558_31 4_558_39x4_558_10 4_558_39x4_558_10->4_558_31 4_558_56 4_558_56 4_558_39x4_558_10->4_558_56 4_558_57 4_558_57 4_558_39x4_558_10->4_558_57 4_558_62 4_558_62 4_558_4x4_558_64 4_558_4x4_558_64->4_558_62 4_558_63 4_558_63 4_558_4x4_558_64->4_558_63 4_558_51 4_558_51 4_558_59x4_558_9 4_558_59x4_558_9->4_558_51 4_558_42 4_558_42 4_558_59x4_558_9->4_558_42 4_558_52 4_558_52 4_558_59x4_558_9->4_558_52 4_558_53 4_558_53 4_558_59x4_558_9->4_558_53 4_558_54 4_558_54 4_558_59x4_558_9->4_558_54 4_558_55 4_558_55 4_558_59x4_558_9->4_558_55 4_558_23 4_558_23 4_558_6x4_558_48 4_558_6x4_558_48->4_558_23 4_558_46 4_558_46 4_558_6x4_558_48->4_558_46 4_558_47 4_558_47 4_558_6x4_558_48->4_558_47 4_558_24 4_558_24 4_558_6x4_558_36 4_558_6x4_558_36->4_558_24 4_558_49 4_558_49 4_558_6x4_558_36->4_558_49 4_558_50 4_558_50 4_558_6x4_558_36->4_558_50 4_558_26 4_558_26 4_558_67x4_558_7 4_558_67x4_558_7->4_558_26 4_558_73 4_558_73 4_558_75x4_558_72 4_558_75x4_558_72->4_558_73 4_558_92 4_558_92 4_558_75x4_558_72->4_558_92 4_558_93 4_558_93 4_558_75x4_558_72->4_558_93 4_558_94 4_558_94 4_558_75x4_558_72->4_558_94 4_558_95 4_558_95 4_558_75x4_558_72->4_558_95 4_558_96 4_558_96 4_558_75x4_558_72->4_558_96 4_558_27 4_558_27 4_558_8x4_558_38 4_558_8x4_558_38->4_558_27 4_558_28 4_558_28 4_558_8x4_558_38->4_558_28 4_558_29 4_558_29 4_558_8x4_558_38->4_558_29 4_558_30 4_558_30 4_558_8x4_558_38->4_558_30 4_558_35 4_558_35 4_558_35->4_558_113x4_558_35 4_558_35->4_558_4x4_558_35 4_558_91x4_558_89 4_558_91x4_558_89->4_558_35 4_558_DCH98.74 4_558_DCH98.74 4_558_98x4_558_74 4_558_98x4_558_74->4_558_DCH98.74 4_558_15 4_558_15 4_558_15->4_558_130x4_558_15 4_558_99x4_558_33 4_558_99x4_558_33->4_558_15 4_558_16 4_558_16 4_558_99x4_558_33->4_558_16 4_558_17 4_558_17 4_558_99x4_558_33->4_558_17 4_558_18 4_558_18 4_558_99x4_558_33->4_558_18 4_558_8 4_558_8 4_558_8->4_558_8x4_558_38 4_558_1x4_558_2 4_558_1x4_558_2->4_558_8 4_558_99 4_558_99 4_558_1x4_558_2->4_558_99 4_558_3 4_558_3 4_558_1x4_558_2->4_558_3 4_558_4 4_558_4 4_558_1x4_558_2->4_558_4 4_558_5 4_558_5 4_558_1x4_558_2->4_558_5 4_558_6 4_558_6 4_558_1x4_558_2->4_558_6 4_558_7 4_558_7 4_558_1x4_558_2->4_558_7 4_558_99->4_558_99x4_558_33 4_558_3->4_558_34x4_558_3 4_558_4->4_558_4x4_558_35 4_558_4->4_558_4x4_558_64 4_558_6->4_558_6x4_558_48 4_558_6->4_558_6x4_558_36 4_558_7->4_558_37x4_558_7 4_558_7->4_558_67x4_558_7 4_558_108 4_558_108 4_558_107x4_558_106 4_558_107x4_558_106->4_558_108 4_558_111 4_558_111 4_558_107x4_558_106->4_558_111 4_558_112 4_558_112 4_558_107x4_558_106->4_558_112 4_558_64 4_558_64 4_558_64->4_558_4x4_558_64 4_558_119x4_558_120 4_558_119x4_558_120->4_558_64 4_558_124 4_558_124 4_558_123x4_558_87 4_558_123x4_558_87->4_558_124 4_558_125 4_558_125 4_558_123x4_558_87->4_558_125 4_558_128 4_558_128 4_558_126x4_558_127 4_558_126x4_558_127->4_558_128 4_558_39 4_558_39 4_558_126x4_558_127->4_558_39 4_558_39->4_558_39x4_558_10 4_558_89 4_558_89 4_558_89->4_558_91x4_558_89 4_558_13x4_558_90 4_558_13x4_558_90->4_558_89 4_558_11 4_558_11 4_558_11->4_558_11x4_558_40 4_558_58x4_558_2 4_558_58x4_558_2->4_558_11 4_558_10 4_558_10 4_558_58x4_558_2->4_558_10 4_558_12 4_558_12 4_558_58x4_558_2->4_558_12 4_558_9 4_558_9 4_558_58x4_558_2->4_558_9 4_558_10->4_558_39x4_558_10 4_558_9->4_558_59x4_558_9 4_558_34 4_558_34 4_558_34->4_558_34x4_558_3 4_558_68x4_558_69 4_558_68x4_558_69->4_558_34 4_558_71 4_558_71 4_558_68x4_558_69->4_558_71 4_558_72 4_558_72 4_558_68x4_558_69->4_558_72 4_558_74 4_558_74 4_558_68x4_558_69->4_558_74 4_558_67 4_558_67 4_558_68x4_558_69->4_558_67 4_558_70 4_558_70 4_558_68x4_558_69->4_558_70 4_558_76 4_558_76 4_558_68x4_558_69->4_558_76 4_558_77 4_558_77 4_558_68x4_558_69->4_558_77 4_558_78 4_558_78 4_558_68x4_558_69->4_558_78 4_558_79 4_558_79 4_558_68x4_558_69->4_558_79 4_558_80 4_558_80 4_558_68x4_558_69->4_558_80 4_558_81 4_558_81 4_558_68x4_558_69->4_558_81 4_558_82 4_558_82 4_558_68x4_558_69->4_558_82 4_558_72->4_558_75x4_558_72 4_558_74->4_558_98x4_558_74 4_558_67->4_558_67x4_558_7 4_558_106 4_558_106 4_558_106->4_558_107x4_558_106 4_558_1x4_558_105 4_558_1x4_558_105->4_558_106 4_558_126 4_558_126 4_558_1x4_558_105->4_558_126 4_558_126->4_558_126x4_558_127 4_558_102 4_558_102 4_558_100x4_558_101 4_558_100x4_558_101->4_558_102 4_558_103 4_558_103 4_558_100x4_558_101->4_558_103 4_558_104 4_558_104 4_558_100x4_558_101->4_558_104 4_558_98 4_558_98 4_558_100x4_558_101->4_558_98 4_558_98->4_558_98x4_558_74 4_558_107 4_558_107 4_558_107->4_558_107x4_558_106 4_558_109x4_558_110 4_558_109x4_558_110->4_558_107 4_558_69 4_558_69 4_558_109x4_558_110->4_558_69 4_558_69->4_558_68x4_558_69 4_558_119 4_558_119 4_558_119->4_558_119x4_558_120 4_558_121x4_558_122 4_558_121x4_558_122->4_558_119 4_558_68 4_558_68 4_558_121x4_558_122->4_558_68 4_558_68->4_558_68x4_558_69 4_558_13 4_558_13 4_558_13->4_558_13x4_558_90 4_558_61x4_558_60 4_558_61x4_558_60->4_558_13 4_558_2 4_558_2 4_558_61x4_558_60->4_558_2 4_558_83 4_558_83 4_558_61x4_558_60->4_558_83 4_558_84 4_558_84 4_558_61x4_558_60->4_558_84 4_558_85 4_558_85 4_558_61x4_558_60->4_558_85 4_558_87 4_558_87 4_558_61x4_558_60->4_558_87 4_558_88 4_558_88 4_558_61x4_558_60->4_558_88 4_558_2->4_558_1x4_558_2 4_558_2->4_558_58x4_558_2 4_558_87->4_558_123x4_558_87 4_558_59 4_558_59 4_558_59->4_558_59x4_558_9 4_558_65x4_558_129 4_558_65x4_558_129->4_558_59 4_558_33 4_558_33 4_558_33->4_558_99x4_558_33 4_558_40 4_558_40 4_558_40->4_558_11x4_558_40 4_558_1 4_558_1 4_558_1->4_558_1x4_558_2 4_558_1->4_558_1x4_558_105 4_558_100 4_558_100 4_558_100->4_558_100x4_558_101 4_558_101 4_558_101 4_558_101->4_558_100x4_558_101 4_558_105 4_558_105 4_558_105->4_558_1x4_558_105 4_558_109 4_558_109 4_558_109->4_558_109x4_558_110 4_558_110 4_558_110 4_558_110->4_558_109x4_558_110 4_558_113 4_558_113 4_558_113->4_558_113x4_558_35 4_558_120 4_558_120 4_558_120->4_558_119x4_558_120 4_558_121 4_558_121 4_558_121->4_558_121x4_558_122 4_558_122 4_558_122 4_558_122->4_558_121x4_558_122 4_558_123 4_558_123 4_558_123->4_558_123x4_558_87 4_558_127 4_558_127 4_558_127->4_558_126x4_558_127 4_558_129 4_558_129 4_558_129->4_558_65x4_558_129 4_558_130 4_558_130 4_558_130->4_558_130x4_558_15 4_558_36 4_558_36 4_558_36->4_558_6x4_558_36 4_558_37 4_558_37 4_558_37->4_558_37x4_558_7 4_558_38 4_558_38 4_558_38->4_558_8x4_558_38 4_558_48 4_558_48 4_558_48->4_558_6x4_558_48 4_558_58 4_558_58 4_558_58->4_558_58x4_558_2 4_558_60 4_558_60 4_558_60->4_558_61x4_558_60 4_558_61 4_558_61 4_558_61->4_558_61x4_558_60 4_558_65 4_558_65 4_558_65->4_558_65x4_558_129 4_558_75 4_558_75 4_558_75->4_558_75x4_558_72 4_558_90 4_558_90 4_558_90->4_558_13x4_558_90 4_558_91 4_558_91 4_558_91->4_558_91x4_558_89 4_558_AAAA 4_558_AAAA 4_558_CCCC 4_558_CCCC 4_558_BBBB 4_558_BBBB
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='3798'])
%3 3798_12 3798_12 3798_11x3798_99 3798_11x3798_99->3798_12 3798_13 3798_13 3798_11x3798_99->3798_13 3798_14 3798_14 3798_11x3798_99->3798_14 3798_16 3798_16 3798_15x3798_9 3798_15x3798_9->3798_16 3798_17 3798_17 3798_15x3798_9->3798_17 3798_18 3798_18 3798_15x3798_9->3798_18 3798_19 3798_19 3798_15x3798_9->3798_19 3798_20 3798_20 3798_15x3798_9->3798_20 3798_22 3798_22 3798_21x3798_10 3798_21x3798_10->3798_22 3798_23 3798_23 3798_21x3798_10->3798_23 3798_24 3798_24 3798_21x3798_10->3798_24 3798_26 3798_26 3798_21x3798_10->3798_26 3798_27 3798_27 3798_21x3798_10->3798_27 3798_28 3798_28 3798_21x3798_10->3798_28 3798_29 3798_29 3798_21x3798_10->3798_29 3798_30 3798_30 3798_21x3798_10->3798_30 3798_31 3798_31 3798_21x3798_10->3798_31 3798_99 3798_99 3798_99->3798_11x3798_99 3798_1x3798_2 3798_1x3798_2->3798_99 3798_9 3798_9 3798_1x3798_2->3798_9 3798_10 3798_10 3798_1x3798_2->3798_10 3798_3 3798_3 3798_1x3798_2->3798_3 3798_4 3798_4 3798_1x3798_2->3798_4 3798_5 3798_5 3798_1x3798_2->3798_5 3798_6 3798_6 3798_1x3798_2->3798_6 3798_7 3798_7 3798_1x3798_2->3798_7 3798_8 3798_8 3798_1x3798_2->3798_8 3798_25 3798_25 3798_1x3798_2->3798_25 3798_9->3798_15x3798_9 3798_10->3798_21x3798_10 3798_15 3798_15 3798_15->3798_15x3798_9 3798_21 3798_21 3798_21->3798_21x3798_10 3798_1 3798_1 3798_1->3798_1x3798_2 3798_2 3798_2 3798_2->3798_1x3798_2 3798_11 3798_11 3798_11->3798_11x3798_99
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='348'])
%3 348_42 348_42 348_3x348_32 348_3x348_32->348_42 348_41 348_41 348_3x348_32->348_41 348_40 348_40 348_3x348_32->348_40 348_39 348_39 348_3x348_32->348_39 348_38 348_38 348_3x348_32->348_38 348_37 348_37 348_3x348_32->348_37 348_36 348_36 348_3x348_32->348_36 348_35 348_35 348_3x348_32->348_35 348_34 348_34 348_3x348_32->348_34 348_DCH55.11 348_DCH55.11 348_55x348_11 348_55x348_11->348_DCH55.11 348_53 348_53 348_8x348_54 348_8x348_54->348_53 348_52 348_52 348_8x348_54->348_52 348_51 348_51 348_8x348_54->348_51 348_50 348_50 348_8x348_54->348_50 348_49 348_49 348_8x348_54->348_49 348_48 348_48 348_8x348_54->348_48 348_47 348_47 348_8x348_54->348_47 348_46 348_46 348_8x348_54->348_46 348_45 348_45 348_8x348_54->348_45 348_44 348_44 348_8x348_54->348_44 348_43 348_43 348_8x348_54->348_43 348_31 348_31 348_99x348_33 348_99x348_33->348_31 348_30 348_30 348_99x348_33->348_30 348_29 348_29 348_99x348_33->348_29 348_28 348_28 348_99x348_33->348_28 348_27 348_27 348_99x348_33->348_27 348_26 348_26 348_99x348_33->348_26 348_25 348_25 348_99x348_33->348_25 348_24 348_24 348_99x348_33->348_24 348_23 348_23 348_99x348_33->348_23 348_22 348_22 348_99x348_33->348_22 348_21 348_21 348_99x348_33->348_21 348_20 348_20 348_99x348_33->348_20 348_19 348_19 348_99x348_33->348_19 348_18 348_18 348_99x348_33->348_18 348_17 348_17 348_99x348_33->348_17 348_16 348_16 348_99x348_33->348_16 348_15 348_15 348_99x348_33->348_15 348_14 348_14 348_99x348_33->348_14 348_9 348_9 348_1x348_2 348_1x348_2->348_9 348_11 348_11 348_1x348_2->348_11 348_99 348_99 348_1x348_2->348_99 348_13 348_13 348_1x348_2->348_13 348_12 348_12 348_1x348_2->348_12 348_10 348_10 348_1x348_2->348_10 348_8 348_8 348_1x348_2->348_8 348_7 348_7 348_1x348_2->348_7 348_6 348_6 348_1x348_2->348_6 348_5 348_5 348_1x348_2->348_5 348_4 348_4 348_1x348_2->348_4 348_3 348_3 348_1x348_2->348_3 348_11->348_55x348_11 348_99->348_99x348_33 348_8->348_8x348_54 348_3->348_3x348_32 348_54 348_54 348_54->348_8x348_54 348_33 348_33 348_33->348_99x348_33 348_32 348_32 348_32->348_3x348_32 348_2 348_2 348_2->348_1x348_2 348_1 348_1 348_1->348_1x348_2 348_55 348_55 348_55->348_55x348_11
{% endraw %} {% raw %}
missing_parents_ped.ad.value_counts()
-9    301
 1    117
 2      8
Name: ad, dtype: int64
{% endraw %} {% raw %}
missing_parents_ped.vcf.value_counts()
False    411
True      15
Name: vcf, dtype: int64
{% endraw %} {% raw %}
missing_parents_ped.ad.value_counts()
-9    301
 1    117
 2      8
Name: ad, dtype: int64
{% endraw %}

Cut trees with depth equal to 1 and trees with only one vcf

{% raw %}
#    trees = None
#    for i in ped.fid.unique():
def merge_trees(trees):
    new_trees = []
    label = pd.Series([False]*len(trees))
    pre_n = n = 0
    tree = []
    while not label.all():
        un_label_idx = label[label==False].index
        if sum(label)==pre_n:
            #next subtree
            if len(tree)>0:
                new_trees.append(tree)
            tree = set(trees[un_label_idx[0]])
            label[un_label_idx[0]] = True
            #update both pre_n and n
            pre_n = n
            n = sum(label)
        else:
            #first update pre_n
            pre_n = n
            for i in un_label_idx:
                if len(tree.intersection(trees[i]))>0:
                    tree = tree.union(trees[i])
                    #update n and label
                    label[i] = True
                    n +=1
    new_trees.append(tree)
    return(new_trees)

def get_trees(fi):
    trees = []
    for i in np.where(fi.depth==1)[0]:
        nodes = get_desnodes(fi,i,[])
        trees.append(nodes)
    return trees
def get_desnodes(fi,i,value):
    '''recursive mark tree'''
    r=fi.iloc[i]
    ch_r = (fi.fathid==r.iid) | (fi.mothid==r.iid)
    if not ch_r.any():
        value.append(r.iid)
    else:
        for j in np.where(ch_r)[0]:
            value.append(r.iid)
            value += get_desnodes(fi,j,value)
    return list(set(value))
{% endraw %} {% raw %}
for i in trim_ped.fid.unique():
    if len(get_trees(trim_ped[trim_ped.fid ==i]))==0:
        print(i)
{% endraw %} {% raw %}
tree_nodes = [merge_trees(get_trees(trim_ped[trim_ped.fid ==i])) for i in trim_ped.fid.unique()]
{% endraw %} {% raw %}
tree_nodes[0]
[{'1005_1', '1005_2', '1005_99'}]
{% endraw %} {% raw %}
def label_subtree(trim_ped,tree_nodes):
    tag = []
    for fid,node_list in zip(trim_ped.fid.unique(),tree_nodes):
        iid = trim_ped.iid[trim_ped.fid == fid]
        labels = pd.Series([None]*len(iid))
        for i,ns in enumerate(node_list):
            labels[list(iid.isin(ns))]=i
        tag += list(labels)
    return tag
{% endraw %} {% raw %}
tag = label_subtree(trim_ped,tree_nodes)
{% endraw %} {% raw %}
tmp = [i+':'+str(j) for i,j in zip(trim_ped.fid,tag)]
{% endraw %} {% raw %}
trim_ped['tag'] = tmp
{% endraw %}

3.1 Remove sub-trees with no or only one vcf

{% raw %}
one_vcf_list = []
for i in set(trim_ped.tag):
    if sum(trim_ped.vcf[trim_ped.tag==i])<=1:
        one_vcf_list.append(i)
{% endraw %} {% raw %}
new_trim_ped = trim_ped[~trim_ped.tag.isin(one_vcf_list)].copy()
{% endraw %} {% raw %}
new_trim_ped.fid = new_trim_ped.tag
{% endraw %} {% raw %}
new_trim_ped.to_csv('data/new_trim_ped.csv',header=True,index=False)
{% endraw %} {% raw %}
new_trim_ped.iloc[:,:-4].sort_values('fid').to_csv('data/new_trim_ped_fam.fam',header=False,index=False,sep='\t')
{% endraw %}

3.2 Explore trimmed pedigrees

{% raw %}
import pandas as pd
{% endraw %} {% raw %}
new_trim_ped = pd.read_csv('../data/new_trim_ped.csv')
{% endraw %} {% raw %}
tmp = new_trim_ped.fid.value_counts()
{% endraw %} {% raw %}
famless17 = new_trim_ped[new_trim_ped.fid.isin(tmp[tmp<17].index)]
{% endraw %} {% raw %}
famless17.iloc[:,:-4].sort_values('fid').to_csv('../data/new_trim_ped_famless17.fam',header=False,index=False,sep='\t')
{% endraw %} {% raw %}
tmp = [True if x[-1]=='0' else False for x in famless17.tag]
{% endraw %} {% raw %}
tmp = famless17[tmp]
{% endraw %} {% raw %}
tmp.fid = [x[:-2] for x in tmp.fid]
/home/yh3455/miniconda3/envs/seqpy3v0/lib/python3.9/site-packages/pandas/core/generic.py:5516: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
{% endraw %} {% raw %}
tmp.iloc[:,:-4].sort_values('fid').to_csv('../data/new_trim_ped_famless17_no:xx.fam',header=False,index=False,sep='\t')
{% endraw %} {% raw %}
famless17.fid[tmp].value_counts()
411:0        16
199:0        16
3593:0       16
264:0        16
10R_R54:0    16
             ..
25_80:0       4
7_113:0       4
1317:0        4
26_EL:0       4
990:0         4
Name: fid, Length: 479, dtype: int64
{% endraw %} {% raw %}
tmp = new_trim_ped.fathid.value_counts()
{% endraw %} {% raw %}
tmp
0                  1820
10R_R114_2           15
530_1                12
10J_128_1            11
285_1                11
                   ... 
26_PBO_PBO03413       1
4_557_10              1
4_557_8               1
4_557_15              1
1007_40               1
Name: fathid, Length: 1198, dtype: int64
{% endraw %} {% raw %}
new_trim_ped.mothid.value_counts()
0                  1820
10R_R114_3           15
530_2                12
10J_128_2            11
10R_R57_3            10
                   ... 
4_558_38              1
4_558_72              1
26_HTB_HTB24426       1
4_558_33              1
1007_4                1
Name: mothid, Length: 1201, dtype: int64
{% endraw %} {% raw %}
new_trim_ped[new_trim_ped.fid.isin(['10R_R114:0'])]
fid iid fathid mothid sex ad vcf depth trim tag
154 10R_R114:0 10R_R114_1 10R_R114_2 10R_R114_3 2 2 True 2 False 10R_R114:0
155 10R_R114:0 10R_R114_4 10R_R114_2 10R_R114_3 2 1 True 2 False 10R_R114:0
156 10R_R114:0 10R_R114_9 10R_R114_2 10R_R114_3 2 -9 True 2 False 10R_R114:0
157 10R_R114:0 10R_R114_12 10R_R114_2 10R_R114_3 2 1 True 2 False 10R_R114:0
158 10R_R114:0 10R_R114_14 10R_R114_2 10R_R114_3 2 1 True 2 False 10R_R114:0
159 10R_R114:0 10R_R114_15 10R_R114_2 10R_R114_3 1 1 True 2 False 10R_R114:0
160 10R_R114:0 10R_R114_18 10R_R114_2 10R_R114_3 2 1 True 2 False 10R_R114:0
161 10R_R114:0 10R_R114_42 10R_R114_2 10R_R114_3 2 1 True 2 False 10R_R114:0
162 10R_R114:0 10R_R114_5 10R_R114_2 10R_R114_3 2 1 True 2 False 10R_R114:0
163 10R_R114:0 10R_R114_20 10R_R114_2 10R_R114_3 2 -9 True 2 False 10R_R114:0
164 10R_R114:0 10R_R114_10 10R_R114_2 10R_R114_3 2 2 False 2 False 10R_R114:0
165 10R_R114:0 10R_R114_11 10R_R114_2 10R_R114_3 2 2 False 2 False 10R_R114:0
166 10R_R114:0 10R_R114_6 10R_R114_2 10R_R114_3 2 2 False 2 False 10R_R114:0
167 10R_R114:0 10R_R114_8 10R_R114_2 10R_R114_3 2 2 False 2 False 10R_R114:0
168 10R_R114:0 10R_R114_2 0 0 1 1 False 1 False 10R_R114:0
169 10R_R114:0 10R_R114_3 0 0 2 2 False 1 False 10R_R114:0
170 10R_R114:0 10R_R114_16 10R_R114_2 10R_R114_3 1 -9 True 2 False 10R_R114:0
{% endraw %} {% raw %}
tmp = trim_ped.copy()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/tmp/2206534.1.plot.q/ipykernel_11876/2044668595.py in <module>
----> 1 tmp = trim_ped.copy()

NameError: name 'trim_ped' is not defined
{% endraw %} {% raw %}
tmp.trim = tag
{% endraw %} {% raw %}
len(set(tmp.fid[tmp.trim>1]))
{% endraw %}

3.3 Plot trimmed pedigrees

{% raw %}
import shutil
{% endraw %} {% raw %}
from pathlib import Path
{% endraw %} {% raw %}
[shutil.copy2('data/trim_ped_plot/ADfam_'+i+'.svg', 'data/multiple_subtrees/') for i in set(tmp.fid[tmp.trim>1])] # target filename is /dst/dir/file.ext
['data/multiple_subtrees/ADfam_223.svg',
 'data/multiple_subtrees/ADfam_597.svg',
 'data/multiple_subtrees/ADfam_508.svg',
 'data/multiple_subtrees/ADfam_3324.svg',
 'data/multiple_subtrees/ADfam_197.svg',
 'data/multiple_subtrees/ADfam_1317.svg',
 'data/multiple_subtrees/ADfam_4_680.svg',
 'data/multiple_subtrees/ADfam_3761.svg',
 'data/multiple_subtrees/ADfam_4_393.svg',
 'data/multiple_subtrees/ADfam_546.svg',
 'data/multiple_subtrees/ADfam_359.svg',
 'data/multiple_subtrees/ADfam_216.svg',
 'data/multiple_subtrees/ADfam_591.svg',
 'data/multiple_subtrees/ADfam_4_162.svg',
 'data/multiple_subtrees/ADfam_215.svg']
{% endraw %} {% raw %}
!tar -zcvf data/multiple_subtrees.tar.gz data/multiple_subtrees
data/multiple_subtrees/
data/multiple_subtrees/ADfam_215.svg
data/multiple_subtrees/ADfam_4_162.svg
data/multiple_subtrees/ADfam_591.svg
data/multiple_subtrees/ADfam_216.svg
data/multiple_subtrees/ADfam_359.svg
data/multiple_subtrees/ADfam_546.svg
data/multiple_subtrees/ADfam_4_393.svg
data/multiple_subtrees/ADfam_3761.svg
data/multiple_subtrees/ADfam_4_680.svg
data/multiple_subtrees/ADfam_1317.svg
data/multiple_subtrees/ADfam_197.svg
data/multiple_subtrees/ADfam_3324.svg
data/multiple_subtrees/ADfam_508.svg
data/multiple_subtrees/ADfam_597.svg
data/multiple_subtrees/ADfam_223.svg
{% endraw %} {% raw %}
def copyfile(file_path):
    path = Path(file_path)
    if path.is_file():
        shutil.copy2(file_path, '../data/multiple_subtrees/')
for i in [223,597,508,3324,197,1317,'4_680',3761,'4_393',546,359,216,591,'4_162',215]:
    copyfile('../data/new_ADfam_depthless4/ADfam_'+str(i)+'.png')
    copyfile('../data/new_ADfam_trimmore4/ADfam_'+str(i)+'.png')
    copyfile('../data/new_ADfam_trimless4/ADfam_'+str(i)+'.png')
{% endraw %} {% raw %}
tmp.sort_values('trim')[-20:]
fid iid fathid mothid sex ad vcf depth trim tag
3761_162 3761 3761_162 0 0 2 -9 False 1 4 3761:4
3761_50 3761 3761_50 0 0 1 -9 False 1 4 3761:4
3761_52 3761 3761_52 3761_50 3761_51 2 2 True 2 4 3761:4
3761_61 3761 3761_61 3761_57 3761_54 2 1 True 3 4 3761:4
3761_59 3761 3761_59 3761_57 3761_54 1 2 False 3 4 3761:4
3761_175 3761 3761_175 3761_170 3761_171 1 2 True 3 4 3761:4
3761_161 3761 3761_161 0 0 1 -9 False 1 4 3761:4
3761_183 3761 3761_183 3761_170 3761_169 1 2 True 3 4 3761:4
3761_176 3761 3761_176 3761_170 3761_171 2 1 True 3 4 3761:4
3761_166 3761 3761_166 3761_161 3761_162 2 2 True 2 4 3761:4
3761_170 3761 3761_170 3761_50 3761_51 1 -9 False 2 4 3761:4
3761_57 3761 3761_57 0 0 1 -9 False 1 4 3761:4
3761_53 3761 3761_53 3761_50 3761_51 2 2 True 2 4 3761:4
3761_169 3761 3761_169 3761_161 3761_162 2 -9 False 2 4 3761:4
359_43 359 359_43 0 0 1 -9 False 1 5 359:5
359_42 359 359_42 0 0 2 -9 False 1 5 359:5
3761_123 3761 3761_123 3761_108 3761_111 2 2 True 2 5 3761:5
3761_108 3761 3761_108 0 0 1 -9 False 1 5 3761:5
3761_111 3761 3761_111 0 0 2 2 True 1 5 3761:5
359_45 359 359_45 359_43 359_42 2 2 True 2 5 359:5
{% endraw %} {% raw %}
len(set(tmp.fid))
748
{% endraw %} {% raw %}
dots4 = [plotped1(tmp[tmp.fid ==i],output=True,folder='data/trim_ped_plot') for i in set(tmp.fid)]
{% endraw %} {% raw %}
plotped1(tmp[tmp.fid =='3761'])
%3 3761_92 3761_92 3761_48x3761_92 3761_92->3761_48x3761_92 3761_96 3761_96 3761_48x3761_92->3761_96 3761_40 3761_40 3761_39x3761_40 3761_40->3761_39x3761_40 3761_45 3761_45 3761_39x3761_40->3761_45 3761_42 3761_42 3761_39x3761_40->3761_42 3761_43 3761_43 3761_39x3761_40->3761_43 3761_44 3761_44 3761_39x3761_40->3761_44 3761_48 3761_48 3761_39x3761_40->3761_48 3761_153 3761_153 3761_39x3761_40->3761_153 3761_115 3761_115 3761_115x3761_110 3761_115->3761_115x3761_110 3761_119 3761_119 3761_115x3761_110->3761_119 3761_154 3761_154 3761_153x3761_154 3761_154->3761_153x3761_154 3761_157 3761_157 3761_153x3761_154->3761_157 3761_185 3761_185 3761_142x3761_185 3761_185->3761_142x3761_185 3761_186 3761_186 3761_142x3761_185->3761_186 3761_213 3761_213 3761_42x3761_213 3761_213->3761_42x3761_213 3761_217 3761_217 3761_42x3761_213->3761_217 3761_2 3761_2 3761_1x3761_2 3761_2->3761_1x3761_2 3761_3 3761_3 3761_1x3761_2->3761_3 3761_4 3761_4 3761_1x3761_2->3761_4 3761_9 3761_9 3761_1x3761_2->3761_9 3761_99 3761_99 3761_1x3761_2->3761_99 3761_10 3761_10 3761_99x3761_10 3761_10->3761_99x3761_10 3761_16 3761_16 3761_99x3761_10->3761_16 3761_13 3761_13 3761_99x3761_10->3761_13 3761_11 3761_11 3761_99x3761_10->3761_11 3761_51 3761_51 3761_50x3761_51 3761_51->3761_50x3761_51 3761_170 3761_170 3761_50x3761_51->3761_170 3761_52 3761_52 3761_50x3761_51->3761_52 3761_54 3761_54 3761_50x3761_51->3761_54 3761_53 3761_53 3761_50x3761_51->3761_53 3761_161 3761_161 3761_161x3761_162 3761_161->3761_161x3761_162 3761_166 3761_166 3761_161x3761_162->3761_166 3761_169 3761_169 3761_161x3761_162->3761_169 3761_62 3761_62 3761_45x3761_62 3761_62->3761_45x3761_62 3761_69 3761_69 3761_45x3761_62->3761_69 3761_67 3761_67 3761_45x3761_62->3761_67 3761_66 3761_66 3761_45x3761_62->3761_66 3761_63 3761_63 3761_45x3761_62->3761_63 3761_112 3761_112 3761_109x3761_112 3761_112->3761_109x3761_112 3761_139 3761_139 3761_109x3761_112->3761_139 3761_141 3761_141 3761_109x3761_112->3761_141 3761_193 3761_193 3761_109x3761_112->3761_193 3761_142 3761_142 3761_109x3761_112->3761_142 3761_138 3761_138 3761_109x3761_112->3761_138 3761_114 3761_114 3761_114x3761_110 3761_114->3761_114x3761_110 3761_121 3761_121 3761_114x3761_110->3761_121 3761_143 3761_143 3761_138x3761_143 3761_143->3761_138x3761_143 3761_129 3761_129 3761_138x3761_143->3761_129 3761_150 3761_150 3761_138x3761_143->3761_150 3761_162 3761_162 3761_162->3761_161x3761_162 3761_171 3761_171 3761_170x3761_171 3761_171->3761_170x3761_171 3761_176 3761_176 3761_170x3761_171->3761_176 3761_175 3761_175 3761_170x3761_171->3761_175 3761_194 3761_194 3761_194x3761_193 3761_194->3761_194x3761_193 3761_211 3761_211 3761_194x3761_193->3761_211 3761_200 3761_200 3761_194x3761_193->3761_200 3761_202 3761_202 3761_202x3761_141 3761_202->3761_202x3761_141 3761_208 3761_208 3761_202x3761_141->3761_208 3761_206 3761_206 3761_202x3761_141->3761_206 3761_82 3761_82 3761_44x3761_82 3761_82->3761_44x3761_82 3761_86 3761_86 3761_44x3761_82->3761_86 3761_57 3761_57 3761_57x3761_54 3761_57->3761_57x3761_54 3761_59 3761_59 3761_57x3761_54->3761_59 3761_61 3761_61 3761_57x3761_54->3761_61 3761_74 3761_74 3761_74x3761_43 3761_74->3761_74x3761_43 3761_184 3761_184 3761_74x3761_43->3761_184 3761_75 3761_75 3761_74x3761_43->3761_75 3761_78 3761_78 3761_74x3761_43->3761_78 3761_170->3761_170x3761_171 3761_170x3761_169 3761_170->3761_170x3761_169 3761_183 3761_183 3761_170x3761_169->3761_183 3761_45->3761_45x3761_62 3761_99->3761_99x3761_10 3761_123 3761_123 3761_108x3761_111 3761_108x3761_111->3761_123 3761_141->3761_202x3761_141 3761_193->3761_194x3761_193 3761_142->3761_142x3761_185 3761_169->3761_170x3761_169 3761_42->3761_42x3761_213 3761_43->3761_74x3761_43 3761_44->3761_44x3761_82 3761_138->3761_138x3761_143 3761_48->3761_48x3761_92 3761_110 3761_110 3761_110->3761_115x3761_110 3761_110->3761_114x3761_110 3761_108 3761_108 3761_108->3761_108x3761_111 3761_50 3761_50 3761_50->3761_50x3761_51 3761_109 3761_109 3761_109->3761_109x3761_112 3761_1 3761_1 3761_1->3761_1x3761_2 3761_111 3761_111 3761_111->3761_108x3761_111 3761_54->3761_57x3761_54 3761_39 3761_39 3761_39->3761_39x3761_40 3761_153->3761_153x3761_154
{% endraw %} {% raw %}
from graphviz import Digraph
def update_attributes(r):
    attributes={'shape':'polygon','height':'0.3','width':'0.5','regular':'0','style':"filled,setlinewidth(4)",'fontcolor':'black'}
    gender = {"m":"box","1":"box",1:"box","f":"ellipse","2":"ellipse",2:"ellipse",0:"polygon"}
    trait = {1:'white',2:'dimgrey',-9:'aquamarine3'}
    vcf = {True:"filled,setlinewidth(4)",False:"filled"}
    trim = {0:"aqua",1:"red",2:"blue",3:"orange",4:"yellow",5:"green"}
    attributes['shape'] = gender[r.sex]
    attributes['fillcolor']=trait[r.ad]
    attributes['style']=vcf[r.vcf]
    try:
        attributes['fontcolor'] = trim[r.trim]
    except:
        pass
    return attributes

def plotped1(fi,output=False,folder='',format='svg'):
    #nodes
    #create iid nodes
    #create parent nodes
    #edges
    #parent to iid
    #fathid,mothid to parent
    # Create Digraph object
    dot = Digraph()
    # Add nodes
    parents = []
    for i,r in fi.iterrows():
        dot.node(r.iid,shape='box',_attributes=update_attributes(r))
        if r.fathid !='0' and r.mothid !='0':
            parents.append(r.fathid+'x'+r.mothid)
            dot.edge(r.fathid+'x'+r.mothid,r.iid)
        #edge to children
        ch_r = (fi.fathid==r.iid) | (fi.mothid==r.iid)
        if ch_r.any():
            for p in fi[ch_r][['fathid','mothid']].agg('x'.join, axis=1).unique():
                dot.edge(r.iid,p)
    for p in set(parents):
        dot.node(p,shape='diamond',label='',height='.1',width='.1')
    if output:
        dot.render(outfile=folder+'/ADfam_'+str(list(fi.fid)[0])+'.'+format,format=format,overwrite_source=True)
    return dot
{% endraw %} {% raw %}
plotped(trim_ped[trim_ped.fid =='359'])
%3 359_61 359_61 359_3x359_73 359_3x359_73->359_61 359_32 359_32 359_34x359_9 359_34x359_9->359_32 359_111 359_111 359_4x359_110 359_4x359_110->359_111 359_67 359_67 359_68x359_11 359_68x359_11->359_67 359_97 359_97 359_7x359_96 359_7x359_96->359_97 359_53 359_53 359_72x359_8 359_72x359_8->359_53 359_17 359_17 359_99x359_16 359_99x359_16->359_17 359_19 359_19 359_99x359_16->359_19 359_12 359_12 359_1x359_2 359_1x359_2->359_12 359_3 359_3 359_1x359_2->359_3 359_4 359_4 359_1x359_2->359_4 359_9 359_9 359_1x359_2->359_9 359_99 359_99 359_1x359_2->359_99 359_11 359_11 359_1x359_2->359_11 359_10 359_10 359_1x359_2->359_10 359_8 359_8 359_1x359_2->359_8 359_7 359_7 359_1x359_2->359_7 359_3->359_3x359_73 359_4->359_4x359_110 359_9->359_34x359_9 359_99->359_99x359_16 359_11->359_68x359_11 359_8->359_72x359_8 359_7->359_7x359_96 359_106 359_106 359_107x359_48 359_107x359_48->359_106 359_121 359_121 359_118x359_119 359_118x359_119->359_121 359_45 359_45 359_43x359_42 359_43x359_42->359_45 359_51 359_51 359_70x359_71 359_70x359_71->359_51 359_52 359_52 359_70x359_71->359_52 359_84 359_84 359_79x359_78 359_79x359_78->359_84 359_103 359_103 359_79x359_78->359_103 359_81 359_81 359_79x359_78->359_81 359_1 359_1 359_1->359_1x359_2 359_70 359_70 359_70->359_70x359_71 359_78 359_78 359_78->359_79x359_78 359_118 359_118 359_118->359_118x359_119 359_143 359_143 359_107x359_131 359_107x359_131->359_143 359_132 359_132 359_107x359_131->359_132 359_2 359_2 359_2->359_1x359_2 359_42 359_42 359_42->359_43x359_42 359_48 359_48 359_48->359_107x359_48 359_79 359_79 359_79->359_79x359_78 359_16 359_16 359_16->359_99x359_16 359_43 359_43 359_43->359_43x359_42 359_71 359_71 359_71->359_70x359_71 359_68 359_68 359_68->359_68x359_11 359_96 359_96 359_96->359_7x359_96 359_107 359_107 359_107->359_107x359_48 359_107->359_107x359_131 359_119 359_119 359_119->359_118x359_119 359_131 359_131 359_131->359_107x359_131
{% endraw %} {% raw %}
plotped(trim_ped[trim_ped.fid =='359'])
%3 359_61 359_61 359_3x359_73 359_3x359_73->359_61 359_32 359_32 359_34x359_9 359_34x359_9->359_32 359_111 359_111 359_4x359_110 359_4x359_110->359_111 359_67 359_67 359_68x359_11 359_68x359_11->359_67 359_97 359_97 359_7x359_96 359_7x359_96->359_97 359_53 359_53 359_72x359_8 359_72x359_8->359_53 359_17 359_17 359_99x359_16 359_99x359_16->359_17 359_19 359_19 359_99x359_16->359_19 359_12 359_12 359_1x359_2 359_1x359_2->359_12 359_3 359_3 359_1x359_2->359_3 359_4 359_4 359_1x359_2->359_4 359_9 359_9 359_1x359_2->359_9 359_99 359_99 359_1x359_2->359_99 359_11 359_11 359_1x359_2->359_11 359_10 359_10 359_1x359_2->359_10 359_8 359_8 359_1x359_2->359_8 359_7 359_7 359_1x359_2->359_7 359_3->359_3x359_73 359_4->359_4x359_110 359_9->359_34x359_9 359_99->359_99x359_16 359_11->359_68x359_11 359_8->359_72x359_8 359_7->359_7x359_96 359_106 359_106 359_107x359_48 359_107x359_48->359_106 359_121 359_121 359_118x359_119 359_118x359_119->359_121 359_45 359_45 359_43x359_42 359_43x359_42->359_45 359_51 359_51 359_70x359_71 359_70x359_71->359_51 359_52 359_52 359_70x359_71->359_52 359_84 359_84 359_79x359_78 359_79x359_78->359_84 359_103 359_103 359_79x359_78->359_103 359_81 359_81 359_79x359_78->359_81 359_1 359_1 359_1->359_1x359_2 359_104x359_105 359_104x359_105->359_1 359_70 359_70 359_104x359_105->359_70 359_78 359_78 359_104x359_105->359_78 359_118 359_118 359_104x359_105->359_118 359_70->359_70x359_71 359_78->359_79x359_78 359_118->359_118x359_119 359_143 359_143 359_107x359_131 359_107x359_131->359_143 359_132 359_132 359_107x359_131->359_132 359_2 359_2 359_2->359_1x359_2 359_50x359_49 359_50x359_49->359_2 359_42 359_42 359_50x359_49->359_42 359_48 359_48 359_50x359_49->359_48 359_79 359_79 359_50x359_49->359_79 359_42->359_43x359_42 359_48->359_107x359_48 359_79->359_79x359_78 359_16 359_16 359_16->359_99x359_16 359_43 359_43 359_43->359_43x359_42 359_71 359_71 359_71->359_70x359_71 359_68 359_68 359_68->359_68x359_11 359_96 359_96 359_96->359_7x359_96 359_107 359_107 359_107->359_107x359_48 359_107->359_107x359_131 359_119 359_119 359_119->359_118x359_119 359_131 359_131 359_131->359_107x359_131
{% endraw %} {% raw %}
trees = get_trees(fi)
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='4_364'])
%3 4_364_18 4_364_18 4_364_16x4_364_17 4_364_16x4_364_17->4_364_18 4_364_19 4_364_19 4_364_16x4_364_17->4_364_19 4_364_15 4_364_15 4_364_14x4_364_8 4_364_14x4_364_8->4_364_15 4_364_16 4_364_16 4_364_14x4_364_8->4_364_16 4_364_16->4_364_16x4_364_17 4_364_9 4_364_9 4_364_13x4_364_99 4_364_13x4_364_99->4_364_9 4_364_10 4_364_10 4_364_13x4_364_99->4_364_10 4_364_11 4_364_11 4_364_13x4_364_99->4_364_11 4_364_12 4_364_12 4_364_13x4_364_99->4_364_12 4_364_8 4_364_8 4_364_13x4_364_99->4_364_8 4_364_8->4_364_14x4_364_8 4_364_20 4_364_20 4_364_21x4_364_5 4_364_21x4_364_5->4_364_20 4_364_22 4_364_22 4_364_21x4_364_5->4_364_22 4_364_DCH23.4 4_364_DCH23.4 4_364_23x4_364_4 4_364_23x4_364_4->4_364_DCH23.4 4_364_99 4_364_99 4_364_99->4_364_13x4_364_99 4_364_1x4_364_2 4_364_1x4_364_2->4_364_99 4_364_7 4_364_7 4_364_1x4_364_2->4_364_7 4_364_6 4_364_6 4_364_1x4_364_2->4_364_6 4_364_5 4_364_5 4_364_1x4_364_2->4_364_5 4_364_3 4_364_3 4_364_1x4_364_2->4_364_3 4_364_4 4_364_4 4_364_1x4_364_2->4_364_4 4_364_5->4_364_21x4_364_5 4_364_4->4_364_23x4_364_4 4_364_1 4_364_1 4_364_1->4_364_1x4_364_2 4_364_13 4_364_13 4_364_13->4_364_13x4_364_99 4_364_14 4_364_14 4_364_14->4_364_14x4_364_8 4_364_17 4_364_17 4_364_17->4_364_16x4_364_17 4_364_2 4_364_2 4_364_2->4_364_1x4_364_2 4_364_21 4_364_21 4_364_21->4_364_21x4_364_5 4_364_23 4_364_23 4_364_23->4_364_23x4_364_4
{% endraw %} {% raw %}
np.where(ch_r)[0][0]
12
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='1005'])
%3 1005_12 1005_12 1005_11x1005_99 1005_11x1005_99->1005_12 1005_9 1005_9 1005_8x1005_99 1005_8x1005_99->1005_9 1005_10 1005_10 1005_8x1005_99->1005_10 1005_99 1005_99 1005_99->1005_11x1005_99 1005_99->1005_8x1005_99 1005_1x1005_2 1005_1x1005_2->1005_99 1005_3 1005_3 1005_1x1005_2->1005_3 1005_4 1005_4 1005_1x1005_2->1005_4 1005_5 1005_5 1005_1x1005_2->1005_5 1005_6 1005_6 1005_1x1005_2->1005_6 1005_7 1005_7 1005_1x1005_2->1005_7 1005_1 1005_1 1005_1->1005_1x1005_2 1005_2 1005_2 1005_2->1005_1x1005_2 1005_8 1005_8 1005_8->1005_8x1005_99 1005_11 1005_11 1005_11->1005_11x1005_99
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='336'])
%3 336_43 336_43 336_42x336_9 336_42x336_9->336_43 336_44 336_44 336_42x336_9->336_44 336_8 336_8 336_10x336_99 336_10x336_99->336_8 336_9 336_9 336_10x336_99->336_9 336_6 336_6 336_10x336_99->336_6 336_7 336_7 336_10x336_99->336_7 336_9->336_42x336_9 336_24 336_24 336_25x336_26 336_25x336_26->336_24 336_39 336_39 336_37x336_38 336_37x336_38->336_39 336_40 336_40 336_37x336_38->336_40 336_41 336_41 336_37x336_38->336_41 336_13 336_13 336_5x336_15 336_5x336_15->336_13 336_14 336_14 336_5x336_15->336_14 336_16 336_16 336_5x336_15->336_16 336_17 336_17 336_5x336_15->336_17 336_18 336_18 336_5x336_15->336_18 336_19 336_19 336_5x336_15->336_19 336_20 336_20 336_5x336_15->336_20 336_21 336_21 336_5x336_15->336_21 336_22 336_22 336_5x336_15->336_22 336_23 336_23 336_5x336_15->336_23 336_99 336_99 336_99->336_10x336_99 336_1x336_2 336_1x336_2->336_99 336_5 336_5 336_1x336_2->336_5 336_4 336_4 336_1x336_2->336_4 336_3 336_3 336_1x336_2->336_3 336_11 336_11 336_1x336_12 336_1x336_12->336_11 336_5->336_5x336_15 336_25 336_25 336_25->336_25x336_26 336_28x336_27 336_28x336_27->336_25 336_37 336_37 336_37->336_37x336_38 336_35x336_36 336_35x336_36->336_37 336_2 336_2 336_2->336_1x336_2 336_29x336_30 336_29x336_30->336_2 336_1 336_1 336_1->336_1x336_2 336_1->336_1x336_12 336_31x336_32 336_31x336_32->336_1 336_27 336_27 336_31x336_32->336_27 336_35 336_35 336_31x336_32->336_35 336_27->336_28x336_27 336_35->336_35x336_36 336_31 336_31 336_31->336_31x336_32 336_34x336_33 336_34x336_33->336_31 336_29 336_29 336_34x336_33->336_29 336_29->336_29x336_30 336_12 336_12 336_12->336_1x336_12 336_10 336_10 336_10->336_10x336_99 336_15 336_15 336_15->336_5x336_15 336_32 336_32 336_32->336_31x336_32 336_28 336_28 336_28->336_28x336_27 336_26 336_26 336_26->336_25x336_26 336_33 336_33 336_33->336_34x336_33 336_34 336_34 336_34->336_34x336_33 336_30 336_30 336_30->336_29x336_30 336_36 336_36 336_36->336_35x336_36 336_38 336_38 336_38->336_37x336_38 336_42 336_42 336_42->336_42x336_9
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='4_364'])
%3 4_364_18 4_364_18 4_364_16x4_364_17 4_364_16x4_364_17->4_364_18 4_364_19 4_364_19 4_364_16x4_364_17->4_364_19 4_364_15 4_364_15 4_364_14x4_364_8 4_364_14x4_364_8->4_364_15 4_364_16 4_364_16 4_364_14x4_364_8->4_364_16 4_364_16->4_364_16x4_364_17 4_364_9 4_364_9 4_364_13x4_364_99 4_364_13x4_364_99->4_364_9 4_364_10 4_364_10 4_364_13x4_364_99->4_364_10 4_364_11 4_364_11 4_364_13x4_364_99->4_364_11 4_364_12 4_364_12 4_364_13x4_364_99->4_364_12 4_364_8 4_364_8 4_364_13x4_364_99->4_364_8 4_364_8->4_364_14x4_364_8 4_364_20 4_364_20 4_364_21x4_364_5 4_364_21x4_364_5->4_364_20 4_364_22 4_364_22 4_364_21x4_364_5->4_364_22 4_364_DCH23.4 4_364_DCH23.4 4_364_23x4_364_4 4_364_23x4_364_4->4_364_DCH23.4 4_364_99 4_364_99 4_364_99->4_364_13x4_364_99 4_364_1x4_364_2 4_364_1x4_364_2->4_364_99 4_364_7 4_364_7 4_364_1x4_364_2->4_364_7 4_364_6 4_364_6 4_364_1x4_364_2->4_364_6 4_364_5 4_364_5 4_364_1x4_364_2->4_364_5 4_364_3 4_364_3 4_364_1x4_364_2->4_364_3 4_364_4 4_364_4 4_364_1x4_364_2->4_364_4 4_364_5->4_364_21x4_364_5 4_364_4->4_364_23x4_364_4 4_364_1 4_364_1 4_364_1->4_364_1x4_364_2 4_364_13 4_364_13 4_364_13->4_364_13x4_364_99 4_364_14 4_364_14 4_364_14->4_364_14x4_364_8 4_364_17 4_364_17 4_364_17->4_364_16x4_364_17 4_364_2 4_364_2 4_364_2->4_364_1x4_364_2 4_364_21 4_364_21 4_364_21->4_364_21x4_364_5 4_364_23 4_364_23 4_364_23->4_364_23x4_364_4
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='4_501'])
%3 4_501_57 4_501_57 4_501_56x4_501_55 4_501_56x4_501_55->4_501_57 4_501_58 4_501_58 4_501_56x4_501_55->4_501_58 4_501_59 4_501_59 4_501_56x4_501_55->4_501_59 4_501_60 4_501_60 4_501_56x4_501_55->4_501_60 4_501_63 4_501_63 4_501_62x4_501_61 4_501_62x4_501_61->4_501_63 4_501_64 4_501_64 4_501_62x4_501_61->4_501_64 4_501_65 4_501_65 4_501_62x4_501_61->4_501_65 4_501_55 4_501_55 4_501_55->4_501_56x4_501_55 4_501_54x4_501_11 4_501_54x4_501_11->4_501_55 4_501_61 4_501_61 4_501_54x4_501_11->4_501_61 4_501_61->4_501_62x4_501_61 4_501_67 4_501_67 4_501_66x4_501_7 4_501_66x4_501_7->4_501_67 4_501_68 4_501_68 4_501_66x4_501_7->4_501_68 4_501_53 4_501_53 4_501_70x4_501_74 4_501_70x4_501_74->4_501_53 4_501_51 4_501_51 4_501_72x4_501_73 4_501_72x4_501_73->4_501_51 4_501_10 4_501_10 4_501_23x4_501_99 4_501_23x4_501_99->4_501_10 4_501_9 4_501_9 4_501_23x4_501_99->4_501_9 4_501_7 4_501_7 4_501_23x4_501_99->4_501_7 4_501_8 4_501_8 4_501_23x4_501_99->4_501_8 4_501_7->4_501_66x4_501_7 4_501_15 4_501_15 4_501_24x4_501_3 4_501_24x4_501_3->4_501_15 4_501_11 4_501_11 4_501_24x4_501_3->4_501_11 4_501_12 4_501_12 4_501_24x4_501_3->4_501_12 4_501_13 4_501_13 4_501_24x4_501_3->4_501_13 4_501_14 4_501_14 4_501_24x4_501_3->4_501_14 4_501_16 4_501_16 4_501_24x4_501_3->4_501_16 4_501_69 4_501_69 4_501_24x4_501_3->4_501_69 4_501_70 4_501_70 4_501_24x4_501_3->4_501_70 4_501_71 4_501_71 4_501_24x4_501_3->4_501_71 4_501_72 4_501_72 4_501_24x4_501_3->4_501_72 4_501_11->4_501_54x4_501_11 4_501_70->4_501_70x4_501_74 4_501_72->4_501_72x4_501_73 4_501_17 4_501_17 4_501_25x4_501_4 4_501_25x4_501_4->4_501_17 4_501_19 4_501_19 4_501_25x4_501_4->4_501_19 4_501_18 4_501_18 4_501_25x4_501_4->4_501_18 4_501_34 4_501_34 4_501_25x4_501_4->4_501_34 4_501_20 4_501_20 4_501_26x4_501_6 4_501_26x4_501_6->4_501_20 4_501_21 4_501_21 4_501_26x4_501_6->4_501_21 4_501_22 4_501_22 4_501_26x4_501_6->4_501_22 4_501_44 4_501_44 4_501_43x4_501_5 4_501_43x4_501_5->4_501_44 4_501_45 4_501_45 4_501_43x4_501_5->4_501_45 4_501_46 4_501_46 4_501_43x4_501_5->4_501_46 4_501_48 4_501_48 4_501_47x4_501_31 4_501_47x4_501_31->4_501_48 4_501_49 4_501_49 4_501_47x4_501_31->4_501_49 4_501_50 4_501_50 4_501_47x4_501_31->4_501_50 4_501_6 4_501_6 4_501_6->4_501_26x4_501_6 4_501_1x4_501_2 4_501_1x4_501_2->4_501_6 4_501_99 4_501_99 4_501_1x4_501_2->4_501_99 4_501_3 4_501_3 4_501_1x4_501_2->4_501_3 4_501_4 4_501_4 4_501_1x4_501_2->4_501_4 4_501_31 4_501_31 4_501_1x4_501_2->4_501_31 4_501_5 4_501_5 4_501_1x4_501_2->4_501_5 4_501_99->4_501_23x4_501_99 4_501_3->4_501_24x4_501_3 4_501_4->4_501_25x4_501_4 4_501_31->4_501_47x4_501_31 4_501_5->4_501_43x4_501_5 4_501_25 4_501_25 4_501_25->4_501_25x4_501_4 4_501_28x4_501_27 4_501_28x4_501_27->4_501_25 4_501_35 4_501_35 4_501_28x4_501_27->4_501_35 4_501_37 4_501_37 4_501_36x4_501_27 4_501_36x4_501_27->4_501_37 4_501_38 4_501_38 4_501_36x4_501_27->4_501_38 4_501_39 4_501_39 4_501_36x4_501_27->4_501_39 4_501_40 4_501_40 4_501_36x4_501_27->4_501_40 4_501_41 4_501_41 4_501_36x4_501_27->4_501_41 4_501_42 4_501_42 4_501_36x4_501_27->4_501_42 4_501_2 4_501_2 4_501_2->4_501_1x4_501_2 4_501_29x4_501_30 4_501_29x4_501_30->4_501_2 4_501_27 4_501_27 4_501_29x4_501_30->4_501_27 4_501_27->4_501_28x4_501_27 4_501_27->4_501_36x4_501_27 4_501_23 4_501_23 4_501_23->4_501_23x4_501_99 4_501_32x4_501_33 4_501_32x4_501_33->4_501_23 4_501_26 4_501_26 4_501_32x4_501_33->4_501_26 4_501_26->4_501_26x4_501_6 4_501_1 4_501_1 4_501_1->4_501_1x4_501_2 4_501_24 4_501_24 4_501_24->4_501_24x4_501_3 4_501_28 4_501_28 4_501_28->4_501_28x4_501_27 4_501_29 4_501_29 4_501_29->4_501_29x4_501_30 4_501_30 4_501_30 4_501_30->4_501_29x4_501_30 4_501_32 4_501_32 4_501_32->4_501_32x4_501_33 4_501_33 4_501_33 4_501_33->4_501_32x4_501_33 4_501_36 4_501_36 4_501_36->4_501_36x4_501_27 4_501_43 4_501_43 4_501_43->4_501_43x4_501_5 4_501_47 4_501_47 4_501_47->4_501_47x4_501_31 4_501_54 4_501_54 4_501_54->4_501_54x4_501_11 4_501_56 4_501_56 4_501_56->4_501_56x4_501_55 4_501_62 4_501_62 4_501_62->4_501_62x4_501_61 4_501_66 4_501_66 4_501_66->4_501_66x4_501_7 4_501_73 4_501_73 4_501_73->4_501_72x4_501_73 4_501_74 4_501_74 4_501_74->4_501_70x4_501_74
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='3761'])
%3 3761_130 3761_130 3761_129x3761_52 3761_129x3761_52->3761_130 3761_131 3761_131 3761_129x3761_52->3761_131 3761_132 3761_132 3761_129x3761_52->3761_132 3761_133 3761_133 3761_129x3761_52->3761_133 3761_134 3761_134 3761_129x3761_52->3761_134 3761_135 3761_135 3761_129x3761_52->3761_135 3761_136 3761_136 3761_129x3761_52->3761_136 3761_137 3761_137 3761_129x3761_52->3761_137 3761_129 3761_129 3761_129->3761_129x3761_52 3761_138x3761_143 3761_138x3761_143->3761_129 3761_150 3761_150 3761_138x3761_143->3761_150 3761_145 3761_145 3761_138x3761_143->3761_145 3761_146 3761_146 3761_138x3761_143->3761_146 3761_147 3761_147 3761_138x3761_143->3761_147 3761_148 3761_148 3761_138x3761_143->3761_148 3761_149 3761_149 3761_138x3761_143->3761_149 3761_151 3761_151 3761_138x3761_143->3761_151 3761_152 3761_152 3761_138x3761_143->3761_152 3761_186 3761_186 3761_142x3761_185 3761_142x3761_185->3761_186 3761_187 3761_187 3761_142x3761_185->3761_187 3761_188 3761_188 3761_142x3761_185->3761_188 3761_189 3761_189 3761_142x3761_185->3761_189 3761_190 3761_190 3761_142x3761_185->3761_190 3761_191 3761_191 3761_142x3761_185->3761_191 3761_192 3761_192 3761_142x3761_185->3761_192 3761_157 3761_157 3761_153x3761_154 3761_153x3761_154->3761_157 3761_155 3761_155 3761_153x3761_154->3761_155 3761_156 3761_156 3761_153x3761_154->3761_156 3761_158 3761_158 3761_153x3761_154->3761_158 3761_159 3761_159 3761_153x3761_154->3761_159 3761_160 3761_160 3761_153x3761_154->3761_160 3761_175 3761_175 3761_170x3761_171 3761_170x3761_171->3761_175 3761_176 3761_176 3761_170x3761_171->3761_176 3761_172 3761_172 3761_170x3761_171->3761_172 3761_173 3761_173 3761_170x3761_171->3761_173 3761_174 3761_174 3761_170x3761_171->3761_174 3761_177 3761_177 3761_170x3761_171->3761_177 3761_178 3761_178 3761_170x3761_171->3761_178 3761_179 3761_179 3761_170x3761_171->3761_179 3761_180 3761_180 3761_170x3761_171->3761_180 3761_181 3761_181 3761_170x3761_171->3761_181 3761_182 3761_182 3761_170x3761_171->3761_182 3761_183 3761_183 3761_170x3761_169 3761_170x3761_169->3761_183 3761_200 3761_200 3761_194x3761_193 3761_194x3761_193->3761_200 3761_211 3761_211 3761_194x3761_193->3761_211 3761_195 3761_195 3761_194x3761_193->3761_195 3761_196 3761_196 3761_194x3761_193->3761_196 3761_197 3761_197 3761_194x3761_193->3761_197 3761_198 3761_198 3761_194x3761_193->3761_198 3761_199 3761_199 3761_194x3761_193->3761_199 3761_201 3761_201 3761_194x3761_193->3761_201 3761_206 3761_206 3761_202x3761_141 3761_202x3761_141->3761_206 3761_208 3761_208 3761_202x3761_141->3761_208 3761_203 3761_203 3761_202x3761_141->3761_203 3761_204 3761_204 3761_202x3761_141->3761_204 3761_205 3761_205 3761_202x3761_141->3761_205 3761_207 3761_207 3761_202x3761_141->3761_207 3761_209 3761_209 3761_202x3761_141->3761_209 3761_210 3761_210 3761_202x3761_141->3761_210 3761_25 3761_25 3761_24x3761_7 3761_24x3761_7->3761_25 3761_26 3761_26 3761_24x3761_7->3761_26 3761_27 3761_27 3761_24x3761_7->3761_27 3761_28 3761_28 3761_24x3761_7->3761_28 3761_29 3761_29 3761_24x3761_7->3761_29 3761_30 3761_30 3761_24x3761_7->3761_30 3761_31 3761_31 3761_24x3761_7->3761_31 3761_33 3761_33 3761_32x3761_9 3761_32x3761_9->3761_33 3761_34 3761_34 3761_32x3761_9->3761_34 3761_35 3761_35 3761_32x3761_9->3761_35 3761_36 3761_36 3761_32x3761_9->3761_36 3761_217 3761_217 3761_42x3761_213 3761_42x3761_213->3761_217 3761_214 3761_214 3761_42x3761_213->3761_214 3761_215 3761_215 3761_42x3761_213->3761_215 3761_216 3761_216 3761_42x3761_213->3761_216 3761_218 3761_218 3761_42x3761_213->3761_218 3761_219 3761_219 3761_42x3761_213->3761_219 3761_220 3761_220 3761_42x3761_213->3761_220 3761_221 3761_221 3761_42x3761_213->3761_221 3761_222 3761_222 3761_42x3761_213->3761_222 3761_86 3761_86 3761_44x3761_82 3761_44x3761_82->3761_86 3761_83 3761_83 3761_44x3761_82->3761_83 3761_84 3761_84 3761_44x3761_82->3761_84 3761_85 3761_85 3761_44x3761_82->3761_85 3761_87 3761_87 3761_44x3761_82->3761_87 3761_88 3761_88 3761_44x3761_82->3761_88 3761_89 3761_89 3761_44x3761_82->3761_89 3761_90 3761_90 3761_44x3761_82->3761_90 3761_91 3761_91 3761_44x3761_82->3761_91 3761_63 3761_63 3761_45x3761_62 3761_45x3761_62->3761_63 3761_66 3761_66 3761_45x3761_62->3761_66 3761_67 3761_67 3761_45x3761_62->3761_67 3761_69 3761_69 3761_45x3761_62->3761_69 3761_64 3761_64 3761_45x3761_62->3761_64 3761_70 3761_70 3761_45x3761_62->3761_70 3761_71 3761_71 3761_45x3761_62->3761_71 3761_65 3761_65 3761_45x3761_62->3761_65 3761_73 3761_73 3761_45x3761_62->3761_73 3761_68 3761_68 3761_45x3761_62->3761_68 3761_72 3761_72 3761_45x3761_62->3761_72 3761_101 3761_101 3761_45x3761_62->3761_101 3761_96 3761_96 3761_48x3761_92 3761_48x3761_92->3761_96 3761_93 3761_93 3761_48x3761_92->3761_93 3761_94 3761_94 3761_48x3761_92->3761_94 3761_95 3761_95 3761_48x3761_92->3761_95 3761_97 3761_97 3761_48x3761_92->3761_97 3761_98 3761_98 3761_48x3761_92->3761_98 3761_100 3761_100 3761_48x3761_92->3761_100 3761_212 3761_212 3761_48x3761_92->3761_212 3761_61 3761_61 3761_57x3761_54 3761_57x3761_54->3761_61 3761_58 3761_58 3761_57x3761_54->3761_58 3761_59 3761_59 3761_57x3761_54->3761_59 3761_60 3761_60 3761_57x3761_54->3761_60 3761_75 3761_75 3761_74x3761_43 3761_74x3761_43->3761_75 3761_78 3761_78 3761_74x3761_43->3761_78 3761_184 3761_184 3761_74x3761_43->3761_184 3761_76 3761_76 3761_74x3761_43->3761_76 3761_77 3761_77 3761_74x3761_43->3761_77 3761_79 3761_79 3761_74x3761_43->3761_79 3761_80 3761_80 3761_74x3761_43->3761_80 3761_81 3761_81 3761_74x3761_43->3761_81 3761_11 3761_11 3761_99x3761_10 3761_99x3761_10->3761_11 3761_13 3761_13 3761_99x3761_10->3761_13 3761_16 3761_16 3761_99x3761_10->3761_16 3761_12 3761_12 3761_99x3761_10->3761_12 3761_14 3761_14 3761_99x3761_10->3761_14 3761_15 3761_15 3761_99x3761_10->3761_15 3761_17 3761_17 3761_99x3761_10->3761_17 3761_18 3761_18 3761_99x3761_10->3761_18 3761_19 3761_19 3761_99x3761_10->3761_19 3761_20 3761_20 3761_99x3761_10->3761_20 3761_21 3761_21 3761_99x3761_10->3761_21 3761_22 3761_22 3761_99x3761_10->3761_22 3761_23 3761_23 3761_99x3761_10->3761_23 3761_3 3761_3 3761_1x3761_2 3761_1x3761_2->3761_3 3761_4 3761_4 3761_1x3761_2->3761_4 3761_9 3761_9 3761_1x3761_2->3761_9 3761_99 3761_99 3761_1x3761_2->3761_99 3761_5 3761_5 3761_1x3761_2->3761_5 3761_6 3761_6 3761_1x3761_2->3761_6 3761_7 3761_7 3761_1x3761_2->3761_7 3761_8 3761_8 3761_1x3761_2->3761_8 3761_9->3761_32x3761_9 3761_99->3761_99x3761_10 3761_7->3761_24x3761_7 3761_104 3761_104 3761_102x3761_103 3761_102x3761_103->3761_104 3761_105 3761_105 3761_102x3761_103->3761_105 3761_106 3761_106 3761_102x3761_103->3761_106 3761_107 3761_107 3761_102x3761_103->3761_107 3761_123 3761_123 3761_108x3761_111 3761_108x3761_111->3761_123 3761_122 3761_122 3761_108x3761_111->3761_122 3761_124 3761_124 3761_108x3761_111->3761_124 3761_125 3761_125 3761_108x3761_111->3761_125 3761_126 3761_126 3761_108x3761_111->3761_126 3761_127 3761_127 3761_108x3761_111->3761_127 3761_128 3761_128 3761_108x3761_111->3761_128 3761_139 3761_139 3761_109x3761_112 3761_109x3761_112->3761_139 3761_141 3761_141 3761_109x3761_112->3761_141 3761_193 3761_193 3761_109x3761_112->3761_193 3761_138 3761_138 3761_109x3761_112->3761_138 3761_142 3761_142 3761_109x3761_112->3761_142 3761_141->3761_202x3761_141 3761_193->3761_194x3761_193 3761_138->3761_138x3761_143 3761_142->3761_142x3761_185 3761_116 3761_116 3761_113x3761_110 3761_113x3761_110->3761_116 3761_117 3761_117 3761_113x3761_110->3761_117 3761_118 3761_118 3761_113x3761_110->3761_118 3761_121 3761_121 3761_114x3761_110 3761_114x3761_110->3761_121 3761_119 3761_119 3761_115x3761_110 3761_115x3761_110->3761_119 3761_120 3761_120 3761_115x3761_110->3761_120 3761_166 3761_166 3761_161x3761_162 3761_161x3761_162->3761_166 3761_163 3761_163 3761_161x3761_162->3761_163 3761_164 3761_164 3761_161x3761_162->3761_164 3761_165 3761_165 3761_161x3761_162->3761_165 3761_167 3761_167 3761_161x3761_162->3761_167 3761_168 3761_168 3761_161x3761_162->3761_168 3761_169 3761_169 3761_161x3761_162->3761_169 3761_169->3761_170x3761_169 3761_42 3761_42 3761_42->3761_42x3761_213 3761_39x3761_40 3761_39x3761_40->3761_42 3761_43 3761_43 3761_39x3761_40->3761_43 3761_44 3761_44 3761_39x3761_40->3761_44 3761_45 3761_45 3761_39x3761_40->3761_45 3761_41 3761_41 3761_39x3761_40->3761_41 3761_46 3761_46 3761_39x3761_40->3761_46 3761_47 3761_47 3761_39x3761_40->3761_47 3761_48 3761_48 3761_39x3761_40->3761_48 3761_49 3761_49 3761_39x3761_40->3761_49 3761_153 3761_153 3761_39x3761_40->3761_153 3761_43->3761_74x3761_43 3761_44->3761_44x3761_82 3761_45->3761_45x3761_62 3761_48->3761_48x3761_92 3761_153->3761_153x3761_154 3761_52 3761_52 3761_52->3761_129x3761_52 3761_50x3761_51 3761_50x3761_51->3761_52 3761_53 3761_53 3761_50x3761_51->3761_53 3761_54 3761_54 3761_50x3761_51->3761_54 3761_55 3761_55 3761_50x3761_51->3761_55 3761_56 3761_56 3761_50x3761_51->3761_56 3761_170 3761_170 3761_50x3761_51->3761_170 3761_54->3761_57x3761_54 3761_170->3761_170x3761_171 3761_170->3761_170x3761_169 3761_1 3761_1 3761_1->3761_1x3761_2 3761_37x3761_38 3761_37x3761_38->3761_1 3761_39 3761_39 3761_37x3761_38->3761_39 3761_50 3761_50 3761_37x3761_38->3761_50 3761_102 3761_102 3761_37x3761_38->3761_102 3761_108 3761_108 3761_37x3761_38->3761_108 3761_109 3761_109 3761_37x3761_38->3761_109 3761_110 3761_110 3761_37x3761_38->3761_110 3761_161 3761_161 3761_37x3761_38->3761_161 3761_39->3761_39x3761_40 3761_50->3761_50x3761_51 3761_102->3761_102x3761_103 3761_108->3761_108x3761_111 3761_109->3761_109x3761_112 3761_110->3761_113x3761_110 3761_110->3761_114x3761_110 3761_110->3761_115x3761_110 3761_161->3761_161x3761_162 3761_74 3761_74 3761_74->3761_74x3761_43 3761_111 3761_111 3761_111->3761_108x3761_111 3761_213 3761_213 3761_213->3761_42x3761_213 3761_2 3761_2 3761_2->3761_1x3761_2 3761_10 3761_10 3761_10->3761_99x3761_10 3761_24 3761_24 3761_24->3761_24x3761_7 3761_32 3761_32 3761_32->3761_32x3761_9 3761_37 3761_37 3761_37->3761_37x3761_38 3761_38 3761_38 3761_38->3761_37x3761_38 3761_40 3761_40 3761_40->3761_39x3761_40 3761_51 3761_51 3761_51->3761_50x3761_51 3761_57 3761_57 3761_57->3761_57x3761_54 3761_62 3761_62 3761_62->3761_45x3761_62 3761_82 3761_82 3761_82->3761_44x3761_82 3761_92 3761_92 3761_92->3761_48x3761_92 3761_103 3761_103 3761_103->3761_102x3761_103 3761_112 3761_112 3761_112->3761_109x3761_112 3761_113 3761_113 3761_113->3761_113x3761_110 3761_114 3761_114 3761_114->3761_114x3761_110 3761_115 3761_115 3761_115->3761_115x3761_110 3761_143 3761_143 3761_143->3761_138x3761_143 3761_154 3761_154 3761_154->3761_153x3761_154 3761_162 3761_162 3761_162->3761_161x3761_162 3761_171 3761_171 3761_171->3761_170x3761_171 3761_185 3761_185 3761_185->3761_142x3761_185 3761_194 3761_194 3761_194->3761_194x3761_193 3761_202 3761_202 3761_202->3761_202x3761_141
{% endraw %} {% raw %}
plotped(new_ped[new_ped.fid =='930'])
%3 930_16 930_16 930_3x930_15 930_3x930_15->930_16 930_17 930_17 930_3x930_15->930_17 930_18 930_18 930_3x930_15->930_18 930_19 930_19 930_3x930_15->930_19 930_6 930_6 930_99x930_5 930_99x930_5->930_6 930_7 930_7 930_99x930_5->930_7 930_8 930_8 930_99x930_5->930_8 930_4 930_4 930_1x930_2 930_1x930_2->930_4 930_3 930_3 930_1x930_2->930_3 930_99 930_99 930_1x930_2->930_99 930_3->930_3x930_15 930_99->930_99x930_5 930_11 930_11 930_12x930_13 930_12x930_13->930_11 930_14 930_14 930_12x930_13->930_14 930_9 930_9 930_12x930_13->930_9 930_10 930_10 930_12x930_13->930_10 930_2 930_2 930_12x930_13->930_2 930_2->930_1x930_2 930_1 930_1 930_1->930_1x930_2 930_5 930_5 930_5->930_99x930_5 930_12 930_12 930_12->930_12x930_13 930_13 930_13 930_13->930_12x930_13 930_15 930_15 930_15->930_3x930_15
{% endraw %} {% raw %}
new_ped1[new_ped1.fid=='930']
fid iid fathid mothid sex ad vcf depth trim tag
930_4 930 930_4 930_1 930_2 2 2 True 3 False 930:10
930_3 930 930_3 930_1 930_2 1 2 True 3 False 930:10
930_99 930 930_99 930_1 930_2 1 -9 True 3 False 930:10
930_11 930 930_11 930_12 930_13 2 1 True 2 False 930:10
930_14 930 930_14 930_12 930_13 1 2 True 2 False 930:10
930_9 930 930_9 930_12 930_13 2 2 True 2 False 930:10
930_10 930 930_10 930_12 930_13 2 2 True 2 False 930:10
930_2 930 930_2 930_12 930_13 2 -9 False 2 False 930:10
930_1 930 930_1 0 0 1 -9 False 1 False 930:8
930_12 930 930_12 0 0 1 -9 False 1 False 930:10
930_13 930 930_13 0 0 2 -9 False 1 False 930:10
{% endraw %} {% raw %}
dot1 = [plotped(new_ped[new_ped.fid ==i],output=True,folder='data/ADfam_new_trim_4') for i in new_ped1[list(pd.Series(depth)>3)].fid.unique()]
{% endraw %} {% raw %}
trim_ped = ped[ped.trim==False]
{% endraw %} {% raw %}
tmp = pd.DataFrame([[trim_ped.depth[trim_ped.fid == i].max() for i in trim_ped.fid.unique()],[ped.depth[ped.fid == i].max() for i in trim_ped.fid.unique()]])
{% endraw %} {% raw %}
tmp = tmp.T
{% endraw %} {% raw %}
tmp.index = trim_ped.fid.unique()
{% endraw %} {% raw %}
tmp.hist()
array([[<AxesSubplot:title={'center':'0'}>,
        <AxesSubplot:title={'center':'1'}>]], dtype=object)
{% endraw %} {% raw %}
sum(tmp[0]<4)
717
{% endraw %} {% raw %}
sum(tmp[1]<4)
614
{% endraw %}