Making predictions for TOIs

Load required packages

[1]:

import pandas as pd import lightkurve as lk import numpy as np import astropy.units as u import astropy.constants as c from scipy.constants import G from IPython.display import display from ldtk import LDPSetCreator, BoxcarFilter, TabulatedFilter from exoInfoMatrixTOI import exoInfoMatrix import ldtk.filters as filters import exoplanet as xo import torch.multiprocessing as mp
WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.

Select only planet candidates

[ ]:

# Read TOIs table from the Nasa Exoplanet Archive (NEA) nea_tois = pd.read_csv("nea_tois.csv", header=90) nea_tois = nea_tois[nea_tois["tfopwg_disp"] == "PC"] # Only want planet candidates nea_tois = nea_tois[nea_tois["pl_pnum"] == 1] # And only with a single planet print(f"Initial number of planet candidates is {len(nea_tois)}\n") # Only keep if there are values for stellar logg and stellar radius which we will need later on nea_tois.dropna(axis=0, subset=["st_logg", "st_rad", "st_teff"], inplace=True) # We also need errors, wherever two values (lower and upper boundaries) for the error are reported or only one is given, we will keep the largest nea_tois["st_rad_err"] = np.nanmax(nea_tois[["st_raderr1", "st_raderr2"]], axis=-1) nea_tois["st_logg_err"] = np.nanmax(nea_tois[["st_loggerr1", "st_loggerr2"]], axis=-1) nea_tois["st_teff_err"] = np.nanmax(nea_tois[["st_tefferr1", "st_tefferr2"]], axis=-1) # And since we also need the errors later on, only keep columns with errors included nea_tois.dropna(axis=0, subset=["st_rad_err", "st_logg_err", "st_teff_err"], inplace=True) print(f"{len(nea_tois)} planet candidates with values for logg, R_star and T_eff along with errors\n") # Reset indices nea_tois.reset_index(inplace=True, drop=True)

Out of these, we want only those which were observed only with 1800s cadence.

[ ]:

# We will need to search for the available lightcurves for each of the candidates. Then, if they are only observed with 1800s we add them to a new dataframe # This can take long for i, row in nea_tois.iterrows(): print(f"\n{i} out of {len(nea_tois) - 1}") TID = f"TIC {row['tid']}" # Results of the lightcurve search search = lk.search_lightcurve(TID, mission="TESS") # If there were no matches, a KeyError will be raised try: exptimes = set(search.exptime.value) except KeyError: # We add a note letting us know this PC wasn't found nea_tois.at[i, "notes"] = "NOT FOUND" print("NOT FOUND") continue # We check if the only cadence is 1800s. If it's not we do not flag this candidate as accepted. Otherwise we do if not exptimes.issubset(set([1800])): nea_tois.at[i, "notes"] = "NOT ONLY 1800s" print("NOT ONLY 1800s") else: nea_tois.at[i, "accepted"] = True print("ACCEPTED")
[ ]:

# Now we save the PCs with observations only in 1800s nea_tois[nea_tois["accepted"] == True].to_csv("tois_with_only_1800s.csv", index=False)
[4]:

# And read these results into a new dataframe only1800 = pd.read_csv("tois_with_only_1800s.csv")

Add columns with some values needed

[31]:
# We use astropy units to not have to deal with conversion between units.


logg = only1800["st_logg"] # Log(g)
g = 10**logg.to_numpy() * u.cm * u.s ** (-2) # g in cm/s^2
R = only1800["st_rad"].to_numpy() * c.R_sun # Stellar radius in solar radii
P = only1800["pl_orbper"].to_numpy() * u.day # Period in days
T = only1800["pl_trandurh"].to_numpy() * u.hour # Transit duration in hours

# We store the values only, no units
only1800["st_rho"] = (3/(4 * np.pi * c.G) * g / R).to(u.g * (u.cm)**(-3)).value # Stellar density

a = (((g * R ** 2 * P ** 2) / (4 * np.pi ** 2)) ** (1/3) / R).decompose() # Semi-major axis

only1800["a"] = a.value

b = np.sqrt(1 - ((a * np.pi * T) / P) ** 2).decompose()

only1800["b"] = b.value

To make stimates, we need fiducial values for the limb-darkening parameters. We obtain approximate values using ‘PyLDTk’.

[32]:

filt = filters.create_tess() # Create TESS filters profiles copy = only1800.copy() # Copy df to iterate through rows safely # Iterate through all rows for i, row in copy.iterrows(): print(f"Row {i} out of {len(copy) - 1}") # Read effective temperature and logg values teff = row["st_teff"] teff_err = row["st_logg_err"] logg = row["st_logg"] logg_err = row["st_logg_err"] # Just to be sure, we check there are no NaN values names = np.array(["teff", "teff_err", "logg", "logg_err"]) anynan = np.isnan(np.array([teff, teff_err, logg, logg_err])) if anynan.any(): print(f"{row['tid']} has NaN value in {names[anynan]}") # Create profiles. Because we have no z value from the table we use 0.25 with error 0.125 sc = LDPSetCreator(teff=(teff, teff_err), logg=(logg, logg_err), z=(0.25, 0.125), filters=[filt]) ps = sc.create_profiles(nsamples=1000) # Do a mcmc to get the values, if it can't converge print message try: qc, qe = ps.coeffs_qd(do_mc=True) except np.linalg.LinAlgError: print(f"Row {i} ({row['tid']}) did not converge") only1800.at[i, "u_star1"] = None only1800.at[i, "u_star2"] = None only1800.at[i, "u_star1_sd"] = None only1800.at[i, "u_star2_sd"] = None continue # Check no NaN values in results if np.isnan([qc,qe]).any(): print(f"Row {i} ({row['tid']}) calculated values are nan somewhere") only1800.at[i, "u_star1"] = qc[0][0] only1800.at[i, "u_star2"] = qc[0][1] only1800.at[i, "u_star1_sd"] = qe[0][0] only1800.at[i, "u_star2_sd"] = qe[0][1]
Row 0 out of 193
Row 1 out of 193
Row 2 out of 193
Row 3 out of 193
Row 4 out of 193
Row 5 out of 193
Row 6 out of 193
Row 7 out of 193
Row 8 out of 193
Row 9 out of 193
Row 10 out of 193
Row 11 out of 193
Row 12 out of 193
Row 13 out of 193
Row 13 (81831095) did not converge
Row 14 out of 193
Row 15 out of 193
Row 16 out of 193
Row 17 out of 193
Row 18 out of 193
Row 19 out of 193
Row 20 out of 193
Row 21 out of 193
Row 22 out of 193
Row 23 out of 193
Row 24 out of 193
Row 25 out of 193
Row 26 out of 193
Row 27 out of 193
Row 28 out of 193
Row 29 out of 193
Row 30 out of 193
Row 31 out of 193
Row 32 out of 193
Row 33 out of 193
Row 34 out of 193
Row 35 out of 193
Row 36 out of 193
Row 37 out of 193
Row 38 out of 193
Row 39 out of 193
Row 39 (160930264) did not converge
Row 40 out of 193
Row 41 out of 193
Row 42 out of 193
Row 43 out of 193
Row 43 (174426662) did not converge
Row 44 out of 193
Row 45 out of 193
Row 46 out of 193
Row 47 out of 193
Row 48 out of 193
Row 49 out of 193
Row 50 out of 193
Row 51 out of 193
Row 52 out of 193
Row 53 out of 193
Row 54 out of 193
Row 55 out of 193
Row 56 out of 193
Row 56 (65440953) did not converge
Row 57 out of 193
Row 58 out of 193
Row 59 out of 193
Row 60 out of 193
Row 61 out of 193
Row 62 out of 193
Row 63 out of 193
Row 64 out of 193
Row 65 out of 193
Row 66 out of 193
Row 67 out of 193
Row 68 out of 193
Row 69 out of 193
Row 70 out of 193
Row 71 out of 193
Row 72 out of 193
Row 73 out of 193
Row 74 out of 193
Row 75 out of 193
Row 76 out of 193
Row 77 out of 193
Row 78 out of 193
Row 79 out of 193
Row 80 out of 193
Row 81 out of 193
Row 82 out of 193
Row 83 out of 193
Row 84 out of 193
Row 85 out of 193
Row 86 out of 193
Row 87 out of 193
Row 88 out of 193
Row 88 (468777766) did not converge
Row 89 out of 193
Row 90 out of 193
Row 91 out of 193
Row 92 out of 193
Row 93 out of 193
Row 94 out of 193
Row 95 out of 193
Row 96 out of 193
Row 97 out of 193
Row 98 out of 193
Row 99 out of 193
Row 100 out of 193
Row 101 out of 193
Row 102 out of 193
Row 103 out of 193
Row 104 out of 193
Row 105 out of 193
Row 106 out of 193
Row 107 out of 193
Row 108 out of 193
Row 109 out of 193
Row 110 out of 193
Row 111 out of 193
Row 112 out of 193
Row 113 out of 193
Row 114 out of 193
Row 115 out of 193
Row 116 out of 193
Row 117 out of 193
Row 118 out of 193
Row 119 out of 193
Row 120 out of 193
Row 121 out of 193
Row 122 out of 193
Row 123 out of 193
Row 124 out of 193
Row 125 out of 193
Row 126 out of 193
Row 127 out of 193
Row 128 out of 193
Row 129 out of 193
Row 129 (190986054) did not converge
Row 130 out of 193
Row 131 out of 193
Row 132 out of 193
Row 133 out of 193
Row 134 out of 193
Row 135 out of 193
Row 136 out of 193
Row 137 out of 193
Row 138 out of 193
Row 139 out of 193
Row 140 out of 193
Row 140 (356472238) did not converge
Row 141 out of 193
Row 142 out of 193
Row 143 out of 193
Row 144 out of 193
Row 145 out of 193
Row 146 out of 193
Row 147 out of 193
Row 148 out of 193
Row 149 out of 193
Row 150 out of 193
Row 151 out of 193
Row 152 out of 193
Row 153 out of 193
Row 154 out of 193
Row 155 out of 193
Row 156 out of 193
Row 157 out of 193
Row 158 out of 193
Row 159 out of 193
Row 160 out of 193
Row 161 out of 193
Row 162 out of 193
Row 163 out of 193
Row 164 out of 193
Row 165 out of 193
Row 166 out of 193
Row 167 out of 193
Row 168 out of 193
Row 169 out of 193
Row 170 out of 193
Row 171 out of 193
Row 172 out of 193
Row 173 out of 193
Row 174 out of 193
Row 175 out of 193
Row 176 out of 193
Row 177 out of 193
Row 178 out of 193
Row 179 out of 193
Row 180 out of 193
Row 181 out of 193
Row 182 out of 193
Row 183 out of 193
Row 183 (407495930) did not converge
Row 184 out of 193
Row 185 out of 193
Row 186 out of 193
Row 187 out of 193
Row 188 out of 193
Row 189 out of 193
Row 190 out of 193
Row 191 out of 193
Row 192 out of 193
Row 193 out of 193
[33]:

# And we save only those that did converge only1800[np.invert(np.isnan(only1800["u_star1"].to_numpy()))].to_csv("tois_with_only_1800s_limbdark.csv", index=False) limbdarkened = pd.read_csv("tois_with_only_1800s_limbdark.csv")

Now that we have limb-darkening values we can get an approximate value for the radius ratio

[34]:

# We calculate for each row for i, row in limbdarkened.copy().iterrows(): # We create a limb-darkened star from the exoplanet package star = xo.LimbDarkLightCurve(row["u_star1"], row["u_star2"]) # And use the 'get_ror_from_approx_transit_depth' utility to obtain an approximate value for the radius ratio ror = star.get_ror_from_approx_transit_depth(row["pl_trandep"]*1e-6, row["b"]).eval() limbdarkened.at[i, "ror"] = ror
[35]:

# We can now save this as our final dataframe limbdarkened = limbdarkened[np.invert(np.isnan(limbdarkened["ror"]))] print(f"{len(limbdarkened)} final planet candidates to be passed onto prediction calculation") limbdarkened.to_csv("final_dataframe.csv", index=False)
157 final planet candidates to be passed onto prediction calculation

Now we make the actual predictions

[36]:

# To make it faster, we will parallelize the calculations # CHANGE THIS TO THE NUMBER OF CORES YOU WISH TO USE NCORES = 12 # Read the final input table table = pd.read_csv("final_dataframe.csv") # Now we split them into NCORES tables tables = np.array_split(table, NCORES) # We will calculate predicted radius ratio for the following exposure times calc_expt = {20, 120, 600, 1800} indices = np.arange(0, NCORES, 1) # Just to keep track of how each thread is doing

This function will calculate predictions for each of the tables. We need to include it in a function so as to be able to do multiprocessing

[37]:

def calculate_prediction(args): df, index = args copy = df.copy() copy.reset_index(inplace=True, drop=True) # Loop through all rows for idx, row in copy.iterrows(): print(f"THREAD {index}: {idx+1} out of {len(copy)}\n") # Read the hostname host = f"TIC {row['tid']}" ref_exptime = 1800 # Our reference exposure time is 1800s, to download a reference lightcurve later one # Search the lightcurve search = lk.search_lightcurve(host, mission="TESS", exptime=ref_exptime) # We give priority to SPOC lightcurves, then QLP and then CDIPS. No reason beyond keeping lightcurves as homogeneous as possible. if len(search[["SPOC" in author for author in search.author]]) != 0: search = search[["SPOC" in author for author in search.author]] elif len(search[["QLP" in author for author in search.author]]) != 0: search = search[["QLP" in author for author in search.author]] elif len(search[["CDIPS" in author for author in search.author]]): search = search[["CDIPS" in author for author in search.author]] # Download the lightcurve try: lc = search[-1].download_all().stitch().remove_nans().remove_outliers(sigma_lower=float('inf')) except lk.LightkurveError: print(f"{host} lightcurve can't be downloaded ({search.author})") # Set the reference mean error of measurements as the mean error for the measurements in the 1800s lightcurve ref_sigma = np.mean(np.array(lc.flux_err.value)) # And the reference timestamps array is also obtained from the lightcurve ref_t = np.array(lc.time.value) # We also keep track of these values copy.at[idx, "ref_exptime"] = ref_exptime copy.at[idx, "ref_sigma"] = ref_sigma # Now we make the actual predictions for each exposure time for exptime in calc_expt: # New array of timestamps with points spaced by one exposure time and with a total observation time equal to one sector t = np.arange(min(ref_t), max(ref_t), exptime / (3600 * 24)) # Calculate the new mean error for this exposure time sigma = ref_sigma * np.sqrt(ref_exptime)/np.sqrt(exptime) # Initialize the information matrix. Oversample of ~100 should be fine but can also do 1000, it will just take longer infomatrix = exoInfoMatrix(exptime, oversample=100) # This is just to make sure there are no nan values anynan = np.isnan(np.array([ row["pl_orbper"], row["pl_tranmid"], row["ror"], row["b"], row["u_star1"], row["u_star2"], row["st_rho"], row["st_rad"]])) names = np.array(["pl_orbper", "pl_tranmid", "ror", "b", "u_star1", "u_star2", "st_rho", "st_rad"]) if np.isnan(t).any(): print(f"{host} has NaN values for t") continue if anynan.any(): print(f"{host} has NaN values for {names[anynan]}") continue # If there are no NaNs then we set the data infomatrix.set_data( time_array = t, period_val = row["pl_orbper"], t0_val = row["pl_tranmid"], ror_val = row["ror"], b_val = row["b"], u1_val = row["u_star1"], u2_val = row["u_star2"], rho_star_val = row["st_rho"], r_star_val = row["st_rad"], ) # Then we set the priors. We do not use a prior on stellar density infomatrix.set_priors( period_prior = np.nanmax(np.abs(row[["pl_orbpererr1", "pl_orbpererr2"]])), t0_prior = np.nanmax(np.abs(row[["pl_tranmiderr1", "pl_tranmiderr2"]])), r_star_prior = np.nanmax(np.abs(row[["st_raderr1", "st_raderr2"]])), b_prior = 1/np.sqrt(12), u1_prior = 0.4713, u2_prior = 0.4084, ) # And we calculate the information matrix try: matrix = infomatrix.eval_cov(sigma = np.mean(sigma)) except ValueError: print(f"{host} inversion of matrix failed") continue # Now we loop through the rows and columns of the matrix to extract the values for i, value1 in enumerate(["period", "t0", "ror", "b", "u_star1", "u_star2", "rho_star", "r_star"]): for j, value2 in enumerate(["period", "t0", "ror", "b", "u_star1", "u_star2", "rho_star", "r_star"]): # Diagonal gives the standard deviation or predicted precision if value1 == value2: std = np.sqrt(np.abs(matrix[i,j])) col = f"{value1}_{exptime}_sd" copy.at[idx, col] = std return copy
[38]:

# Now we parallelize the calculation and execute it # May have problems downloading lightcurves authored by DIAMANTE arguments = [(df, index) for df, index in zip(tables, indices)] p = mp.Pool(NCORES) result = list(p.imap(calculate_prediction, arguments)) p.close() p.join() final_df = pd.DataFrame() for df in result: final_df = pd.concat([final_df, df]) final_df.to_csv("tois_with_predictions.csv", index=False)
THREAD 0: 1 out of 14
THREAD 3: 1 out of 13
THREAD 2: 1 out of 13
THREAD 1: 1 out of 13




THREAD 6: 1 out of 13
THREAD 5: 1 out of 13
THREAD 8: 1 out of 13
THREAD 11: 1 out of 13
THREAD 4: 1 out of 13
THREAD 7: 1 out of 13
THREAD 10: 1 out of 13
THREAD 9: 1 out of 13








THREAD 3: 2 out of 13

THREAD 0: 2 out of 14

THREAD 4: 2 out of 13

THREAD 7: 2 out of 13

THREAD 1: 2 out of 13

THREAD 5: 2 out of 13

THREAD 9: 2 out of 13

THREAD 8: 2 out of 13

THREAD 10: 2 out of 13

THREAD 2: 2 out of 13

THREAD 3: 3 out of 13

THREAD 11: 2 out of 13

THREAD 6: 2 out of 13

THREAD 0: 3 out of 14

THREAD 9: 3 out of 13

TIC 252928337 lightcurve can't be downloaded (['DIAMANTE'])
THREAD 1: 3 out of 13

THREAD 10: 3 out of 13

THREAD 3: 4 out of 13

THREAD 6: 3 out of 13

THREAD 4: 3 out of 13

THREAD 7: 3 out of 13

THREAD 5: 3 out of 13

THREAD 9: 4 out of 13

THREAD 2: 3 out of 13

THREAD 8: 3 out of 13

THREAD 10: 4 out of 13

THREAD 0: 4 out of 14

THREAD 11: 3 out of 13

THREAD 3: 5 out of 13

THREAD 4: 4 out of 13

THREAD 8: 4 out of 13

THREAD 0: 5 out of 14

THREAD 1: 4 out of 13

THREAD 6: 4 out of 13

THREAD 5: 4 out of 13

THREAD 7: 4 out of 13

THREAD 9: 5 out of 13

THREAD 11: 4 out of 13

THREAD 10: 5 out of 13

THREAD 4: 5 out of 13

THREAD 0: 6 out of 14

THREAD 2: 4 out of 13

THREAD 5: 5 out of 13

THREAD 3: 6 out of 13

THREAD 6: 5 out of 13

THREAD 9: 6 out of 13

THREAD 10: 6 out of 13

THREAD 4: 6 out of 13

THREAD 8: 5 out of 13

THREAD 5: 6 out of 13

THREAD 7: 5 out of 13

THREAD 0: 7 out of 14

THREAD 1: 5 out of 13

THREAD 11: 5 out of 13

THREAD 6: 6 out of 13

THREAD 9: 7 out of 13

THREAD 10: 7 out of 13

THREAD 3: 7 out of 13

THREAD 0: 8 out of 14

THREAD 7: 6 out of 13

THREAD 2: 5 out of 13

THREAD 11: 6 out of 13

THREAD 8: 6 out of 13

THREAD 4: 7 out of 13

THREAD 1: 6 out of 13

THREAD 5: 7 out of 13

THREAD 6: 7 out of 13

THREAD 7: 7 out of 13

THREAD 9: 8 out of 13

THREAD 0: 9 out of 14

THREAD 8: 7 out of 13

THREAD 10: 8 out of 13

THREAD 4: 8 out of 13

THREAD 11: 7 out of 13

THREAD 3: 8 out of 13

THREAD 1: 7 out of 13

THREAD 6: 8 out of 13

THREAD 0: 10 out of 14

THREAD 2: 6 out of 13

THREAD 8: 8 out of 13

THREAD 7: 8 out of 13

THREAD 4: 9 out of 13

THREAD 11: 8 out of 13

THREAD 9: 9 out of 13

THREAD 5: 8 out of 13

THREAD 0: 11 out of 14

THREAD 6: 9 out of 13

THREAD 10: 9 out of 13

THREAD 7: 9 out of 13

THREAD 4: 10 out of 13

THREAD 3: 9 out of 13

THREAD 1: 8 out of 13

THREAD 9: 10 out of 13

THREAD 11: 9 out of 13

THREAD 2: 7 out of 13

THREAD 8: 9 out of 13

THREAD 0: 12 out of 14

THREAD 7: 10 out of 13

THREAD 6: 10 out of 13

THREAD 4: 11 out of 13

THREAD 5: 9 out of 13

THREAD 10: 10 out of 13

THREAD 11: 10 out of 13

THREAD 1: 9 out of 13

THREAD 3: 10 out of 13

THREAD 2: 8 out of 13

THREAD 7: 11 out of 13

THREAD 9: 11 out of 13

THREAD 5: 10 out of 13

THREAD 11: 11 out of 13

THREAD 8: 10 out of 13

THREAD 1: 10 out of 13

THREAD 0: 13 out of 14

THREAD 4: 12 out of 13

THREAD 10: 11 out of 13

THREAD 6: 11 out of 13

THREAD 7: 12 out of 13

THREAD 2: 9 out of 13

THREAD 3: 11 out of 13

THREAD 8: 11 out of 13

THREAD 6: 12 out of 13

THREAD 11: 12 out of 13

THREAD 5: 11 out of 13

THREAD 1: 11 out of 13

THREAD 2: 10 out of 13

THREAD 9: 12 out of 13

THREAD 0: 14 out of 14

THREAD 7: 13 out of 13

THREAD 4: 13 out of 13

THREAD 3: 12 out of 13

THREAD 10: 12 out of 13

THREAD 11: 13 out of 13

THREAD 5: 12 out of 13

THREAD 1: 12 out of 13

THREAD 2: 11 out of 13

THREAD 6: 13 out of 13

THREAD 8: 12 out of 13

THREAD 9: 13 out of 13

THREAD 3: 13 out of 13

THREAD 10: 13 out of 13

THREAD 2: 12 out of 13

THREAD 8: 13 out of 13

THREAD 5: 13 out of 13

THREAD 1: 13 out of 13

THREAD 2: 13 out of 13

Now we calculate the actual improvements in precision by using the predicted precisions and make it into a nice table

[2]:

final_df = pd.read_csv("tois_with_predictions.csv") improvements = pd.DataFrame(columns=['toi', 'tid', '20_improv', '120_improv', 'ror_sd_20', 'ror_sd_120', 'ror_sd_1800']) improvements['toi'] = final_df['toi'] improvements['tid'] = final_df['tid'] improvements['ror_sd_20'] = final_df['ror_20_sd'] improvements['ror_sd_120'] = final_df['ror_120_sd'] improvements['ror_sd_600'] = final_df['ror_600_sd'] improvements['ror_sd_1800'] = final_df['ror_1800_sd'] improvements['20_improv'] = (1 - improvements['ror_sd_20'] / improvements['ror_sd_1800']) * 100 improvements['120_improv'] = (1 - improvements['ror_sd_120'] / improvements['ror_sd_1800']) * 100 improvements['600_improv'] = (1 - improvements['ror_sd_600'] / improvements['ror_sd_1800']) * 100 improvements.sort_values(by=['20_improv'], ascending=False, inplace=True) # And this is our nice table with all predictions improvements
[2]:
toi tid 20_improv 120_improv ror_sd_20 ror_sd_120 ror_sd_1800 ror_sd_600 600_improv
11 1677.01 87090944 77.932100 77.709268 0.010058 0.010160 0.045579 0.012324 72.961336
22 2784.01 302766000 70.941629 70.354258 0.023336 0.023808 0.080307 0.036841 54.125414
78 3786.01 321250206 66.841361 66.794015 0.009582 0.009595 0.028897 0.009857 65.889581
12 1701.01 274215536 66.328226 65.003455 0.002721 0.002828 0.008080 0.004805 40.533684
21 2578.01 104986789 65.538223 65.110207 0.001560 0.001580 0.004528 0.001907 57.888268
... ... ... ... ... ... ... ... ... ...
29 3244.01 208091447 4.763420 4.733792 0.007618 0.007620 0.007999 0.007662 4.211289
139 5664.01 158022899 3.407098 3.350451 0.005839 0.005842 0.006045 0.005887 2.607795
127 5599.01 159160230 2.995921 2.898366 0.002671 0.002674 0.002753 0.002705 1.761623
145 5699.01 224328450 2.988077 2.958728 0.006361 0.006363 0.006557 0.006392 2.518649
131 5637.01 136992839 2.825291 2.802809 0.003070 0.003071 0.003159 0.003091 2.167626

157 rows × 9 columns

Can also convert the table to a latex table

[3]:

# We select the top 10 table = improvements.head(10) table.drop(labels=['tid', 'ror_sd_20', 'ror_sd_120', 'ror_sd_600', 'ror_sd_1800'], inplace=True, axis=1) table.rename(columns={ 'toi': 'TOI', '20_improv': '20s Improv. [%]', '120_improv': '120s Improv [%]', '600_improv': '600s Improv [%]' }, inplace=True) table.to_latex('improvements_table.tex', index=False, float_format="%.2f") # Which is the table used in the paper
[6]:

# We also generate a longer table for the appendix N = 100 table = improvements table.drop(labels=['tid', 'ror_sd_20', 'ror_sd_120', 'ror_sd_600', 'ror_sd_1800'], inplace=True, axis=1) table.rename(columns={ 'toi': 'TOI', '20_improv': '20s Improv. [%]', '120_improv': '120s Improv [%]', '600_improv': '600s Improv [%]' }, inplace=True) table.to_latex('long_improvements_table.tex', index=False, float_format="%.2f", caption=f"Predictions for all the planet candidates considered in order of decreasing improvements to the radius ratio precision.")

And save a formated csv file of all results

[10]:

formatted_table = improvements.drop(labels=['tid', 'ror_sd_20', 'ror_sd_120', 'ror_sd_600', 'ror_sd_1800'], axis=1) formatted_table.rename(columns={ 'toi': 'TOI', '20_improv': '20s_improv', '120_improv': '120s_improv', '600_improv': '600s_improv' }, inplace=True) formatted_table.to_csv("formatted_toi_predictions.csv", index=False)