Source code for astro_ghost.starSeparation

from sklearn import svm
import seaborn as sns
import matplotlib.pyplot as plt
from astro_ghost.stellarLocus import *
import pickle
import importlib_resources

[docs]def separateStars_STRM(df, model_path='.', plot=False, verbose=False, starcut='gentle'): """Star-galaxy separation, using a random forest trained on the PS1-STRM-classified star and galaxy labels given in Beck et al., 2021. :param df: Dataframe of PS1 sources. :type df: Pandas DataFrame :param model_path: Filepath to the saved random forest model. :type model_path: str, optional :param plot: If True, shows the separated stars and galaxies in Ap - Kron vs Ap Mag space. :type plot: bool, optional :param verbose: If true, print details of the classification routine. :type verbose: bool, optional :param starcut: Labels corresponding to the classification thresholds required to classify a star as such. Options are 'gentle' (P>0.8), normal (P>0.5), and aggressive (P>0.3). :type starcut: str :return: A dataframe of sources classified as galaxies. :rtype: Pandas DataFrame :return: A dataframe of sources classified as stars. :rtype: Pandas DataFrame """ #remove known galaxies and stars NED_stars = df[df['NED_type'].isin(['*', '**', 'WD*', '!*', '!V*', 'V*', '!Nova'])] NED_gals = df[df['NED_type'] == 'G'] #label the sources we know to be stars and galaxies NED_stars['sourceClass'] = 1 NED_gals['sourceClass'] = 0 #get the remaining objects unsure = df[~df.index.isin(NED_stars.index)] unsure = unsure[~unsure.index.isin(NED_gals.index)] #also drop HII regions, QSOs, etc unsure = unsure[~unsure['NED_type'].isin(['HII', 'QSO', 'SNR', 'PN'])] #remove all sources with bad values for any required PS1 properties unsure_dropped = unsure.dropna(subset=['7DCD','gApMag','gApMag_gKronMag','rApMag','rApMag_rKronMag','iApMag', 'iApMag_iKronMag']) #keep track of the unknown sources with bad values - we don't want to lose them entirely only_na = unsure[~unsure.index.isin(unsure_dropped.index)] # load random forest model modelName = "Star_Galaxy_RealisticModel_GHOST_PS1ClassLabels.sav" stream = importlib_resources.files(__name__).joinpath(modelName).open("rb") if verbose: print("Loading model %s."%modelName) model = pickle.load(stream) # plot the distribution of objects before classifying if plot: sns.set_style("dark") sns.set_context("talk") plt.figure(figsize=(10,8)) plt.plot(unsure['iApMag'], unsure['iApMag_iKronMag'], 'o', alpha=0.1) plt.xlabel(r"Ap Mag, $i$") plt.ylabel(r"Ap - Kron Mag, $i$") plt.savefig("TNS_NoClassificationInfo.pdf") if len(unsure_dropped) <1: if verbose: print("No sources in field with feature values, skipping star/galaxy separation...") return df, unsure_dropped test_X = np.asarray(unsure_dropped[['7DCD','gApMag','gApMag_gKronMag','rApMag','rApMag_rKronMag','iApMag', 'iApMag_iKronMag']]) # define probability threshold for classification cutdict = {'normal':0.5, 'aggressive':0.3, 'gentle':0.8} try: test_y = model.predict_proba(test_X)[:,1] > cutdict[starcut] except: print("Error! I didn't understand your starcut option.") unsure_dropped['sourceClass'] = test_y test_stars = unsure_dropped[unsure_dropped['sourceClass'] == 1] test_gals = unsure_dropped[unsure_dropped['sourceClass'] == 0] # plot the distribution of classified stars and galaxies. if plot: c_gals = '#087e8b' c_stars = '#ffbf00' plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k') sns.kdeplot(test_gals['iApMag'], test_gals['iApMag_iKronMag'], n_levels=20, fill=False,label='Galaxies', color=c_gals, alpha=0.5,legend=True); if len(test_stars) > 1: sns.kdeplot(test_stars['iApMag'], test_stars['iApMag_iKronMag'], n_levels=20, fill=False,label='Stars', color=c_stars, alpha=0.5); plt.legend() plt.xlim(xmin=13,xmax=23) plt.ylim(ymin=-1,ymax=5) plt.xlabel("Ap Magnitude, $i$",fontsize=16) plt.ylabel("Ap - Kron Magnitude, $i$",fontsize=16) plt.savefig("TNS_STRM_Stars_vs_Gals_Contours.pdf") plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k') ax = sns.distplot(test_gals['iApMag_iKronMag'],bins=np.linspace(-1, 7, num=100),label='Galaxies',kde=False,hist_kws={"color": c_gals}); if len(test_stars) > 1: sns.distplot(test_stars['iApMag_iKronMag'],bins=np.linspace(-1, 7, num=100),label='Stars',kde=False,hist_kws={"color": c_stars}); sns.distplot(unsure['iApMag_iKronMag'],bins=np.linspace(-1, 7, num=100),label='Total',kde=False,hist_kws={"histtype": "step", "linewidth": 1, "alpha": 1, "color": "black"}); plt.xlim(-1, 3) plt.xlabel("Ap - Kron Mag, $i$",fontsize=16) plt.xlim() plt.ylabel("N",fontsize=16) plt.legend(fontsize=16) plt.savefig("TNS_iAp_iKronMag_Histogram.pdf") # Adding back in the ones we identified as galaxies from our clustering above, and the NED sources. df_gals = pd.concat([test_gals, NED_gals]) df_gals.reset_index(inplace=True, drop=True) # Adding back in the ones we identified as stars from our clustering above, and the NED sources. df_stars = pd.concat([test_stars, NED_stars]) df_stars.reset_index(inplace=True, drop=True) return df_gals, df_stars
[docs]def separateStars_South(df, plot=0, verbose=0, starcut='gentle'): """Star-galaxy separation in the southern hemisphere, using a simple SkyMapper_StarClass threshold cut. :param df: Dataframe of PS1 sources. :type df: Pandas DataFrame :param plot: If True, shows the separated stars and galaxies in Ap - Kron vs Ap Mag space. :type plot: bool, optional :param verbose: If true, print details of the classification routine. :type verbose: bool, optional :param starcut: Labels corresponding to the classification thresholds required to classify a star as such. Options are 'gentle' (P>0.8), normal (P>0.5), and aggressive (P>0.3). :type starcut: str :return: A dataframe of sources classified as galaxies. :rtype: Pandas DataFrame :return: A dataframe of sources classified as stars. :rtype: Pandas DataFrame """ #remove known galaxies and stars NED_stars = df[df['NED_type'].isin(['*', '**', 'WD*', '!*', '!V*', 'V*', '!Nova'])] NED_gals = df[df['NED_type'] == 'G'] #get the remaining objects unsure = df[~df.index.isin(NED_stars.index)] unsure = unsure[~unsure.index.isin(NED_gals.index)] unsure_dropped = unsure[unsure['SkyMapper_StarClass']>0] only_na = unsure[~unsure.index.isin(unsure_dropped.index)] # define probability threshold for classification cutdict = {'normal':0.5, 'aggressive':0.3, 'gentle':0.8} try: test_stars = unsure_dropped[unsure_dropped['SkyMapper_StarClass']> cutdict[starcut]] except: print("Error! I didn't understand your starcut option.") test_gals = unsure_dropped[~unsure_dropped.index.isin(test_stars.index)] # Adding back in the ones we identified as galaxies from our clustering above, and the NED sources. df_gals = pd.concat([test_gals, NED_gals]) df_gals['sourceClass'] = 0 df_gals.reset_index(inplace=True, drop=True) # Adding back in the ones we identified as stars from our clustering above, and the NED sources. df_stars = pd.concat([test_stars, NED_stars]) df_stars['sourceClass'] = 1 df_stars.reset_index(inplace=True, drop=True) return df_gals, df_stars