import numpy as np
from astropy.table import Table
from PIL import Image
from io import BytesIO
import pathlib
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy import units as u
from astropy.coordinates import SkyCoord
import re
import astro_ghost
from astro_ghost.PS1QueryFunctions import *
from astro_ghost.hostMatching import build_ML_df
from astro_ghost.NEDQueryFunctions import getNEDInfo
from astro_ghost.SimbadQueryFunctions import getSimbadInfo
from astro_ghost.gradientAscent import gradientAscent
from astro_ghost.starSeparation import separateStars_STRM, separateStars_South
from astro_ghost.sourceCleaning import clean_dict, removePS1Duplicates, getColors, makeCuts
from astro_ghost.stellarLocus import calc_7DCD
from astro_ghost.DLR import chooseByDLR, chooseByGladeDLR
import requests
import pickle
import pyvo
import glob
from datetime import datetime
from joblib import dump, load
import pandas as pd
#we do a lot of copies, sub-selects and rewrites - no need to warn about everything!
pd.options.mode.chained_assignment = None # default='warn'
[docs]def cleanup(path):
"""Cleans up the working directory.
This is done after host-galaxy association is completed.
:param path: filepath where association files are saved
:type path: str
"""
tablePath = path+"/tables/"
printoutPath = path+'/printouts/'
paths = [tablePath, printoutPath]
for tempPath in paths:
if not os.path.exists(tempPath):
os.mkdir(tempPath)
#move tables to the tables directory, and printouts to the printouts directory
printouts = glob.glob(path+'/*.txt')
for p in printouts:
fn = remove_prefix(p, path)
os.rename(p, printoutPath+fn)
table_ext = ['csv', 'gz']
for end in table_ext:
tables = glob.glob(path+'/*.%s'%end)
for t in tables:
fn = remove_prefix(t, path)
os.rename(t, tablePath+fn)
[docs]def getGHOST(real=False, verbose=False, installpath='', clobber=False):
"""Downloads the GHOST database.
:param real: If True, download the GHOST database. If False, write an empty files with
relevant columns (so that every transient is manually associated).
:type real: bool, optional
:param verbose: If True, print debugging info.
:type verbose: bool, optional
:param installpath: Filepath where GHOST database will be installed.
:type installpath: str
:param clobber: If True, write new GHOST database even if it already exists at installpath.
:type clobber: bool, optional
"""
if not installpath:
try:
installpath = os.environ['GHOST_PATH']
except:
print("Couldn't find where you want to save GHOST. Setting location to package path...")
installpath = astro_ghost.__file__
installpath = installpath.split("/")[:-1]
installpath = "/".join(installpath)
if not os.path.exists(installpath + '/database'):
os.mkdir(installpath + '/database')
else:
if os.path.exists(installpath + '/database/GHOST.csv') & (clobber == False):
print("GHOST database already exists in the install path!")
return
if real:
url = 'https://www.dropbox.com/s/a0fufc3827pfril/GHOST.csv?dl=1'
r = requests.get(url)
fname = installpath + '/database/GHOST.csv'
open(fname , 'wb').write(r.content)
if verbose:
print("Successfully downloaded GHOST database.\n")
else:
#create dummy database
df = create_dummy_df(fullDF=True)
df.to_csv(installpath + "/database/GHOST.csv",index=False)
if verbose:
print("Successfully created dummy database.\n")
[docs]def fracWithHosts(transient_dict):
"""Calculates transient fraction with at least one candidate host.
:param transient_dict: The dictionary of supernovae and their host galaxy candidate objIDs from PS1.
:type transient_dict: dictionary
:return: The fraction of supernovae with at least one candidate host galaxy.
:rtype: dictionary
"""
count = 0
for name, host in transient_dict.items():
# only do matching if there's a found host
if isinstance(host, list) or isinstance(host, np.ndarray):
if len(host) > 0 and np.any(np.array(host == host)):
count += 1
else:
if host == host:
count+=1
return count/len(transient_dict.keys())
[docs]def remove_prefix(text, prefix):
"""Removes the prefix from a string.
Very useful for removing the 'SN' from supernova names!
:param text: The full text.
:type text: str
:param prefix: The prefix to remove from the text.
:type prefix: str
:return: The input text, with prefix removed.
:rtype: str
"""
return text[text.startswith(prefix) and len(prefix):]
[docs]def checkSimbadHierarchy(df, verbose=False):
"""Throw a warning if the source has a parent in simbad!
:param df: The final associated data frame containing host and transient features.
:type df: Pandas DataFrame
:return: The dataframe, after correcting for SIMBAD hierarchical information.
:rtype: Pandas DataFrame
"""
host_DF = df.copy()
HierarchicalHostedSNe = []
parents = []
for idx, row in host_DF.iterrows():
# cone search of the best-fit host in SIMBAD - if it gets it right,
#replace the info with the parent information!
tap_simbad = pyvo.dal.TAPService("https://simbad.u-strasbg.fr/simbad/sim-tap")
query = """SELECT main_id, otype, basic.ra, basic.dec,
DISTANCE( POINT('ICRS', ra, dec), POINT('ICRS', {0}, {1})) AS dist,
h_link.membership, cluster.id AS cluster
FROM (SELECT oid, id FROM basic JOIN ident ON oidref = oid WHERE
CONTAINS(POINT('ICRS',ra,dec),CIRCLE('ICRS',{0},{1},0.000556))=1 ) AS cluster, basic
JOIN h_link ON basic.oid = h_link.parent WHERE h_link.child = cluster.oid ORDER BY dist ASC;
""".format(row.raMean, row.decMean)
result = tap_simbad.search(query)
tap_pandas = result.to_table().to_pandas()
tap_pandas.dropna(subset=['membership'], inplace=True)
tap_pandas.reset_index(drop=True, inplace=True)
if ((not tap_pandas.empty) and (tap_pandas.loc[0, 'membership'] > 50)):
tap_pandas.drop_duplicates(subset=['main_id'], inplace=True)
tap_pandas.reset_index(drop=True, inplace=True)
if (tap_pandas['main_id'].values[0].startswith("VIRTUAL PARENT")) or (tap_pandas['otype'].values[0] == 'GrG'):
continue
if verbose:
print("Warning! Host of %s is the hierarchical child of another object in Simbad, choosing parent as host instead..." % row.TransientName)
# query PS1 for correct host
a = ps1cone(tap_pandas.loc[0, 'ra'], tap_pandas.loc[0, 'dec'], 10./3600)
if a:
a = ascii.read(a)
a = a.to_pandas()
parent = a.iloc[[0]]
parent['TransientName'] = row.TransientName
parent['TransientClass'] = row.TransientClass
parent['TransientRA'] = row.TransientRA
parent['TransientDEC'] = row.TransientDEC
parent = getNEDInfo(parent)
HierarchicalHostedSNe.append(row.TransientName)
parents.append(parent)
if len(parents)>0:
parentDF = pd.concat(parents)
else:
parentDF = pd.DataFrame({})
finalHosts_traditional = host_DF.loc[~host_DF['TransientName'].isin(HierarchicalHostedSNe)]
host_DF = pd.concat([finalHosts_traditional, parentDF], ignore_index=True)
return host_DF
[docs]def getDBHostFromTransientCoords(transientCoords, GHOSTpath=''):
"""Gets the host of a GHOST transient by position.
:param transientCoords: A list of astropy SkyCoord coordinates of transients.
:type transientCoords: array-like
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
:return: The PS1 objects associated with the queried transients in GHOST.
:rtype: Pandas DataFrame
:return: A list of the coordinates of transients not found in the database.
:rtype: host_DF : array-like
"""
fullTable = fullData(GHOSTpath)
notFound = []
host_DF = None
hostList = []
#a little wrapper so that this function can take lists of coords in addition to one coord
for transientCoord in transientCoords:
smallTable = fullTable[np.abs(fullTable['TransientRA'] - transientCoord.ra.degree)<0.1]
smallTable = smallTable[np.abs(smallTable['TransientDEC'] - transientCoord.dec.degree)<0.1]
if len(smallTable) < 1:
notFound.append(transientCoord)
continue
c2 = SkyCoord(smallTable['TransientRA'].values*u.deg, smallTable['TransientDEC'].values*u.deg, frame='icrs')
sep = np.array(transientCoord.separation(c2).arcsec)
if np.nanmin(sep) <= 1:
host_idx = np.where(sep == np.nanmin(sep))[0][0]
host = smallTable.iloc[[host_idx]]
hostList.append(host)
else:
notFound.append(transientCoord)
if len(hostList) > 0:
host_DF = pd.concat(hostList, ignore_index=True)
return host_DF, notFound
[docs]def getDBHostFromTransientName(transientNames, GHOSTpath=''):
"""Gets the host of a GHOST transient by transient name.
:param transientNames: A list of transient names.
:type transientNames: array-like
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
:return: The PS1 objects associated with the queried transients in GHOST.
:rtype: Pandas DataFrame
:return: A list of the coordinates of transients not found in the database.
:rtype: array-like
"""
fullTable = fullData(GHOSTpath)
allHosts = []
notFound = []
host_DF = None
for transientName in transientNames:
transientName = re.sub(r"\s+", "", str(transientName))
host = None
possibleNames = [transientName, transientName.upper(), transientName.lower(), "SN"+transientName]
for name in possibleNames:
if len(fullTable[fullTable['TransientName'] == name])>0:
host = fullTable[fullTable['TransientName'] == name]
allHosts.append(host)
break
if host is None:
notFound.append(transientName)
if len(allHosts) > 0:
host_DF = pd.concat(allHosts, ignore_index=True)
return host_DF, notFound
[docs]def getHostFromHostName(hostNames, GHOSTpath=''):
"""Gets hosts in the GHOST database by name.
:param hostNames: A list of host galaxy names.
:type hostNames: array-like
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
:return: The host galaxies found in GHOST.
:rtype: Pandas DataFrame
"""
fullTable = fullData(GHOSTpath)
possibleNames = []
for name in hostNames:
possibleNames.append(name)
possibleNames.append(name.upper())
possibleNames.append(re.sub(r"\s+", "", str(name)))
possibleNames.append(name.lower())
host = fullTable[fullTable['NED_name'].isin(possibleNames)]
if host is None:
print("Sorry, no hosts were found in our database!\n")
return host
[docs]def getHostFromHostCoords(hostCoords, GHOSTpath=''):
"""Gets hosts in the GHOST database by coordinates.
:param hostCoords : A list of astropy SkyCoord coordinates of host galaxies.
:type hostCoords: array-like
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
:return: The subset of discovered hosts.
:rtype: Pandas DataFrame
"""
fullTable = fullData(GHOSTpath)
c2 = SkyCoord(fullTable['raMean']*u.deg, fullTable['decMean']*u.deg, frame='icrs')
host = []
for hostCoord in hostCoords:
sep = np.array(hostCoord.separation(c2).arcsec)
if np.nanmin(sep) <= 1:
host_idx = np.where(sep == np.nanmin(sep))[0][0]
host.append(fullTable.iloc[[host_idx]])
if len(host) < 1:
print("Sorry, No hosts found in our database!\n")
else:
host = pd.concat(host, ignore_index=True)
return host
[docs]def getTransientStatsFromHostCoords(hostCoord):
"""Prints basic statistics for transient,
based on a query of the coordinates of
its host.
:param hostCoord: The position of the host galaxy.
:type hostCoord: Astropy SkyCoord Object
"""
host = getHostFromHostCoords(hostCoord)
i = 0
if len(host) > 0:
print("Found info for host %s.\n"%host['NED_name'].values[0])
print("Associated supernovae: ")
for SN in np.array(host['TransientName'].values):
SN_frame = host.loc[host['TransientName'] == SN]
print("%i. %s"%(i+1, SN_frame['TransientName'].values[0]))
print("RA, DEC (J2000): %f, %f"%(SN_frame['TransientRA'].values[0], SN_frame['TransientDEC'].values[0]))
print("Redshift: %f"%SN_frame['TransientRedshift'].values[0])
print("Discovery Date: %s"%SN_frame['TransientDiscoveryDate'].values[0].split(" ")[0])
print("Discovery Mag: %.2f"%SN_frame['TransientDiscoveryMag'].values[0])
i+= 1
return
[docs]def getTransientStatsFromHostName(hostName):
"""Returns basic statistics for transient,
based on a query of the name of
its host.
:param hostName: The name of the host galaxy.
:type hostName: str
"""
host = getHostFromHostName(hostName)
hostCoord = SkyCoord(np.unique(host['raMean'])*u.deg, np.unique(host['decMean'])*u.deg, frame='icrs')
getTransientStatsFromHostCoords(hostCoord)
return
[docs]def getHostStatsFromTransientCoords(transientCoordsList, GHOSTpath=''):
""" Returns basic statistics for the most likely
host of a previously identified transient, from the
transient's coordinates.
:param transientCoordsList: A list of astropy SkyCoord coordinates of transients.
:type transientCoordsList: array-like
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
"""
fullTable = fullData(GHOSTpath)
names = []
for transientCoords in transientCoordsList:
c2 = SkyCoord(fullTable['TransientRA']*u.deg, fullTable['TransientDEC']*u.deg, frame='icrs')
sep = np.array(transientCoords.separation(c2).arcsec)
host = None
if np.nanmin(sep) <= 1:
host_idx = np.where(sep == np.nanmin(sep))[0][0]
host = fullTable.iloc[[host_idx]]
names.append(host['TransientName'].values[0])
getHostStatsFromTransientName(names)
[docs]def getHostStatsFromTransientName(transientName, GHOSTpath=''):
"""Returns basic statistics for the most likely host of a previously identified transient.
:param transientName: Array of transient names.
:type transientName: array-like
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
"""
transientName = np.array(transientName)
fullTable = fullData(GHOSTpath)
host, notFound = getDBHostFromTransientName(transientName, GHOSTpath)
if host is not None:
for idx, row in host.iterrows():
if np.unique(row['NED_name']) != "":
print("Found host %s.\n"%row['NED_name'])
else:
print("Found host with PS1 ID %s"%row['objID'])
print("RA, DEC (J2000): %f, %f"%(row['raMean'], row['decMean']))
if row['NED_redshift'] != '':
print("Redshift: %f"%row['NED_redshift'])
print("PS1 rMag: %.2f"%row['rApMag'])
print("g-r r-i i-z")
print("%.2f+/-%.3f %.2f+/-%.3f %.2f+/-%.3f"%(row['g-r'],row['g-rErr'], row['r-i'], row['r-iErr'], row['i-z'], row['i-zErr']))
print("Associated supernovae: ")
if np.unique(row['NED_name']) != "":
tempHost = fullTable[fullTable['NED_name'] == row['NED_name']]
else:
tempHost = fullTable[fullTable['objID'] == row['objID']]
for SN in np.array(tempHost['TransientName'].values):
print(SN)
else:
print("No host info found!")
return
[docs]def getHostImage(transientName='', band="grizy", rad=60, save=False, GHOSTpath=''):
"""Returns a postage stamp of the most likely host in one of the PS1 bands - g,r,i,z,y - as a
fits file with radius rad, and plots the image.
:param transientName: Name of queried transient.
:type transientName: str
:param band: Band for host-galaxy image.
:type band: str
:param rad: Size of the image, in arcsec.
:type rad: float
:param save: If True, save host image.
:type save: bool, optional
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
"""
if transientName == '':
print("Error! Please enter a supernova!\n")
return
fullTable = fullData(GHOSTpath)
host, notFound = getDBHostFromTransientName(transientName, GHOSTpath)
if host is not None:
host.reset_index(drop=True, inplace=True)
tempSize = int(4*float(rad))
fn_save = host['objID'].values[0]
if np.unique(host['NED_name']) != "":
fn_save = host['NED_name'].values[0]
print("Showing postage stamp for %s"%np.unique(host['NED_name'])[0])
ra = np.unique(host['raMean'])[0]
dec = np.unique(host['decMean'])[0]
tempSize = int(4*rad)
img = getcolorim(ra, dec, output_size=tempSize, size=tempSize, filters=band, format="png")
plt.figure(figsize=(10,10))
plt.imshow(img)
if save:
img.save("%s.png" % fn_save)
return
else:
print("Transient host not found!")
[docs]def getTransientSpectra(path, transientName):
"""Retrieves all saved spectra associated with a transient.
:param path: Filepath to spectra.
:type path: str
:param transientName: Name of transient to query.
:type transientName: str
:return: Saved spectra.
:rtype: list of Pandas DataFrames
"""
transientName = remove_prefix(transientName, 'SN')
files = glob.glob(path+"*%s*"%transientName)
specFiles = []
if len(files) > 0:
for file in files:
if remove_prefix(file, path).startswith("osc_"):
spectra = pd.read_csv(file, header=None, names=['Wave(A)', 'I', 'Ierr'])
else:
spectra = pd.read_csv(file, delim_whitespace=True, header=None, names=['x', 'y', 'z'])
specFiles.append(spectra)
else:
print("Sorry! No spectra found.")
return specFiles
[docs]def getHostSpectra(transientName, path):
"""Retrieves all saved spectra associated with a host galaxy.
:param path: Filepath to spectra.
:type path: str
:param transientName: Name of transient to query.
:type transientName: str
:return: Saved spectra.
:rtype: list of Pandas DataFrames
"""
transientName = remove_prefix(transientName, 'SN')
files = glob.glob(path+"*%s_hostSpectra.csv*"%transientName)
specFiles = []
if len(files) > 0:
for file in files:
spectra = pd.read_csv(file)
specFiles.append(spectra)
else:
print("Sorry! No spectra found.")
return specFiles
[docs]def coneSearchPairs(coord, radius, GHOSTpath=''):
"""A cone search for all transient-host pairs within a certain radius, returned as a pandas dataframe.
:param coord: Astropy SkyCoord
:type coord: Position for cone search.
:param radius: Search radius, in arcsec.
:type radius: float
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
:return: GHOST galaxies within search radius.
:rtype: Pandas DataFrame
"""
fullTable = fullData(GHOSTpath)
c2 = SkyCoord(fullTable['TransientRA']*u.deg, fullTable['TransientDEC']*u.deg, frame='icrs')
sep = np.array(coord.separation(c2).arcsec)
hosts = None
if np.nanmin(sep) < radius:
host_idx = np.where(sep < radius)[0]
hosts = fullTable.iloc[host_idx]
return hosts
[docs]def fullData(GHOSTpath=''):
"""Returns the full GHOST database.
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
:return: GHOST database.
:rtype: Pandas DataFrame
"""
if not GHOSTpath:
GHOSTpath = os.getenv('GHOST_PATH')
if not GHOSTpath:
try:
GHOSTpath = astro_ghost.__file__
GHOSTpath = GHOSTpath.split("/")[:-1]
GHOSTpath = "/".join(GHOSTpath)
except:
print("Error! I don't know where you installed GHOST -- set GHOST_PATH as an environmental variable or pass in the GHOSTpath parameter.")
fullTable = pd.read_csv(GHOSTpath+"/database/GHOST.csv")
return fullTable
[docs]def getTransientHosts(transientName=[''], snCoord=[''], snClass=[''], verbose=False, starcut='normal', ascentMatch=False, GLADE=True, px=800, savepath='./', GHOSTpath='', redo_search=True):
"""The wrapper function for the main host association pipeline. The function first
searches the pre-existing GHOST database by transient name, then by transient coordinates, and finally
associates the remaining transients not found.
:param transientName: List of transients to associate.
:type transientName: array-like
:param snCoord: List of astropy SkyCoord transient positions.
:type snCoord: array-like
:param snClass: List of transient classifications (if they exist).
:type snClass: array-like
:param verbose: If True, print logging information.
:type verbose: bool, optional
:param starcut: Strings corresponding to the classification thresholds required to classify a star as such.
Options are \\'gentle\\' (P>0.8), normal (P>0.5), and aggressive (P>0.3).
:type starcut: str
:param ascentMatch: If True, run the gradient ascent algorithm for the transients not matched with the
Directional Light Radius algorithm.
:type ascentMatch: bool
:param px: Size of the image used in gradient ascent (ignored if ascentMatch=False).
:type px: int
:param savepath: Filepath where dataframe of associated hosts will be saved.
:type savepath: str
:param GHOSTpath: The path to the saved GHOST database.
:type GHOSTpath: str
:param redo_search: If True, redo the search with 150\\" cone search radius if hosts were not
found for any of the queried transients.
:type redo_search: bool
:return: Final dataframe of associated transients and host galaxies.
:rtype: Pandas DataFrame
"""
#if no names were passed in, add placeholder names for each transient in the search
if len(transientName) < 1:
transientName = []
print("No transient names listed, adding placeholder names...")
for i in np.arange(len(snCoord)):
transientName.append("Transient_%i" % (i+1))
hostDB = None
tempHost1 = tempHost2 = tempHost3 = None
found_by_name = found_by_coord = found_by_manual = 0
if not isinstance(snCoord, list) and not isinstance(snCoord, np.ndarray):
snCoord = [snCoord]
transientName = [transientName]
snClass = [snClass]
if len(snClass) != len(transientName):
snClass = ['']*len(transientName)
transientName = [x.replace(" ", "") for x in transientName]
df_transients = pd.DataFrame({'Name':np.array(transientName), 'snCoord':np.array(snCoord), 'snClass':np.array(snClass)})
tempHost1, notFoundNames = getDBHostFromTransientName(transientName, GHOSTpath)
found_by_name = len(transientName) - len(notFoundNames)
if tempHost1 is None or len(notFoundNames) > 0:
if verbose:
print("%i transients not found in GHOST by name, trying a coordinate search..."%len(notFoundNames))
df_transients_remaining = df_transients[df_transients['Name'].isin(notFoundNames)]
snCoord_remaining = df_transients_remaining['snCoord'].values
tempHost2, notFoundCoords = getDBHostFromTransientCoords(snCoord_remaining, GHOSTpath);
found_by_coord = len(transientName) - len(notFoundCoords) - found_by_name
if tempHost2 is None or len(notFoundCoords) > 0:
if verbose:
print("%i transients not found in GHOST by name or coordinates, manually associating..."% len(notFoundCoords))
df_transients_remaining = df_transients_remaining[df_transients_remaining['snCoord'].isin(notFoundCoords)]
transientName_remaining = df_transients_remaining['Name'].values
snCoord_remaining = df_transients_remaining['snCoord'].values
snClass_remaining = df_transients_remaining['snClass'].values
tempHost3 = findNewHosts(transientName_remaining, snCoord_remaining, snClass_remaining, verbose, starcut, ascentMatch, px, savepath, GLADE=GLADE)
if (len(transientName_remaining) > 0) and (len(tempHost3)==0) and (redo_search):
#bump up the search radius to 150 arcsec for extremely low-redshift hosts...
if verbose:
print("Couldn't find any hosts! Trying again with a search radius of 150''.")
tempHost3 = findNewHosts(transientName_remaining, snCoord_remaining, snClass_remaining, verbose, starcut, ascentMatch, px, savepath, 150)
found_by_manual = len(tempHost3)
hostDB = pd.concat([tempHost1, tempHost2, tempHost3], ignore_index=True)
hostDB.replace(-999.0, np.nan, inplace=True)
if verbose:
print("%i transients found by name, %i transients found by coordinates, %i transients manually associated."% (found_by_name, found_by_coord, found_by_manual))
return hostDB
[docs]def findNewHosts(transientName, snCoord, snClass, verbose=False, starcut='gentle', ascentMatch=False, px=800, savepath='./', rad=60, GLADE=True):
"""Associates hosts of transients not in the GHOST database.
:param transientName: List of transients to associate.
:type transientName: array-like
:param snCoord: List of astropy SkyCoord transient positions.
:type snCoord: array-like
:param snClass: List of transient classifications (if they exist).
:type snClass: array-like
:param verbose: If True, print logging information.
:type verbose: bool, optional
:param starcut: Strings corresponding to the classification thresholds required to classify a star as such.
Options are \\'gentle\\' (P>0.8), normal (P>0.5), and aggressive (P>0.3).
:type starcut: str, optional
:param ascentMatch: If True, run the gradient ascent algorithm for the transients not matched with the
Directional Light Radius algorithm.
:type ascentMatch: bool, optional
:param px: Size of the image used in gradient ascent (ignored if ascentMatch=False).
:type px: int
:param savepath: Filepath where dataframe of associated hosts will be saved.
:type savepath: str
:param rad: The search radius around each transient position, in arcseconds.
:type rad: float
:return: Final dataframe of associated transients and host galaxies.
:rtype: Pandas DataFrame
"""
if isinstance(transientName, str):
transientName = transientName.replace(" ", "")
snRA = snCoord.ra.degree
snDEC = snCoord.dec.degree
elif isinstance(transientName, list) or isinstance(transientName, np.ndarray):
transientName = [x.replace(" ", "") for x in transientName]
snRA = [x.ra.degree for x in snCoord]
snDEC = [x.dec.degree for x in snCoord]
else:
print("Error! Please pass in your transient name as either a string or a list/array of strings.\n")
transientName_arr = np.array(transientName)
snRA_arr = np.array(snRA)
snDEC_arr = np.array(snDEC)
snClass_arr = np.array(snClass)
dateStr = str(datetime.today()).replace("-", '').replace(".", '').replace(":", "").replace(" ", '')
fn_host = "SNe_TNS_%s_PS1Hosts_%iarcsec.csv" % (dateStr, rad)
fn_transients = 'transients_%s.csv' % dateStr
fn_dict = fn_host[:-4] + ".p"
dir_name = fn_transients[:-4]
if not os.path.exists(savepath):
os.mkdir(savepath)
os.chmod(savepath, 0o777)
os.makedirs(savepath + dir_name)
path = savepath+dir_name+'/'
#create temp dataframe with RA and DEC corresponding to the transient
snDF = pd.DataFrame({'Name':transientName_arr, 'RA':snRA_arr, 'DEC':snDEC_arr, 'HostName':['']*len(snDEC_arr), 'Obj. Type':snClass_arr})
snDF.to_csv(path+fn_transients, index=False)
#new low-z method (beta) - before we do anything else, find and associate with GLADE
if GLADE:
fn_glade = "gladeDLR.txt"
foundGladeHosts, noGladeHosts = chooseByGladeDLR(path, fn_glade, snDF, verbose=verbose, todo="r")
#open transients df and drop the transients already found in GLADE. We'll add these back in at the end
snDF = snDF[snDF['Name'].isin(noGladeHosts)]
fn_transients_preGLADE = fn_transients
fn_transients = 'transients_%s_postGlade.csv' % dateStr
snDF.to_csv(path+fn_transients, index=False)
else:
foundGladeHosts = []
noGladeHosts = snDF['Name'].values
fn_transients_preGLADE = fn_transients
if len(noGladeHosts) < 1:
#just return the GLADE df!
foundGladeHosts['GLADE_source'] = True
foundGladeHosts = ps1crossmatch_GLADE(foundGladeHosts)
host_DF = foundGladeHosts
else:
#begin doing the heavy lifting after GLADE to associate transients with hosts
host_DF = get_hosts(path, fn_transients, fn_host, rad)
if len(host_DF) < 1:
print("ERROR: Found no hosts in cone search during manual association!")
return None
cuts = ["n", "coords", "quality", "duplicate"]
transient_dict =[]
# this bit of trickery is required to combine northern-hemisphere and
# southern-hemisphere source dictionaries
f = open(path+'/dictionaries/'+fn_dict, "rb")
try:
while True:
transient_dict.append(pickle.load(f))
except EOFError:
pass
temp = transient_dict[0]
if len(transient_dict) > 1:
for i in np.arange(len(transient_dict)-1):
temp.update(transient_dict[i+1])
transient_dict = {k.replace(' ', ''): v for k, v in temp.items()}
desperate_dict = transient_dict.copy()
host_DF = getNEDInfo(host_DF)
host_DF_north = host_DF[host_DF['decMean']>-30].reset_index(drop=True)
host_DF_south = host_DF[host_DF['decMean']<=-30].reset_index(drop=True)
host_DF_north = makeCuts(host_DF_north, cuts, transient_dict)
host_DF = pd.concat([host_DF_north, host_DF_south], ignore_index=True)
cut_dict = clean_dict(transient_dict, host_DF, [])
desperate_dict = cut_dict.copy()
hostFrac = fracWithHosts(cut_dict)*100
if verbose:
print("Associated fraction after quality cuts: %.2f%%."%hostFrac)
#automatically add to host association for gradient ascent
lost = np.array([k for k, v in cut_dict.items() if len(v) <1])
host_DF_north = getColors(host_DF_north)
host_DF_north = removePS1Duplicates(host_DF_north)
host_DF_north = calc_7DCD(host_DF_north)
host_DF = pd.concat([host_DF_north, host_DF_south], ignore_index=True)
host_DF = getSimbadInfo(host_DF)
host_DF.to_csv(path+"/candidateHosts_NEDSimbad.csv", index=False)
host_DF_north = host_DF[host_DF['decMean']>-30].reset_index()
host_DF_south = host_DF[host_DF['decMean']<=-30].reset_index()
host_gals_DF_north, stars_DF_north = separateStars_STRM(host_DF_north, plot=0, verbose=verbose, starcut=starcut)
host_gals_DF_south, stars_DF_south = separateStars_South(host_DF_south, plot=0, verbose=verbose, starcut=starcut)
host_gals_DF = pd.concat([host_gals_DF_north, host_gals_DF_south],ignore_index=True)
stars_DF = pd.concat([stars_DF_north, stars_DF_south],ignore_index=True)
if verbose:
print("Removed %i stars. We now have %i candidate host galaxies."%(len(stars_DF), len(host_gals_DF)))
cut_dict = clean_dict(cut_dict, host_gals_DF, [])
stars_DF.to_csv(path+"removedStars.csv",index=False)
#in debugging mode, plotting the ML-identified stars and galaxies along the
#stellar locus can be super helpful!!
#plotLocus(host_gals_DF, color=1, save=1, type="Gals", timestamp=dateStr)
#plotLocus(stars_DF, color=1, save=1, type="Stars", timestamp=dateStr)
host_dict_nospace = {k.replace(' ', ''): v for k, v in cut_dict.items()}
fn = "DLR.txt"
transients = pd.read_csv(path+fn_transients)
with open(path+"/dictionaries/checkpoint_preDLR.p", 'wb') as fp:
dump(host_dict_nospace, fp)
host_DF, host_dict_nospace_postDLR, noHosts, GD_SN = chooseByDLR(path, host_gals_DF, transients, fn, host_dict_nospace, todo="r")
#last-ditch effort -- for the ones with no found host, just pick the nearest NED galaxy.
for transient in noHosts:
tempDF = host_gals_DF[host_gals_DF['objID'].isin(desperate_dict[transient])]
tempDF_gals = tempDF[tempDF['NED_type'].isin(['G', 'PofG', 'GPair', 'GGroup', 'GClstr'])].reset_index()
if len(tempDF_gals) < 1:
continue
transientRA = transients.loc[transients['Name'] == transient, 'RA'].values[0]
transientDEC = transients.loc[transients['Name'] == transient, 'DEC'].values[0]
transientCoord = SkyCoord(transientRA*u.deg, transientDEC*u.deg, frame='icrs')
tempHostCoords = SkyCoord(tempDF_gals['raMean'].values*u.deg, tempDF_gals['decMean'].values*u.deg, frame='icrs')
sep = transientCoord.separation(tempHostCoords)
desperateMatch = tempDF_gals.iloc[[np.argmin(sep.arcsec)]]
host_DF = pd.concat([host_DF, desperateMatch], ignore_index=True)
host_dict_nospace_postDLR[transient] = desperateMatch['objID'].values[0]
if verbose:
print("Desperate match found for %s, %.2f arcsec away." % (transient, sep[np.argmin(sep.arcsec)].arcsec))
if len(noHosts) > 0:
with open(path+"/dictionaries/noHosts_fromDLR.p", 'wb') as fp:
dump(noHosts, fp)
if len(GD_SN) > 0:
with open(path+"/dictionaries/badInfo_fromDLR.p", 'wb') as fp:
dump(GD_SN, fp)
#gradient ascent algorithm for the SNe that didn't pass this stage
SN_toReassociate = np.concatenate([np.array(noHosts), np.array(GD_SN), np.array(list(lost))])
if (len(SN_toReassociate) > 0) and (ascentMatch):
if verbose:
print("%i transients with no host found with DLR, %i transients with bad host data with DLR." %(len(noHosts), len(GD_SN)))
print("Running gradient ascent for %i remaining transients."%len(SN_toReassociate))
print("See GradientAscent.txt for more information.")
fn_GD= path+'/GradientAscent.txt'
host_dict_nospace_postDLR_GD, host_DF, unchanged = gradientAscent(path, transient_dict, host_dict_nospace_postDLR, SN_toReassociate, host_DF, transients, fn_GD, plot=verbose, px=px)
with open(path+"/dictionaries/gals_postGD.p", 'wb') as fp:
dump(host_dict_nospace_postDLR_GD, fp)
if verbose:
print("Hosts not found for %i transients in gradient ascent. Storing names in GD_unchanged.txt" %(len(unchanged)))
with open(path+"/GD_unchanged.txt", 'wb') as fp:
dump(unchanged, fp)
hostFrac = fracWithHosts(host_dict_nospace_postDLR_GD)*100
if verbose:
print("Associated fraction after gradient ascent: %.2f%%."%hostFrac)
final_dict = host_dict_nospace_postDLR_GD.copy()
else:
final_dict = host_dict_nospace_postDLR.copy()
host_DF = build_ML_df(final_dict, host_DF, transients)
# add the glade sources back in!
if len(foundGladeHosts) > 0:
if verbose:
print("Adding %i sources from GLADE back into the catalog..."%len(foundGladeHosts))
host_DF['GLADE_source'] = False
foundGladeHosts['GLADE_source'] = True
foundGladeHosts = ps1crossmatch_GLADE(foundGladeHosts)
#combine
host_DF = pd.concat([host_DF, foundGladeHosts], ignore_index=True)
with open(path+"/dictionaries/" + "Final_Dictionary.p", 'wb') as fp:
dump(final_dict, fp)
host_DF = checkSimbadHierarchy(host_DF, verbose=verbose)
#a few final cleaning steps
#first, add back in some features
host_DF_north = host_DF[host_DF['decMean']>-30].reset_index(drop=True)
host_DF_south = host_DF[host_DF['decMean']<=-30].reset_index(drop=True)
if len(host_DF_north)>0:
host_DF_north = getColors(host_DF_north)
host_DF_north = calc_7DCD(host_DF_north)
host_DF = pd.concat([host_DF_north, host_DF_south], ignore_index=True)
host_DF.drop_duplicates(subset=['TransientName'], inplace=True)
host_DF = host_DF[host_DF['TransientName'] != ""]
host_DF.reset_index(inplace=True, drop=True)
host_DF['TransientName'] = [x.replace(" ", "") for x in host_DF['TransientName']]
allTransients = pd.read_csv(path+fn_transients_preGLADE)
matchFrac = len(host_DF)/len(allTransients)*100
print("Found matches for %.1f%% of events."%matchFrac)
if verbose:
print("Saving table of hosts to %s."%(path+"tables/FinalAssociationTable.csv"))
host_DF.to_csv(path+"FinalAssociationTable.csv", index=False)
#sort things into the relevant folders
cleanup(path)
#remove if there's an extra index column
try:
del host_DF['index']
except:
pass
return host_DF