forked from informationcake/SKA_SDC1_solution
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcatalogue_analysis.py
156 lines (103 loc) · 4.61 KB
/
catalogue_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Python 3.6. Written by Alex Clarke
# Read in catalogue, do statistics and source classification
import pickle, itertools
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import umap
import datashader as ds
import datashader.transfer_functions as tf
from datashader.transfer_functions import shade, stack
from functools import partial
from datashader.utils import export_image
from datashader.colors import *
# ------ ------ ------ ------ ------ ------ ------ ------ ------ ------
#Loading/saving python data objects
def save_obj(obj, name ):
with open(name + '.pkl', 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
# ------ ------ ------ ------ ------ ------ ------ ------ ------ ------
if __name__ == '__main__':
# Load sifted catalogue in, assuming it has been saved as a Pandas Dataframe
cat = load_obj('main_catalogue_sifted_df')
# fix source code type weirdness
cat['s_code_str'] = cat['S_Code'].astype('str').str[2:-1]
# Make resolved column quantifying how extended a source is
cat['resolved'] = ( cat['Total_flux'] - cat['Peak_flux'] ).abs()
cat['fratio'] = ( cat['Total_flux'] / cat['Peak_flux'] )
# Investigations for now, tidy up into functions soon...
# Plot histogram of resolved, coloured by source type
# S = a single-Gaussian source that is the only source in the island
# C = a single-Gaussian source in an island with other sources
# M = a multi-Gaussian source
bins_res = 10 ** np.linspace(np.log10(1e-8), np.log10(10), 100)
plt.hist(cat[cat.s_code_str=='S'].resolved, bins=bins_res, histtype='step', color='red', label='S')
plt.hist(cat[cat.s_code_str=='C'].resolved, bins=bins_res, histtype='step', color='blue', label='C')
plt.hist(cat[cat.s_code_str=='M'].resolved, bins=bins_res, histtype='step', color='green', label='M')
plt.xlabel('|Total flux - Peak flux|')
plt.ylabel('Number')
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.savefig('Histogram_resolved.png')
# peak/total ratio
plt.hist(cat.fratio, bins=200, histtype='step', label='All')
plt.hist(cat[cat.s_code_str=='S'].fratio, bins=200, histtype='step', color='red', label='S')
plt.hist(cat[cat.s_code_str=='C'].fratio, bins=200, histtype='step', color='blue', label='C')
plt.hist(cat[cat.s_code_str=='M'].fratio, bins=200, histtype='step', color='green', label='M')
plt.xlabel('Total flux / Peak flux')
plt.ylabel('Number')
plt.xlim(0,15)
plt.legend()
plt.yscale('log')
plt.show()
# Plot total flux vs peak flux
plt.scatter(cat.Total_flux, cat.Peak_flux,s=0.4)
plt.xlabel('Total flux, Jy')
plt.ylabel('Peak flux, Jy/beam')
plt.xscale('log')
plt.yscale('log')
plt.savefig('Total_Vs_Peak.png')
# Histogram of peak and total fluxes
bins_res = 10 ** np.linspace(np.log10(1e-8), np.log10(10), 100)
plt.hist(cat.Total_flux, bins=bins_flux, histtype='step', label='Total flux')
plt.hist(cat.Peak_flux, bins=bins_flux, histtype='step', label='Peak flux')
#plt.hist(cat.Isl_Total_flux, bins=bins_flux, histtype='step', label='Island total flux')
plt.xlabel('Jy')
plt.ylabel('Number')
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.savefig('Histogram_PeakTotal_flux.png')
# Source axis, Maj vs Min
plt.scatter(cat[cat.s_code_str=='S'].Maj*3600, cat[cat.s_code_str=='S'].Min*3600, s=0.3, label='S')
plt.scatter(cat[cat.s_code_str=='C'].Maj*3600, cat[cat.s_code_str=='C'].Min*3600, s=0.3, label='C')
plt.scatter(cat[cat.s_code_str=='M'].Maj*3600, cat[cat.s_code_str=='M'].Min*3600, s=0.3, label='M')
plt.xlabel('Source major axis, arcsec')
plt.ylabel('Source minor axis, arcsec')
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.savefig('MajVsMin_sourceaxis.png')
plt.scatter(cat.Maj*3600, cat.Min*3600, s=0.3, c=cat.resolved)
plt.colorbar()
plt.show()
# create png
cvs = ds.Canvas(plot_width=200, plot_height=200)
agg = cvs.points(cat, 'Maj', 'Min', ds.mean('resolved'))
img = tf.shade(agg, how='log')
#export_image(img, 'DS-majmin', fmt='.png', background='white')
# generate figure with png created and append colourbar axis
fig = plt.figure(figsize=(10,10)) # y axis larger to fit cbar in
#img = img.imread('DS-majmin.png')
plt.imshow(img)
plt.show()
#
#
data_cols = ['Total_flux', 'Peak_flux', 'Maj', 'Min', 'Isl_Total_flux', 'resolved']
#