In [84]:
%autosave 20
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
Autosaving every 20 seconds
In [6]:
import math
import cmath
cmath.sqrt(-1)
Out[6]:
1j
In [7]:
fh = open('diagram.csv')
text = fh.read()
fh.close()
print(text)
0.036950897,-19.812012
0.03500012,32.59825
0.2093362,42.75399
0.25276598,9.813249
0.2743527,-36.13254
0.27199373,-77.60696
0.44834232,395.42206
0.50287396,410.46777
0.49865913,443.23727
0.6310321,322.5719
0.90020174,96.50567
0.9007238,216.59071
0.8012198,380.78067
0.89946604,428.3866
0.89804685,603.06525
1.0035977,827.4952
1.100515,569.43005
1.1055652,728.79663
1.3991741,611.79395
1.699523,1042.7968
2.0069914,1106.9563
1.9971569,849.35736
2.0035028,805.6615
1.9957709,530.5861

In [10]:
with open('diagram.csv') as fh:
    text = fh.read()
print(fh.closed)
True
In [27]:
x = []
y = []
with open('diagram.csv', encoding='utf-8') as fh:
    for row in fh:
        row = row.strip()
        a, b = row.split(',')
        x.append(float(a))
        y.append(float(b))
x = np.array(x)
y = np.array(y)
print(x)
print(y)
plt.plot(x, y, 'x')

with open('diagram.tsv', 'w') as fh:
    for a, b in zip(x * 7, y):
        fh.write('{}\t{}\n'.format(a, b))
[0.0369509  0.03500012 0.2093362  0.25276598 0.2743527  0.27199373
 0.44834232 0.50287396 0.49865913 0.6310321  0.90020174 0.9007238
 0.8012198  0.89946604 0.89804685 1.0035977  1.100515   1.1055652
 1.3991741  1.699523   2.0069914  1.9971569  2.0035028  1.9957709 ]
[ -19.812012   32.59825    42.75399     9.813249  -36.13254   -77.60696
  395.42206   410.46777   443.23727   322.5719     96.50567   216.59071
  380.78067   428.3866    603.06525   827.4952    569.43005   728.79663
  611.79395  1042.7968   1106.9563    849.35736   805.6615    530.5861  ]
In [28]:
with open('cat.jpg', 'rb') as fh:
    data = fh.read(16)
print(data)
print(type(data))
b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H'
<class 'bytes'>
In [32]:
from PIL import Image

img = Image.open('cat.jpg')
cat = np.array(img)
cat.dtype
Out[32]:
dtype('uint8')
In [45]:
table = np.genfromtxt('diagram.csv',
                      delimiter=',',
                      names=('d', 'v'))
print(table[0])
print(table['d'])
records = np.rec.array(table)
records.d
(0.0369509, -19.812012)
[0.0369509  0.03500012 0.2093362  0.25276598 0.2743527  0.27199373
 0.44834232 0.50287396 0.49865913 0.6310321  0.90020174 0.9007238
 0.8012198  0.89946604 0.89804685 1.0035977  1.100515   1.1055652
 1.3991741  1.699523   2.0069914  1.9971569  2.0035028  1.9957709 ]
Out[45]:
array([0.0369509 , 0.03500012, 0.2093362 , 0.25276598, 0.2743527 ,
       0.27199373, 0.44834232, 0.50287396, 0.49865913, 0.6310321 ,
       0.90020174, 0.9007238 , 0.8012198 , 0.89946604, 0.89804685,
       1.0035977 , 1.100515  , 1.1055652 , 1.3991741 , 1.699523  ,
       2.0069914 , 1.9971569 , 2.0035028 , 1.9957709 ])
In [57]:
table = np.genfromtxt('freddi.dat', names=True,
                      skip_footer=100)
In [73]:
import gzip
import os
import shutil

with gzip.open('V404Cyg.txt.gz') as fh:
    for _ in range(10):
        print(fh.readline())
b'JD\tMagnitude\tUncertainty\tHQuncertainty\tBand\tObserver Code\tComment Code(s)\tComp Star 1\tComp Star 2\tCharts\tComments\tTransfomed\tAirmass\tValidation Flag\tCmag\tKmag\tHJD\tStar Name\tObserver Affiliation\tMeasurement Method\tGrouping Method\tADS Reference\tDigitizer\tCredit\n'
b'2447673.49028\t12.8\t\t\tVis.\tMOBM\t\t\t\tTA Sequence\t\t\t\tZ\t\t\t\tV404 CYG\t\tSTD\t\t\t\tBAAVSS\n'
b'2447674.45972\t12.5\t\t\tVis.\tMOBM\t\t\t\tTA sequence\t\t\t\tZ\t\t\t\tV404 CYG\t\tSTD\t\t\t\tBAAVSS\n'
b'2447674.538\t11.9\t\t\tVis.\tKRT\t\t\t\t\t\t\t\tV\t\t\t\tV404 CYG\t\tSTD\t\t\t\t\n'
b'2447675.47986\t11.9\t\t\tVis.\tMOBM\t\t\t\tTA Sequence\t\t\t\tZ\t\t\t\tV404 CYG\t\tSTD\t\t\t\tBAAVSS\n'
b'2447676.45972\t12.5\t\t\tVis.\tMOBM\t\t\t\tTA Sequence\t\t\t\tZ\t\t\t\tV404 CYG\t\tSTD\t\t\t\tBAAVSS\n'
b'2447680.449\t14.5\t\t\tVis.\tBMU\t\t\t\t\t\t\t\tV\t\t\t\tV404 CYG\tKNVWS\tSTD\t\t\t\t\n'
b'2447680.521\t14.0\t\t\tVis.\tKRT\t\t\t\t\t\t\t\tV\t\t\t\tV404 CYG\t\tSTD\t\t\t\t\n'
b'2447685.05\t14.2\t\t\tVis.\tKOA\t\t\t\t\t\t\t\tV\t\t\t\tV404 CYG\t\tSTD\t\t\t\t\n'
b'2447685.9\t<14.0\t\t\tVis.\tSCE\t\t\t\t\t\t\t\tV\t\t\t\tV404 CYG\t\tSTD\t\t\t\t\n'
In [70]:
table = np.genfromtxt('V404Cyg.txt.gz', names=True,
                      usecols=(0, 1, 2,),
                      missing_values=b'', filling_values=0,)
table[:10]
Out[70]:
array([(2447673.49028, 12.8, 0.), (2447674.45972, 12.5, 0.),
       (2447674.538  , 11.9, 0.), (2447675.47986, 11.9, 0.),
       (2447676.45972, 12.5, 0.), (2447680.449  , 14.5, 0.),
       (2447680.521  , 14. , 0.), (2447685.05   , 14.2, 0.),
       (2447685.9    ,  0. , 0.), (2447690.467  , 14.8, 0.)],
      dtype=[('JD', '<f8'), ('Magnitude', '<f8'), ('Uncertainty', '<f8')])
In [83]:
def magn_converter(s):
    if s.startswith(b'<'):
        x = float(s[1:]) + 900
        return x
    return float(s)

table = np.genfromtxt('V404Cyg.txt.gz', names=True,
                      usecols=(0, 1, 2,),
                      dtype=(float, float, float),
                      missing_values=b'', filling_values=0,
                      converters={
                          1: magn_converter
                      })
table[:10]
is_upper_limit = table['Magnitude'] > 500
good_data = table[np.logical_not(is_upper_limit)]
plt.plot(good_data['JD'], good_data['Magnitude'], 'x')
Out[83]:
[<matplotlib.lines.Line2D at 0x7fe0a947f320>]
In [93]:
df = pd.DataFrame(table)
df.JD
df['JD']
df.columns
# df[0]
Out[93]:
Index(['JD', 'Magnitude', 'Uncertainty'], dtype='object')
In [95]:
df.loc[2]
Out[95]:
JD             2447674.538
Magnitude           11.900
Uncertainty          0.000
Name: 2, dtype: float64
In [100]:
df = pd.read_table('V404Cyg.txt.gz', low_memory=False)
print(df.dtypes)
print(type(df.Magnitude[0]))
JD                      float64
Magnitude                object
Uncertainty             float64
HQuncertainty           float64
Band                     object
Observer Code            object
Comment Code(s)          object
Comp Star 1              object
Comp Star 2              object
Charts                   object
Comments                 object
Transfomed              float64
Airmass                 float64
Validation Flag          object
Cmag                    float64
Kmag                    float64
HJD                     float64
Star Name                object
Observer Affiliation     object
Measurement Method       object
Grouping Method          object
ADS Reference           float64
Digitizer               float64
Credit                   object
dtype: object
<class 'str'>
In [101]:
# import json
# .yaml, .yml, .ini
In [113]:
def magn_converter(s):
    if s.startswith('<'):
        s = s[1:]
    return float(s)

df['m'] = df.Magnitude.map(magn_converter)
df['is_upper_limit'] = df.Magnitude.map(
    lambda s: s.startswith('<')
)
df['m']
Out[113]:
0        12.800
1        12.500
2        11.900
3        11.900
4        12.500
5        14.500
6        14.000
7        14.200
8        14.000
9        14.800
10       15.000
11       13.300
12       14.800
13       15.100
14       15.300
15       14.000
16       14.500
17       14.500
18       14.500
19       14.300
20       13.600
21       14.800
22       14.500
23       14.500
24       14.800
25       14.800
26       14.000
27       14.500
28       14.800
29       14.800
          ...  
80753    15.200
80754    14.400
80755    14.400
80756    15.200
80757    14.400
80758    14.900
80759    14.900
80760    14.400
80761    14.400
80762    15.200
80763    14.400
80764    14.400
80765    14.900
80766    15.200
80767    14.100
80768    14.400
80769    15.600
80770    14.100
80771    14.400
80772    15.600
80773    14.100
80774    14.400
80775    17.770
80776    14.400
80777    14.400
80778    15.200
80779    14.400
80780    17.407
80781    14.400
80782    14.400
Name: m, Length: 80783, dtype: float64
In [117]:
upper_limits = df[df.is_upper_limit]