"FileDB": Querying Millennium Run particles

Find stratified random sample of dark matter halos in Millennium Simulation
Calculate density profiles (in SQL)
Fit to Hernquist profile
Show fits
Plot parameters

In [9]:
with open('/home/idies/keystone.token', 'r') as f:
    token = f.read().rstrip('\n')
# some of our functions can retrieve the token from a system variable:
import sys
sys.argv.append("--ident="+token)
In [10]:
import warnings
warnings.filterwarnings('ignore')
In [11]:
# Import necessary libraries.
# "BOILERPLATE"
import SciServer.CasJobs
import SciServer.Keystone
import SciServer.SciDrive
import os
import pandas
import tables
import matplotlib.pyplot as plt
import pylab as pl
import numpy
import math
# load library for fitting
from scipy.optimize import curve_fit
In [12]:
# define fitting function from Navarro, Frenk & White 1995
# we will fit log of rdensity profile
def nfw(radius, rho0, Rs):
    return numpy.log10(rho0*(1/((radius/Rs)*(1+radius/Rs)**2)))
In [13]:
# define fitting function from Navarro, Frenk & White 1995
# we will fit log of rdensity profile
def hern(radius, M, a):
    return numpy.log10(0.5*M*a/((radius)*(a+radius)**3)/math.pi)
In [14]:
# do not run this if the table StratHaloSample already exists, otherwise uncomment next line
# define a stratified random sample of DM halos
query='''
with h as (
  select haloid,  np, floor(log10(np)/.2) as npbin
    ,   rank() over(partition by floor(log10(np)/.2) order by newid()) as rank
  from mr 
  where snapnum=63 and haloid=firsthaloinfofgroupid
)
select * 
into MyDB.StratHaloSample
from h where rank <= 10
'''
#Submit job on "slow" queue
jobId=SciServer.CasJobs.submitJob(query, context = "MPAHaloTrees")
submitJob PUT response:  200 OK
In [15]:
# wait for job finishing
SciServer.CasJobs.waitForJob(jobId)
Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Waiting...Job  541050  finished.
Out[15]:
{'AutoComplete': 0,
 'Created_Table': 'StratHaloSample',
 'Estimate': 500,
 'HostIP': 'MPAHaloTrees_lo',
 'JobID': 541050,
 'Message': 'Query Complete',
 'ModifiedQuery': 'with h as (\n  select haloid,  np, floor(log10(np)/.2) as npbin\n    ,   rank() over(partition by floor(log10(np)/.2) order by newid()) as rank\n  from mr \n  where snapnum=63 and haloid=firsthaloinfofgroupid\n)\nselect * \n\nfrom h where rank <= 10/*CASJOBS_INTO_TABLE:[scitest02].mydb_958083076.mydb.StratHaloSample*/',
 'OutputLoc': '',
 'OutputType': 'QUERY',
 'Params': '',
 'Query': '\nwith h as (\n  select haloid,  np, floor(log10(np)/.2) as npbin\n    ,   rank() over(partition by floor(log10(np)/.2) order by newid()) as rank\n  from mr \n  where snapnum=63 and haloid=firsthaloinfofgroupid\n)\nselect * \ninto MyDB.StratHaloSample\nfrom h where rank <= 10\n',
 'Rows': 254,
 'SendEmail': 0,
 'Status': 5,
 'Target': 'MPAHaloTrees',
 'TaskName': 'REST Job Task',
 'TimeEnd': '2016-04-27T07:17:40.893',
 'TimeStart': '2016-04-27T07:17:10.933',
 'TimeSubmit': '2016-04-27T07:17:07.07',
 'WebServicesID': 958083076}

SQL

The following SQL uses a new feature (as yet not public) for querying raw siulation data (particles) from Millennium simulation snapshots

with ps as (
select h.haloId,p.x-hh.x as x,p.y-hh.y as y,p.z-hh.z as z, hh.np
from mydb.strathalosample h
   inner join mpahalotrees..mr hh
  on hh.haloid=h.haloid 
  and h.rank <= 8
  and h.npbin between 15 and 27
      cross apply dbo.MillenniumParticles(hh.snapnum,
          dbo.Sphere::New(hh.x,hh.y,hh.z,3*hh.halfmassradius).ToString()) p
),
rs as (
select haloid,sqrt(xx+yy+zz) as r, np
from ps
)
select haloid, max(np) as np,
 power(convert(real,10 ),.1floor(log10(r)/.1)) as r1
  , power(convert(real, 10),.1(1+floor(log10(r)/.1))) as r2, count() as num
  from rs where r > 0
  group by haloid, floor(log10(r)/.1)
  order by 1,3
```

In [16]:
# Find particles around each halo using special SQLCLR/C# function, accessing raw simulation files on disk.
# Create histogram of counts in spherical shells (~density profile)
query="""
with ps as (
select h.haloId,p.x-hh.x as x,p.y-hh.y as y,p.z-hh.z as z, hh.np
from mydb.strathalosample h
   inner join mpahalotrees..mr hh
  on hh.haloid=h.haloid 
  and h.rank <= 8
  and h.npbin between 15 and 27
      cross apply dbo.MillenniumParticles(hh.snapnum,
      dbo.Sphere::New(hh.x,hh.y,hh.z,3*hh.halfmassradius).ToString()) p
),
rs as (
select haloid,sqrt(x*x+y*y+z*z) as r, np
from ps
)
select haloid, max(np) as np,
 power(convert(real,10 ),.1*floor(log10(r)/.1)) as r1
  , power(convert(real, 10),.1*(1+floor(log10(r)/.1))) as r2, count(*) as num
  from rs where r > 0
  group by haloid, floor(log10(r)/.1)
  order by 1,3
"""
# query CasJobs table. Using SimulationDB as context
queryResponse = SciServer.CasJobs.executeQuery(query, "SimulationDB",token=token)

# parse results into py DataFrame for further in memory processing
profs = pandas.read_csv(queryResponse,index_col=None)
print("Found "+str(profs.count()[0])+" rows")
executeQuery POST response:  200 OK
Found 2718 rows
In [17]:
# define array of unique haloId, used in further processing
haloIds=numpy.unique(profs['haloid'])
In [18]:
plt.figure(figsize=(18, 15))
plt.xlabel('log10(radius/Mpc)')
plt.ylabel('density in #/Mpc^3')
plt.title('Density Profile')

subPlotNum = 1

# restrict to small sample for demo purposes
nmax=40
count=0
params=[]
cov=[]
for haloId in haloIds:
    df=profs.loc[(profs.haloid ==haloId), ['haloid','np','r1', 'r2', 'num']]
    hs=numpy.array(df['haloid'].tolist())
    n1=3
    n2=len(hs)-1
    np=numpy.array(df['np'].tolist())[0]
    r1=numpy.array(df['r1'].tolist())[n1:n2]
    r2=numpy.array(df['r2'].tolist())[n1:n2]
    num=numpy.array(df['num'].tolist())[n1:n2]
    r=numpy.sqrt(r1*r2)
    v=4*math.pi*(r2**3-r1**3)/3
    d = num/v
    ld = numpy.log10(d)
    lr=numpy.log10(r)

# if fit Navarro, Frenk, WHite profile uncomment next lines
#    fitFunc=nfw
#    p0=[np, 0.05]

# fit Hernquist profile
    fitFunc=hern
    p0=[np, 0.05]

    fitParams, fitCovariances = curve_fit(fitFunc, r,ld, p0=p0)
    params.append(fitParams)
    cov.append(fitCovariances)
    
# plot density profiles as open dots and fit +/- 1 sigma as lines     
    plt.subplot(8,5,subPlotNum)
    subPlotNum += 1
    plt.scatter(lr, ld,facecolors='none', edgecolors='b')
    plt.plot(lr, (fitFunc(r, fitParams[0], fitParams[1])),
             lr, (fitFunc(r, fitParams[0] + numpy.sqrt(fitCovariances[0,0]), fitParams[1]- numpy.sqrt(fitCovariances[1,1]))),
             lr, (fitFunc(r, fitParams[0] - numpy.sqrt(fitCovariances[0,0]), fitParams[1] + numpy.sqrt(fitCovariances[1,1]))))
    plt.title(str(haloId))

    
    count+=1
    if(count >= nmax):
        break
    
plt.show()
In [19]:
def straight(lmass, a, b):
    return a*lmass+b
In [20]:
p1,p2=zip(*params)

p0=[2,0]
ab, errab = curve_fit(straight, numpy.log10(p1), numpy.log10(p2),p0=p0)
xx=[min(numpy.log10(p1)), max(numpy.log10(p1))]
yy=[straight(min(numpy.log10(p1)),ab[0],ab[1]), straight(max(numpy.log10(p1)),ab[0],ab[1])]
ab
Out[20]:
array([ 0.35118366, -2.39385672])
In [21]:
plt.figure(figsize=(10, 10))
plt.xlabel('M')
plt.ylabel('a')
plt.scatter(numpy.log10(p1), numpy.log10(p2))
plt.plot(xx,yy)
plt.show()

Exercise 1: evolution density profile

Modify the query defining the halo sample to provide the evolution of the (main branch of) a massive dark matter halo at z=0.

For ideas see documentation at public Millennium Database site:
http://gavo.mpa-garching.mpg.de/Millennium/
and
http://gavo.mpa-garching.mpg.de/Millennium/Help/mergertrees

In [ ]: