{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# WebSky bright source catalog workflow\n" ], "id": "aef8d96ab819" }, { "cell_type": "code", "execution_count": 1, "id": "0ed31a46-d481-4958-af82-3889e2f6b80a", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n" ] } ], "source": [ "import h5py\n", "import numpy as np\n", "import healpy as hp\n", "import matplotlib.pyplot as plt\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 2, "id": "263a8761", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "# for jupyter.nersc.gov otherwise the notebook only uses 2 cores\n", "num_threads = 128\n", "os.environ[\"OMP_NUM_THREADS\"] = \"1\"" ] }, { "cell_type": "code", "execution_count": 3, "id": "5f8de1f5", "metadata": {}, "outputs": [], "source": [ "cutoff_flux = 1e-3" ] }, { "cell_type": "code", "execution_count": 4, "id": "1e31f0a2", "metadata": {}, "outputs": [], "source": [ "output_filename = \"websky_high_flux_catalog_1mJy.h5\"" ] }, { "cell_type": "code", "execution_count": 5, "id": "82d11cad", "metadata": {}, "outputs": [], "source": [ "plot = False" ] }, { "cell_type": "code", "execution_count": 6, "id": "8e35f4b9-bc39-49e9-af61-67464526f4d9", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/global/cfs/cdirs/sobs/www/users/Radio_WebSky/matched_catalogs_2\n" ] } ], "source": [ "cd /global/cfs/cdirs/sobs/www/users/Radio_WebSky/matched_catalogs_2" ] }, { "cell_type": "code", "execution_count": 7, "id": "031d3e3d-b9d0-4c73-80bb-bbd804a825fe", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "catalog_100.0.h5 catalog_232.0.h5 catalog_353.0.h5 catalog_643.0.h5\r\n", "catalog_111.0.h5 catalog_24.5.h5 catalog_375.0.h5 catalog_67.8.h5\r\n", "catalog_129.0.h5 catalog_256.0.h5 catalog_409.0.h5 catalog_70.0.h5\r\n", "catalog_143.0.h5 catalog_27.3.h5 catalog_41.7.h5 catalog_729.0.h5\r\n", "catalog_153.0.h5 catalog_275.0.h5 catalog_44.0.h5 catalog_73.7.h5\r\n", "catalog_164.0.h5 catalog_294.0.h5 catalog_467.0.h5 catalog_79.6.h5\r\n", "catalog_18.7.h5 catalog_30.0.h5 catalog_47.4.h5 catalog_817.0.h5\r\n", "catalog_189.0.h5 catalog_306.0.h5 catalog_525.0.h5 catalog_857.0.h5\r\n", "catalog_21.6.h5 catalog_314.0.h5 catalog_545.0.h5 catalog_90.2.h5\r\n", "catalog_210.0.h5 catalog_340.0.h5 catalog_584.0.h5 catalog_906.0.h5\r\n", "catalog_217.0.h5 catalog_35.9.h5 catalog_63.9.h5\r\n" ] } ], "source": [ "%ls" ] }, { "cell_type": "code", "execution_count": 8, "id": "ba71f7d1", "metadata": { "tags": [] }, "outputs": [], "source": [ "freqs = [\n", " \"18.7\",\n", " \"24.5\",\n", " \"44.0\",\n", " \"70.0\",\n", " \"100.0\",\n", " \"143.0\",\n", " \"217.0\",\n", " \"353.0\",\n", " \"545.0\",\n", " \"643.0\",\n", " \"729.0\",\n", " \"857.0\",\n", " \"906.0\",\n", "]" ] }, { "cell_type": "code", "execution_count": 9, "id": "d0653ac2-3d67-4480-849d-bcca2727a143", "metadata": { "tags": [] }, "outputs": [], "source": [ "cat = h5py.File(\"catalog_100.0.h5\", \"r\")" ] }, { "cell_type": "markdown", "id": "80b4c835", "metadata": {}, "source": [ "There are no metadata in the file, I guess fluxes are in `Jy`" ] }, { "cell_type": "code", "execution_count": 10, "id": "1ad4445f", "metadata": {}, "outputs": [], "source": [ "high_flux_sources_mask = cat[\"flux\"][:] > cutoff_flux" ] }, { "cell_type": "code", "execution_count": 11, "id": "63276683", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1.42910628e-09, 1.99535624e-08, 2.29563857e-09], dtype='>f8')" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat[\"polarized flux\"][:3]" ] }, { "cell_type": "code", "execution_count": 12, "id": "e916cd08", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "372255" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(high_flux_sources_mask).sum()" ] }, { "cell_type": "code", "execution_count": 13, "id": "4483e313", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.13211945911740433" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "high_flux_sources_mask.mean() * 100" ] }, { "cell_type": "code", "execution_count": 14, "id": "06739d8b-fc8a-46d8-a430-c905f89fb37c", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "flux [3.24291534e-07 3.16862867e-07 3.17171157e-07]\n", "phi [3.22861886 3.22861886 3.22861886]\n", "polarized flux [1.42910628e-09 1.99535624e-08 2.29563857e-09]\n", "theta [1.64009452 1.64009452 1.64009452]\n" ] } ], "source": [ "for k, v in cat.items():\n", " print(k, v[:3])" ] }, { "cell_type": "code", "execution_count": 15, "id": "306159e9", "metadata": {}, "outputs": [], "source": [ "(all_indices,) = np.nonzero(high_flux_sources_mask)" ] }, { "cell_type": "code", "execution_count": 16, "id": "1b7ed1da-8a08-47a0-a513-538bfec1dd9b", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "372255" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(all_indices)" ] }, { "cell_type": "code", "execution_count": 17, "id": "06ee9c0f-c6d6-4f86-ba98-5ec295e18f0b", "metadata": { "tags": [] }, "outputs": [], "source": [ "#all_indices = np.array(sorted(all_indices))\n", "all_indices = np.array(all_indices)" ] }, { "cell_type": "markdown", "id": "68206975", "metadata": {}, "source": [ "Generate 1 source only\n", "\n", "```\n", "argmax = np.array(cat[\"flux\"]).argmax()\n", "all_indices = np.array([argmax])\n", "high_flux_sources_mask = high_flux_sources_mask * 0\n", "high_flux_sources_mask[argmax] = 1\n", "```" ] }, { "cell_type": "code", "execution_count": 18, "id": "886dbbaf-8890-449c-bab4-2f7eba722d31", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import xarray as xr" ] }, { "cell_type": "code", "execution_count": 19, "id": "b918e34e-b782-480b-8067-c3c31df9c436", "metadata": { "tags": [] }, "outputs": [], "source": [ "columns = [\"theta\", \"phi\", \"flux\", \"polarized flux\"]" ] }, { "cell_type": "code", "execution_count": 20, "id": "0851b3a4-7214-4182-9afb-c21768fd96e4", "metadata": { "tags": [] }, "outputs": [], "source": [ "flux = xr.DataArray(\n", " data=np.zeros((len(all_indices), len(freqs)), dtype=np.float64),\n", " coords={\"index\": all_indices, \"freq\": list(map(float, freqs))},\n", " name=\"flux\",\n", ")\n", "fluxnorm = flux.copy()" ] }, { "cell_type": "code", "execution_count": 21, "id": "394e8f66-d9e7-446e-af75-6940184da4a7", "metadata": { "tags": [] }, "outputs": [], "source": [ "polarized_flux = flux.copy()" ] }, { "cell_type": "code", "execution_count": 22, "id": "6dedf80b-4d9c-4a8b-b028-ca99f5f1393e", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 13/13 [00:20<00:00, 1.54s/it]\n" ] } ], "source": [ "sources_xr = xr.Dataset(\n", " {\"flux\": flux, \"polarized_flux\": polarized_flux, \"fluxnorm\": fluxnorm}\n", ")\n", "for freq in tqdm(freqs):\n", " cat = h5py.File(f\"catalog_{freq}.h5\", \"r\")\n", " for column in [\"flux\", \"polarized_flux\"]:\n", " sources_xr[column].loc[dict(index=all_indices, freq=float(freq))] = cat[\n", " column.replace(\"_\", \" \")\n", " ][high_flux_sources_mask]" ] }, { "cell_type": "code", "execution_count": 23, "id": "8f1fa80e-f674-40c1-bb9d-901503650240", "metadata": { "tags": [] }, "outputs": [], "source": [ "# sources_xr = sources_xr.sortby(sources_xr.flux.loc[dict(freq=float(freqs[0]))])" ] }, { "cell_type": "code", "execution_count": 24, "id": "074fa7ac-7aae-4513-af86-cbe2a24082b1", "metadata": { "tags": [] }, "outputs": [], "source": [ "sources_xr.coords[\"index\"] = np.arange(len(sources_xr.coords[\"index\"]))" ] }, { "cell_type": "code", "execution_count": 25, "id": "e38b940b-9b60-4162-bb0f-392dcea5563f", "metadata": { "tags": [] }, "outputs": [], "source": [ "if plot:\n", " for s in tqdm(range(len(all_indices))):\n", " sources_xr[\"fluxnorm\"].loc[dict(index=s)] = sources_xr[\"flux\"].loc[\n", " dict(index=s)\n", " ] / sources_xr[\"flux\"].loc[dict(index=s)].sel(freq=100)" ] }, { "cell_type": "code", "execution_count": 26, "id": "8728e619-1716-4894-9ecb-94ba384a2266", "metadata": { "tags": [] }, "outputs": [], "source": [ "#print(sources_xr[\"fluxnorm\"].loc[dict(index=s)], sources_xr[\"flux\"].loc[dict(index=s)])" ] }, { "cell_type": "code", "execution_count": 27, "id": "e6f77665-62a3-45d5-b1cd-73f19e4ba279", "metadata": { "tags": [] }, "outputs": [], "source": [ "if plot:\n", " #sources_xr.fluxnorm.plot(vmin=0, vmax=100)\n", "\n", " plt.figure()\n", " sources_xr.flux.plot(vmin=0, vmax=100)" ] }, { "cell_type": "code", "execution_count": 28, "id": "41cd2b38-0caf-41ae-a8f5-871d205238c3", "metadata": { "tags": [] }, "outputs": [], "source": [ "sources_xr[\"logpolycoefflux\"] = xr.DataArray(\n", " np.zeros((len(all_indices), 5), dtype=np.float64),\n", " dims=[\"index\", \"power\"],\n", " coords={\"power\": np.arange(5)[::-1]},\n", ")\n", "sources_xr[\"logpolycoefpolflux\"] = sources_xr[\"logpolycoefflux\"].copy()\n", "\n", "if plot:\n", " sources_xr[\"logpolycoefnorm\"] = sources_xr[\"logpolycoefflux\"].copy()" ] }, { "cell_type": "code", "execution_count": 29, "id": "a1ecd6d8", "metadata": {}, "outputs": [], "source": [ "from numba import njit\n", "\n", "@njit\n", "def model(freq, a, b, c, d, e):\n", " log_freq = np.log(freq)\n", " return a * log_freq**4 + b * log_freq**3 + c * log_freq**2 + d * log_freq + e" ] }, { "cell_type": "code", "execution_count": 30, "id": "3c08d830", "metadata": {}, "outputs": [], "source": [ "from scipy.optimize import curve_fit" ] }, { "cell_type": "code", "execution_count": 31, "id": "1b709764", "metadata": {}, "outputs": [], "source": [ "from dask.distributed import Client, LocalCluster\n", "\n", "cluster = LocalCluster(n_workers=32, threads_per_worker=2, processes=True)\n", "client = Client(cluster)" ] }, { "cell_type": "code", "execution_count": 32, "id": "1e06b450", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(client)" ] }, { "cell_type": "code", "execution_count": 33, "id": "6e73f4df", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.02853885, 0.01962042, 0.0104193 , 0.00548068, 0.00337772,\n", " 0.00218903, 0.0014772 , 0.00095432, 0.00064614, 0.000557 ,\n", " 0.00049761, 0.00043035, 0.00040939])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s=0\n", "sources_xr[\"flux\"].sel(index=s).data" ] }, { "cell_type": "code", "execution_count": 34, "id": "16faa0af", "metadata": {}, "outputs": [], "source": [ "def run_curve_fit_factory(field):\n", " def run_curve_fit(s):\n", " return curve_fit(\n", " model, sources_xr.coords[\"freq\"].data, sources_xr[field].sel(index=s).data\n", " )[0]\n", " return run_curve_fit" ] }, { "cell_type": "code", "execution_count": 35, "id": "8a9a654c", "metadata": {}, "outputs": [], "source": [ "from dask.diagnostics import ProgressBar" ] }, { "cell_type": "code", "execution_count": 36, "id": "094e45f8", "metadata": {}, "outputs": [], "source": [ "import dask.bag as db" ] }, { "cell_type": "code", "execution_count": 37, "id": "10df0e53", "metadata": {}, "outputs": [], "source": [ "from dask.distributed import progress\n" ] }, { "cell_type": "code", "execution_count": 38, "id": "ea5a3ea2", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/distributed/client.py:3361: UserWarning: Sending large graph of size 142.02 MiB.\n", "This may cause some slowdown.\n", "Consider loading the data with Dask directly\n", " or using futures or delayed objects to embed the data into the graph without repetition.\n", "See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.\n", " warnings.warn(\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.26.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 18.7 s, sys: 11.5 s, total: 30.2 s\n", "Wall time: 39 s\n" ] } ], "source": [ "%%time\n", "\n", "sources_xr[\"logpolycoefflux\"].data = db.range(len(all_indices), npartitions=num_threads).map(run_curve_fit_factory(\"flux\")).compute()" ] }, { "cell_type": "code", "execution_count": 39, "id": "ba2f0e67", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/global/common/software/cmb/zonca/conda/pycmb/lib/python3.10/site-packages/distributed/client.py:3361: UserWarning: Sending large graph of size 142.02 MiB.\n", "This may cause some slowdown.\n", "Consider loading the data with Dask directly\n", " or using futures or delayed objects to embed the data into the graph without repetition.\n", "See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 16.2 s, sys: 10.1 s, total: 26.3 s\n", "Wall time: 33.3 s\n" ] } ], "source": [ "%%time\n", "\n", "sources_xr[\"logpolycoefpolflux\"].data = db.range(len(all_indices), npartitions=num_threads).map(run_curve_fit_factory(\"polarized_flux\")).compute()" ] }, { "cell_type": "code", "execution_count": 40, "id": "cec940c0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 3 µs, sys: 3 µs, total: 6 µs\n", "Wall time: 11 µs\n" ] } ], "source": [ "%%time\n", "\n", "if plot:\n", " sources_xr[\"logpolycoefnorm\"].data = db.range(len(all_indices), npartitions=num_threads).map(run_curve_fit_factory(\"fluxnorm\")).compute()" ] }, { "cell_type": "code", "execution_count": 41, "id": "6dbd89aa-fe34-401f-8d9d-43330c8057c9", "metadata": { "tags": [] }, "outputs": [], "source": [ "# for s in range(len(all_indices)):\n", "# sources_xr[\"logpolycoefflux\"].loc[dict(index=s)], cov = curve_fit(\n", "# model, sources_xr.coords[\"freq\"], sources_xr.flux.sel(index=s)\n", "# )\n", "# sources_xr[\"logpolycoefpolflux\"].loc[dict(index=s)], cov = curve_fit(\n", "# model, sources_xr.coords[\"freq\"], sources_xr.polarized_flux.sel(index=s)\n", "# )\n", "# if plot:\n", "# sources_xr[\"logpolycoefnorm\"].loc[dict(index=s)], cov = curve_fit(\n", "# model, sources_xr.coords[\"freq\"], sources_xr.fluxnorm.sel(index=s)\n", "# )" ] }, { "cell_type": "code", "execution_count": 42, "id": "fbfa997a-54f3-43ab-bfbe-6b1ae107a368", "metadata": { "tags": [] }, "outputs": [], "source": [ "if plot:\n", " for s in range(len(all_indices)):\n", " plt.figure()\n", " sources_xr.flux.sel(index=s).plot(marker=\"o\", linestyle=\"none\") # , xscale=\"log\")\n", " sources_xr.fluxnorm.sel(index=s).plot(\n", " marker=\"o\", linestyle=\"none\"\n", " ) # , xscale=\"log\")\n", "\n", " plt.loglog(\n", " sources_xr.coords[\"freq\"],\n", " model(sources_xr.coords[\"freq\"], *sources_xr.logpolycoefflux.sel(index=s)),\n", " )\n", " plt.loglog(\n", " sources_xr.coords[\"freq\"],\n", " model(sources_xr.coords[\"freq\"], *sources_xr.logpolycoefnorm.sel(index=s)),\n", " )\n", " plt.grid()" ] }, { "cell_type": "code", "execution_count": 43, "id": "31df666c-4e49-40f1-b884-9e3eec1da294", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(\n", " array(-17557.7742453),\n", " \n", " array(23993.56673443))" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sources_xr.logpolycoefflux.min(), sources_xr.logpolycoefflux.max()" ] }, { "cell_type": "code", "execution_count": 44, "id": "43f02e70-7a7e-408d-851e-17d4f6356f5e", "metadata": { "tags": [] }, "outputs": [], "source": [ "if plot:\n", " plt.figure(figsize=(12, 5))\n", " plt.subplot(121)\n", " sources_xr.logpolycoefflux.plot(vmax=50, vmin=-50)\n", " plt.subplot(122)\n", " sources_xr.logpolycoefnorm.plot(vmax=50, vmin=-50)" ] }, { "cell_type": "code", "execution_count": 45, "id": "a8be3926-00e6-4a14-a66a-060424b6d405", "metadata": { "tags": [] }, "outputs": [], "source": [ "if plot:\n", " plt.figure(figsize=(15, 8))\n", "\n", " for power in range(5):\n", " plt.subplot(231 + power)\n", "\n", " np.fabs(sources_xr.logpolycoefflux.loc[dict(power=power)]).plot.hist(\n", " bins=np.logspace(-0, 4, 20), density=False, lw=3, label=\"fluxes\"\n", " )\n", "\n", " np.fabs(sources_xr.logpolycoefnorm.loc[dict(power=power)]).plot.hist(\n", " bins=np.logspace(-0, 4, 20),\n", " density=False,\n", " histtype=\"step\",\n", " lw=2,\n", " label=\"normalized fluxes\",\n", " linestyle=\"--\",\n", " )\n", " plt.grid()\n", " plt.title(f\"Power {power}\")\n", " plt.legend()\n", " plt.xscale(\"log\")\n", " plt.xlabel(None)" ] }, { "cell_type": "code", "execution_count": 46, "id": "f7f80d98", "metadata": {}, "outputs": [], "source": [ "output_catalog = sources_xr[[\"logpolycoefflux\",\"logpolycoefpolflux\"]]" ] }, { "cell_type": "code", "execution_count": 47, "id": "af92c175", "metadata": {}, "outputs": [], "source": [ "output_catalog[\"index\"] = all_indices" ] }, { "cell_type": "code", "execution_count": 48, "id": "e761dc39", "metadata": {}, "outputs": [], "source": [ "output_catalog.logpolycoefflux.attrs[\"units\"] = \"Jy\"\n", "output_catalog.logpolycoefpolflux.attrs[\"units\"] = \"Jy\"" ] }, { "cell_type": "code", "execution_count": 49, "id": "3606bcfa", "metadata": {}, "outputs": [], "source": [ "for coord in [\"theta\", \"phi\"]:\n", " output_catalog = output_catalog.assign_coords(\n", " **{coord:((\"index\"), cat[coord][high_flux_sources_mask].astype(np.float64))})" ] }, { "cell_type": "code", "execution_count": 50, "id": "bc3a3c86", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:             (index: 372255, power: 5)\n",
       "Coordinates:\n",
       "  * index               (index) int64 11253 16428 24110 ... 281755430 281755795\n",
       "  * power               (power) int64 4 3 2 1 0\n",
       "    theta               (index) float64 2.655 2.659 2.596 ... 1.38 1.377 1.426\n",
       "    phi                 (index) float64 4.177 3.977 4.17 ... 0.4638 0.5054\n",
       "Data variables:\n",
       "    logpolycoefflux     (index, power) float64 0.0002901 -0.006796 ... 0.01476\n",
       "    logpolycoefpolflux  (index, power) float64 5.318e-05 -0.001129 ... -0.005808
" ], "text/plain": [ "\n", "Dimensions: (index: 372255, power: 5)\n", "Coordinates:\n", " * index (index) int64 11253 16428 24110 ... 281755430 281755795\n", " * power (power) int64 4 3 2 1 0\n", " theta (index) float64 2.655 2.659 2.596 ... 1.38 1.377 1.426\n", " phi (index) float64 4.177 3.977 4.17 ... 0.4638 0.5054\n", "Data variables:\n", " logpolycoefflux (index, power) float64 0.0002901 -0.006796 ... 0.01476\n", " logpolycoefpolflux (index, power) float64 5.318e-05 -0.001129 ... -0.005808" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output_catalog" ] }, { "cell_type": "code", "execution_count": 51, "id": "631c8dca", "metadata": {}, "outputs": [], "source": [ "output_catalog.coords[\"theta\"].attrs[\"units\"] = \"rad\"\n", "output_catalog.coords[\"phi\"].attrs[\"units\"] = \"rad\"" ] }, { "cell_type": "code", "execution_count": 52, "id": "84f0228d", "metadata": {}, "outputs": [], "source": [ "flux_100 = np.polynomial.polynomial.polyval(np.log(100), np.array(output_catalog[\"logpolycoefflux\"])[:,::-1].T, tensor=False)" ] }, { "cell_type": "code", "execution_count": 53, "id": "14a27c5e", "metadata": {}, "outputs": [], "source": [ "output_catalog[\"flux_100\"] = np.polynomial.polynomial.polyval(np.log(100), output_catalog[\"logpolycoefflux\"][:,::-1].T, tensor=False)" ] }, { "cell_type": "code", "execution_count": 54, "id": "04e95036", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray 'flux_100' ()>\n",
       "array(41.02290808)
" ], "text/plain": [ "\n", "array(41.02290808)" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output_catalog[\"flux_100\"].max()" ] }, { "cell_type": "code", "execution_count": 55, "id": "907b7a1a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(372255, 5)" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array(output_catalog[\"logpolycoefflux\"]).shape" ] }, { "cell_type": "code", "execution_count": 56, "id": "a6ba3897", "metadata": {}, "outputs": [], "source": [ "output_catalog = output_catalog.sortby(\"flux_100\", ascending=False)" ] }, { "cell_type": "code", "execution_count": 57, "id": "55eab2fb", "metadata": {}, "outputs": [], "source": [ "del output_catalog[\"flux_100\"]" ] }, { "cell_type": "code", "execution_count": 58, "id": "87e06bf2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.003478966281996676" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.polyval(output_catalog.logpolycoefflux[0], np.log(100))" ] }, { "cell_type": "code", "execution_count": 59, "id": "8e981f2c", "metadata": {}, "outputs": [], "source": [ "output_catalog.attrs[\"notes\"] = \\\n", "\"\"\"Catalog of sources where the flux in Jy at any frequency is calculated with a 5th order polynomial in the logarithm of the frequency in GHz, separately for temperature and polarization.\n", "The catalog does not contain information about the polarization angle of a source.\n", "The catalog sorted in descending order based on the source flux at 100 GHz\"\"\"" ] }, { "cell_type": "code", "execution_count": 60, "id": "26c1e7a7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:             (index: 372255, power: 5)\n",
       "Coordinates:\n",
       "  * index               (index) int64 11253 16428 24110 ... 281755430 281755795\n",
       "  * power               (power) int64 4 3 2 1 0\n",
       "    theta               (index) float64 2.655 2.659 2.596 ... 1.38 1.377 1.426\n",
       "    phi                 (index) float64 4.177 3.977 4.17 ... 0.4638 0.5054\n",
       "Data variables:\n",
       "    logpolycoefflux     (index, power) float64 0.0002901 -0.006796 ... 0.01476\n",
       "    logpolycoefpolflux  (index, power) float64 5.318e-05 -0.001129 ... -0.005808\n",
       "Attributes:\n",
       "    notes:    Catalog of sources where the flux in Jy at any frequency is cal...
" ], "text/plain": [ "\n", "Dimensions: (index: 372255, power: 5)\n", "Coordinates:\n", " * index (index) int64 11253 16428 24110 ... 281755430 281755795\n", " * power (power) int64 4 3 2 1 0\n", " theta (index) float64 2.655 2.659 2.596 ... 1.38 1.377 1.426\n", " phi (index) float64 4.177 3.977 4.17 ... 0.4638 0.5054\n", "Data variables:\n", " logpolycoefflux (index, power) float64 0.0002901 -0.006796 ... 0.01476\n", " logpolycoefpolflux (index, power) float64 5.318e-05 -0.001129 ... -0.005808\n", "Attributes:\n", " notes: Catalog of sources where the flux in Jy at any frequency is cal..." ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output_catalog" ] }, { "cell_type": "code", "execution_count": 61, "id": "6cc1bd05", "metadata": {}, "outputs": [], "source": [ "output_catalog.to_netcdf(output_filename, format=\"NETCDF4\") # requires netcdf4 package" ] }, { "cell_type": "code", "execution_count": 62, "id": "e106b83e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-rw---- 1 zonca sobs 37M Sep 19 11:15 websky_high_flux_catalog_1mJy.h5\r\n" ] } ], "source": [ "%ls -lah $output_filename" ] }, { "cell_type": "code", "execution_count": 63, "id": "f38b5686", "metadata": {}, "outputs": [], "source": [ "import xarray" ] }, { "cell_type": "code", "execution_count": 64, "id": "7226bf84", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:             (index: 372255, power: 5)\n",
       "Coordinates:\n",
       "  * index               (index) int64 11253 16428 24110 ... 281755430 281755795\n",
       "  * power               (power) int64 4 3 2 1 0\n",
       "    theta               (index) float64 ...\n",
       "    phi                 (index) float64 ...\n",
       "Data variables:\n",
       "    logpolycoefflux     (index, power) float64 ...\n",
       "    logpolycoefpolflux  (index, power) float64 ...\n",
       "Attributes:\n",
       "    notes:    Catalog of sources where the flux in Jy at any frequency is cal...
" ], "text/plain": [ "\n", "Dimensions: (index: 372255, power: 5)\n", "Coordinates:\n", " * index (index) int64 11253 16428 24110 ... 281755430 281755795\n", " * power (power) int64 4 3 2 1 0\n", " theta (index) float64 ...\n", " phi (index) float64 ...\n", "Data variables:\n", " logpolycoefflux (index, power) float64 ...\n", " logpolycoefpolflux (index, power) float64 ...\n", "Attributes:\n", " notes: Catalog of sources where the flux in Jy at any frequency is cal..." ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xarray.open_dataset(output_filename)" ] }, { "cell_type": "code", "execution_count": 65, "id": "505d4b7c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import h5py\n", "f = h5py.File(output_filename, 'r')\n", "f[\"logpolycoefflux\"]" ] }, { "cell_type": "code", "execution_count": 66, "id": "10bd3b7e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "b'Jy'" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f[\"logpolycoefflux\"].attrs[\"units\"]" ] }, { "cell_type": "code", "execution_count": 67, "id": "a0b98440", "metadata": {}, "outputs": [], "source": [ "!mv $output_filename ~/p/issues/202405_pysm_catalog_pixell/" ] } ], "metadata": { "@webio": { "lastCommId": null, "lastKernelId": null }, "kernelspec": { "display_name": "pycmb", "language": "python", "name": "pycmb" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 5 }