From Sven:

The spectra are all available via reproducible URLs (if the spectra exist).

```
sobject_id_list = [140711003901030,140711003901360]

import wget

for sobject_id in sobject_id_list:
    url = "https://cloud.datacentral.org.au/teamdata/GALAH/public/GALAH_DR4/analysis_products_allstar/"+str(sobject_id)[:6]+"/"+str(sobject_id)+"/"+str(sobject_id)+"_allstar_fit_spectrum.fits"
    wget.download(url)
```

Until bulk download is available for all spectra, download only spectra with APOGEE cross-match.

Relevant wget command:
```
wget -c -nc -i urls.txt -P /path/to/save

```


UPDATE: it looks like the spectra are all tarballed and gzipped into paths like: 
https://cloud.datacentral.org.au/teamdata/GALAH/public/GALAH_DR4/analysis_products_allstar/140811.tar.gz

In [9]:
import astropy.table as at
import numpy as np

In [10]:
galah = at.Table.read("DR4/galah_dr4_allstar_240705.fits")
id_bucket = np.array([x[:6] for x in galah["sobject_id"].astype(str)])

In [11]:
unq_buckets = np.unique(id_bucket)
len(unq_buckets)

655

In [13]:
url_base = "https://cloud.datacentral.org.au/teamdata/GALAH/public/GALAH_DR4/analysis_products_allstar"
urls = [
    f"{url_base}/{bucket}.tar.gz" for bucket in unq_buckets
]

with open("dr4-spectra-urls.txt", "w") as f:
    f.writelines("\n".join(urls) + "\n")

In [15]:
dl_script_text = """#!/bin/zsh -l
#!/bin/zsh -l
#SBATCH -J dl-galah
#SBATCH -o dl-galah.o
#SBATCH -e dl-galah.e
#SBATCH -N 1
#SBATCH -t 48:00:00
#SBATCH -p cca
#SBATCH -C rome

cd /mnt/home/apricewhelan/data/GALAH
mkdir -p DR4/spectra
cat dr4-spectra-urls.txt | xargs -n1 -P8 wget -c -nc -P DR4/spectra
"""
with open("dr4-spectra-download-submit.sh", "w") as f:
    f.write(dl_script_text)

------------------------

### Old approach: only download spectra with APOGEE cross-match

In [2]:
apogee = at.Table.read("../APOGEE/DR17/allStarLite-dr17-synspec_rev1.fits")
galah = at.Table.read("DR4/galah_dr4_allstar_240705.fits")



In [3]:
galah["apogee_id"] = [f"2M{r['tmass_id']}" for r in galah]

In [4]:
joined = at.join(
    apogee, galah, keys_left="APOGEE_ID", keys_right="apogee_id", join_type="inner"
)
len(joined)

68438

In [7]:
url_base = "https://cloud.datacentral.org.au/teamdata/GALAH/public/GALAH_DR4/analysis_products_allstar"

urls = [
    f"{url_base}/{sid[:6]!s}/{sid!s}_allstar_fit_spectrum.fits"
    for sid in joined["sobject_id"].astype(str)
]

In [8]:
urls[0]

'https://cloud.datacentral.org.au/teamdata/GALAH/public/GALAH_DR4/analysis_products_allstar/140811/140811005001201_allstar_fit_spectrum.fits'