Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Adds extraction of physio signals from DICOMs #446

Draft
wants to merge 25 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f4e8cfd
[ENH] Adds extraction of physio signals from DICOMs
pvelasco May 5, 2020
e832ae6
Adds heuristic for physio extraction.
pvelasco May 5, 2020
aff779f
Moves the installation of `bidsphysio` to `info.py`
pvelasco May 6, 2020
9de4bd6
Minor formating in heudiconv/convert.py
pvelasco May 6, 2020
e935490
Minor formating in heudiconv/convert.py
pvelasco May 6, 2020
9de5114
Minor formating in heudiconv/dicoms.py
pvelasco May 6, 2020
892bc6a
Minor formatting updates
pvelasco May 6, 2020
e2774f5
Minor formatting updates
pvelasco May 6, 2020
cca6c7c
Adds `assert_cwd_unchanged` to `test_regression`
pvelasco May 6, 2020
532c630
RF: convert - convert_physio returns if bids_options is None
pvelasco May 6, 2020
4ac551e
ENH: Adds "AcquisitionTime" to the `seqinfo`
pvelasco Jan 6, 2021
5b33646
Switch back order of seqinfo_fields
pvelasco Jan 7, 2021
74d55d5
Switch back order of SeqInfo arguments in dicom.py
pvelasco Jan 8, 2021
0fb0c2f
Adds unittest to check "time" in the right position in dicominfo.tsv
pvelasco Jan 8, 2021
46c0bdb
ENH: Allows the user to save the Phoenix Report (Siemens) in the sour…
pvelasco Jan 8, 2021
371d006
Changed calls to bidsphysio to conform with newest version
pvelasco Jan 8, 2021
81ec2a2
Merge branch 'master' into dcm_physio
pvelasco Jan 8, 2021
2768e7a
BF(py3.5): explicitly case path to str for open
yarikoptic Jan 12, 2021
fe3fc34
ENH(minor): sort imports
yarikoptic Jan 12, 2021
38d6360
Merge pull request #6 from cbinyu/adds_acq_time_to_seqinfo
pvelasco Jan 19, 2021
593062f
Merge branch 'dcm_physio' into handles_phoenix_file
pvelasco Jan 19, 2021
62841a7
Merge pull request #7 from cbinyu/handles_phoenix_file
pvelasco Jan 19, 2021
5c78593
Add environment marker (Py>3.5) for dcm2bids requirement.
pvelasco Jan 21, 2021
0745845
Change name of extra_requires from dcm2bids to physio
pvelasco Jan 21, 2021
15e4a49
Delete some unneeded checks in the bids_physio heuristic.
pvelasco Jan 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions heudiconv/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,8 @@ def convert(items, converter, scaninfo_suffix, custom_callable, with_prov,
if outtype == 'dicom':
convert_dicom(item_dicoms, bids_options, prefix,
outdir, tempdirs, symlink, overwrite)
elif outtype == 'physio':
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we would need to document that, e.g. at https://github.com/nipy/heudiconv/blob/HEAD/docs/heuristics.rst#infotodictseqinfos (make it all into a nice itemized list there for nii, dicom and now physio)

convert_physio(item_dicoms, bids_options, prefix)
elif outtype in ['nii', 'nii.gz']:
assert converter == 'dcm2niix', ('Invalid converter '
'{}'.format(converter))
Expand Down Expand Up @@ -580,6 +582,49 @@ def convert_dicom(item_dicoms, bids_options, prefix,
shutil.copyfile(filename, outfile)


def convert_physio(item_dicoms, bids_options, prefix):
"""Save DICOM physiology as BIDS physio files

Parameters
pvelasco marked this conversation as resolved.
Show resolved Hide resolved
----------
item_dicoms : list of filenames
DICOMs to save
bids_options : list or None
If not None then save to BIDS format. List may be empty
or contain bids specific options
prefix : string
Conversion outname

Returns
pvelasco marked this conversation as resolved.
Show resolved Hide resolved
-------
None
"""
if bids_options is None:
return

try:
from bidsphysio.dcm2bids.dcm2bidsphysio import dcm2bids
except ImportError:
lgr.warning(
"bidsphysio.dcm2bids not found. "
"Not extracting physiological recordings."
)
return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would be ok with erroring out here since if heuristic did instruct to get those physio - we better have all dependencies


item_dicoms = list(map(op.abspath, item_dicoms)) # absolute paths
if len(item_dicoms) > 1:
lgr.warning(
"More than one PHYSIO file has been found for this series. "
"If each file corresponds to a different signal, all is OK. "
"If multiple files have the same signal, only the signal "
"from the last file will be saved."
)
for dicom_file in item_dicoms:
physio_data = dcm2bids(dicom_file)
if physio_data.labels():
physio_data.save_to_bids_with_trigger(prefix)


def nipype_convert(item_dicoms, prefix, with_prov, bids_options, tmpdir, dcmconfig=None):
"""
Converts DICOMs grouped from heuristic using Nipype's Dcm2niix interface.
Expand Down
15 changes: 12 additions & 3 deletions heudiconv/dicoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def create_seqinfo(mw, series_files, series_id):
patient_age=dcminfo.get('PatientAge'),
patient_sex=dcminfo.get('PatientSex'),
date=dcminfo.get('AcquisitionDate'),
series_uid=dcminfo.get('SeriesInstanceUID')
series_uid=dcminfo.get('SeriesInstanceUID'),
time=dcminfo.get('AcquisitionTime'),
)
return seqinfo

Expand Down Expand Up @@ -265,8 +266,16 @@ def group_dicoms_into_seqinfos(files, grouping, file_filter=None,
series_id = '-'.join(map(str, series_id))
if mw.image_shape is None:
# this whole thing has no image data (maybe just PSg DICOMs)
# nothing to see here, just move on
continue
# If this is a Siemens PhoenixZipReport or PhysioLog, keep it:
if (
mw.dcm_data.SeriesDescription == 'PhoenixZIPReport'
or mw.dcm_data.SeriesDescription.endswith('_PhysioLog')
):
# just give it a dummy shape, so that we can continue:
mw.image_shape = (0, 0, 0)
else:
# nothing to see here, just move on
continue
seqinfo = create_seqinfo(mw, series_files, series_id)

if per_studyUID:
Expand Down
38 changes: 38 additions & 0 deletions heudiconv/heuristics/bids_PhoenixReport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Heuristic demonstrating conversion of the PhoenixZIPReport from Siemens.

It only cares about converting a series with have PhoenixZIPReport in their
series_description and outputs **only to sourcedata**.
"""


def create_key(template, outtype=('nii.gz',), annotation_classes=None):
if template is None or not template:
raise ValueError('Template must be a valid format string')
return template, outtype, annotation_classes


def infotodict(seqinfo):
"""Heuristic evaluator for determining which runs belong where

allowed template fields - follow python string module:

item: index within category
subject: participant id
seqitem: run number during scanning
subindex: sub index within group
"""
sbref = create_key('sub-{subject}/func/sub-{subject}_task-QA_sbref', outtype=('nii.gz', 'dicom',))
scout = create_key('sub-{subject}/anat/sub-{subject}_T1w', outtype=('nii.gz', 'dicom',))
phoenix_doc = create_key('sub-{subject}/misc/sub-{subject}_phoenix', outtype=('dicom',))

info = {sbref: [], scout: [], phoenix_doc: []}
for s in seqinfo:
if (
'PhoenixZIPReport' in s.series_description
and s.image_type[3] == 'CSA REPORT'
):
info[phoenix_doc].append({'item': s.series_id})
if 'scout' in s.series_description.lower():
info[scout].append({'item': s.series_id})

return info
99 changes: 99 additions & 0 deletions heudiconv/heuristics/bids_physio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""
Heuristic demonstrating extraction of physiological data from CMRR
fMRI DICOMs

We want to make sure the run number for the _sbref, _phase and
_physio matches that of the corresponding _bold. For "normal"
scanning, you can just rely on the {item} value, but if you have a
functional run with just saving the magnitude and then one saving
both magnitude and phase, you would have _run-01_bold, _run-02_bold
and _run-01_phase, but the phase image corresponds to _run-02_bold,
so the run number in the filename will not match
"""


def create_key(template, outtype=('nii.gz',), annotation_classes=None):
if template is None or not template:
raise ValueError('Template must be a valid format string')
return template, outtype, annotation_classes

def infotodict(seqinfo):
"""Heuristic evaluator for determining which runs belong where

allowed template fields - follow python string module:

item: index within category
subject: participant id
seqitem: run number during scanning
subindex: sub index within group
"""

info = {}
run_no = 0
for idx, s in enumerate(seqinfo):
# We want to make sure the _SBRef, PhysioLog and phase series
# (if present) are labeled the same as the main (magnitude)
# image. So we only focus on the magnitude series (to exclude
# phase images) without _SBRef at the end of the series_
# description and then we search if the phase and/or _SBRef
# are present.
if (
'epfid2d' in s.sequence_name
and (
'M' in s.image_type
or 'FMRI' in s.image_type
)
and not s.series_description.lower().endswith('_sbref')
and not 'DERIVED' in s.image_type
):
run_no += 1
bold = create_key(
'sub-{subject}/func/sub-{subject}_task-test_run-%02d_bold' % run_no
)
info[bold] = [{'item': s.series_id}]
next_series = idx+1 # used for physio log below

### is phase image present? ###
# At least for Siemens systems, if magnitude/phase was
# selected, the phase images come as a separate series
# immediatelly following the magnitude series.
# (note: make sure you don't check beyond the number of
# elements in seqinfo...)
if (
idx+1 < len(seqinfo)
and 'P' in seqinfo[idx+1].image_type
):
phase = create_key(
'sub-{subject}/func/sub-{subject}_task-test_run-%02d_phase' % run_no
)
info[phase] = [{'item': seqinfo[idx+1].series_id}]
next_series = idx+2 # used for physio log below

### SBREF ###
# here, within the functional run code, check to see if
# the previous run's series_description ended in _sbref,
# to assign the same run number.
if (
idx > 0
and seqinfo[idx-1].series_description.lower().endswith('_sbref')
):
sbref = create_key(
'sub-{subject}/func/sub-{subject}_task-test_run-%02d_sbref' % run_no
)
info[sbref] = [{'item': seqinfo[idx-1].series_id}]

### PHYSIO LOG ###
# here, within the functional run code, check to see if
# the next run image_type lists "PHYSIO", to assign the
# same run number.
if (
next_series < len(seqinfo)
and 'PHYSIO' in seqinfo[next_series].image_type
):
physio = create_key(
'sub-{subject}/func/sub-{subject}_task-test_run-%02d_physio' % run_no,
outtype = ('physio',)
)
info[physio] = [{'item': seqinfo[next_series].series_id}]

return info
5 changes: 4 additions & 1 deletion heudiconv/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@
'extras': [
'duecredit', # optional dependency
], # Requires patched version ATM ['dcmstack'],
'datalad': ['datalad >=%s' % MIN_DATALAD_VERSION]
'datalad': ['datalad >=%s' % MIN_DATALAD_VERSION],
'dcm2bids': [
pvelasco marked this conversation as resolved.
Show resolved Hide resolved
'bidsphysio.dcm2bids >=1.4.3; python_version>"3.5"', # if dicoms with physio need to be converted
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
'bidsphysio.dcm2bids >=1.4.3; python_version>"3.5"', # if dicoms with physio need to be converted
'bidsphysio.dcm2bids >=1.4.3', # if dicoms with physio need to be converted

as 3.5 is already below what we support

]
}

# Flatten the lists
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all those binary files below adding up to over 1MB -- I think should be moved to some external dataset and used via fetch_data. ATM fetch_data hardcodes to access from datasets.datalad.org but I think we should change that -- if full url is provided - just use that URL, and then share them on some repo, could be even not annex, just regular git repo on github.

Later we should improve this fetch_data to make data persistent locally etc... but not for this PR.

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 4 additions & 0 deletions heudiconv/tests/data/samplePhysio/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
samplePhysio dataset

It contains phantom functional images and physiological recordings
using CMRR Multi-Band EPI saved as DICOMs.
23 changes: 22 additions & 1 deletion heudiconv/tests/test_dicoms.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import os.path as op
import json
from glob import glob

import pytest

from heudiconv.external.pydicom import dcm
from heudiconv.cli.run import main as runner
from heudiconv.convert import nipype_convert
from heudiconv.dicoms import parse_private_csa_header, embed_dicom_and_nifti_metadata
from heudiconv.dicoms import (
OrderedDict,
embed_dicom_and_nifti_metadata,
group_dicoms_into_seqinfos,
parse_private_csa_header,
)
from .utils import (
assert_cwd_unchanged,
TESTS_DATA_PATH,
Expand Down Expand Up @@ -64,3 +70,18 @@ def test_embed_dicom_and_nifti_metadata(tmpdir):

assert out3.pop("existing") == "data"
assert out3 == out2


def test_group_dicoms_into_seqinfos(tmpdir):
"""Tests for group_dicoms_into_seqinfos"""

# 1) Check that it works for PhoenixDocuments:
# set up testing files
dcmfolder = op.join(TESTS_DATA_PATH, 'Phoenix')
dcmfiles = glob(op.join(dcmfolder, '*', '*.dcm'))

seqinfo = group_dicoms_into_seqinfos(dcmfiles, 'studyUID', flatten=True)

assert type(seqinfo) is OrderedDict
assert len(seqinfo) == len(dcmfiles)
assert [s.series_description for s in seqinfo] == ['AAHead_Scout_32ch-head-coil', 'PhoenixZIPReport']
17 changes: 17 additions & 0 deletions heudiconv/tests/test_heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,20 @@ def test_notop(tmpdir, bidsoptions):
assert not op.exists(pjoin(tmppath, 'Halchenko/Yarik/950_bids_test4', fname))
else:
assert op.exists(pjoin(tmppath, 'Halchenko/Yarik/950_bids_test4', fname))


def test_phoenix_doc_conversion(tmpdir):
tmppath = tmpdir.strpath
subID = 'Phoenix'
args = (
"-c dcm2niix -o %s -b -f bids_PhoenixReport --files %s -s %s"
% (tmpdir, pjoin(TESTS_DATA_PATH, 'Phoenix'), subID)
).split(' ')
runner(args)

# check that the Phoenix document has been extracted (as gzipped dicom) in
# the sourcedata/misc folder:
assert op.exists(pjoin(tmppath, 'sourcedata', 'sub-%s', 'misc', 'sub-%s_phoenix.dicom.tgz') % (subID, subID))
# check that no "sub-<subID>/misc" folder has been created in the BIDS
# structure:
assert not op.exists(pjoin(tmppath, 'sub-%s', 'misc') % subID)
5 changes: 5 additions & 0 deletions heudiconv/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,11 @@ def test_cache(tmpdir):
assert (cachedir / 'S01.auto.txt').exists()
assert (cachedir / 'S01.edit.txt').exists()

# check dicominfo has "time" as last column:
with open(str(cachedir / 'dicominfo.tsv'), 'r') as f:
cols = f.readline().split()
assert cols[26] == "time"


def test_no_etelemetry():
# smoke test at large - just verifying that no crash if no etelemetry
Expand Down
40 changes: 39 additions & 1 deletion heudiconv/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,25 @@
from heudiconv.external.pydicom import dcm
from heudiconv.utils import load_json
# testing utilities
from .utils import fetch_data, gen_heudiconv_args, TESTS_DATA_PATH
from .utils import (
fetch_data,
gen_heudiconv_args,
assert_cwd_unchanged,
TESTS_DATA_PATH
)

have_datalad = True
try:
from datalad.support.exceptions import IncompleteResultsError
except ImportError:
have_datalad = False

have_bidsphysio = True
try:
from bidsphysio.dcm2bids.dcm2bidsphysio import dcm2bids
except ImportError:
have_bidsphysio = False


@pytest.mark.skipif(not have_datalad, reason="no datalad")
@pytest.mark.parametrize('subject', ['sub-sid000143'])
Expand Down Expand Up @@ -84,6 +95,33 @@ def test_multiecho(tmpdir, subject='MEEPI', heuristic='bids_ME.py'):
assert 'echo-' not in event


@assert_cwd_unchanged(ok_to_chdir=True) # so we cd back after tmpdir.chdir
@pytest.mark.skipif(not have_bidsphysio, reason="no bidsphysio")
def test_physio(tmpdir, subject='samplePhysio', heuristic='bids_physio.py'):
pvelasco marked this conversation as resolved.
Show resolved Hide resolved
tmpdir.chdir()
outdir = tmpdir.mkdir('out').strpath
template = "{subject}/*/*.dcm"
args = gen_heudiconv_args(
TESTS_DATA_PATH, outdir, subject, heuristic,template=template
)
runner(args) # run conversion

# Check we get only one image file:
func_images = glob(op.join('out', 'sub-' + subject, 'func', '*.nii.gz'))
assert len(func_images) == 1
# The corresponding json:
_json = func_images[0].replace('.nii.gz', '.json')
assert op.exists(_json)
# For each physiological signal, we get the json and tsv.gz:
for s in ['respiratory','cardiac']:
expectedFileName = func_images[0].replace(
'_bold.nii.gz',
'_recording-' + s + '_physio'
)
assert op.exists(expectedFileName + '.json')
assert op.exists(expectedFileName + '.tsv.gz')


@pytest.mark.parametrize('subject', ['merged'])
def test_grouping(tmpdir, subject):
dicoms = [
Expand Down
3 changes: 2 additions & 1 deletion heudiconv/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
'patient_sex', # 23
'date', # 24
'series_uid', # 25
]
'time', # 26
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would conflict with WiP of #637 / #581 but the main concern is that I do not see it needed/used at all by the target need of this PR -- conversion of physio data. Spurious wishful change? ;-)


SeqInfo = namedtuple('SeqInfo', seqinfo_fields)

Expand Down