Skip to content

Commit

Permalink
Updating install reqs for #886
Browse files Browse the repository at this point in the history
  • Loading branch information
adkinsrs committed Sep 16, 2024
1 parent d971fdb commit 0deb41c
Show file tree
Hide file tree
Showing 10 changed files with 64 additions and 59 deletions.
3 changes: 1 addition & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ RUN apt-get -qq update \
# Required for R
gfortran \
# Required for rpy2
r-base-dev \
r-base \
r-cran-rjava \
# Required for R-package devtools (which is required for SJD)
libharfbuzz-dev \
libfribidi-dev \
Expand Down
1 change: 1 addition & 0 deletions docker/install_bioc.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env Rscript --vanilla

install.packages(c("BiocManager", "devtools"), dependencies=TRUE, repos="http://lib.stat.cmu.edu/R/CRAN/")
BiocManager::install(version = "3.19") # required for R 4.4.0
BiocManager::install(c("genesofeve/projectR", "biomaRt"), ask=FALSE)
library(devtools); install_github("CHuanSite/SJD")
2 changes: 1 addition & 1 deletion docker/install_bioc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Rver="${Rmaj}.4.0"

current_dir=$(pwd)

curl -s -L http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar xzv -C /opt
curl -s -L http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar xzv -C /opt || exit 1
cd /opt/${Rver}
/opt/${Rver}/configure --with-readline=no --enable-R-shlib --enable-BLAS-shlib --with-x=no || exit 1
make || exit 1
Expand Down
4 changes: 2 additions & 2 deletions docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ more_itertools==9.0.0
mysql-connector-python==8.0.20
numba==0.58.1
numexpr==2.8.4
numpy==1.26.0
numpy==1.26.4
opencv-python==4.5.5.64
openpyxl==3.1.5
pandas==2.2.1
Expand All @@ -29,7 +29,7 @@ pika==1.3.1
plotly==5.6.0
python-dotenv==0.20.0
requests==2.31.0
rpy2==3.5.1 # 3.5.2 and up gives errors with rpy2py and py2rpy
rpy2==3.5.16
sanic
scanpy==1.10.1
scikit-learn==1.0.2
Expand Down
2 changes: 1 addition & 1 deletion docs/setup.python.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Check the requirement.txt file in <git_repo_root>/docker for the latest packages
mysql-connector-python==8.0.20 \
numba==0.58.1 \
numexpr==2.8.4 \
numpy==1.26.0 \
numpy==1.26.4 \
opencv-python==4.5.5.64 \
openpyxl==3.1.5 \
pandas==2.2.1 \
Expand Down
3 changes: 1 addition & 2 deletions services/projectr/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ RUN apt-get -qq update \
# Required for R
gfortran \
# Required for rpy2
r-base-dev \
r-base \
r-cran-rjava \
# Required for R-package devtools (which is required for SJD)
libharfbuzz-dev \
libfribidi-dev \
Expand Down
1 change: 1 addition & 0 deletions services/projectr/install_bioc.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env Rscript --vanilla

install.packages(c("BiocManager", "devtools"), dependencies=TRUE, repos="http://lib.stat.cmu.edu/R/CRAN/")
BiocManager::install(version = "3.19") # required for R 4.4.0
BiocManager::install(c("genesofeve/projectR", "biomaRt"), ask=FALSE)
library(devtools); install_github("CHuanSite/SJD")
2 changes: 1 addition & 1 deletion services/projectr/install_bioc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Rver="${Rmaj}.4.0"

current_dir=$(pwd)

curl -s -L http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar xzv -C /opt
curl -s -L http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar xzv -C /opt || exit 1
cd /opt/${Rver}
/opt/${Rver}/configure --with-readline=no --enable-R-shlib --enable-BLAS-shlib --with-x=no || exit 1
make || exit 1
Expand Down
6 changes: 2 additions & 4 deletions services/projectr/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
Flask==3.0.0
gunicorn==20.1.0
rpy2==3.5.1 # 3.5.2 and up gives errors with rpy2py and py2rpy
#rpy2==3.5.16
#pandas==2.2.1
rpy2==3.5.16
pandas==2.2.1
numpy==1.26.4 # https://stackoverflow.com/a/78641304
pandas==1.4.1
google-cloud-logging
99 changes: 53 additions & 46 deletions services/projectr/rfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.vectors import StrVector

# If running locally, need to ensure that multiple concurrent R calls do not conflict
from rpy2.rinterface_lib import openrlib


class RError(Exception):
"""Error based on issues that would manifest in any particular R-language call."""
def __init__(self, message="") -> None:
Expand Down Expand Up @@ -45,51 +49,54 @@ def run_projectR_cmd(target_df, loading_df, algorithm):
Return Pandas dataframe of the projectR output
"""

# Convert from pandas dataframe to R data.frame
with localconverter(ro.default_converter + pandas2ri.converter):
target_r_df = ro.conversion.py2rpy(target_df)
loading_r_df = ro.conversion.py2rpy(loading_df)

# data.frame to matrix (projectR has no data.frame signature)
target_r_matrix = convert_r_df_to_r_matrix(target_r_df)
loading_r_matrix = convert_r_df_to_r_matrix(loading_r_df)

# Assign Rownames to each matrix
# I don't know why but using ro.StrVector makes rpy2py fail where the output df is an incompatible class
# Guessing that there are some non-strings mixed into the indexes
target_r_matrix.rownames = StrVector(target_df.index)
loading_r_matrix.rownames = StrVector(loading_df.index)

# The NMF projectR method signature is based on the LinearEmbeddedMatrix class,
# Which has a featureLoadings property. That matrix is loaded and the default
# projectR signature is returned and used. So we can just pass the matrix as-is.
# https://rdrr.io/bioc/SingleCellExperiment/man/LinearEmbeddingMatrix.html

# Run project R command. Get projectionPatterns matrix
try:
if algorithm == "nmf":
projectR = importr('projectR')
projection_patterns_r_matrix = projectR.projectR(data=target_r_matrix, loadings=loading_r_matrix, full=False)
elif algorithm == "fixednmf":
sjd = importr('SJD')
loading_list = ro.ListVector({"genesig": loading_r_matrix})

projection = sjd.projectNMF(proj_dataset=target_r_matrix, proj_group=True, list_component=loading_list)
projection_patterns_r_matrix = projection.rx2("proj_score_list").rx2("genesig")
else:
raise ValueError("Algorithm {} is not supported".format(algorithm))
except Exception as e:
# print stacktrace with line numbers
traceback.print_exc(file=sys.stderr)
raise RError("Error: Could not run projectR command.\tReason: {}".format(str(e)))

# matrix back to data.frame
projection_patterns_r_df = convert_r_matrix_to_r_df(projection_patterns_r_matrix)

# Convert from R data.frame to pandas dataframe
with localconverter(ro.default_converter + pandas2ri.converter):
projection_patterns_df = ro.conversion.rpy2py(projection_patterns_r_df)

return projection_patterns_df
# Ensure multithreading if running locally -> https://rpy2.github.io/doc/v3.5.x/html/rinterface.html#multithreading
with openrlib.rlock:

# Convert from pandas dataframe to R data.frame
with localconverter(ro.default_converter + pandas2ri.converter):
target_r_df = ro.conversion.py2rpy(target_df)
loading_r_df = ro.conversion.py2rpy(loading_df)

# data.frame to matrix (projectR has no data.frame signature)
target_r_matrix = convert_r_df_to_r_matrix(target_r_df)
loading_r_matrix = convert_r_df_to_r_matrix(loading_r_df)

# Assign Rownames to each matrix
# I don't know why but using ro.StrVector makes rpy2py fail where the output df is an incompatible class
# Guessing that there are some non-strings mixed into the indexes
target_r_matrix.rownames = StrVector(target_df.index)
loading_r_matrix.rownames = StrVector(loading_df.index)

# The NMF projectR method signature is based on the LinearEmbeddedMatrix class,
# Which has a featureLoadings property. That matrix is loaded and the default
# projectR signature is returned and used. So we can just pass the matrix as-is.
# https://rdrr.io/bioc/SingleCellExperiment/man/LinearEmbeddingMatrix.html

# Run project R command. Get projectionPatterns matrix
try:
if algorithm == "nmf":
projectR = importr('projectR')
projection_patterns_r_matrix = projectR.projectR(data=target_r_matrix, loadings=loading_r_matrix, full=False)
elif algorithm == "fixednmf":
sjd = importr('SJD')
loading_list = ro.ListVector({"genesig": loading_r_matrix})

projection = sjd.projectNMF(proj_dataset=target_r_matrix, proj_group=True, list_component=loading_list)
projection_patterns_r_matrix = projection.rx2("proj_score_list").rx2("genesig")
else:
raise ValueError("Algorithm {} is not supported".format(algorithm))
except Exception as e:
# print stacktrace with line numbers
traceback.print_exc(file=sys.stderr)
raise RError("Error: Could not run projectR command.\tReason: {}".format(str(e)))

# matrix back to data.frame
projection_patterns_r_df = convert_r_matrix_to_r_df(projection_patterns_r_matrix)

# Convert from R data.frame to pandas dataframe
with localconverter(ro.default_converter + pandas2ri.converter):
projection_patterns_df = ro.conversion.rpy2py(projection_patterns_r_df)

return projection_patterns_df


0 comments on commit 0deb41c

Please sign in to comment.