diff options
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 957 |
1 files changed, 125 insertions, 832 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 239a8c13df..1ffac1a0ca 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -211,85 +211,6 @@ tRNA consensus sequences and RNA structure. It also outputs the secondary structure of the predicted RNA.") (license license:gpl2))) -(define-public bamm - (package - (name "bamm") - (version "1.7.3") - (source (origin - (method git-fetch) - ;; BamM is not available on pypi. - (uri (git-reference - (url "https://github.com/Ecogenomics/BamM") - (commit version) - (recursive? #t))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5")) - (modules '((guix build utils))) - (snippet - `(begin - ;; Delete bundled htslib. - (delete-file-recursively "c/htslib-1.3.1"))))) - (build-system python-build-system) - (arguments - `(#:python ,python-2 ; BamM is Python 2 only. - ;; Do not use bundled libhts. Do use the bundled libcfu because it has - ;; been modified from its original form. - #:configure-flags - ,#~(let ((htslib #$(this-package-input "htslib"))) - (list "--with-libhts-lib" (string-append htslib "/lib") - "--with-libhts-inc" (string-append htslib "/include/htslib"))) - #:phases - (modify-phases %standard-phases - (add-after 'unpack 'autogen - (lambda _ - (with-directory-excursion "c" - (let ((sh (which "sh"))) - (for-each make-file-writable (find-files "." ".*")) - ;; Use autogen so that 'configure' works. - (substitute* "autogen.sh" (("/bin/sh") sh)) - (setenv "CONFIG_SHELL" sh) - (invoke "./autogen.sh"))))) - (delete 'build) ;the build loops otherwise - (replace 'check - (lambda _ - ;; There are 2 errors printed, but they are safe to ignore: - ;; 1) [E::hts_open_format] fail to open file ... - ;; 2) samtools view: failed to open ... - (invoke "nosetests"))) - (add-after 'install 'wrap-executable - (lambda* (#:key inputs outputs #:allow-other-keys) - (let* ((out (assoc-ref outputs "out")) - (path (getenv "PATH")) - (pythonpath (getenv "GUIX_PYTHONPATH"))) - (wrap-program (string-append out "/bin/bamm") - `("PATH" ":" prefix (,path)) - `("GUIX_PYTHONPATH" ":" prefix (,pythonpath))))))))) - (native-inputs - (list autoconf - automake - libtool - zlib - python2-nose - python2-pysam)) - (inputs - (list htslib-1.3 ; At least one test fails on htslib-1.4+. - samtools - bwa - grep - sed - coreutils)) - (propagated-inputs - (list python2-numpy)) - (home-page "https://ecogenomics.github.io/BamM/") - (synopsis "Metagenomics-focused BAM file manipulator") - (description - "BamM is a C library, wrapped in python, to efficiently generate and -parse BAM files, specifically for the analysis of metagenomic data. For -instance, it implements several methods to assess contig-wise read coverage.") - (license license:lgpl3+))) - (define-public bamtools (package (name "bamtools") @@ -840,52 +761,6 @@ frames} (ORFs) using ribosome profiling (ribo-seq) data. This package provides the Ribotaper pipeline.") (license license:gpl3+))) -(define-public ribodiff - (package - (name "ribodiff") - (version "0.2.2") - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ratschlab/RiboDiff") - (commit (string-append "v" version)))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz")))) - (build-system python-build-system) - (arguments - `(#:python ,python-2 - #:phases - (modify-phases %standard-phases - ;; This test fails because of the matplotlib plotting backend. - (add-after 'unpack 'disable-plot-test - (lambda _ - (substitute* "src/ribodiff/functional_test_te.py" - (("pl\\.make_plots\\(data, opts\\)") "#")))) - ;; Generate an installable executable script wrapper. - (add-after 'unpack 'patch-setup.py - (lambda _ - (substitute* "setup.py" - (("^(.*)packages=.*" line prefix) - (string-append line "\n" - prefix "scripts=['scripts/TE.py'],\n")))))))) - (inputs - (list python2-numpy python2-matplotlib python2-scipy - python2-statsmodels)) - (native-inputs - (list python2-mock python2-nose)) - (home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/") - (synopsis "Detect translation efficiency changes from ribosome footprints") - (description "RiboDiff is a statistical tool that detects the protein -translational efficiency change from Ribo-Seq (ribosome footprinting) and -RNA-Seq data. It uses a generalized linear model to detect genes showing -difference in translational profile taking mRNA abundance into account. It -facilitates us to decipher the translational regulation that behave -independently with transcriptional regulation.") - (license license:gpl3+))) - (define-public bioawk (package (name "bioawk") @@ -1027,14 +902,6 @@ pybedtools extends BEDTools by offering feature-level manipulations from with Python.") (license license:gpl2+))) -(define-public python2-pybedtools - (let ((pybedtools (package-with-python2 python-pybedtools))) - (package - (inherit pybedtools) - (native-inputs - (modify-inputs (package-native-inputs pybedtools) - (prepend python2-pathlib)))))) - (define-public python-biom-format (package (name "python-biom-format") @@ -1283,9 +1150,6 @@ into separate processes; and more.") (base32 "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh")))))) -(define-public python2-biopython - (package-with-python2 python-biopython)) - (define-public python-fastalite (package (name "python-fastalite") @@ -1306,9 +1170,6 @@ into separate processes; and more.") relying on a complex dependency tree.") (license license:expat))) -(define-public python2-fastalite - (package-with-python2 python-fastalite)) - (define-public biosoup (package (name "biosoup") @@ -2377,9 +2238,6 @@ SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It also includes an interface for tabix.") (license license:expat))) -(define-public python2-pysam - (package-with-python2 python-pysam)) - (define-public python-twobitreader (package (name "python-twobitreader") @@ -2440,59 +2298,60 @@ high-throughput sequencing data – with an emphasis on simplicity.") (define-public tetoolkit (package (name "tetoolkit") - (version "2.0.3") + (version "2.2.1b") (source (origin (method git-fetch) (uri (git-reference - (url "https://github.com/mhammell-laboratory/tetoolkit") + (url "https://github.com/mhammell-laboratory/TEtranscripts") (commit version))) (file-name (git-file-name name version)) (sha256 (base32 - "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807")))) + "1m3xsydakhdan9gp9mfdz7llka5g6ak91d0mbl1cmmxq9qs6an4y")))) (build-system python-build-system) (arguments - `(#:python ,python-2 ; not guaranteed to work with Python 3 - #:phases + `(#:phases (modify-phases %standard-phases - (add-after 'unpack 'make-writable + (add-after 'unpack 'adjust-requirements (lambda _ - (for-each make-file-writable (find-files ".")) - #t)) + (substitute* "setup.py" + ;; This defunct dependency isn't required for Python 3 (see: + ;; https://github.com/mhammell-laboratory/TEtranscripts/issues/111). + ((".*'argparse'.*") "")))) (add-after 'unpack 'patch-invocations (lambda* (#:key inputs #:allow-other-keys) (substitute* '("bin/TEtranscripts" "bin/TEcount") (("'sort ") - (string-append "'" (which "sort") " ")) + (string-append "'" (search-input-file inputs "bin/sort") " ")) (("'rm -f ") - (string-append "'" (which "rm") " -f ")) - (("'Rscript'") (string-append "'" (which "Rscript") "'"))) + (string-append "'" (search-input-file inputs "bin/rm") " -f ")) + (("'Rscript'") + (string-append "'" (search-input-file inputs "bin/Rscript") + "'"))) (substitute* "TEToolkit/IO/ReadInputs.py" - (("BamToBED") (which "bamToBed"))) + (("BamToBED") + (search-input-file inputs "bin/bamToBed"))) (substitute* "TEToolkit/Normalization.py" (("\"Rscript\"") - (string-append "\"" (which "Rscript") "\""))) - #t)) + (string-append "\"" (search-input-file inputs "bin/Rscript") + "\""))))) (add-after 'install 'wrap-program (lambda* (#:key outputs #:allow-other-keys) ;; Make sure the executables find R packages. - (let ((out (assoc-ref outputs "out"))) - (for-each - (lambda (script) - (wrap-program (string-append out "/bin/" script) - `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE"))))) - '("TEtranscripts" - "TEcount"))) - #t))))) + (for-each (lambda (script) + (wrap-program script + `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE"))))) + (list (search-input-file outputs "bin/TEtranscripts") + (search-input-file outputs "bin/TEcount")))))))) (inputs - (list coreutils + (list bash-minimal + coreutils bedtools - python2-argparse - python2-pysam + python-pysam r-minimal r-deseq2)) - (home-page "https://github.com/mhammell-laboratory/tetoolkit") + (home-page "https://github.com/mhammell-laboratory/TEtranscripts") (synopsis "Transposable elements in differential enrichment analysis") (description "This is package for including transposable elements in differential @@ -2903,9 +2762,6 @@ files.") accessing bigWig files.") (license license:expat))) -(define-public python2-pybigwig - (package-with-python2 python-pybigwig)) - (define-public python-schema-salad (package (name "python-schema-salad") @@ -3093,22 +2949,6 @@ writing, simulation, processing and manipulation of phylogenetic trees (phylogenies) and characters.") (license license:bsd-3))) -(define-public python2-dendropy - (let ((base (package-with-python2 python-dendropy))) - (package/inherit base - (arguments - `(#:phases - (modify-phases %standard-phases - (add-after 'unpack 'remove-failing-test - (lambda _ - ;; This test fails when the full test suite is run, as documented - ;; at https://github.com/jeetsukumaran/DendroPy/issues/74 - (substitute* "tests/test_dataio_nexml_reader_tree_list.py" - (("test_collection_comments_and_annotations") - "do_not_test_collection_comments_and_annotations")) - #t))) - ,@(package-arguments base)))))) - (define-public python-py2bit (package (name "python-py2bit") @@ -3817,96 +3657,6 @@ supports next-generation sequencing data in fasta/q and csfasta/q format from Illumina, Roche 454, and the SOLiD platform.") (license license:bsd-3))) -(define-public fraggenescan - (package - (name "fraggenescan") - (version "1.30") - (source - (origin - (method url-fetch) - (uri - (string-append "mirror://sourceforge/fraggenescan/" - "FragGeneScan" version ".tar.gz")) - (sha256 - (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj")))) - (build-system gnu-build-system) - (arguments - `(#:phases - (modify-phases %standard-phases - (delete 'configure) - (add-before 'build 'patch-paths - (lambda* (#:key outputs #:allow-other-keys) - (let* ((out (string-append (assoc-ref outputs "out"))) - (share (string-append out "/share/fraggenescan/"))) - (substitute* "run_FragGeneScan.pl" - (("system\\(\"rm") - (string-append "system(\"" (which "rm"))) - (("system\\(\"mv") - (string-append "system(\"" (which "mv"))) - (("\\\"awk") (string-append "\"" (which "awk"))) - ;; This script and other programs expect the training files - ;; to be in the non-standard location bin/train/XXX. Change - ;; this to be share/fraggenescan/train/XXX instead. - (("^\\$train.file = \\$dir.*") - (string-append "$train_file = \"" - share - "train/\".$FGS_train_file;"))) - (substitute* "run_hmm.c" - (("^ strcat\\(train_dir, \\\"train/\\\"\\);") - (string-append " strcpy(train_dir, \"" share "/train/\");")))) - #t)) - (replace 'build - (lambda _ - (invoke "make" "clean") - (invoke "make" "fgs") - #t)) - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let* ((out (string-append (assoc-ref outputs "out"))) - (bin (string-append out "/bin/")) - (share (string-append out "/share/fraggenescan/train"))) - (install-file "run_FragGeneScan.pl" bin) - (install-file "FragGeneScan" bin) - (copy-recursively "train" share)) - #t)) - (delete 'check) - (add-after 'install 'post-install-check - ;; In lieu of 'make check', run one of the examples and check the - ;; output files gets created. - (lambda* (#:key outputs #:allow-other-keys) - (let* ((out (string-append (assoc-ref outputs "out"))) - (bin (string-append out "/bin/")) - (frag (string-append bin "run_FragGeneScan.pl"))) - ;; Test complete genome. - (invoke frag - "-genome=./example/NC_000913.fna" - "-out=./test2" - "-complete=1" - "-train=complete") - (unless (and (file-exists? "test2.faa") - (file-exists? "test2.ffn") - (file-exists? "test2.gff") - (file-exists? "test2.out")) - (error "Expected files do not exist.")) - ;; Test incomplete sequences. - (invoke frag - "-genome=./example/NC_000913-fgs.ffn" - "-out=out" - "-complete=0" - "-train=454_30") - #t)))))) - (inputs - `(("perl" ,perl) - ("python" ,python-2))) ;not compatible with python 3. - (home-page "https://sourceforge.net/projects/fraggenescan/") - (synopsis "Finds potentially fragmented genes in short reads") - (description - "FragGeneScan is a program for predicting bacterial and archaeal genes in -short and error-prone DNA sequencing reads. It can also be applied to predict -genes in incomplete assemblies or complete genomes.") - ;; GPL3+ according to private correspondense with the authors. - (license license:gpl3+))) - (define-public fxtract (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115")) (package @@ -4028,48 +3778,6 @@ standard linear mixed model resolver with application in @acronym{GWAS, genome-wide association studies}.") (license license:gpl3))) -(define-public grit - (package - (name "grit") - (version "2.0.5") - (source (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/nboley/grit") - (commit version))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "1l5v8vfvfbrpmgnrvbrbv40d0arhxcnmxgv2f1mlcqfa3q6bkqm9")))) - (build-system python-build-system) - (arguments - `(#:python ,python-2 - #:phases - (modify-phases %standard-phases - (add-after 'unpack 'generate-from-cython-sources - (lambda* (#:key inputs outputs #:allow-other-keys) - ;; Delete these C files to force fresh generation from pyx sources. - (delete-file "grit/sparsify_support_fns.c") - (delete-file "grit/call_peaks_support_fns.c") - (substitute* "setup.py" - (("Cython.Setup") "Cython.Build")) - #t))))) - (inputs - (list python2-scipy python2-numpy python2-pysam python2-networkx)) - (native-inputs - (list python2-cython)) - ;; The canonical <http://grit-bio.org> home page times out as of 2020-01-21. - (home-page "https://github.com/nboley/grit") - (synopsis "Tool for integrative analysis of RNA-seq type assays") - (description - "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify -full length transcript models. When none of these data sources are available, -GRIT can be run by providing a candidate set of TES or TSS sites. In -addition, GRIT can merge in reference junctions and gene boundaries. GRIT can -also be run in quantification mode, where it uses a provided GTF file and just -estimates transcript expression.") - (license license:gpl3+))) - (define-public hisat (package (name "hisat") @@ -4250,9 +3958,6 @@ HMMs).") from high-throughput sequencing assays.") (license license:gpl3+))) -(define-public python2-htseq - (package-with-python2 htseq)) - (define-public java-htsjdk (package (name "java-htsjdk") @@ -5394,54 +5099,6 @@ unassembled metagenomic reads, but is mainly designed for full genomes and assembled metagenomic sequence.") (license license:gpl3+))) -(define-public miso - (let ((commit "b71402188000465e3430736a11ea118fd5639a4a") - (revision "1")) - (package - (name "miso") - (version (git-version "0.5.4" revision commit)) - (source (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/yarden/MISO/") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "0x37ipwwvpxbkrg17gmq3hp92c9cphch8acd6cj7fqgnrjwd47g5")) - (modules '((guix build utils))) - (snippet - '(substitute* "setup.py" - ;; Use "gcc" instead of "cc" for compilation. - (("^defines") - "cc.set_executables( -compiler='gcc', -compiler_so='gcc', -linker_exe='gcc', -linker_so='gcc -shared'); defines"))))) - (build-system python-build-system) - (arguments - `(#:python ,python-2 ; only Python 2 is supported - #:tests? #f)) ; no "test" target - (inputs - ;; Samtools must not be newer than 1.2. See - ;; https://github.com/yarden/MISO/issues/135 - (list samtools-1.2 python2-numpy python2-pysam python2-scipy - python2-matplotlib)) - (native-inputs - (list python2-mock ; for tests - python2-pytz)) ; for tests - (home-page "https://miso.readthedocs.io/en/fastmiso/") - (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation") - (description - "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates -the expression level of alternatively spliced genes from RNA-Seq data, and -identifies differentially regulated isoforms or exons across samples. By -modeling the generative process by which reads are produced from isoforms in -RNA-Seq, the MISO model uses Bayesian inference to compute the probability -that a read originated from a particular isoform.") - (license license:gpl2)))) - (define-public muscle (package (name "muscle") @@ -5532,80 +5189,6 @@ interrupted by stop codons. OrfM finds and prints these ORFs.") (home-page "https://github.com/wwood/OrfM") (license license:lgpl3+))) -(define-public python2-pbcore - (package - (name "python2-pbcore") - (version "1.2.10") - (source (origin - (method url-fetch) - (uri (pypi-uri "pbcore" version)) - (sha256 - (base32 - "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i")))) - (build-system python-build-system) - (arguments - `(#:python ,python-2 ;pbcore < 2.0 requires Python 2.7 - #:phases (modify-phases %standard-phases - (add-after 'unpack 'remove-sphinx-dependency - (lambda _ - ;; Sphinx is only required for documentation tests, which - ;; we do not run; furthermore it depends on python2-sphinx - ;; which is no longer maintained. - (substitute* "requirements-dev.txt" - (("^sphinx") "")) - #t))))) - (propagated-inputs - (list python2-cython python2-numpy python2-pysam python2-h5py)) - (native-inputs - (list python2-nose python2-pyxb)) - (home-page "https://pacificbiosciences.github.io/pbcore/") - (synopsis "Library for reading and writing PacBio data files") - (description - "The pbcore package provides Python APIs for interacting with PacBio data -files and writing bioinformatics applications.") - (license license:bsd-3))) - -(define-public python2-warpedlmm - (package - (name "python2-warpedlmm") - (version "0.21") - (source - (origin - (method url-fetch) - (uri (pypi-uri "WarpedLMM" version ".zip")) - (sha256 - (base32 - "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j")))) - (build-system python-build-system) - (arguments - `(#:python ,python-2 ; requires Python 2.7 - #:tests? #f ; test data are not included - #:phases - (modify-phases %standard-phases - (add-after 'unpack 'use-weave - (lambda _ - (substitute* "warpedlmm/util/linalg.py" - (("from scipy import linalg, weave") - "from scipy import linalg\nimport weave")) - #t))))) - (propagated-inputs - (list python2-scipy - python2-numpy - python2-matplotlib - python2-fastlmm - python2-pandas - python2-pysnptools - python2-weave)) - (native-inputs - (list python2-mock python2-nose unzip)) - (home-page "https://github.com/PMBio/warpedLMM") - (synopsis "Implementation of warped linear mixed models") - (description - "WarpedLMM is a Python implementation of the warped linear mixed model, -which automatically learns an optimal warping function (or transformation) for -the phenotype as it models the data.") - (license license:asl2.0))) - (define-public prank (package (name "prank") @@ -5727,45 +5310,6 @@ clusters them to find significant groups. The algorithm was designed to handle large-scale data and can be applied to hundreds of species at once.") (license license:gpl3+))) -(define-public pyicoteo - (package - (name "pyicoteo") - (version "2.0.7") - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://bitbucket.org/regulatorygenomicsupf/pyicoteo.git") - (commit (string-append "v" version)))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "0hz5g8d25lbjy1wpscr490l0lmyvaix893hhax4fxnh1h9w34w8p")))) - (build-system python-build-system) - (arguments - `(#:python ,python-2 ; does not work with Python 3 - #:tests? #f)) ; there are no tests - (inputs - (list python2-matplotlib)) - (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo") - (synopsis "Analyze high-throughput genetic sequencing data") - (description - "Pyicoteo is a suite of tools for the analysis of high-throughput genetic -sequencing data. It works with genomic coordinates. There are currently six -different command-line tools: - -@enumerate -@item pyicoregion: for generating exploratory regions automatically; -@item pyicoenrich: for differential enrichment between two conditions; -@item pyicoclip: for calling CLIP-Seq peaks without a control; -@item pyicos: for genomic coordinates manipulation; -@item pyicoller: for peak calling on punctuated ChIP-Seq; -@item pyicount: to count how many reads from N experiment files overlap in a - region file; -@item pyicotrocol: to combine operations from pyicoteo. -@end enumerate\n") - (license license:gpl3+))) - (define-public prodigal (package (name "prodigal") @@ -6835,9 +6379,6 @@ Values such as sequence name, sequence description, sequence quality and the sequence itself can be retrieved from these databases.") (license license:bsd-3))) -(define-public python2-screed - (package-with-python2 python-screed)) - (define-public sra-tools (package (name "sra-tools") @@ -8822,32 +8363,6 @@ regions of statistically significant read enrichment. Additional covariates may optionally be provided to further inform the peak-calling process.") (license license:gpl3+)))) -(define-public pepr - (package - (name "pepr") - (version "1.0.9") - (source (origin - (method url-fetch) - (uri (pypi-uri "PePr" version)) - (sha256 - (base32 - "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx")))) - (build-system python-build-system) - (arguments - `(#:python ,python-2 ; python2 only - #:tests? #f)) ; no tests included - (propagated-inputs - (list python2-numpy python2-scipy python2-pysam)) - (home-page "https://github.com/shawnzhangyx/PePr") - (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data") - (description - "PePr is a ChIP-Seq peak calling or differential binding analysis tool -that is primarily designed for data with biological replicates. It uses a -negative binomial distribution to model the read counts among the samples in -the same group, and look for consistent differences between ChIP and control -group or two ChIP groups run under different conditions.") - (license license:gpl3+))) - (define-public filevercmp (let ((commit "1a9b779b93d0b244040274794d402106907b71b7") (revision "1")) @@ -9373,72 +8888,6 @@ adapter trimming as well as quality control, with some added functionality to remove biased methylation positions for RRBS sequence files.") (license license:gpl3+))) -(define-public gess - (package - (name "gess") - (version "1.0") - (source (origin - (method url-fetch) - (uri (string-append "http://compbio.uthscsa.edu/" - "GESS_Web/files/" - "gess-" version ".src.tar.gz")) - (sha256 - (base32 - "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7")))) - (build-system gnu-build-system) - (arguments - `(#:tests? #f ; no tests - #:phases - (modify-phases %standard-phases - (delete 'configure) - (delete 'build) - (replace 'install - (lambda* (#:key inputs outputs #:allow-other-keys) - (let* ((python (assoc-ref inputs "python")) - (out (assoc-ref outputs "out")) - (bin (string-append out "/bin/")) - (target (string-append - out "/lib/python" - ,(version-major+minor - (package-version python)) - "/site-packages/gess/"))) - (mkdir-p target) - (copy-recursively "." target) - ;; Make GESS.py executable - (chmod (string-append target "GESS.py") #o555) - ;; Add Python shebang to the top and make Matplotlib - ;; usable. - (substitute* (string-append target "GESS.py") - (("\"\"\"Description:" line) - (string-append "#!" (which "python") " -import matplotlib -matplotlib.use('Agg') -" line))) - ;; Make sure GESS has all modules in its path - (wrap-script (string-append target "GESS.py") - #:guile (search-input-file inputs "bin/guile") - `("GUIX_PYTHONPATH" ":" = (,target ,(getenv "GUIX_PYTHONPATH")))) - (mkdir-p bin) - (symlink (string-append target "GESS.py") - (string-append bin "GESS.py")) - #t)))))) - (inputs - `(("python" ,python-2) - ("python2-pysam" ,python2-pysam) - ("python2-scipy" ,python2-scipy) - ("python2-numpy" ,python2-numpy) - ("python2-networkx" ,python2-networkx) - ("python2-biopython" ,python2-biopython) - ("guile" ,guile-3.0))) ; for the script wrapper - (home-page "https://compbio.uthscsa.edu/GESS_Web/") - (synopsis "Detect exon-skipping events from raw RNA-seq data") - (description - "GESS is an implementation of a novel computational method to detect de -novo exon-skipping events directly from raw RNA-seq data without the prior -knowledge of gene annotation information. GESS stands for the graph-based -exon-skipping scanner detection scheme.") - (license license:bsd-3))) - (define-public phylip (package (name "phylip") @@ -10992,7 +10441,7 @@ once. This package provides tools to perform Drop-seq analyses.") (define-public pigx-rnaseq (package (name "pigx-rnaseq") - (version "0.0.20") + (version "0.1.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/" @@ -11000,7 +10449,7 @@ once. This package provides tools to perform Drop-seq analyses.") "/pigx_rnaseq-" version ".tar.gz")) (sha256 (base32 - "0bf65qqvlkc77vl7cmmzacq70f0qav4p6nf8pp3x1vdd0nvhr24f")))) + "0acdjimfb9ywba8zsv7lavv436pmcmp8ra683h11wr4s3681pqk8")))) (build-system gnu-build-system) (arguments `(#:parallel-tests? #f ; not supported @@ -11094,6 +10543,15 @@ expression report comparing samples in an easily configurable manner.") (modify-phases %standard-phases (add-before 'bootstrap 'autoreconf (lambda _ + ;; This was fixed in commit + ;; 0b1c9f7f2e4d0ff601f1de95ab8b2953f4d5dbc7, but there is no + ;; release with this fix. + (call-with-output-file "VERSION" + (lambda (port) (display ,version port))) + ;; See https://github.com/BIMSBbioinfo/pigx_chipseq/issues/176 + (substitute* "m4/ax_r_package.m4" + (("if\\(is.na\\(packageDescription\\(\"PKG\"\\)\\)\\)") + "if(system.file(package=\"PKG\") == \"\")")) (invoke "autoreconf" "-vif"))) (add-before 'configure 'set-PYTHONPATH (lambda _ @@ -11182,6 +10640,15 @@ in an easily configurable manner.") (modify-phases %standard-phases (add-before 'bootstrap 'autoreconf (lambda _ + ;; This was fixed in commit + ;; d56ac732524da659afbbb0972f7a87fa178ae58e, but there is no + ;; release with this fix. + (call-with-output-file "VERSION" + (lambda (port) (display ,version port))) + ;; https://github.com/BIMSBbioinfo/pigx_bsseq/issues/181 + (substitute* "m4/ax_r_package.m4" + (("if\\(is.na\\(packageDescription\\(\"PKG\"\\)\\)\\)") + "if(system.file(package=\"PKG\") == \"\")")) (invoke "autoreconf" "-vif"))) (add-before 'configure 'set-PYTHONPATH (lambda _ @@ -11249,8 +10716,20 @@ methylation and segmentation.") "1lc42hl8mz95kilh0z39s3wnv092mhm6vl2i394n0yfvdzk4f885")))) (build-system gnu-build-system) (arguments - '(#:phases + `(#:phases (modify-phases %standard-phases + (add-before 'bootstrap 'autoreconf + (lambda _ + ;; This was fixed in commit + ;; c4ac067438ae9312b5786a72e2bfb3d795e3ec8a, but there is no + ;; release with this fix. + (call-with-output-file "VERSION" + (lambda (port) (display ,version port))) + ;; https://github.com/BIMSBbioinfo/pigx_scrnaseq/issues/59 + (substitute* "m4/ax_r_package.m4" + (("if\\(is.na\\(packageDescription\\(\"PKG\"\\)\\)\\)") + "if(system.file(package=\"PKG\") == \"\")")) + (invoke "autoreconf" "-vif"))) (add-before 'configure 'set-additional-environment-variables (lambda _ ;; Needed because of loompy @@ -11299,6 +10778,8 @@ methylation and segmentation.") r-singlecellexperiment r-stringr r-yaml)) + (native-inputs + (list autoconf automake)) (home-page "https://bioinformatics.mdc-berlin.de/pigx/") (synopsis "Analysis pipeline for single-cell RNA sequencing experiments") (description @@ -11327,6 +10808,13 @@ based methods.") `(#:tests? #f ;requires huge kraken database #:phases (modify-phases %standard-phases + (add-before 'bootstrap 'autoreconf + (lambda _ + ;; https://github.com/BIMSBbioinfo/pigx_sars-cov-2/issues/123 + (substitute* "m4/ax_r_package.m4" + (("if\\(is.na\\(packageDescription\\(\"PKG\"\\)\\)\\)") + "if(system.file(package=\"PKG\") == \"\")")) + (invoke "autoreconf" "-vif"))) (add-before 'configure 'set-PYTHONPATH (lambda _ (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH"))))))) @@ -12093,56 +11581,6 @@ conversions, region filtering, FASTA sequence extraction and more.") (license (list license:expat license:artistic2.0))))) -(define-public find-circ - ;; The last release was in 2015. The license was clarified in 2017, so we - ;; take the latest commit. - (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d") - (revision "1")) - (package - (name "find-circ") - (version (git-version "1.2" revision commit)) - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/marvin-jens/find_circ") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg")))) - (build-system gnu-build-system) - (arguments - `(#:tests? #f ; there are none - #:phases - ;; There is no actual build system. - (modify-phases %standard-phases - (delete 'configure) - (delete 'build) - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let* ((out (assoc-ref outputs "out")) - (bin (string-append out "/bin")) - (path (getenv "GUIX_PYTHONPATH"))) - (for-each (lambda (script) - (install-file script bin) - (wrap-program (string-append bin "/" script) - `("GUIX_PYTHONPATH" ":" prefix (,path)))) - '("cmp_bed.py" - "find_circ.py" - "maxlength.py" - "merge_bed.py" - "unmapped2anchors.py"))) - #t))))) - (inputs - (list python-2 python2-pysam python2-numpy)) - (home-page "https://github.com/marvin-jens/find_circ") - (synopsis "circRNA detection from RNA-seq reads") - (description "This package provides tools to detect head-to-tail -spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA) -in RNA-seq data.") - (license license:gpl3)))) - (define-public fit-sne (package (name "fit-sne") @@ -12209,7 +11647,7 @@ implementation differs in these ways: (define-public python-scanpy (package (name "python-scanpy") - (version "1.8.2") + (version "1.9.1") (source (origin (method git-fetch) @@ -12219,7 +11657,7 @@ implementation differs in these ways: (file-name (git-file-name name version)) (sha256 (base32 - "14zax23lqinv7xyv3491vpl3ydi38naiwaxg5mkfs5zk2406cqdr")))) + "0k524xnx3dvpz5yx65p316wghvi01zs17is8w2m3w2qywiswk0sl")))) (build-system python-build-system) (arguments `(#:phases @@ -12239,6 +11677,10 @@ implementation differs in these ways: (invoke "python" "-m" "pip" "install" wheel (string-append "--prefix=" out))) (find-files "dist" "\\.whl$"))))) + ;; Numba needs a writable dir to cache functions. + (add-before 'check 'set-numba-cache-dir + (lambda _ + (setenv "NUMBA_CACHE_DIR" "/tmp"))) (replace 'check (lambda* (#:key tests? inputs #:allow-other-keys) (when tests? @@ -12246,6 +11688,7 @@ implementation differs in these ways: (delete-file-recursively "scanpy/tests/notebooks") (delete-file "scanpy/tests/test_clustering.py") (delete-file "scanpy/tests/test_datasets.py") + (delete-file "scanpy/tests/test_normalization.py") (delete-file "scanpy/tests/test_score_genes.py") (delete-file "scanpy/tests/test_highly_variable_genes.py") @@ -12254,6 +11697,9 @@ implementation differs in these ways: (delete-file "scanpy/tests/test_preprocessing.py") (delete-file "scanpy/tests/test_read_10x.py") + ;; These two fail with "ValueError: I/O operation on closed file." + (delete-file "scanpy/tests/test_neighbors_key_added.py") + ;; TODO: these fail with TypingError and "Use of unsupported ;; NumPy function 'numpy.split'". (delete-file "scanpy/tests/test_metrics.py") @@ -12279,17 +11725,20 @@ implementation differs in these ways: " and not test_clustermap" ;; These try to connect to the network + " and not test_scrublet_plots" " and not test_plot_rank_genes_groups_gene_symbols" + " and not test_pca_n_pcs" " and not test_pca_chunked" " and not test_pca_sparse" " and not test_pca_reproducible")))))))) (propagated-inputs (list python-anndata + python-dask python-h5py python-igraph python-joblib python-legacy-api-wrap - python-louvain-0.7 + python-louvain python-matplotlib python-natsort python-networkx @@ -12300,6 +11749,7 @@ implementation differs in these ways: python-scikit-learn python-scipy python-seaborn + python-session-info python-sinfo python-statsmodels python-tables @@ -12636,9 +12086,6 @@ bgzipped text file that contains a pair of genomic coordinates per line.") fasta subsequences.") (license license:bsd-3))) -(define-public python2-pyfaidx - (package-with-python2 python-pyfaidx)) - (define-public python-cooler (package (name "python-cooler") @@ -13083,42 +12530,6 @@ of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR Barcoding Kit or Rapid Barcoding Kit.") (license license:gpl3+)))) -(define-public poretools - ;; The latest release was in 2016 and the latest commit is from 2017 - ;; the recommended way to install is to clone the git repository - ;; https://poretools.readthedocs.io/en/latest/content/installation.html - (let ((commit "e426b1f09e86ac259a00c261c79df91510777407") - (revision "1")) - (package - (name "poretools") - (version (git-version "0.6.0" revision commit)) - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/arq5x/poretools") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 "0bglj833wxpp3cq430p1d3xp085ls221js2y90w7ir2x5ay8l7am")))) - (build-system python-build-system) - ;; requires python >=2.7, <3.0, and the same for python dependencies - (arguments `(#:python ,python-2)) - (inputs - (list hdf5)) - (propagated-inputs - (list python2-dateutil python2-h5py python2-matplotlib - python2-pandas python2-seaborn)) - (home-page "https://poretools.readthedocs.io") - (synopsis "Toolkit for working with nanopore sequencing data") - (description - "The MinION from Oxford Nanopore Technologies is a nanopore sequencer. -This @code{poretools} package is a flexible toolkit for exploring datasets -generated by nanopore sequencing devices for the purposes of quality control and -downstream analysis. Poretools operates directly on the native FAST5, a variant -of the Hierarchical Data Format (HDF5) standard.") - (license license:expat)))) - (define-public jamm (package (name "jamm") @@ -13831,162 +13242,6 @@ Additionally, the AdapterRemoval may be used to recover a consensus adapter sequence for paired-ended data, for which this information is not available.") (license license:gpl3+))) -(define-public pplacer - (let ((commit "807f6f3")) - (package - (name "pplacer") - ;; The commit should be updated with each version change. - (version "1.1.alpha19") - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/matsen/pplacer") - (commit (string-append "v" version)))) - (file-name (git-file-name name version)) - (sha256 - (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn")))) - (build-system ocaml-build-system) - (arguments - `(#:modules ((guix build ocaml-build-system) - (guix build utils) - (ice-9 ftw)) - #:phases - (modify-phases %standard-phases - (delete 'configure) - (add-after 'unpack 'fix-build-with-latest-ocaml - (lambda _ - (substitute* "myocamlbuild.ml" - (("dep \\[\"c_pam\"\\]" m) - (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n" - m)) - (("let run_and_read" m) - (string-append " -let split s ch = - let x = ref [] in - let rec go s = - let pos = String.index s ch in - x := (String.before s pos)::!x; - go (String.after s (pos + 1)) - in - try go s - with Not_found -> !x -let split_nl s = split s '\\n' -let before_space s = - try String.before s (String.index s ' ') - with Not_found -> s - -" m)) - (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m) - (string-append "List.map before_space (split_nl & " m ")")) - ((" blank_sep_strings &") "") - ((" Lexing.from_string &") "")) - #t)) - (add-after 'unpack 'replace-bundled-cddlib - (lambda* (#:key inputs #:allow-other-keys) - (let* ((cddlib-src (assoc-ref inputs "cddlib-src")) - (local-dir "cddlib_guix")) - (mkdir local-dir) - (with-directory-excursion local-dir - (invoke "tar" "xvf" cddlib-src)) - (let ((cddlib-src-folder - (string-append local-dir "/" - (list-ref (scandir local-dir) 2) - "/lib-src"))) - (for-each make-file-writable (find-files "cdd_src" ".*")) - (for-each - (lambda (file) - (copy-file file - (string-append "cdd_src/" (basename file)))) - (find-files cddlib-src-folder ".*[ch]$"))) - #t))) - (add-after 'unpack 'fix-makefile - (lambda _ - ;; Remove system calls to 'git'. - (substitute* "Makefile" - (("^DESCRIPT:=pplacer-.*") - (string-append - "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n"))) - (substitute* "myocamlbuild.ml" - (("git describe --tags --long .*\\\" with") - (string-append - "echo -n v" ,version "-" ,commit "\" with"))) - #t)) - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let* ((out (assoc-ref outputs "out")) - (bin (string-append out "/bin"))) - (copy-recursively "bin" bin)) - #t))) - #:ocaml ,ocaml-4.07 - #:findlib ,ocaml4.07-findlib)) - (inputs - `(("zlib" ,zlib "static") - ("gsl" ,gsl-static) - ("ocaml-ounit" ,(package-with-ocaml4.07 ocaml-ounit)) - ("ocaml-batteries" ,(package-with-ocaml4.07 ocaml-batteries)) - ("ocaml-camlzip" ,(package-with-ocaml4.07 camlzip)) - ("ocaml-csv" ,(package-with-ocaml4.07 ocaml-csv)) - ("ocaml-sqlite3" ,(package-with-ocaml4.07 ocaml-sqlite3)) - ("ocaml-xmlm" ,(package-with-ocaml4.07 ocaml-xmlm)) - ("ocaml-mcl" ,(package-with-ocaml4.07 ocaml-mcl)) - ("ocaml-gsl" ,ocaml4.07-gsl-1) - ("sqlite:static" ,sqlite "static"))) - (native-inputs - `(("cddlib-src" ,(package-source cddlib)) - ("ocamlbuild" ,(package-with-ocaml4.07 ocamlbuild)) - ("pkg-config" ,pkg-config))) - (propagated-inputs - (list pplacer-scripts)) - (synopsis "Phylogenetic placement of biological sequences") - (description - "Pplacer places query sequences on a fixed reference phylogenetic tree -to maximize phylogenetic likelihood or posterior probability according to a -reference alignment. Pplacer is designed to be fast, to give useful -information about uncertainty, and to offer advanced visualization and -downstream analysis.") - (home-page "https://matsen.fhcrc.org/pplacer/") - (license license:gpl3)))) - -;; This package is installed alongside 'pplacer'. It is a separate package so -;; that it can use the python-build-system for the scripts that are -;; distributed alongside the main OCaml binaries. -(define pplacer-scripts - (package - (inherit pplacer) - (name "pplacer-scripts") - (build-system python-build-system) - (arguments - `(#:python ,python-2 - #:phases - (modify-phases %standard-phases - (add-after 'unpack 'enter-scripts-dir - (lambda _ (chdir "scripts") #t)) - (replace 'check - (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t)) - (add-after 'install 'wrap-executables - (lambda* (#:key inputs outputs #:allow-other-keys) - (let* ((out (assoc-ref outputs "out")) - (bin (string-append out "/bin"))) - (let ((path (string-append - (assoc-ref inputs "hmmer") "/bin:" - (assoc-ref inputs "infernal") "/bin"))) - (display path) - (wrap-program (string-append bin "/refpkg_align.py") - `("PATH" ":" prefix (,path)))) - (let ((path (string-append - (assoc-ref inputs "hmmer") "/bin"))) - (wrap-program (string-append bin "/hrefpkg_query.py") - `("PATH" ":" prefix (,path))))) - #t))))) - (inputs - `(("infernal" ,infernal) - ("hmmer" ,hmmer))) - (propagated-inputs - `(("python-biopython" ,python2-biopython) - ("taxtastic" ,taxtastic))) - (synopsis "Pplacer Python scripts"))) - (define-public checkm (package (name "checkm") @@ -14024,9 +13279,6 @@ on marker set compatibility, similarity in genomic characteristics, and proximity within a reference genome.") (license license:gpl3+))) -(define-public python2-checkm-genome - (deprecated-package "python2-checkm-genome" checkm)) - (define-public umi-tools (package (name "umi-tools") @@ -14422,6 +13674,47 @@ some of the details of opening and jumping in tabix-indexed files.") ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9. (license (list license:gpl2 license:expat))))) +(define-public sylamer + (package + (name "sylamer") + (version "18-131") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/micans/sylamer/") + (commit "aa75c3584797c0c15f860addb645f7bc1dd7627d"))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1ddiwlrdghhb4574rvfw0brjp9gs5l6nfsy82h0m4mvz1dr3gkj5")))) + (build-system gnu-build-system) + (arguments + (list + #:tests? #f ; no test target + #:make-flags + #~(list (string-append "GSLPREFIX=" #$(this-package-input "gsl"))) + #:phases + '(modify-phases %standard-phases + (replace 'configure + (lambda* (#:key outputs #:allow-other-keys) + (substitute* "Makefile" + (("cp sylamer \\$\\(HOME\\)/local/bin") + (string-append "install -D -t " (assoc-ref outputs "out") + "/bin sylamer"))) + (install-file "Makefile" "src") + (chdir "src")))))) + (inputs (list gsl zlib)) + (home-page "https://www.ebi.ac.uk/research/enright/software/sylamer") + (synopsis "Asses microRNA binding and siRNA off-target effects") + (description "Sylamer is a system for finding significantly over or +under-represented words in sequences according to a sorted gene list. +Typically it is used to find significant enrichment or depletion of microRNA +or siRNA seed sequences from microarray expression data. Sylamer is extremely +fast and can be applied to genome-wide datasets with ease. Results are +plotted in terms of a significance landscape plot. These plots show +significance profiles for each word studied across the sorted genelist.") + (license license:gpl3+))) + (define-public multichoose (package (name "multichoose") |