summaryrefslogtreecommitdiff
path: root/gnu/packages/python-science.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/python-science.scm')
-rw-r--r--gnu/packages/python-science.scm489
1 files changed, 453 insertions, 36 deletions
diff --git a/gnu/packages/python-science.scm b/gnu/packages/python-science.scm
index 79ee7c8bb2..cc6a759e82 100644
--- a/gnu/packages/python-science.scm
+++ b/gnu/packages/python-science.scm
@@ -11,6 +11,9 @@
;;; Copyright © 2020 Pierre Langlois <[email protected]>
;;; Copyright © 2020, 2021 Vinicius Monego <[email protected]>
;;; Copyright © 2021 Greg Hogan <[email protected]>
+;;; Copyright © 2021 Roel Janssen <[email protected]>
+;;; Copyright © 2021 Paul Garlick <[email protected]>
+;;; Copyright © 2021 Arun Isaac <[email protected]>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -32,21 +35,30 @@
#:use-module (gnu packages)
#:use-module (gnu packages base)
#:use-module (gnu packages check)
+ #:use-module (gnu packages databases)
#:use-module (gnu packages gcc)
+ #:use-module (gnu packages image-processing)
+ #:use-module (gnu packages machine-learning)
#:use-module (gnu packages maths)
+ #:use-module (gnu packages mpi)
#:use-module (gnu packages perl)
+ #:use-module (gnu packages pkg-config)
#:use-module (gnu packages python)
#:use-module (gnu packages python-build)
+ #:use-module (gnu packages python-crypto)
#:use-module (gnu packages python-check)
#:use-module (gnu packages python-web)
#:use-module (gnu packages python-xyz)
+ #:use-module (gnu packages simulation)
#:use-module (gnu packages sphinx)
+ #:use-module (gnu packages statistics)
#:use-module (gnu packages time)
#:use-module (gnu packages xdisorg)
#:use-module (gnu packages xml)
#:use-module (gnu packages xorg)
#:use-module (guix packages)
#:use-module (guix download)
+ #:use-module (guix git-download)
#:use-module (guix utils)
#:use-module (guix build-system python))
@@ -66,8 +78,7 @@
("python-matplotlib" ,python-matplotlib)
("python-pyparsing" ,python-pyparsing)))
(inputs
- `(("lapack" ,lapack)
- ("openblas" ,openblas)
+ `(("openblas" ,openblas)
("pybind11" ,pybind11)))
(native-inputs
`(("python-cython" ,python-cython)
@@ -318,49 +329,55 @@ of the SGP4 satellite tracking algorithm.")
(define-public python-pandas
(package
(name "python-pandas")
- (version "1.0.5")
+ (version "1.3.0")
(source
(origin
(method url-fetch)
(uri (pypi-uri "pandas" version))
(sha256
- (base32 "1a2gv3g6jr6vb5ca43fkwjl5xf86wpfz8y3zcy787adjl0hdkib9"))))
+ (base32 "1qi2cv450m05dwccx3p1s373k5b4ncvwi74plnms2pidrz4ycm65"))))
(build-system python-build-system)
(arguments
`(#:modules ((guix build utils)
(guix build python-build-system)
(ice-9 ftw)
+ (srfi srfi-1)
(srfi srfi-26))
- #:phases (modify-phases %standard-phases
- (add-after 'unpack 'patch-which
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((which (assoc-ref inputs "which")))
- (substitute* "pandas/io/clipboard/__init__.py"
- (("^WHICH_CMD = .*")
- (string-append "WHICH_CMD = \"" which "\"\n"))))
- #t))
- (add-before 'check 'prepare-x
- (lambda _
- (system "Xvfb &")
- (setenv "DISPLAY" ":0")
- ;; xsel needs to write a log file.
- (setenv "HOME" "/tmp")
- #t))
- (replace 'check
- (lambda _
- (let ((build-directory
- (string-append
- (getcwd) "/build/"
- (car (scandir "build"
- (cut string-prefix? "lib." <>))))))
- ;; Disable the "strict data files" option which causes
- ;; the build to error out if required data files are
- ;; not available (as is the case with PyPI archives).
- (substitute* "setup.cfg"
- (("addopts = --strict-data-files") "addopts = "))
- (with-directory-excursion build-directory
- (invoke "pytest" "-vv" "pandas" "--skip-slow"
- "--skip-network"))))))))
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'patch-which
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((which (assoc-ref inputs "which")))
+ (substitute* "pandas/io/clipboard/__init__.py"
+ (("^WHICH_CMD = .*")
+ (string-append "WHICH_CMD = \"" which "\"\n"))))))
+ (add-before 'check 'prepare-x
+ (lambda _
+ (system "Xvfb &")
+ (setenv "DISPLAY" ":0")
+ ;; xsel needs to write a log file.
+ (setenv "HOME" "/tmp")))
+ (replace 'check
+ (lambda _
+ (let ((build-directory
+ (string-append
+ (getcwd) "/build/"
+ (first (scandir "build"
+ (cut string-prefix? "lib." <>))))))
+ (with-directory-excursion build-directory
+ (invoke "pytest" "-vv" "pandas" "--skip-slow"
+ "--skip-network"
+ "-k"
+ ;; These tets access the internet:
+ ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[lxml]
+ ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[etree]
+ ;; TODO: the excel tests fail for unknown reasons
+ (string-append "not test_wrong_url"
+ " and not test_excelwriter_fspath"
+ " and not test_ExcelWriter_dispatch"
+ ;; TODO: Missing input
+ " and not TestS3"
+ " and not s3")))))))))
(propagated-inputs
`(("python-jinja2" ,python-jinja2)
("python-numpy" ,python-numpy)
@@ -436,8 +453,82 @@ doing practical, real world data analysis in Python.")
;; from <https://github.com/pandas-dev/pandas/pull/29294>.
(substitute* "pandas/io/parsers.py"
(("if 'NULL byte' in msg:")
- "if 'NULL byte' in msg or 'line contains NUL' in msg:"))
- #t)))))))
+ "if 'NULL byte' in msg or 'line contains NUL' in msg:"))))))
+ (arguments
+ `(#:modules ((guix build utils)
+ (guix build python-build-system)
+ (ice-9 ftw)
+ (srfi srfi-26))
+ #:python ,python-2
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'patch-which
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((which (assoc-ref inputs "which")))
+ (substitute* "pandas/io/clipboard/__init__.py"
+ (("^CHECK_CMD = .*")
+ (string-append "CHECK_CMD = \"" which "\"\n"))))))
+ (replace 'check
+ (lambda _
+ (let ((build-directory
+ (string-append
+ (getcwd) "/build/"
+ (car (scandir "build"
+ (cut string-prefix? "lib." <>))))))
+ ;; Disable the "strict data files" option which causes
+ ;; the build to error out if required data files are
+ ;; not available (as is the case with PyPI archives).
+ (substitute* "setup.cfg"
+ (("addopts = --strict-data-files") "addopts = "))
+ (with-directory-excursion build-directory
+ ;; Delete tests that require "moto" which is not yet
+ ;; in Guix.
+ (for-each delete-file
+ '("pandas/tests/io/conftest.py"
+ "pandas/tests/io/json/test_compression.py"
+ "pandas/tests/io/parser/test_network.py"
+ "pandas/tests/io/test_parquet.py"))
+ (invoke "pytest" "-vv" "pandas" "--skip-slow"
+ "--skip-network" "-k"
+ ;; XXX: Due to the deleted tests above.
+ "not test_read_s3_jsonl"))))))))
+ (propagated-inputs
+ `(("python-numpy" ,python2-numpy)
+ ("python-openpyxl" ,python2-openpyxl)
+ ("python-pytz" ,python2-pytz)
+ ("python-dateutil" ,python2-dateutil)
+ ("python-xlrd" ,python2-xlrd)))
+ (inputs
+ `(("which" ,which)))
+ (native-inputs
+ `(("python-cython" ,python2-cython)
+ ("python-beautifulsoup4" ,python2-beautifulsoup4)
+ ("python-lxml" ,python2-lxml)
+ ("python-html5lib" ,python2-html5lib)
+ ("python-nose" ,python2-nose)
+ ("python-pytest" ,python2-pytest)
+ ("python-pytest-mock" ,python2-pytest-mock))))))
+
+(define-public python-pyflow
+ (package
+ (name "python-pyflow")
+ (version "1.1.20")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "https://github.com/Illumina/pyflow/releases/download/v"
+ version "/pyflow-" version ".tar.gz"))
+ (sha256
+ (base32
+ "1bvfvviw58cndyn862qnv9nj3d9cd3a0dm4vc4sd9vwq8a6z1riv"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:tests? #f)) ; There is no test suite.
+ (home-page "https://illumina.github.io/pyflow/")
+ (synopsis "Tool to manage tasks in a task dependency graph")
+ (description "This package is a Python module to manage tasks in the
+context of a task dependency graph. It has some similarities to make.")
+ (license license:bsd-2)))
(define-public python-bottleneck
(package
@@ -555,6 +646,37 @@ by numpy using the highly efficient @code{msgpack} format. Serialization of
Python's native complex data types is also supported.")
(license license:bsd-3)))
+(define-public python-ruffus
+ (package
+ (name "python-ruffus")
+ (version "2.8.4")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "ruffus" version))
+ (sha256
+ (base32
+ "1ai673k1s94s8b6pyxai8mk17p6zvvyi87rl236fs6ls8mpdklvc"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (delete 'check)
+ (add-after 'install 'check
+ (lambda* (#:key tests? inputs outputs #:allow-other-keys)
+ (when tests?
+ (add-installed-pythonpath inputs outputs)
+ (with-directory-excursion "ruffus/test"
+ (invoke "bash" "run_all_unit_tests3.cmd"))))))))
+ (native-inputs
+ `(("python-pytest" ,python-pytest)))
+ (home-page "http://www.ruffus.org.uk")
+ (synopsis "Light-weight computational pipeline management")
+ (description
+ "Ruffus is designed to allow scientific and other analyses to be
+automated with the minimum of fuss and the least effort.")
+ (license license:expat)))
+
(define-public python-statannot
(package
(name "python-statannot")
@@ -617,3 +739,298 @@ annotations on an existing boxplots and barplots generated by seaborn.")
UpSet plots are used to visualize set overlaps; like Venn diagrams but more
readable.")
(license license:bsd-3)))
+
+(define-public python-vedo
+ (package
+ (name "python-vedo")
+ (version "2021.0.3")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/marcomusy/vedo")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "18i3ajh5jzhpc86di15lwh4jv97jhm627ii877sa4yhv6abzjfpn"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'build 'mpi-setup
+ ,%openmpi-setup)
+ (replace 'check
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (setenv "HOME" (getcwd))
+ (add-installed-pythonpath inputs outputs)
+ (with-directory-excursion "tests"
+ (for-each (lambda (dir)
+ (with-directory-excursion dir
+ (invoke "./run_all.sh")))
+ '("common" "dolfin")))
+ #t)))))
+ (inputs ; for the check phase
+ `(("dolfin" ,fenics)
+ ("pkgconfig" ,python-pkgconfig)
+ ("matplotlib" ,python-matplotlib)))
+ (native-inputs ; for python-pkgconfig
+ `(("pkg-config" ,pkg-config)))
+ (propagated-inputs
+ `(("numpy" ,python-numpy)
+ ("vtk" ,vtk)))
+ (home-page "https://github.com/marcomusy/vedo")
+ (synopsis
+ "Analysis and visualization of 3D objects and point clouds")
+ (description
+ "@code{vedo} is a fast and lightweight python module for
+scientific analysis and visualization. The package provides a wide
+range of functionalities for working with three-dimensional meshes and
+point clouds. It can also be used to generate high quality
+two-dimensional renderings such as scatter plots and histograms.
+@code{vedo} is based on @code{vtk} and @code{numpy}, with no other
+dependencies.")
+ ;; vedo is released under the Expat license. Included fonts are
+ ;; covered by the OFL license and textures by the CC0 license.
+ ;; The earth images are in the public domain.
+ (license (list license:expat
+ license:silofl1.1
+ license:cc0
+ license:public-domain))))
+
+(define-public python-pandas-flavor
+ (package
+ (name "python-pandas-flavor")
+ (version "0.2.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "pandas_flavor" version))
+ (sha256
+ (base32
+ "12g4av8gpl6l83yza3h97j3f2jblqv69frlidrvdq8ny2rc6awbq"))))
+ (build-system python-build-system)
+ (propagated-inputs
+ `(("python-pandas" ,python-pandas)
+ ("python-xarray" ,python-xarray)))
+ (home-page "https://github.com/Zsailer/pandas_flavor")
+ (synopsis "Write your own flavor of Pandas")
+ (description "Pandas 0.23 added a simple API for registering accessors
+with Pandas objects. Pandas-flavor extends Pandas' extension API by
+
+@itemize
+@item adding support for registering methods as well
+@item making each of these functions backwards compatible with older versions
+of Pandas
+@end itemize")
+ (license license:expat)))
+
+(define-public python-pingouin
+ (package
+ (name "python-pingouin")
+ (version "0.3.12")
+ (source
+ ;; The PyPI tarball does not contain the tests.
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/raphaelvallat/pingouin")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1ap29x54kdr19vi8qxj9g6cz2r1q4f0z7dcf6g77zwav7hf7r61a"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ ;; On loading, Pingouin uses the outdated package to check if a newer
+ ;; version is available on PyPI. This check adds an extra dependency
+ ;; and is irrelevant to Guix users. So, disable it.
+ (add-after 'unpack 'remove-outdated-check
+ (lambda _
+ (substitute* "setup.py"
+ (("'outdated',") ""))
+ (substitute* "pingouin/__init__.py"
+ (("^from outdated[^\n]*") "")
+ (("^warn_if_outdated[^\n]*") ""))))
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (invoke "pytest")))))))
+ (native-inputs
+ `(("python-pytest" ,python-pytest)
+ ("python-pytest-cov" ,python-pytest-cov)))
+ (propagated-inputs
+ `(("python-matplotlib" ,python-matplotlib)
+ ("python-mpmath" ,python-mpmath)
+ ("python-numpy" ,python-numpy)
+ ("python-pandas" ,python-pandas)
+ ("python-pandas-flavor" ,python-pandas-flavor)
+ ("python-scikit-learn" ,python-scikit-learn)
+ ("python-scipy" ,python-scipy)
+ ("python-seaborn" ,python-seaborn)
+ ("python-statsmodels" ,python-statsmodels)
+ ("python-tabulate" ,python-tabulate)))
+ (home-page "https://pingouin-stats.org/")
+ (synopsis "Statistical package for Python")
+ (description "Pingouin is a statistical package written in Python 3 and
+based mostly on Pandas and NumPy. Its features include
+
+@itemize
+@item ANOVAs: N-ways, repeated measures, mixed, ancova
+@item Pairwise post-hocs tests (parametric and non-parametric) and pairwise
+correlations
+@item Robust, partial, distance and repeated measures correlations
+@item Linear/logistic regression and mediation analysis
+@item Bayes Factors
+@item Multivariate tests
+@item Reliability and consistency
+@item Effect sizes and power analysis
+@item Parametric/bootstrapped confidence intervals around an effect size or a
+correlation coefficient
+@item Circular statistics
+@item Chi-squared tests
+@item Plotting: Bland-Altman plot, Q-Q plot, paired plot, robust correlation,
+and more
+@end itemize")
+ (license license:gpl3)))
+
+(define-public python-distributed
+ (package
+ (name "python-distributed")
+ (version "2021.07.1")
+ (source
+ (origin
+ ;; The test files are not included in the archive on pypi
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/dask/distributed")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0i55zf3k55sqjxnwlzsyj3h3v1588fn54ng4mj3dfiqzh3nlj0dg"))))
+ (build-system python-build-system)
+ (arguments
+ '(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'fix-references
+ (lambda* (#:key outputs #:allow-other-keys)
+ (substitute* '("distributed/comm/tests/test_ucx_config.py"
+ "distributed/tests/test_client.py"
+ "distributed/tests/test_queues.py"
+ "distributed/tests/test_variable.py"
+ "distributed/cli/tests/test_tls_cli.py"
+ "distributed/cli/tests/test_dask_spec.py"
+ "distributed/cli/tests/test_dask_worker.py"
+ "distributed/cli/tests/test_dask_scheduler.py")
+ (("\"dask-scheduler\"")
+ (format #false "\"~a/bin/dask-scheduler\""
+ (assoc-ref outputs "out")))
+ (("\"dask-worker\"")
+ (format #false "\"~a/bin/dask-worker\""
+ (assoc-ref outputs "out"))))))
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (setenv "DISABLE_IPV6" "1")
+ (invoke "pytest" "-vv" "distributed"
+ "-m" "not slow and not gpu and not ipython and not avoid_ci"
+ "-k"
+ ;; TODO: These tests fail for unknown reasons:
+ ;; Assertion error.
+ (string-append
+ "not test_version_option"
+ ;; "The 'distributed' distribution was not found"
+ " and not test_register_backend_entrypoint"
+ ;; "AttributeError: module 'distributed.dashboard' has no attribute 'scheduler'"
+ " and not test_get_client_functions_spawn_clusters"))))))))
+ (propagated-inputs
+ `(("python-click" ,python-click)
+ ("python-cloudpickle" ,python-cloudpickle)
+ ("python-cryptography" ,python-cryptography)
+ ("python-dask" ,python-dask)
+ ("python-msgpack" ,python-msgpack)
+ ("python-psutil" ,python-psutil)
+ ("python-pyyaml" ,python-pyyaml)
+ ("python-setuptools" ,python-setuptools)
+ ("python-sortedcontainers" ,python-sortedcontainers)
+ ("python-tblib" ,python-tblib)
+ ("python-toolz" ,python-toolz)
+ ("python-tornado" ,python-tornado-6)
+ ("python-zict" ,python-zict)))
+ (native-inputs
+ `(("python-pytest" ,python-pytest)))
+ (home-page "https://distributed.dask.org")
+ (synopsis "Distributed scheduler for Dask")
+ (description "Dask.distributed is a lightweight library for distributed
+computing in Python. It extends both the @code{concurrent.futures} and
+@code{dask} APIs to moderate sized clusters.")
+ (license license:bsd-3)))
+
+(define-public python-modin
+ (package
+ (name "python-modin")
+ (version "0.10.1")
+ (source
+ (origin
+ ;; The archive on pypi does not include all required files.
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/modin-project/modin")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "128ghfb9ncmnn8km409xjcdppvn9nr9jqw8rkvsfavh7wnwlk509"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'make-files-writable
+ (lambda _
+ (for-each make-file-writable (find-files "."))))
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (setenv "MODIN_ENGINE" "dask")
+ (invoke "python" "-m" "pytest"
+ "modin/pandas/test/test_concat.py")
+ (setenv "MODIN_ENGINE" "python")
+ (invoke "python" "-m" "pytest"
+ "modin/pandas/test/test_concat.py")))))))
+ (propagated-inputs
+ `(("python-cloudpickle" ,python-cloudpickle)
+ ("python-dask" ,python-dask)
+ ("python-distributed" ,python-distributed)
+ ("python-numpy" ,python-numpy)
+ ("python-packaging" ,python-packaging)
+ ("python-pandas" ,python-pandas)))
+ (native-inputs
+ `(("python-coverage" ,python-coverage)
+ ("python-jinja2" ,python-jinja2)
+ ("python-lxml" ,python-lxml)
+ ("python-matplotlib" ,python-matplotlib)
+ ("python-msgpack" ,python-msgpack)
+ ("python-openpyxl" ,python-openpyxl)
+ ("python-psutil" ,python-psutil)
+ ("python-pyarrow" ,python-pyarrow)
+ ("python-pytest" ,python-pytest)
+ ("python-pytest-benchmark" ,python-pytest-benchmark)
+ ("python-pytest-cov" ,python-pytest-cov)
+ ("python-pytest-xdist" ,python-pytest-xdist)
+ ("python-scipy" ,python-scipy)
+ ("python-sqlalchemy" ,python-sqlalchemy)
+ ("python-tables" ,python-tables)
+ ("python-tqdm" ,python-tqdm)
+ ("python-xarray" ,python-xarray)
+ ("python-xlrd" ,python-xlrd)))
+ (home-page "https://github.com/modin-project/modin")
+ (synopsis "Make your pandas code run faster")
+ (description
+ "Modin uses Ray or Dask to provide an effortless way to speed up your
+pandas notebooks, scripts, and libraries. Unlike other distributed DataFrame
+libraries, Modin provides seamless integration and compatibility with existing
+pandas code.")
+ (license license:asl2.0)))