diff options
Diffstat (limited to 'gnu/packages/python-science.scm')
-rw-r--r-- | gnu/packages/python-science.scm | 489 |
1 files changed, 453 insertions, 36 deletions
diff --git a/gnu/packages/python-science.scm b/gnu/packages/python-science.scm index 79ee7c8bb2..cc6a759e82 100644 --- a/gnu/packages/python-science.scm +++ b/gnu/packages/python-science.scm @@ -11,6 +11,9 @@ ;;; Copyright © 2020 Pierre Langlois <[email protected]> ;;; Copyright © 2020, 2021 Vinicius Monego <[email protected]> ;;; Copyright © 2021 Greg Hogan <[email protected]> +;;; Copyright © 2021 Roel Janssen <[email protected]> +;;; Copyright © 2021 Paul Garlick <[email protected]> +;;; Copyright © 2021 Arun Isaac <[email protected]> ;;; ;;; This file is part of GNU Guix. ;;; @@ -32,21 +35,30 @@ #:use-module (gnu packages) #:use-module (gnu packages base) #:use-module (gnu packages check) + #:use-module (gnu packages databases) #:use-module (gnu packages gcc) + #:use-module (gnu packages image-processing) + #:use-module (gnu packages machine-learning) #:use-module (gnu packages maths) + #:use-module (gnu packages mpi) #:use-module (gnu packages perl) + #:use-module (gnu packages pkg-config) #:use-module (gnu packages python) #:use-module (gnu packages python-build) + #:use-module (gnu packages python-crypto) #:use-module (gnu packages python-check) #:use-module (gnu packages python-web) #:use-module (gnu packages python-xyz) + #:use-module (gnu packages simulation) #:use-module (gnu packages sphinx) + #:use-module (gnu packages statistics) #:use-module (gnu packages time) #:use-module (gnu packages xdisorg) #:use-module (gnu packages xml) #:use-module (gnu packages xorg) #:use-module (guix packages) #:use-module (guix download) + #:use-module (guix git-download) #:use-module (guix utils) #:use-module (guix build-system python)) @@ -66,8 +78,7 @@ ("python-matplotlib" ,python-matplotlib) ("python-pyparsing" ,python-pyparsing))) (inputs - `(("lapack" ,lapack) - ("openblas" ,openblas) + `(("openblas" ,openblas) ("pybind11" ,pybind11))) (native-inputs `(("python-cython" ,python-cython) @@ -318,49 +329,55 @@ of the SGP4 satellite tracking algorithm.") (define-public python-pandas (package (name "python-pandas") - (version "1.0.5") + (version "1.3.0") (source (origin (method url-fetch) (uri (pypi-uri "pandas" version)) (sha256 - (base32 "1a2gv3g6jr6vb5ca43fkwjl5xf86wpfz8y3zcy787adjl0hdkib9")))) + (base32 "1qi2cv450m05dwccx3p1s373k5b4ncvwi74plnms2pidrz4ycm65")))) (build-system python-build-system) (arguments `(#:modules ((guix build utils) (guix build python-build-system) (ice-9 ftw) + (srfi srfi-1) (srfi srfi-26)) - #:phases (modify-phases %standard-phases - (add-after 'unpack 'patch-which - (lambda* (#:key inputs #:allow-other-keys) - (let ((which (assoc-ref inputs "which"))) - (substitute* "pandas/io/clipboard/__init__.py" - (("^WHICH_CMD = .*") - (string-append "WHICH_CMD = \"" which "\"\n")))) - #t)) - (add-before 'check 'prepare-x - (lambda _ - (system "Xvfb &") - (setenv "DISPLAY" ":0") - ;; xsel needs to write a log file. - (setenv "HOME" "/tmp") - #t)) - (replace 'check - (lambda _ - (let ((build-directory - (string-append - (getcwd) "/build/" - (car (scandir "build" - (cut string-prefix? "lib." <>)))))) - ;; Disable the "strict data files" option which causes - ;; the build to error out if required data files are - ;; not available (as is the case with PyPI archives). - (substitute* "setup.cfg" - (("addopts = --strict-data-files") "addopts = ")) - (with-directory-excursion build-directory - (invoke "pytest" "-vv" "pandas" "--skip-slow" - "--skip-network")))))))) + #:phases + (modify-phases %standard-phases + (add-after 'unpack 'patch-which + (lambda* (#:key inputs #:allow-other-keys) + (let ((which (assoc-ref inputs "which"))) + (substitute* "pandas/io/clipboard/__init__.py" + (("^WHICH_CMD = .*") + (string-append "WHICH_CMD = \"" which "\"\n")))))) + (add-before 'check 'prepare-x + (lambda _ + (system "Xvfb &") + (setenv "DISPLAY" ":0") + ;; xsel needs to write a log file. + (setenv "HOME" "/tmp"))) + (replace 'check + (lambda _ + (let ((build-directory + (string-append + (getcwd) "/build/" + (first (scandir "build" + (cut string-prefix? "lib." <>)))))) + (with-directory-excursion build-directory + (invoke "pytest" "-vv" "pandas" "--skip-slow" + "--skip-network" + "-k" + ;; These tets access the internet: + ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[lxml] + ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[etree] + ;; TODO: the excel tests fail for unknown reasons + (string-append "not test_wrong_url" + " and not test_excelwriter_fspath" + " and not test_ExcelWriter_dispatch" + ;; TODO: Missing input + " and not TestS3" + " and not s3"))))))))) (propagated-inputs `(("python-jinja2" ,python-jinja2) ("python-numpy" ,python-numpy) @@ -436,8 +453,82 @@ doing practical, real world data analysis in Python.") ;; from <https://github.com/pandas-dev/pandas/pull/29294>. (substitute* "pandas/io/parsers.py" (("if 'NULL byte' in msg:") - "if 'NULL byte' in msg or 'line contains NUL' in msg:")) - #t))))))) + "if 'NULL byte' in msg or 'line contains NUL' in msg:")))))) + (arguments + `(#:modules ((guix build utils) + (guix build python-build-system) + (ice-9 ftw) + (srfi srfi-26)) + #:python ,python-2 + #:phases + (modify-phases %standard-phases + (add-after 'unpack 'patch-which + (lambda* (#:key inputs #:allow-other-keys) + (let ((which (assoc-ref inputs "which"))) + (substitute* "pandas/io/clipboard/__init__.py" + (("^CHECK_CMD = .*") + (string-append "CHECK_CMD = \"" which "\"\n")))))) + (replace 'check + (lambda _ + (let ((build-directory + (string-append + (getcwd) "/build/" + (car (scandir "build" + (cut string-prefix? "lib." <>)))))) + ;; Disable the "strict data files" option which causes + ;; the build to error out if required data files are + ;; not available (as is the case with PyPI archives). + (substitute* "setup.cfg" + (("addopts = --strict-data-files") "addopts = ")) + (with-directory-excursion build-directory + ;; Delete tests that require "moto" which is not yet + ;; in Guix. + (for-each delete-file + '("pandas/tests/io/conftest.py" + "pandas/tests/io/json/test_compression.py" + "pandas/tests/io/parser/test_network.py" + "pandas/tests/io/test_parquet.py")) + (invoke "pytest" "-vv" "pandas" "--skip-slow" + "--skip-network" "-k" + ;; XXX: Due to the deleted tests above. + "not test_read_s3_jsonl")))))))) + (propagated-inputs + `(("python-numpy" ,python2-numpy) + ("python-openpyxl" ,python2-openpyxl) + ("python-pytz" ,python2-pytz) + ("python-dateutil" ,python2-dateutil) + ("python-xlrd" ,python2-xlrd))) + (inputs + `(("which" ,which))) + (native-inputs + `(("python-cython" ,python2-cython) + ("python-beautifulsoup4" ,python2-beautifulsoup4) + ("python-lxml" ,python2-lxml) + ("python-html5lib" ,python2-html5lib) + ("python-nose" ,python2-nose) + ("python-pytest" ,python2-pytest) + ("python-pytest-mock" ,python2-pytest-mock)))))) + +(define-public python-pyflow + (package + (name "python-pyflow") + (version "1.1.20") + (source (origin + (method url-fetch) + (uri (string-append + "https://github.com/Illumina/pyflow/releases/download/v" + version "/pyflow-" version ".tar.gz")) + (sha256 + (base32 + "1bvfvviw58cndyn862qnv9nj3d9cd3a0dm4vc4sd9vwq8a6z1riv")))) + (build-system python-build-system) + (arguments + `(#:tests? #f)) ; There is no test suite. + (home-page "https://illumina.github.io/pyflow/") + (synopsis "Tool to manage tasks in a task dependency graph") + (description "This package is a Python module to manage tasks in the +context of a task dependency graph. It has some similarities to make.") + (license license:bsd-2))) (define-public python-bottleneck (package @@ -555,6 +646,37 @@ by numpy using the highly efficient @code{msgpack} format. Serialization of Python's native complex data types is also supported.") (license license:bsd-3))) +(define-public python-ruffus + (package + (name "python-ruffus") + (version "2.8.4") + (source + (origin + (method url-fetch) + (uri (pypi-uri "ruffus" version)) + (sha256 + (base32 + "1ai673k1s94s8b6pyxai8mk17p6zvvyi87rl236fs6ls8mpdklvc")))) + (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (delete 'check) + (add-after 'install 'check + (lambda* (#:key tests? inputs outputs #:allow-other-keys) + (when tests? + (add-installed-pythonpath inputs outputs) + (with-directory-excursion "ruffus/test" + (invoke "bash" "run_all_unit_tests3.cmd")))))))) + (native-inputs + `(("python-pytest" ,python-pytest))) + (home-page "http://www.ruffus.org.uk") + (synopsis "Light-weight computational pipeline management") + (description + "Ruffus is designed to allow scientific and other analyses to be +automated with the minimum of fuss and the least effort.") + (license license:expat))) + (define-public python-statannot (package (name "python-statannot") @@ -617,3 +739,298 @@ annotations on an existing boxplots and barplots generated by seaborn.") UpSet plots are used to visualize set overlaps; like Venn diagrams but more readable.") (license license:bsd-3))) + +(define-public python-vedo + (package + (name "python-vedo") + (version "2021.0.3") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/marcomusy/vedo") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "18i3ajh5jzhpc86di15lwh4jv97jhm627ii877sa4yhv6abzjfpn")))) + (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'build 'mpi-setup + ,%openmpi-setup) + (replace 'check + (lambda* (#:key inputs outputs #:allow-other-keys) + (setenv "HOME" (getcwd)) + (add-installed-pythonpath inputs outputs) + (with-directory-excursion "tests" + (for-each (lambda (dir) + (with-directory-excursion dir + (invoke "./run_all.sh"))) + '("common" "dolfin"))) + #t))))) + (inputs ; for the check phase + `(("dolfin" ,fenics) + ("pkgconfig" ,python-pkgconfig) + ("matplotlib" ,python-matplotlib))) + (native-inputs ; for python-pkgconfig + `(("pkg-config" ,pkg-config))) + (propagated-inputs + `(("numpy" ,python-numpy) + ("vtk" ,vtk))) + (home-page "https://github.com/marcomusy/vedo") + (synopsis + "Analysis and visualization of 3D objects and point clouds") + (description + "@code{vedo} is a fast and lightweight python module for +scientific analysis and visualization. The package provides a wide +range of functionalities for working with three-dimensional meshes and +point clouds. It can also be used to generate high quality +two-dimensional renderings such as scatter plots and histograms. +@code{vedo} is based on @code{vtk} and @code{numpy}, with no other +dependencies.") + ;; vedo is released under the Expat license. Included fonts are + ;; covered by the OFL license and textures by the CC0 license. + ;; The earth images are in the public domain. + (license (list license:expat + license:silofl1.1 + license:cc0 + license:public-domain)))) + +(define-public python-pandas-flavor + (package + (name "python-pandas-flavor") + (version "0.2.0") + (source + (origin + (method url-fetch) + (uri (pypi-uri "pandas_flavor" version)) + (sha256 + (base32 + "12g4av8gpl6l83yza3h97j3f2jblqv69frlidrvdq8ny2rc6awbq")))) + (build-system python-build-system) + (propagated-inputs + `(("python-pandas" ,python-pandas) + ("python-xarray" ,python-xarray))) + (home-page "https://github.com/Zsailer/pandas_flavor") + (synopsis "Write your own flavor of Pandas") + (description "Pandas 0.23 added a simple API for registering accessors +with Pandas objects. Pandas-flavor extends Pandas' extension API by + +@itemize +@item adding support for registering methods as well +@item making each of these functions backwards compatible with older versions +of Pandas +@end itemize") + (license license:expat))) + +(define-public python-pingouin + (package + (name "python-pingouin") + (version "0.3.12") + (source + ;; The PyPI tarball does not contain the tests. + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/raphaelvallat/pingouin") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1ap29x54kdr19vi8qxj9g6cz2r1q4f0z7dcf6g77zwav7hf7r61a")))) + (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + ;; On loading, Pingouin uses the outdated package to check if a newer + ;; version is available on PyPI. This check adds an extra dependency + ;; and is irrelevant to Guix users. So, disable it. + (add-after 'unpack 'remove-outdated-check + (lambda _ + (substitute* "setup.py" + (("'outdated',") "")) + (substitute* "pingouin/__init__.py" + (("^from outdated[^\n]*") "") + (("^warn_if_outdated[^\n]*") "")))) + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (invoke "pytest"))))))) + (native-inputs + `(("python-pytest" ,python-pytest) + ("python-pytest-cov" ,python-pytest-cov))) + (propagated-inputs + `(("python-matplotlib" ,python-matplotlib) + ("python-mpmath" ,python-mpmath) + ("python-numpy" ,python-numpy) + ("python-pandas" ,python-pandas) + ("python-pandas-flavor" ,python-pandas-flavor) + ("python-scikit-learn" ,python-scikit-learn) + ("python-scipy" ,python-scipy) + ("python-seaborn" ,python-seaborn) + ("python-statsmodels" ,python-statsmodels) + ("python-tabulate" ,python-tabulate))) + (home-page "https://pingouin-stats.org/") + (synopsis "Statistical package for Python") + (description "Pingouin is a statistical package written in Python 3 and +based mostly on Pandas and NumPy. Its features include + +@itemize +@item ANOVAs: N-ways, repeated measures, mixed, ancova +@item Pairwise post-hocs tests (parametric and non-parametric) and pairwise +correlations +@item Robust, partial, distance and repeated measures correlations +@item Linear/logistic regression and mediation analysis +@item Bayes Factors +@item Multivariate tests +@item Reliability and consistency +@item Effect sizes and power analysis +@item Parametric/bootstrapped confidence intervals around an effect size or a +correlation coefficient +@item Circular statistics +@item Chi-squared tests +@item Plotting: Bland-Altman plot, Q-Q plot, paired plot, robust correlation, +and more +@end itemize") + (license license:gpl3))) + +(define-public python-distributed + (package + (name "python-distributed") + (version "2021.07.1") + (source + (origin + ;; The test files are not included in the archive on pypi + (method git-fetch) + (uri (git-reference + (url "https://github.com/dask/distributed") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0i55zf3k55sqjxnwlzsyj3h3v1588fn54ng4mj3dfiqzh3nlj0dg")))) + (build-system python-build-system) + (arguments + '(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'fix-references + (lambda* (#:key outputs #:allow-other-keys) + (substitute* '("distributed/comm/tests/test_ucx_config.py" + "distributed/tests/test_client.py" + "distributed/tests/test_queues.py" + "distributed/tests/test_variable.py" + "distributed/cli/tests/test_tls_cli.py" + "distributed/cli/tests/test_dask_spec.py" + "distributed/cli/tests/test_dask_worker.py" + "distributed/cli/tests/test_dask_scheduler.py") + (("\"dask-scheduler\"") + (format #false "\"~a/bin/dask-scheduler\"" + (assoc-ref outputs "out"))) + (("\"dask-worker\"") + (format #false "\"~a/bin/dask-worker\"" + (assoc-ref outputs "out")))))) + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (setenv "DISABLE_IPV6" "1") + (invoke "pytest" "-vv" "distributed" + "-m" "not slow and not gpu and not ipython and not avoid_ci" + "-k" + ;; TODO: These tests fail for unknown reasons: + ;; Assertion error. + (string-append + "not test_version_option" + ;; "The 'distributed' distribution was not found" + " and not test_register_backend_entrypoint" + ;; "AttributeError: module 'distributed.dashboard' has no attribute 'scheduler'" + " and not test_get_client_functions_spawn_clusters")))))))) + (propagated-inputs + `(("python-click" ,python-click) + ("python-cloudpickle" ,python-cloudpickle) + ("python-cryptography" ,python-cryptography) + ("python-dask" ,python-dask) + ("python-msgpack" ,python-msgpack) + ("python-psutil" ,python-psutil) + ("python-pyyaml" ,python-pyyaml) + ("python-setuptools" ,python-setuptools) + ("python-sortedcontainers" ,python-sortedcontainers) + ("python-tblib" ,python-tblib) + ("python-toolz" ,python-toolz) + ("python-tornado" ,python-tornado-6) + ("python-zict" ,python-zict))) + (native-inputs + `(("python-pytest" ,python-pytest))) + (home-page "https://distributed.dask.org") + (synopsis "Distributed scheduler for Dask") + (description "Dask.distributed is a lightweight library for distributed +computing in Python. It extends both the @code{concurrent.futures} and +@code{dask} APIs to moderate sized clusters.") + (license license:bsd-3))) + +(define-public python-modin + (package + (name "python-modin") + (version "0.10.1") + (source + (origin + ;; The archive on pypi does not include all required files. + (method git-fetch) + (uri (git-reference + (url "https://github.com/modin-project/modin") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "128ghfb9ncmnn8km409xjcdppvn9nr9jqw8rkvsfavh7wnwlk509")))) + (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'make-files-writable + (lambda _ + (for-each make-file-writable (find-files ".")))) + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (setenv "MODIN_ENGINE" "dask") + (invoke "python" "-m" "pytest" + "modin/pandas/test/test_concat.py") + (setenv "MODIN_ENGINE" "python") + (invoke "python" "-m" "pytest" + "modin/pandas/test/test_concat.py"))))))) + (propagated-inputs + `(("python-cloudpickle" ,python-cloudpickle) + ("python-dask" ,python-dask) + ("python-distributed" ,python-distributed) + ("python-numpy" ,python-numpy) + ("python-packaging" ,python-packaging) + ("python-pandas" ,python-pandas))) + (native-inputs + `(("python-coverage" ,python-coverage) + ("python-jinja2" ,python-jinja2) + ("python-lxml" ,python-lxml) + ("python-matplotlib" ,python-matplotlib) + ("python-msgpack" ,python-msgpack) + ("python-openpyxl" ,python-openpyxl) + ("python-psutil" ,python-psutil) + ("python-pyarrow" ,python-pyarrow) + ("python-pytest" ,python-pytest) + ("python-pytest-benchmark" ,python-pytest-benchmark) + ("python-pytest-cov" ,python-pytest-cov) + ("python-pytest-xdist" ,python-pytest-xdist) + ("python-scipy" ,python-scipy) + ("python-sqlalchemy" ,python-sqlalchemy) + ("python-tables" ,python-tables) + ("python-tqdm" ,python-tqdm) + ("python-xarray" ,python-xarray) + ("python-xlrd" ,python-xlrd))) + (home-page "https://github.com/modin-project/modin") + (synopsis "Make your pandas code run faster") + (description + "Modin uses Ray or Dask to provide an effortless way to speed up your +pandas notebooks, scripts, and libraries. Unlike other distributed DataFrame +libraries, Modin provides seamless integration and compatibility with existing +pandas code.") + (license license:asl2.0))) |