blob: 3c9fc1847c4935f87d28644a39561344adf30939 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
# Copyright 2023-2024 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
DISTUTILS_EXT=1
DISTUTILS_USE_PEP517=setuptools
PYTHON_COMPAT=( python3_{10..13} )
inherit distutils-r1 multiprocessing
# arrow.git: testing
ARROW_DATA_GIT_HASH=4d209492d514c2d3cb2d392681b9aa00e6d8da1c
# arrow.git: cpp/submodules/parquet-testing
PARQUET_DATA_GIT_HASH=cb7a9674142c137367bf75a01b79c6e214a73199
DESCRIPTION="Python library for Apache Arrow"
HOMEPAGE="
https://arrow.apache.org/
https://github.com/apache/arrow/
https://pypi.org/project/pyarrow/
"
SRC_URI="
mirror://apache/arrow/arrow-${PV}/apache-arrow-${PV}.tar.gz
test? (
https://github.com/apache/parquet-testing/archive/${PARQUET_DATA_GIT_HASH}.tar.gz
-> parquet-testing-${PARQUET_DATA_GIT_HASH}.tar.gz
https://github.com/apache/arrow-testing/archive/${ARROW_DATA_GIT_HASH}.tar.gz
-> arrow-testing-${ARROW_DATA_GIT_HASH}.tar.gz
)
"
S="${WORKDIR}/apache-arrow-${PV}/python"
LICENSE="Apache-2.0"
SLOT="0"
KEYWORDS="~amd64 arm64 ~loong ~riscv ~x86"
IUSE="+parquet +snappy ssl"
RDEPEND="
~dev-libs/apache-arrow-${PV}[compute,dataset,json,parquet?,re2,snappy?,ssl?]
>=dev-python/numpy-1.16.6:=[${PYTHON_USEDEP}]
"
BDEPEND="
test? (
dev-python/cffi[${PYTHON_USEDEP}]
dev-python/hypothesis[${PYTHON_USEDEP}]
dev-python/pandas[${PYTHON_USEDEP}]
dev-python/pytz[${PYTHON_USEDEP}]
dev-libs/apache-arrow[lz4,zlib]
)
"
EPYTEST_XDIST=1
distutils_enable_tests pytest
src_prepare() {
distutils-r1_src_prepare
# cython's -Werror
sed -i -e '/--warning-errors/d' CMakeLists.txt || die
}
src_compile() {
export PYARROW_PARALLEL="$(makeopts_jobs)"
export PYARROW_BUILD_VERBOSE=1
export PYARROW_CXXFLAGS="${CXXFLAGS}"
export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0
export PYARROW_CMAKE_GENERATOR=Ninja
export PYARROW_WITH_HDFS=1
if use parquet; then
export PYARROW_WITH_DATASET=1
export PYARROW_WITH_PARQUET=1
use ssl && export PYARROW_WITH_PARQUET_ENCRYPTION=1
fi
if use snappy; then
export PYARROW_WITH_SNAPPY=1
fi
distutils-r1_src_compile
}
python_test() {
local EPYTEST_DESELECT=(
# wtf?
tests/test_fs.py::test_localfs_errors
# these require apache-arrow with jemalloc that doesn't seem
# to be supported by the Gentoo package
tests/test_memory.py::test_env_var
tests/test_memory.py::test_specific_memory_pools
tests/test_memory.py::test_supported_memory_backends
# hypothesis health check failures
# https://github.com/apache/arrow/issues/41318
tests/interchange/test_interchange_spec.py::test_dtypes
tests/test_convert_builtin.py::test_array_to_pylist_roundtrip
tests/test_feather.py::test_roundtrip
tests/test_pandas.py::test_array_to_pandas_roundtrip
tests/test_strategies.py::test_types
tests/test_types.py::test_hashing
# fragile memory tests
tests/test_csv.py::TestSerialStreamingCSVRead::test_batch_lifetime
tests/test_csv.py::TestThreadedStreamingCSVRead::test_batch_lifetime
# takes forever, and manages to generate timedeltas over 64 bits
tests/test_strategies.py
"tests/test_array.py::test_pickling[builtin_pickle]"
)
cd "${T}" || die
local -x PYTEST_DISABLE_PLUGIN_AUTOLOAD=1
local -x PARQUET_TEST_DATA="${WORKDIR}/parquet-testing-${PARQUET_DATA_GIT_HASH}/data"
local -x ARROW_TEST_DATA="${WORKDIR}/arrow-testing-${ARROW_DATA_GIT_HASH}/data"
epytest --pyargs pyarrow
}
|