Skip to content
Permalink
Browse files

ARROW-5757: [Python] Remove Python 2.7 support

Part of the changes were done using [pyupgrade](https://github.com/asottile/pyupgrade).

Closes #6410 from pitrou/ARROW-5757-py2-goodbye and squashes the following commits:

f0f9f51 <Antoine Pitrou> Address review comments
561ac96 <Antoine Pitrou> ARROW-5757:  Remove Python 2.7 support

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
pitrou authored and wesm committed Feb 13, 2020
1 parent 3bc01ec commit 412145b8a736a5dce4f17b03d7f72efb45053176
Showing with 426 additions and 1,123 deletions.
  1. +1 −1 .github/workflows/python.yml
  2. +0 −1 ci/docker/conda-python.dockerfile
  3. +0 −10 cpp/src/arrow/python/common.h
  4. +0 −4 cpp/src/arrow/python/config.h
  5. +0 −13 cpp/src/arrow/python/datetime.h
  6. +0 −6 cpp/src/arrow/python/deserialize.cc
  7. +0 −28 cpp/src/arrow/python/helpers.cc
  8. +1 −7 cpp/src/arrow/python/helpers.h
  9. +0 −9 cpp/src/arrow/python/io.cc
  10. +0 −5 cpp/src/arrow/python/numpy_internal.h
  11. +0 −3 cpp/src/arrow/python/python_test.cc
  12. +0 −10 cpp/src/arrow/python/serialize.cc
  13. +1 −1 cpp/src/arrow/python/serialize.h
  14. +0 −52 dev/tasks/conda-recipes/.ci_support/linux_python2.7.yaml
  15. +0 −56 dev/tasks/conda-recipes/.ci_support/osx_python2.7.yaml
  16. +0 −67 dev/tasks/tasks.yml
  17. +1 −7 docs/source/python/install.rst
  18. +5 −9 python/manylinux1/README.md
  19. +15 −30 python/manylinux1/build_arrow.sh
  20. +3 −5 python/manylinux201x/README.md
  21. +16 −33 python/manylinux201x/build_arrow.sh
  22. +0 −2 python/pyarrow/__init__.pxd
  23. +1 −2 python/pyarrow/__init__.py
  24. +0 −1 python/pyarrow/_csv.pyx
  25. +0 −2 python/pyarrow/_cuda.pxd
  26. +0 −10 python/pyarrow/_cuda.pyx
  27. +1 −4 python/pyarrow/_dataset.pyx
  28. +4 −8 python/pyarrow/_flight.pyx
  29. +0 −2 python/pyarrow/_fs.pxd
  30. +1 −2 python/pyarrow/_fs.pyx
  31. +1 −3 python/pyarrow/_hdfs.pyx
  32. +0 −2 python/pyarrow/_json.pyx
  33. +0 −2 python/pyarrow/_orc.pxd
  34. +0 −3 python/pyarrow/_orc.pyx
  35. +0 −2 python/pyarrow/_parquet.pxd
  36. +1 −4 python/pyarrow/_parquet.pyx
  37. +0 −2 python/pyarrow/_plasma.pyx
  38. +0 −2 python/pyarrow/_s3fs.pyx
  39. +2 −2 python/pyarrow/array.pxi
  40. +0 −1 python/pyarrow/benchmark.py
  41. +1 −3 python/pyarrow/builder.pxi
  42. +19 −107 python/pyarrow/compat.py
  43. +0 −1 python/pyarrow/compute.py
  44. +0 −1 python/pyarrow/csv.py
  45. +0 −1 python/pyarrow/cuda.py
  46. +5 −12 python/pyarrow/dataset.py
  47. +7 −10 python/pyarrow/feather.py
  48. +5 −7 python/pyarrow/filesystem.py
  49. +0 −7 python/pyarrow/flight.py
  50. +0 −1 python/pyarrow/fs.py
  51. +0 −2 python/pyarrow/gandiva.pyx
  52. +9 −11 python/pyarrow/hdfs.py
  53. +3 −6 python/pyarrow/io-hdfs.pxi
  54. +2 −11 python/pyarrow/io.pxi
  55. +1 −2 python/pyarrow/ipc.py
  56. +0 −1 python/pyarrow/json.py
  57. +0 −3 python/pyarrow/jvm.py
  58. +0 −2 python/pyarrow/lib.pxd
  59. +0 −3 python/pyarrow/lib.pyx
  60. +3 −6 python/pyarrow/orc.py
  61. +17 −18 python/pyarrow/pandas_compat.py
  62. +33 −47 python/pyarrow/parquet.py
  63. +0 −1 python/pyarrow/plasma.py
  64. +2 −4 python/pyarrow/serialization.pxi
  65. +1 −14 python/pyarrow/serialization.py
  66. +4 −4 python/pyarrow/table.pxi
  67. +7 −27 python/pyarrow/tests/conftest.py
  68. +6 −7 python/pyarrow/tests/pandas_examples.py
  69. +12 −13 python/pyarrow/tests/test_array.py
  70. +0 −1 python/pyarrow/tests/test_compute.py
  71. +16 −24 python/pyarrow/tests/test_convert_builtin.py
  72. +24 −27 python/pyarrow/tests/test_csv.py
  73. +0 −1 python/pyarrow/tests/test_cuda.py
  74. +1 −1 python/pyarrow/tests/test_dataset.py
  75. +5 −5 python/pyarrow/tests/test_feather.py
  76. +9 −11 python/pyarrow/tests/test_flight.py
  77. +9 −12 python/pyarrow/tests/test_fs.py
  78. +2 −2 python/pyarrow/tests/test_gandiva.py
  79. +3 −3 python/pyarrow/tests/test_hdfs.py
  80. +25 −60 python/pyarrow/tests/test_io.py
  81. +1 −1 python/pyarrow/tests/test_ipc.py
  82. +6 −7 python/pyarrow/tests/test_json.py
  83. +2 −4 python/pyarrow/tests/test_jvm.py
  84. +1 −5 python/pyarrow/tests/test_orc.py
  85. +38 −49 python/pyarrow/tests/test_pandas.py
  86. +13 −26 python/pyarrow/tests/test_parquet.py
  87. +5 −14 python/pyarrow/tests/test_plasma.py
  88. +26 −27 python/pyarrow/tests/test_scalars.py
  89. +23 −35 python/pyarrow/tests/test_serialization.py
  90. +10 −11 python/pyarrow/tests/test_table.py
  91. +0 −2 python/pyarrow/tests/test_tensor.py
  92. +8 −14 python/pyarrow/types.pxi
  93. +0 −1 python/pyarrow/types.py
  94. +6 −25 python/pyarrow/util.py
  95. +0 −1 python/requirements-test.txt
  96. +0 −2 python/requirements-wheel.txt
  97. +0 −2 python/requirements.txt
  98. +2 −5 python/setup.py
@@ -72,7 +72,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python: [2.7, 3.6, 3.8]
python: [3.6, 3.8]
env:
PYTHON: ${{ matrix.python }}
PANDAS: latest
@@ -24,7 +24,6 @@ ARG python=3.6
COPY ci/conda_env_python.yml /arrow/ci/
RUN conda install -q \
--file arrow/ci/conda_env_python.yml \
$([ "$python" == "2.7" ] && echo "futures enum34") \
$([ "$python" == "3.6" -o "$python" == "3.7" ] && echo "pickle5") \
python=${python} \
nomkl && \
@@ -134,8 +134,6 @@ auto SafeCallIntoPython(Function&& func) -> decltype(func()) {
return maybe_status;
}

#define PYARROW_IS_PY2 PY_MAJOR_VERSION <= 2

// A RAII primitive that DECREFs the underlying PyObject* when it
// goes out of scope.
class ARROW_PYTHON_EXPORT OwnedRef {
@@ -211,21 +209,13 @@ struct PyBytesView {
}

Status FromUnicode(PyObject* obj) {
#if PY_MAJOR_VERSION >= 3
Py_ssize_t size;
// The utf-8 representation is cached on the unicode object
const char* data = PyUnicode_AsUTF8AndSize(obj, &size);
RETURN_IF_PYERROR();
this->bytes = data;
this->size = size;
this->ref.reset();
#else
PyObject* converted = PyUnicode_AsUTF8String(obj);
RETURN_IF_PYERROR();
this->bytes = PyBytes_AS_STRING(converted);
this->size = PyBytes_GET_SIZE(converted);
this->ref.reset(converted);
#endif
return Status::OK();
}

@@ -23,10 +23,6 @@
#include "arrow/python/numpy_interop.h"
#include "arrow/python/visibility.h"

#if PY_MAJOR_VERSION >= 3
#define PyString_Check PyUnicode_Check
#endif

namespace arrow {
namespace py {

@@ -121,36 +121,23 @@ inline TimePoint PyDateTime_to_TimePoint(PyDateTime_DateTime* pydatetime) {
ARROW_PYTHON_EXPORT
inline int64_t PyDelta_to_s(PyDateTime_Delta* pytimedelta) {
int64_t total_seconds = 0;
#if PY_VERSION_HEX >= 0x03000000
total_seconds += PyDateTime_DELTA_GET_SECONDS(pytimedelta);
total_seconds += PyDateTime_DELTA_GET_DAYS(pytimedelta) * 24 * 3600;
#else
total_seconds += pytimedelta->seconds;
total_seconds += pytimedelta->days * 24 * 3600;
#endif
return total_seconds;
}

ARROW_PYTHON_EXPORT
inline int64_t PyDelta_to_ms(PyDateTime_Delta* pytimedelta) {
int64_t total_ms = PyDelta_to_s(pytimedelta) * 1000;
#if PY_VERSION_HEX >= 0x03000000
total_ms += PyDateTime_DELTA_GET_MICROSECONDS(pytimedelta) / 1000;
#else
total_ms += pytimedelta->microseconds / 1000;
#endif
return total_ms;
}

ARROW_PYTHON_EXPORT
inline int64_t PyDelta_to_us(PyDateTime_Delta* pytimedelta) {
int64_t total_us = 0;
total_us += PyDelta_to_s(pytimedelta) * 1000 * 1000;
#if PY_VERSION_HEX >= 0x03000000
total_us += PyDateTime_DELTA_GET_MICROSECONDS(pytimedelta);
#else
total_us += pytimedelta->microseconds;
#endif
return total_us;
}

@@ -123,12 +123,6 @@ Status GetValue(PyObject* context, const Array& arr, int64_t index, int8_t type,
return Status::OK();
case PythonType::PY2INT:
case PythonType::INT: {
#if PY_MAJOR_VERSION < 3
if (type == PythonType::PY2INT) {
*result = PyInt_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index));
return Status::OK();
}
#endif
*result = PyLong_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index));
return Status::OK();
}
@@ -96,33 +96,22 @@ std::string PyBytes_AsStdString(PyObject* obj) {

Status PyUnicode_AsStdString(PyObject* obj, std::string* out) {
DCHECK(PyUnicode_Check(obj));
#if PY_MAJOR_VERSION >= 3
Py_ssize_t size;
// The utf-8 representation is cached on the unicode object
const char* data = PyUnicode_AsUTF8AndSize(obj, &size);
RETURN_IF_PYERROR();
*out = std::string(data, size);
return Status::OK();
#else
OwnedRef bytes_ref(PyUnicode_AsUTF8String(obj));
RETURN_IF_PYERROR();
*out = PyBytes_AsStdString(bytes_ref.obj());
return Status::OK();
#endif
}

std::string PyObject_StdStringRepr(PyObject* obj) {
#if PY_MAJOR_VERSION >= 3
OwnedRef unicode_ref(PyObject_Repr(obj));
OwnedRef bytes_ref;

if (unicode_ref) {
bytes_ref.reset(
PyUnicode_AsEncodedString(unicode_ref.obj(), "utf8", "backslashreplace"));
}
#else
OwnedRef bytes_ref(PyObject_Repr(obj));
#endif
if (!bytes_ref) {
PyErr_Clear();
std::stringstream ss;
@@ -135,12 +124,7 @@ std::string PyObject_StdStringRepr(PyObject* obj) {
Status PyObject_StdStringStr(PyObject* obj, std::string* out) {
OwnedRef string_ref(PyObject_Str(obj));
RETURN_IF_PYERROR();
#if PY_MAJOR_VERSION >= 3
return PyUnicode_AsStdString(string_ref.obj(), out);
#else
*out = PyBytes_AsStdString(string_ref.obj());
return Status::OK();
#endif
}

Status ImportModule(const std::string& module_name, OwnedRef* ref) {
@@ -259,18 +243,10 @@ template Status CIntFromPython(PyObject*, uint64_t*, const std::string&);

inline bool MayHaveNaN(PyObject* obj) {
// Some core types can be very quickly type-checked and do not allow NaN values
#if PYARROW_IS_PY2
const int64_t non_nan_tpflags = Py_TPFLAGS_INT_SUBCLASS | Py_TPFLAGS_LONG_SUBCLASS |
Py_TPFLAGS_LIST_SUBCLASS | Py_TPFLAGS_TUPLE_SUBCLASS |
Py_TPFLAGS_STRING_SUBCLASS |
Py_TPFLAGS_UNICODE_SUBCLASS | Py_TPFLAGS_DICT_SUBCLASS |
Py_TPFLAGS_BASE_EXC_SUBCLASS | Py_TPFLAGS_TYPE_SUBCLASS;
#else
const int64_t non_nan_tpflags = Py_TPFLAGS_LONG_SUBCLASS | Py_TPFLAGS_LIST_SUBCLASS |
Py_TPFLAGS_TUPLE_SUBCLASS | Py_TPFLAGS_BYTES_SUBCLASS |
Py_TPFLAGS_UNICODE_SUBCLASS | Py_TPFLAGS_DICT_SUBCLASS |
Py_TPFLAGS_BASE_EXC_SUBCLASS | Py_TPFLAGS_TYPE_SUBCLASS;
#endif
return !PyType_HasFeature(Py_TYPE(obj), non_nan_tpflags);
}

@@ -313,10 +289,6 @@ Status UnboxIntegerAsInt64(PyObject* obj, int64_t* out) {
if (overflow) {
return Status::Invalid("PyLong is too large to fit int64");
}
#if PY_MAJOR_VERSION < 3
} else if (PyInt_Check(obj)) {
*out = static_cast<int64_t>(PyInt_AS_LONG(obj));
#endif
} else if (PyArray_IsScalar(obj, UByte)) {
*out = reinterpret_cast<PyUByteScalarObject*>(obj)->obval;
} else if (PyArray_IsScalar(obj, Short)) {
@@ -65,13 +65,7 @@ ARROW_PYTHON_EXPORT
Status ImportFromModule(PyObject* module, const std::string& name, OwnedRef* ref);

// \brief Check whether obj is an integer, independent of Python versions.
inline bool IsPyInteger(PyObject* obj) {
#if PYARROW_IS_PY2
return PyLong_Check(obj) || PyInt_Check(obj);
#else
return PyLong_Check(obj);
#endif
}
inline bool IsPyInteger(PyObject* obj) { return PyLong_Check(obj); }

// \brief Use pandas missing value semantics to check if a value is null
ARROW_PYTHON_EXPORT
@@ -123,14 +123,6 @@ class PythonFile {
Status Write(const std::shared_ptr<Buffer>& buffer) {
RETURN_NOT_OK(CheckClosed());

#if PY_MAJOR_VERSION < 3
// On Python 2, a write() method can typically call str() on its argument
// to get its bytes payload (this is the case with socket.makefile()).
// Unfortunately, on non-bytes buffer-like objects this will give out
// the repr() of the object rather than its data. So fall back on
// copying the data to a bytes object.
return Write(buffer->data(), buffer->size());
#else
PyObject* py_data = wrap_buffer(buffer);
PY_RETURN_IF_ERROR(StatusCode::IOError);

@@ -139,7 +131,6 @@ class PythonFile {
Py_XDECREF(result);
PY_RETURN_IF_ERROR(StatusCode::IOError);
return Status::OK();
#endif
}

Result<int64_t> Tell() {
@@ -161,11 +161,6 @@ inline bool PyFloatScalar_Check(PyObject* obj) {
}

inline bool PyIntScalar_Check(PyObject* obj) {
#if PY_MAJOR_VERSION < 3
if (PyInt_Check(obj)) {
return true;
}
#endif
return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
}

@@ -85,9 +85,6 @@ TEST(OwnedRefNoGIL, TestMoves) {
std::string FormatPythonException(const std::string& exc_class_name) {
std::stringstream ss;
ss << "Python exception: ";
#if PY_MAJOR_VERSION < 3
ss << "exceptions.";
#endif
ss << exc_class_name;
return ss.str();
}
@@ -103,11 +103,6 @@ class SequenceBuilder {
return AppendPrimitive(&bools_, data, PythonType::BOOL);
}

// Appending a python 2 int64_t to the sequence
Status AppendPy2Int64(const int64_t data) {
return AppendPrimitive(&py2_ints_, data, PythonType::PY2INT);
}

// Appending an int64_t to the sequence
Status AppendInt64(const int64_t data) {
return AppendPrimitive(&ints_, data, PythonType::INT);
@@ -252,7 +247,6 @@ class SequenceBuilder {

std::shared_ptr<BooleanBuilder> bools_;
std::shared_ptr<Int64Builder> ints_;
std::shared_ptr<Int64Builder> py2_ints_;
std::shared_ptr<BinaryBuilder> bytes_;
std::shared_ptr<StringBuilder> strings_;
std::shared_ptr<HalfFloatBuilder> half_floats_;
@@ -458,10 +452,6 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
RETURN_NOT_OK(
builder->AppendDict(context, serialized_object, recursion_depth, blobs_out));
}
#if PY_MAJOR_VERSION < 3
} else if (PyInt_Check(elem)) {
RETURN_NOT_OK(builder->AppendPy2Int64(static_cast<int64_t>(PyInt_AS_LONG(elem))));
#endif
} else if (PyBytes_Check(elem)) {
auto data = reinterpret_cast<uint8_t*>(PyBytes_AS_STRING(elem));
int32_t size = -1;
@@ -118,7 +118,7 @@ struct PythonType {
enum type {
BOOL,
INT,
PY2INT,
PY2INT, // Kept for compatibility
BYTES,
STRING,
HALF_FLOAT,

This file was deleted.

This file was deleted.

0 comments on commit 412145b

Please sign in to comment.
You can’t perform that action at this time.