diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000000..e947f30d285cd --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,147 @@ +version: 2 +jobs: + + # -------------------------------------------------------------------------- + # 0. py27_compat + # -------------------------------------------------------------------------- + py27_compat: + docker: + - image: continuumio/miniconda:latest + # databases configuration + - image: circleci/postgres:9.6.5-alpine-ram + environment: + POSTGRES_USER: postgres + POSTGRES_DB: pandas_nosetest + - image: circleci/mysql:8-ram + environment: + MYSQL_USER: "root" + MYSQL_HOST: "localhost" + MYSQL_ALLOW_EMPTY_PASSWORD: "true" + MYSQL_DATABASE: "pandas_nosetest" + environment: + JOB: "2.7_COMPAT" + ENV_FILE: "ci/circle-27-compat.yaml" + LOCALE_OVERRIDE: "it_IT.UTF-8" + MINICONDA_DIR: /home/ubuntu/miniconda3 + steps: + - checkout + - run: + name: build + command: | + ./ci/install_circle.sh + ./ci/show_circle.sh + - run: + name: test + command: ./ci/run_circle.sh --skip-slow --skip-network + + # -------------------------------------------------------------------------- + # 1. py36_locale + # -------------------------------------------------------------------------- + py36_locale: + docker: + - image: continuumio/miniconda:latest + # databases configuration + - image: circleci/postgres:9.6.5-alpine-ram + environment: + POSTGRES_USER: postgres + POSTGRES_DB: pandas_nosetest + - image: circleci/mysql:8-ram + environment: + MYSQL_USER: "root" + MYSQL_HOST: "localhost" + MYSQL_ALLOW_EMPTY_PASSWORD: "true" + MYSQL_DATABASE: "pandas_nosetest" + + environment: + JOB: "3.6_LOCALE" + ENV_FILE: "ci/circle-36-locale.yaml" + LOCALE_OVERRIDE: "zh_CN.UTF-8" + MINICONDA_DIR: /home/ubuntu/miniconda3 + steps: + - checkout + - run: + name: build + command: | + ./ci/install_circle.sh + ./ci/show_circle.sh + - run: + name: test + command: ./ci/run_circle.sh --skip-slow --skip-network + + # -------------------------------------------------------------------------- + # 2. py36_locale_slow + # -------------------------------------------------------------------------- + py36_locale_slow: + docker: + - image: continuumio/miniconda:latest + # databases configuration + - image: circleci/postgres:9.6.5-alpine-ram + environment: + POSTGRES_USER: postgres + POSTGRES_DB: pandas_nosetest + - image: circleci/mysql:8-ram + environment: + MYSQL_USER: "root" + MYSQL_HOST: "localhost" + MYSQL_ALLOW_EMPTY_PASSWORD: "true" + MYSQL_DATABASE: "pandas_nosetest" + + environment: + JOB: "3.6_LOCALE_SLOW" + ENV_FILE: "ci/circle-36-locale_slow.yaml" + LOCALE_OVERRIDE: "zh_CN.UTF-8" + MINICONDA_DIR: /home/ubuntu/miniconda3 + steps: + - checkout + - run: + name: build + command: | + ./ci/install_circle.sh + ./ci/show_circle.sh + - run: + name: test + command: ./ci/run_circle.sh --only-slow --skip-network + + # -------------------------------------------------------------------------- + # 3. py35_ascii + # -------------------------------------------------------------------------- + py35_ascii: + docker: + - image: continuumio/miniconda:latest + # databases configuration + - image: circleci/postgres:9.6.5-alpine-ram + environment: + POSTGRES_USER: postgres + POSTGRES_DB: pandas_nosetest + - image: circleci/mysql:8-ram + environment: + MYSQL_USER: "root" + MYSQL_HOST: "localhost" + MYSQL_ALLOW_EMPTY_PASSWORD: "true" + MYSQL_DATABASE: "pandas_nosetest" + + environment: + JOB: "3.5_ASCII" + ENV_FILE: "ci/circle-35-ascii.yaml" + LOCALE_OVERRIDE: "C" + MINICONDA_DIR: /home/ubuntu/miniconda3 + steps: + - checkout + - run: + name: build + command: | + ./ci/install_circle.sh + ./ci/show_circle.sh + - run: + name: test + command: ./ci/run_circle.sh --skip-slow --skip-network + + +workflows: + version: 2 + build_and_test: + jobs: + - py27_compat + - py36_locale + - py36_locale_slow + - py35_ascii diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 5ffff84c88488..f8bcf6bcffc99 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -6,14 +6,7 @@ echo "[home_dir: $home_dir]" echo "[ls -ltr]" ls -ltr -echo "[Using clean Miniconda install]" -rm -rf "$MINICONDA_DIR" - -# install miniconda -wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1 -bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 - -export PATH="$MINICONDA_DIR/bin:$PATH" +apt-get update -y && apt-get install -y build-essential postgresql-client-9.6 echo "[update conda]" conda config --set ssl_verify false || exit 1 @@ -48,9 +41,17 @@ source $ENVS_FILE # edit the locale override if needed if [ -n "$LOCALE_OVERRIDE" ]; then + + apt-get update && apt-get -y install locales locales-all + + export LANG=$LOCALE_OVERRIDE + export LC_ALL=$LOCALE_OVERRIDE + + python -c "import locale; locale.setlocale(locale.LC_ALL, \"$LOCALE_OVERRIDE\")" || exit 1; + echo "[Adding locale to the first line of pandas/__init__.py]" rm -f pandas/__init__.pyc - sedc="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n" + sedc="3iimport locale\nlocale.setlocale(locale.LC_ALL, \"$LOCALE_OVERRIDE\")\n" sed -i "$sedc" pandas/__init__.py echo "[head -4 pandas/__init__.py]" head -4 pandas/__init__.py diff --git a/ci/install_db_circle.sh b/ci/install_db_circle.sh deleted file mode 100755 index a00f74f009f54..0000000000000 --- a/ci/install_db_circle.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -echo "installing dbs" -mysql -e 'create database pandas_nosetest;' -psql -c 'create database pandas_nosetest;' -U postgres - -echo "done" -exit 0 diff --git a/ci/run_circle.sh b/ci/run_circle.sh index 435985bd42148..fc2a8b849a354 100755 --- a/ci/run_circle.sh +++ b/ci/run_circle.sh @@ -6,4 +6,4 @@ export PATH="$MINICONDA_DIR/bin:$PATH" source activate pandas echo "pytest --strict --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas" -pytest --strict --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas +pytest --strict --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas diff --git a/circle.yml b/circle.yml deleted file mode 100644 index 66415defba6fe..0000000000000 --- a/circle.yml +++ /dev/null @@ -1,38 +0,0 @@ -machine: - environment: - # these are globally set - MINICONDA_DIR: /home/ubuntu/miniconda3 - - -database: - override: - - ./ci/install_db_circle.sh - - -checkout: - post: - # since circleci does a shallow fetch - # we need to populate our tags - - git fetch --depth=1000 - - -dependencies: - override: - - > - case $CIRCLE_NODE_INDEX in - 0) - sudo apt-get install language-pack-it && ./ci/install_circle.sh JOB="2.7_COMPAT" ENV_FILE="ci/circle-27-compat.yaml" LOCALE_OVERRIDE="it_IT.UTF-8" ;; - 1) - sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.6_LOCALE" ENV_FILE="ci/circle-36-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; - 2) - sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.6_LOCALE_SLOW" ENV_FILE="ci/circle-36-locale_slow.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; - 3) - ./ci/install_circle.sh JOB="3.5_ASCII" ENV_FILE="ci/circle-35-ascii.yaml" LOCALE_OVERRIDE="C" ;; - esac - - ./ci/show_circle.sh - - -test: - override: - - case $CIRCLE_NODE_INDEX in 0) ./ci/run_circle.sh --skip-slow --skip-network ;; 1) ./ci/run_circle.sh --only-slow --skip-network ;; 2) ./ci/run_circle.sh --skip-slow --skip-network ;; 3) ./ci/run_circle.sh --skip-slow --skip-network ;; esac: - parallel: true diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 056924f2c6663..743cbc107cce5 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -1,5 +1,6 @@ import locale import calendar +import unicodedata import pytest @@ -7,7 +8,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import (Index, DatetimeIndex, datetime, offsets, - date_range, Timestamp) + date_range, Timestamp, compat) class TestTimeSeries(object): @@ -284,10 +285,24 @@ def test_datetime_name_accessors(self, time_locale): dti = DatetimeIndex(freq='M', start='2012', end='2013') result = dti.month_name(locale=time_locale) expected = Index([month.capitalize() for month in expected_months]) + + # work around different normalization schemes + # https://github.com/pandas-dev/pandas/issues/22342 + if not compat.PY2: + result = result.str.normalize("NFD") + expected = expected.str.normalize("NFD") + tm.assert_index_equal(result, expected) + for date, expected in zip(dti, expected_months): result = date.month_name(locale=time_locale) - assert result == expected.capitalize() + expected = expected.capitalize() + + if not compat.PY2: + result = unicodedata.normalize("NFD", result) + expected = unicodedata.normalize("NFD", result) + + assert result == expected dti = dti.append(DatetimeIndex([pd.NaT])) assert np.isnan(dti.month_name(locale=time_locale)[-1]) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index f2e72e5fe00e1..b411744f7bac2 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -2,6 +2,7 @@ import pandas as pd import pandas.util.testing as tm +import pandas.util._test_decorators as td from pandas.util.testing import assert_frame_equal, assert_raises_regex @@ -31,6 +32,7 @@ def test_read_zipped_json(datapath): assert_frame_equal(uncompressed_df, compressed_df) +@td.skip_if_not_us_locale def test_with_s3_url(compression): boto3 = pytest.importorskip('boto3') pytest.importorskip('s3fs') diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 0715521a74819..04f0220839523 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -15,6 +15,7 @@ assert_series_equal, network, ensure_clean, assert_index_equal) import pandas.util.testing as tm +import pandas.util._test_decorators as td _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() @@ -1047,6 +1048,7 @@ def test_read_inline_jsonl(self): expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) assert_frame_equal(result, expected) + @td.skip_if_not_us_locale def test_read_s3_jsonl(self, s3_resource): # GH17200 diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index a7cc3ad989ea1..bfe33980ac617 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -55,10 +55,12 @@ def tips_df(datapath): @pytest.mark.usefixtures("s3_resource") +@td.skip_if_not_us_locale() class TestS3(object): def test_parse_public_s3_bucket(self, tips_df): pytest.importorskip('s3fs') + # more of an integration test due to the not-public contents portion # can probably mock this though. for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index fa5a8f6a1900c..5f27ff719fda1 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -586,6 +586,7 @@ def test_read_from_http_url(self, ext): tm.assert_frame_equal(url_table, local_table) @td.skip_if_no('s3fs') + @td.skip_if_not_us_locale def test_read_from_s3_url(self, ext): boto3 = pytest.importorskip('boto3') moto = pytest.importorskip('moto') diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 4172bfd41b9db..58146cae587fe 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -5,6 +5,7 @@ import dateutil import calendar import locale +import unicodedata import numpy as np from dateutil.tz import tzutc @@ -20,7 +21,7 @@ from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz from pandas.errors import OutOfBoundsDatetime -from pandas.compat import long, PY3 +from pandas.compat import long, PY3, PY2 from pandas.compat.numpy import np_datetime64_compat from pandas import Timestamp, Period, Timedelta, NaT @@ -116,8 +117,21 @@ def test_names(self, data, time_locale): expected_day = calendar.day_name[0].capitalize() expected_month = calendar.month_name[8].capitalize() - assert data.day_name(time_locale) == expected_day - assert data.month_name(time_locale) == expected_month + result_day = data.day_name(time_locale) + result_month = data.month_name(time_locale) + + # Work around https://github.com/pandas-dev/pandas/issues/22342 + # different normalizations + + if not PY2: + expected_day = unicodedata.normalize("NFD", expected_day) + expected_month = unicodedata.normalize("NFD", expected_month) + + result_day = unicodedata.normalize("NFD", result_day,) + result_month = unicodedata.normalize("NFD", result_month) + + assert result_day == expected_day + assert result_month == expected_month # Test NaT nan_ts = Timestamp(NaT) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 06eb525bbac56..e2dcbfae78b16 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -3,6 +3,7 @@ import locale import calendar +import unicodedata import pytest from datetime import datetime, time, date @@ -13,7 +14,8 @@ from pandas.core.dtypes.common import is_integer_dtype, is_list_like from pandas import (Index, Series, DataFrame, bdate_range, date_range, period_range, timedelta_range, - PeriodIndex, DatetimeIndex, TimedeltaIndex) + PeriodIndex, DatetimeIndex, TimedeltaIndex, + compat) import pandas.core.common as com import dateutil @@ -308,10 +310,24 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale): s = Series(DatetimeIndex(freq='M', start='2012', end='2013')) result = s.dt.month_name(locale=time_locale) expected = Series([month.capitalize() for month in expected_months]) + + # work around https://github.com/pandas-dev/pandas/issues/22342 + if not compat.PY2: + result = result.str.normalize("NFD") + expected = expected.str.normalize("NFD") + tm.assert_series_equal(result, expected) + for s_date, expected in zip(s, expected_months): result = s_date.month_name(locale=time_locale) - assert result == expected.capitalize() + expected = expected.capitalize() + + if not compat.PY2: + result = unicodedata.normalize("NFD", result) + expected = unicodedata.normalize("NFD", expected) + + assert result == expected + s = s.append(Series([pd.NaT])) assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1])