Skip to content

Commit 0451937

Browse files
committed
BLD: restrict code_checks.sh to tracked repo files
Previously, some of the checks in code_checks.sh ran unrestricted on all the contents of the repository root (recursively), so that if any files extraneous to the repo were present (e.g. a virtual environment directory), they were checked too, potentially causing many false positives when a developer runs ./ci/code_checks.sh . The checker invocations that were already scoped (i.e. they were already restricted, in one way or another, to the actual pandas code, e.g. by restricting the search to the `pandas` subfolder) have been left as-is, while those that weren't are now given an explicit list of files that are tracked in the repo.
1 parent 2bdcab4 commit 0451937

File tree

1 file changed

+39
-14
lines changed

1 file changed

+39
-14
lines changed

ci/code_checks.sh

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,26 @@ BASE_DIR="$(dirname $0)/.."
2525
RET=0
2626
CHECK=$1
2727

28+
29+
# Get lists of files tracked by git:
30+
31+
function quote_if_needed {
32+
awk '{ print $0 ~ /.*\s+.*/ ? "\""$0"\"" : $0 }'
33+
}
34+
35+
function git_tracked_files {
36+
[[ ! -z "$1" ]] && local patt="\\${1}$" || local patt="$"
37+
local subdir=$2
38+
git ls-tree --name-only -r HEAD $subdir | grep -e $patt | quote_if_needed
39+
}
40+
41+
GIT_TRACKED_ALL=$(git_tracked_files)
42+
GIT_TRACKED_ALL_PY_FILES=$(git_tracked_files .py)
43+
GIT_TRACKED_DOCSOURCE_RST_FILES=$(git_tracked_files .rst doc/source)
44+
GIT_TRACKED_REFERENCE_RST_FILES=$(git_tracked_files .rst doc/source/reference)
45+
GIT_TRACKED_DEVELOPMENT_RST_FILES=$(git_tracked_files .rst doc/source/development)
46+
47+
2848
function invgrep {
2949
# grep with inverse exist status and formatting for azure-pipelines
3050
#
@@ -38,6 +58,8 @@ function invgrep {
3858
return $((! $EXIT_STATUS))
3959
}
4060

61+
export -f invgrep; # needed because of the use of xargs to pass in $GIT_TRACKED_ALL as args
62+
4163
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
4264
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
4365
INVGREP_PREPEND="##[error]"
@@ -52,7 +74,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5274
black --version
5375

5476
MSG='Checking black formatting' ; echo $MSG
55-
black . --check
77+
echo $GIT_TRACKED_ALL_PY_FILES | xargs black --check
5678
RET=$(($RET + $?)) ; echo $MSG "DONE"
5779

5880
# `setup.cfg` contains the list of error codes that are being ignored in flake8
@@ -62,7 +84,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
6284

6385
# pandas/_libs/src is C code, so no need to search there.
6486
MSG='Linting .py code' ; echo $MSG
65-
flake8 --format="$FLAKE8_FORMAT" .
87+
echo $GIT_TRACKED_ALL_PY_FILES | xargs flake8 --format="$FLAKE8_FORMAT"
6688
RET=$(($RET + $?)) ; echo $MSG "DONE"
6789

6890
MSG='Linting .pyx and .pxd code' ; echo $MSG
@@ -77,7 +99,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
7799
flake8-rst --version
78100

79101
MSG='Linting code-blocks in .rst documentation' ; echo $MSG
80-
flake8-rst doc/source --filename=*.rst --format="$FLAKE8_FORMAT"
102+
echo $GIT_TRACKED_DOCSOURCE_RST_FILES | xargs flake8-rst --format="$FLAKE8_FORMAT"
81103
RET=$(($RET + $?)) ; echo $MSG "DONE"
82104

83105
# Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
@@ -100,35 +122,38 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
100122
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
101123
RET=$(($RET + $?)) ; echo $MSG "DONE"
102124

125+
126+
VALIDATE_CMD=$BASE_DIR/scripts/validate_unwanted_patterns.py
127+
103128
MSG='Check for use of not concatenated strings' ; echo $MSG
104129
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
105-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" --format="##[error]{source_path}:{line_number}:{msg}" .
130+
echo $GIT_TRACKED_ALL_PY_FILES | xargs $VALIDATE_CMD --validation-type="strings_to_concatenate" --format="##[error]{source_path}:{line_number}:{msg}" --no-override
106131
else
107-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" .
132+
echo $GIT_TRACKED_ALL_PY_FILES | xargs $VALIDATE_CMD --validation-type="strings_to_concatenate" --no-override
108133
fi
109134
RET=$(($RET + $?)) ; echo $MSG "DONE"
110135

111136
MSG='Check for strings with wrong placed spaces' ; echo $MSG
112137
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
113-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" --format="##[error]{source_path}:{line_number}:{msg}" .
138+
echo $GIT_TRACKED_ALL_PY_FILES | xargs $VALIDATE_CMD --validation-type="strings_with_wrong_placed_whitespace" --format="##[error]{source_path}:{line_number}:{msg}" --no-override
114139
else
115-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" .
140+
echo $GIT_TRACKED_ALL_PY_FILES | xargs $VALIDATE_CMD --validation-type="strings_with_wrong_placed_whitespace" --no-override
116141
fi
117142
RET=$(($RET + $?)) ; echo $MSG "DONE"
118143

119144
MSG='Check for import of private attributes across modules' ; echo $MSG
120145
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
121-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
146+
$VALIDATE_CMD --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
122147
else
123-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
148+
$VALIDATE_CMD --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
124149
fi
125150
RET=$(($RET + $?)) ; echo $MSG "DONE"
126151

127152
MSG='Check for use of private functions across modules' ; echo $MSG
128153
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
129-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
154+
$VALIDATE_CMD --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
130155
else
131-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
156+
$VALIDATE_CMD --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
132157
fi
133158
RET=$(($RET + $?)) ; echo $MSG "DONE"
134159

@@ -239,7 +264,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
239264
RET=$(($RET + $?)) ; echo $MSG "DONE"
240265

241266
MSG='Check for extra blank lines after the class definition' ; echo $MSG
242-
invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' .
267+
invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' pandas
243268
RET=$(($RET + $?)) ; echo $MSG "DONE"
244269

245270
MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
@@ -272,7 +297,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
272297

273298
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
274299
INVGREP_APPEND=" <- trailing whitespaces found"
275-
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
300+
echo $GIT_TRACKED_ALL | xargs bash -c 'invgrep -RI "\s$" "$@"' _
276301
RET=$(($RET + $?)) ; echo $MSG "DONE"
277302
unset INVGREP_APPEND
278303
fi
@@ -390,7 +415,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
390415
RET=$(($RET + $?)) ; echo $MSG "DONE"
391416

392417
MSG='Validate correct capitalization among titles in documentation' ; echo $MSG
393-
$BASE_DIR/scripts/validate_rst_title_capitalization.py $BASE_DIR/doc/source/development $BASE_DIR/doc/source/reference
418+
echo "${GIT_TRACKED_REFERENCE_RST_FILES} ${GIT_TRACKED_DEVELOPMENT_RST_FILES}" | xargs $BASE_DIR/scripts/validate_rst_title_capitalization.py
394419
RET=$(($RET + $?)) ; echo $MSG "DONE"
395420

396421
fi

0 commit comments

Comments
 (0)