Skip to content

CI/BLD: Restrict ci/code_checks.sh to tracked repo files #36386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
79 changes: 53 additions & 26 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@ BASE_DIR="$(dirname $0)/.."
RET=0
CHECK=$1


# Get lists of files tracked by git:

function quote_if_needed {
awk '{ print $0 ~ /.*\s+.*/ ? "\""$0"\"" : $0 }'
}

function git_tracked_files {
[[ ! -z "$1" ]] && local patt="\\${1}$" || local patt="$"
local subdir=$2
git ls-tree --name-only -r HEAD $subdir | grep -e $patt | quote_if_needed
}

GIT_TRACKED_ALL=$(git_tracked_files)
GIT_TRACKED_ALL_PY_FILES=$(git_tracked_files .py)
GIT_TRACKED_DOCSOURCE_RST_FILES=$(git_tracked_files .rst doc/source)
GIT_TRACKED_REFERENCE_RST_FILES=$(git_tracked_files .rst doc/source/reference)
GIT_TRACKED_DEVELOPMENT_RST_FILES=$(git_tracked_files .rst doc/source/development)


function invgrep {
# grep with inverse exist status and formatting for azure-pipelines
#
Expand All @@ -38,21 +58,37 @@ function invgrep {
return $((! $EXIT_STATUS))
}

export -f invgrep; # needed because of the use of xargs to pass in $GIT_TRACKED_ALL as args

if [[ "$GITHUB_ACTIONS" == "true" ]]; then
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
INVGREP_PREPEND="##[error]"
else
FLAKE8_FORMAT="default"
fi


function if_gh_actions {
# If this is running on GitHub Actions, echo the argument list, otherwise
# echo the empty string.
# Used to conditionally pass command-line arguments as in
# $(if_gh_actions --baz spam) | xargs foo --bar
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
for arg in "$@"; do echo $arg; done | quote_if_needed
else
echo ""
fi
}


### LINTING ###
if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then

echo "black --version"
black --version

MSG='Checking black formatting' ; echo $MSG
black . --check
echo $GIT_TRACKED_ALL_PY_FILES | xargs black --check
RET=$(($RET + $?)) ; echo $MSG "DONE"

# `setup.cfg` contains the list of error codes that are being ignored in flake8
Expand All @@ -62,7 +98,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then

# pandas/_libs/src is C code, so no need to search there.
MSG='Linting .py code' ; echo $MSG
flake8 --format="$FLAKE8_FORMAT" .
echo $GIT_TRACKED_ALL_PY_FILES | xargs flake8 --format="$FLAKE8_FORMAT"
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Linting .pyx and .pxd code' ; echo $MSG
Expand All @@ -77,7 +113,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
flake8-rst --version

MSG='Linting code-blocks in .rst documentation' ; echo $MSG
flake8-rst doc/source --filename=*.rst --format="$FLAKE8_FORMAT"
echo $GIT_TRACKED_DOCSOURCE_RST_FILES | xargs flake8-rst --format="$FLAKE8_FORMAT"
RET=$(($RET + $?)) ; echo $MSG "DONE"

# Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
Expand All @@ -100,36 +136,27 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
RET=$(($RET + $?)) ; echo $MSG "DONE"


VALIDATE_CMD=$BASE_DIR/scripts/validate_unwanted_patterns.py

MSG='Check for use of not concatenated strings' ; echo $MSG
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" --format="##[error]{source_path}:{line_number}:{msg}" .
else
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" .
fi
ARGS=$({ if_gh_actions --format="##[error]{source_path}:{line_number}:{msg}"; echo $GIT_TRACKED_ALL_PY_FILES; })
echo $ARGS | xargs $VALIDATE_CMD --validation-type="strings_to_concatenate" --no-override
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for strings with wrong placed spaces' ; echo $MSG
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" --format="##[error]{source_path}:{line_number}:{msg}" .
else
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" .
fi
ARGS=$({ if_gh_actions --format="##[error]{source_path}:{line_number}:{msg}"; echo $GIT_TRACKED_ALL_PY_FILES; })
echo $ARGS | xargs $VALIDATE_CMD --validation-type="strings_with_wrong_placed_whitespace" --no-override
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for import of private attributes across modules' ; echo $MSG
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
else
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
fi
ARGS=$(if_gh_actions --format="##[error]{source_path}:{line_number}:{msg}")
echo $ARGS | xargs $VALIDATE_CMD --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for use of private functions across modules' ; echo $MSG
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
else
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
fi
ARGS=$(if_gh_actions --format="##[error]{source_path}:{line_number}:{msg}")
echo $ARGS | xargs $VALIDATE_CMD --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
RET=$(($RET + $?)) ; echo $MSG "DONE"

echo "isort --version-number"
Expand Down Expand Up @@ -239,7 +266,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for extra blank lines after the class definition' ; echo $MSG
invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' .
invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
Expand Down Expand Up @@ -272,7 +299,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then

MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
INVGREP_APPEND=" <- trailing whitespaces found"
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
echo $GIT_TRACKED_ALL | xargs bash -c 'invgrep -RI "\s$" "$@"' _
RET=$(($RET + $?)) ; echo $MSG "DONE"
unset INVGREP_APPEND
fi
Expand Down Expand Up @@ -390,7 +417,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Validate correct capitalization among titles in documentation' ; echo $MSG
$BASE_DIR/scripts/validate_rst_title_capitalization.py $BASE_DIR/doc/source/development $BASE_DIR/doc/source/reference
echo "${GIT_TRACKED_REFERENCE_RST_FILES} ${GIT_TRACKED_DEVELOPMENT_RST_FILES}" | xargs $BASE_DIR/scripts/validate_rst_title_capitalization.py
RET=$(($RET + $?)) ; echo $MSG "DONE"

fi
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/ujson/lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
}

break;

}
}

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/src/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a,
* object into a NumPy npy_datetimestruct. Uses tzinfo (if present)
* to convert to UTC time.
*
* The following implementation just asks for attributes, and thus
* The following implementation just asks for attributes, and thus
* supports datetime duck typing. The tzinfo time zone conversion
* requires this style of access as well.
*
Expand Down
111 changes: 79 additions & 32 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import sys
import token
import tokenize
from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple
from typing import IO, Callable, FrozenSet, Iterable, List, Sequence, Set, Tuple

PRIVATE_IMPORTS_TO_IGNORE: Set[str] = {
"_extension_array_shared_docs",
Expand Down Expand Up @@ -403,10 +403,12 @@ def has_wrong_whitespace(first_line: str, second_line: str) -> bool:

def main(
function: Callable[[IO[str]], Iterable[Tuple[int, str]]],
source_path: str,
source_paths: Sequence[str],
output_format: str,
file_extensions_to_check: str,
excluded_file_paths: str,
override: bool,
verbose: int,
) -> bool:
"""
Main entry point of the script.
Expand All @@ -415,14 +417,19 @@ def main(
----------
function : Callable
Function to execute for the specified validation type.
source_path : str
Source path representing path to a file/directory.
source_paths : list of str
File paths of files and directories to check.
output_format : str
Output format of the error message.
file_extensions_to_check : str
Comma separated values of what file extensions to check.
excluded_file_paths : str
Comma separated values of what file paths to exclude during the check.
override:
Whether individual files mentioned in ``source_paths`` should override
``excluded_file_paths``.
verbose : int
Verbosity level (currently only distinguishes between zero and nonzero).

Returns
-------
Expand All @@ -434,46 +441,59 @@ def main(
ValueError
If the `source_path` is not pointing to existing file/directory.
"""
if not os.path.exists(source_path):
if not all(os.path.exists(path) for path in source_paths):
raise ValueError("Please enter a valid path, pointing to a file/directory.")

is_failed: bool = False
file_path: str = ""

FILE_EXTENSIONS_TO_CHECK: FrozenSet[str] = frozenset(
file_extensions_to_check.split(",")
)
PATHS_TO_IGNORE = frozenset(excluded_file_paths.split(","))
PATHS_TO_IGNORE = frozenset(
os.path.abspath(os.path.normpath(path))
for path in excluded_file_paths.split(",")
)

is_failed: bool = False

if os.path.isfile(source_path):
file_path = source_path
def check_file(file_path: str):
nonlocal is_failed
local_is_failed = False
if verbose:
print(f"Checking {file_path}...", file=sys.stderr, end="")
with open(file_path) as file_obj:
for line_number, msg in function(file_obj):
is_failed = True
local_is_failed = True
print(
output_format.format(
source_path=file_path, line_number=line_number, msg=msg
)
)
if not local_is_failed:
if verbose:
print(" OK", file=sys.stderr)
else:
is_failed = True

for subdir, _, files in os.walk(source_path):
if any(path in subdir for path in PATHS_TO_IGNORE):
continue
for file_name in files:
if not any(
file_name.endswith(extension) for extension in FILE_EXTENSIONS_TO_CHECK
):
continue
def is_ignored(path: str):
path = os.path.abspath(os.path.normpath(path))
return any(path.startswith(ignored_path) for ignored_path in PATHS_TO_IGNORE)

file_path = os.path.join(subdir, file_name)
with open(file_path) as file_obj:
for line_number, msg in function(file_obj):
is_failed = True
print(
output_format.format(
source_path=file_path, line_number=line_number, msg=msg
)
)
for source_path in source_paths:
if os.path.isfile(source_path):
if override or not is_ignored(source_path):
check_file(source_path)
else:
for subdir, _, files in os.walk(source_path):
if is_ignored(subdir):
continue
for file_name in files:
file_path = os.path.join(subdir, file_name)
if is_ignored(file_path) or not any(
file_name.endswith(extension)
for extension in FILE_EXTENSIONS_TO_CHECK
):
continue

check_file(file_path)

return is_failed

Expand All @@ -490,7 +510,13 @@ def main(
parser = argparse.ArgumentParser(description="Unwanted patterns checker.")

parser.add_argument(
"path", nargs="?", default=".", help="Source path of file/directory to check."
"paths",
nargs="*",
default=["."],
help=(
"Source path(s) of files and directories to check. If a directory is "
"specified, all its contents are checked recursively."
),
)
parser.add_argument(
"--format",
Expand All @@ -513,17 +539,38 @@ def main(
parser.add_argument(
"--excluded-file-paths",
default="asv_bench/env",
help="Comma separated file paths to exclude.",
help=(
"Comma separated file paths to exclude. If an individual file is "
"explicitly passed in `paths`, it overrides this setting, unless the "
"--no-override flag is used."
),
)
parser.add_argument(
"--no-override",
dest="override",
action="store_false",
help=(
"Don't allow individual files explicitly mentioned in `pahts` to override "
"the excluded file paths (see --excluded-file-paths)."
),
)
parser.add_argument(
"-v",
"--verbose",
action="count",
help="Set the verbosity level to the number of times this flag is used.",
)

args = parser.parse_args()

sys.exit(
main(
function=globals().get(args.validation_type), # type: ignore
source_path=args.path,
source_paths=args.paths,
output_format=args.format,
file_extensions_to_check=args.included_file_extensions,
excluded_file_paths=args.excluded_file_paths,
override=args.override,
verbose=args.verbose,
)
)
4 changes: 2 additions & 2 deletions web/pandas/_templates/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

{% if static.logo %}<a class="navbar-brand" href="{{ base_url }}/"><img alt="" src="{{ base_url }}{{ static.logo }}"/></a>{% endif %}

<div class="collapse navbar-collapse" id="nav-content">
<div class="collapse navbar-collapse" id="nav-content">
<ul class="navbar-nav ml-auto">
{% for item in navbar %}
{% if not item.has_subitems %}
Expand Down Expand Up @@ -89,7 +89,7 @@
pandas is a fiscally sponsored project of <a href="https://numfocus.org">NumFOCUS.</a>
</p>
</footer>

<script src="https://code.jquery.com/jquery-3.2.1.slim.min.js"
integrity="sha384-KJ3o2DKtIkvYIK3UENzmM7KCkRr/rE9/Qpg6aAZGJwFDMVNA/GpGFF93hXpG5KkN"
crossorigin="anonymous"></script>
Expand Down
1 change: 0 additions & 1 deletion web/pandas/static/img/partners/r_studio.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.