Skip to content
This repository was archived by the owner on Jan 19, 2025. It is now read-only.

Commit fefda4b

Browse files
committed
feat: Started issue #434 - not working yet!
1 parent 3f17cad commit fefda4b

File tree

5 files changed

+367
-1
lines changed

5 files changed

+367
-1
lines changed

package-parser/package-parser.iml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
<sourceFolder url="file://$MODULE_DIR$/refined_types" type="java-resource" />
99
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
1010
</content>
11-
<orderEntry type="jdk" jdkName="Poetry (api-editor)" jdkType="Python SDK" />
1211
<orderEntry type="sourceFolder" forTests="false" />
1312
</component>
1413
</module>
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from ._get_api import get_api
2+
from ._model import (
3+
API,
4+
Action,
5+
APIDependencies,
6+
Class,
7+
Condition,
8+
Dependency,
9+
FromImport,
10+
Function,
11+
Import,
12+
Module,
13+
Parameter,
14+
ParameterAndResultDocstring,
15+
ParameterAssignment,
16+
ParameterHasValue,
17+
ParameterIsIgnored,
18+
ParameterIsIllegal,
19+
ParameterIsNone,
20+
Result,
21+
)
22+
from ._package_metadata import (
23+
distribution,
24+
distribution_version,
25+
package_files,
26+
package_root,
27+
)
Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
import json
2+
from io import TextIOWrapper
3+
from pathlib import Path
4+
from typing import Any, Union
5+
6+
from package_parser.commands.find_usages import (
7+
ClassUsage,
8+
FunctionUsage,
9+
UsageStore,
10+
ValueUsage,
11+
)
12+
from package_parser.commands.get_api import API
13+
from package_parser.utils import ensure_file_exists, parent_qname
14+
15+
16+
def suggest_improvements(
17+
api_file: TextIOWrapper, usages_file: TextIOWrapper
18+
):
19+
with api_file:
20+
api_json = json.load(api_file)
21+
api = API.from_json(api_json)
22+
23+
with usages_file:
24+
usages_json = json.load(usages_file)
25+
usages = UsageStore.from_json(usages_json)
26+
27+
out_dir.mkdir(parents=True, exist_ok=True)
28+
base_file_name = api_file.name.replace("__api.json", "")
29+
30+
__preprocess_usages(usages, api)
31+
__print_usage_counts(usages, out_dir, base_file_name)
32+
__create_usage_distributions(usages, out_dir, base_file_name)
33+
api_size_after_removal = __remove_rarely_used_api_elements(
34+
usages, min_usages, out_dir, base_file_name
35+
)
36+
__write_api_size(api, api_size_after_removal, out_dir, base_file_name)
37+
__optional_vs_required_parameters(usages, api, out_dir, base_file_name)
38+
39+
def __preprocess_usages(usages: UsageStore, api: API) -> None:
40+
__remove_internal_usages(usages, api)
41+
__add_unused_api_elements(usages, api)
42+
__add_implicit_usages_of_default_value(usages, api)
43+
44+
45+
def __create_parameter_usage_distribution(usages: UsageStore) -> dict[int, int]:
46+
"""
47+
Creates a dictionary X -> N where N indicates the number of parameters that are set at most X times to a value other
48+
than the most commonly used value (which might differ from the default value).
49+
50+
:param usages: Usage store.
51+
:return: The usage distribution.
52+
"""
53+
54+
result = {}
55+
56+
function_usages = usages.function_usages
57+
parameter_usages = usages.parameter_usages
58+
value_usages = usages.value_usages
59+
60+
max_usages = max(
61+
__n_not_set_to_most_common_value(it, function_usages, value_usages)[1]
62+
for it in parameter_usages.keys()
63+
)
64+
65+
for i in range(max_usages + 1):
66+
result[i] = len(
67+
[
68+
it
69+
for it in parameter_usages.keys()
70+
if usages.n_function_usages(parent_qname(it)) >= i
71+
and (
72+
parent_qname(parent_qname(it)) not in usages.class_usages
73+
or usages.n_class_usages(parent_qname(parent_qname(it))) >= i
74+
)
75+
and __n_not_set_to_most_common_value(it, function_usages, value_usages)[1]
76+
>= i
77+
]
78+
)
79+
80+
return result
81+
82+
83+
def __remove_internal_usages(usages: UsageStore, api: API) -> None:
84+
"""
85+
Removes usages of internal parts of the API. It might incorrectly remove some calls to methods that are inherited
86+
from internal classes into a public class but these are just fit/predict/etc., i.e. something we want to keep
87+
unchanged anyway.
88+
89+
:param usages: Usage store
90+
:param api: Description of the API
91+
"""
92+
93+
# Internal classes
94+
for class_qname in list(usages.class_usages.keys()):
95+
if not api.is_public_class(class_qname):
96+
print(f"Removing usages of internal class {class_qname}")
97+
usages.remove_class(class_qname)
98+
99+
# Internal functions
100+
for function_qname in list(usages.function_usages.keys()):
101+
if not api.is_public_function(function_qname):
102+
print(f"Removing usages of internal function {function_qname}")
103+
usages.remove_function(function_qname)
104+
105+
# Internal parameters
106+
parameter_qnames = set(api.parameters().keys())
107+
108+
for parameter_qname in list(usages.parameter_usages.keys()):
109+
function_qname = parent_qname(parameter_qname)
110+
if parameter_qname not in parameter_qnames or not api.is_public_function(
111+
function_qname
112+
):
113+
print(f"Removing usages of internal parameter {parameter_qname}")
114+
usages.remove_parameter(parameter_qname)
115+
116+
117+
def __add_unused_api_elements(usages: UsageStore, api: API) -> None:
118+
# Public classes
119+
for class_qname in api.classes:
120+
if api.is_public_class(class_qname):
121+
usages.init_class(class_qname)
122+
123+
# Public functions
124+
for function in api.functions.values():
125+
if api.is_public_function(function.qname):
126+
usages.init_function(function.qname)
127+
128+
# "Public" parameters
129+
for parameter in function.parameters:
130+
parameter_qname = f"{function.qname}.{parameter.name}"
131+
usages.init_parameter(parameter_qname)
132+
usages.init_value(parameter_qname)
133+
134+
135+
def __add_implicit_usages_of_default_value(usages: UsageStore, api: API) -> None:
136+
for parameter_qname, parameter_usage_list in list(usages.parameter_usages.items()):
137+
default_value = api.get_default_value(parameter_qname)
138+
if default_value is None:
139+
continue
140+
141+
function_qname = parent_qname(parameter_qname)
142+
function_usage_list = usages.function_usages[function_qname]
143+
144+
locations_of_implicit_usages_of_default_value = set(
145+
[it.location for it in function_usage_list]
146+
) - set([it.location for it in parameter_usage_list])
147+
148+
for location in locations_of_implicit_usages_of_default_value:
149+
usages.add_value_usage(parameter_qname, default_value, location)
150+
151+
152+
def __n_not_set_to_most_common_value(
153+
parameter_qname: str,
154+
function_usages: dict[str, list[FunctionUsage]],
155+
value_usages: dict[str, dict[str, list[ValueUsage]]],
156+
) -> tuple:
157+
"""Counts how often a parameter is set to a value other than the most commonly used value."""
158+
159+
n_total_usage = len(function_usages[parent_qname(parameter_qname)])
160+
161+
# Parameter is unused
162+
# Checking both conditions even though one implies the other to ensure correctness of the program
163+
if n_total_usage == 0 and len(value_usages[parameter_qname].values()) == 0:
164+
return 0
165+
166+
max_value = 0
167+
max_key = None
168+
for key, value in value_usages[parameter_qname].entries():
169+
if max_value < len(value):
170+
max_key = key
171+
max_value = len(value)
172+
173+
return max_key, (n_total_usage - max_value)
174+
175+
176+
def __remove_rarely_used_api_elements(
177+
usages: UsageStore, min_usages: int, out_dir: Path, base_file_name: str
178+
) -> dict[str, Any]:
179+
"""
180+
Removes API elements that are used fewer than min_usages times.
181+
182+
:return: The API size after the individual steps.
183+
"""
184+
185+
rarely_used_classes = __remove_rarely_used_classes(usages, min_usages)
186+
api_size_after_unused_class_removal = __api_size_to_json(
187+
len(usages.class_usages),
188+
len(usages.function_usages),
189+
len(usages.parameter_usages),
190+
)
191+
with out_dir.joinpath(
192+
f"{base_file_name}__classes_used_fewer_than_{min_usages}_times.json"
193+
).open("w") as f:
194+
json.dump(rarely_used_classes, f, indent=2)
195+
196+
rarely_used_functions = __remove_rarely_used_functions(usages, min_usages)
197+
api_size_after_unused_function_removal = __api_size_to_json(
198+
len(usages.class_usages),
199+
len(usages.function_usages),
200+
len(usages.parameter_usages),
201+
)
202+
with out_dir.joinpath(
203+
f"{base_file_name}__functions_used_fewer_than_{min_usages}_times.json"
204+
).open("w") as f:
205+
json.dump(rarely_used_functions, f, indent=2)
206+
207+
rarely_used_parameters = __remove_rarely_used_parameters(usages, min_usages)
208+
api_size_after_unused_parameter_removal = __api_size_to_json(
209+
len(usages.class_usages),
210+
len(usages.function_usages),
211+
len(usages.parameter_usages),
212+
)
213+
with out_dir.joinpath(
214+
f"{base_file_name}__parameters_used_fewer_than_{min_usages}_times.json"
215+
).open("w") as f:
216+
json.dump(rarely_used_parameters, f, indent=2)
217+
218+
mostly_useless_parameters = __remove_mostly_useless_parameters(usages, min_usages)
219+
api_size_after_useless_parameter_removal = __api_size_to_json(
220+
len(usages.class_usages),
221+
len(usages.function_usages),
222+
len(usages.parameter_usages),
223+
)
224+
with out_dir.joinpath(
225+
f"{base_file_name}__parameters_set_fewer_than_{min_usages}_times_to_value_other_than_most_common.json"
226+
).open("w") as f:
227+
json.dump(mostly_useless_parameters, f, indent=2)
228+
229+
return {
230+
"after_unused_class_removal": api_size_after_unused_class_removal,
231+
"after_unused_function_removal": api_size_after_unused_function_removal,
232+
"after_unused_parameter_removal": api_size_after_unused_parameter_removal,
233+
"after_useless_parameter_removal": api_size_after_useless_parameter_removal,
234+
}
235+
236+
237+
def __remove_rarely_used_classes(usages: UsageStore, min_usages: int) -> list[str]:
238+
result = []
239+
240+
for class_qname in list(usages.class_usages.keys()):
241+
if usages.n_class_usages(class_qname) < min_usages:
242+
result.append(class_qname)
243+
usages.remove_class(class_qname)
244+
245+
return sorted(result)
246+
247+
248+
def __remove_rarely_used_functions(usages: UsageStore, min_usages: int) -> list[str]:
249+
result = []
250+
251+
for function_qname in list(usages.function_usages.keys()):
252+
if usages.n_function_usages(function_qname) < min_usages:
253+
result.append(function_qname)
254+
usages.remove_function(function_qname)
255+
256+
return sorted(result)
257+
258+
259+
def __remove_rarely_used_parameters(usages: UsageStore, min_usages: int) -> list[str]:
260+
result = []
261+
262+
for parameter_qname in list(usages.parameter_usages.keys()):
263+
if usages.n_parameter_usages(parameter_qname) < min_usages:
264+
result.append(parameter_qname)
265+
usages.remove_parameter(parameter_qname)
266+
267+
return sorted(result)
268+
269+
270+
def __remove_mostly_useless_parameters(usages: UsageStore) -> dict[str, int]:
271+
result = {}
272+
273+
for parameter_qname in list(usages.parameter_usages.keys()):
274+
usage_key, usage_count = __n_not_set_to_most_common_value(
275+
parameter_qname, usages.function_usages, usages.value_usages
276+
)
277+
278+
if usage_count < 1:
279+
result[parameter_qname] = usage_key
280+
usages.remove_parameter(parameter_qname)
281+
282+
return result
283+
284+
285+
def __write_api_size(
286+
api: API, api_size_after_removal: dict[str, Any], out_dir: Path, base_file_name: str
287+
) -> None:
288+
with out_dir.joinpath(f"{base_file_name}__api_size.json").open("w") as f:
289+
json.dump(
290+
{
291+
"full": __api_size_to_json(
292+
api.class_count(), api.function_count(), api.parameter_count()
293+
),
294+
"public": __api_size_to_json(
295+
api.public_class_count(),
296+
api.public_function_count(),
297+
api.public_parameter_count(),
298+
),
299+
"after_unused_class_removal": api_size_after_removal[
300+
"after_unused_class_removal"
301+
],
302+
"after_unused_function_removal": api_size_after_removal[
303+
"after_unused_function_removal"
304+
],
305+
"after_unused_parameter_removal": api_size_after_removal[
306+
"after_unused_parameter_removal"
307+
],
308+
"after_useless_parameter_removal": api_size_after_removal[
309+
"after_useless_parameter_removal"
310+
],
311+
},
312+
f,
313+
indent=2,
314+
)
315+
316+
317+
def __api_size_to_json(n_classes: int, n_functions: int, n_parameters: int) -> Any:
318+
return {
319+
"n_classes": n_classes,
320+
"n_functions": n_functions,
321+
"n_parameters": n_parameters,
322+
}
323+
324+
325+
def __optional_vs_required_parameters(
326+
usages: UsageStore, public_api: API, out_dir: Path, base_file_name: str
327+
) -> None:
328+
# TODO: Determine whether parameter should be constant (already removed)/required/optional based on entropy
329+
# TODO: Use must commonly set value as default
330+
331+
pass

package-parser/tests/commands/generate-annotations/__init__.py

Whitespace-only changes.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import pytest
2+
3+
def test_irgendwas():
4+
5+
6+
suggest_improvements(
7+
"C:\Users\Arsam\Desktop\Arsam\AWT\\api-editor\\api-editor\data\\api\scikit-learn__sklearn__1.0.2__api.json",
8+
args.usages
9+
)

0 commit comments

Comments
 (0)