Stubs for beautifulsoup4 (#5758)

JelleZijlstra · web-flow · commit 7f320c6b9e14 · 2021-07-11T17:02:51.000+03:00
Stubgen and manual fixes, don't bother with Python 2
diff --git a/pyrightconfig.stricter.json b/pyrightconfig.stricter.json
@@ -49,7 +49,8 @@
         "stubs/simplejson",
         "stubs/vobject",
         "stubs/waitress",
-        "stubs/Werkzeug"
+        "stubs/Werkzeug",
+        "stubs/beautifulsoup4",
     ],
     "typeCheckingMode": "basic",
     "strictListInference": true,
diff --git a/stubs/beautifulsoup4/@tests/requirements-stubtest.txt b/stubs/beautifulsoup4/@tests/requirements-stubtest.txt
@@ -0,0 +1,2 @@
+html5lib
+lxml
diff --git a/stubs/beautifulsoup4/@tests/stubtest_allowlist.txt b/stubs/beautifulsoup4/@tests/stubtest_allowlist.txt
@@ -0,0 +1 @@
+bs4.element.NavigableString.string  # created through __getattr__
diff --git a/stubs/beautifulsoup4/METADATA.toml b/stubs/beautifulsoup4/METADATA.toml
@@ -0,0 +1 @@
+version = "4.9"
diff --git a/stubs/beautifulsoup4/bs4/__init__.pyi b/stubs/beautifulsoup4/bs4/__init__.pyi
@@ -0,0 +1,67 @@
+from _typeshed import Self, SupportsRead
+from typing import Any, List, Sequence, Type
+
+from .builder import TreeBuilder
+from .element import PageElement, SoupStrainer, Tag
+from .formatter import Formatter
+
+class GuessedAtParserWarning(UserWarning): ...
+class MarkupResemblesLocatorWarning(UserWarning): ...
+
+class BeautifulSoup(Tag):
+    ROOT_TAG_NAME: str
+    DEFAULT_BUILDER_FEATURES: List[str]
+    ASCII_SPACES: str
+    NO_PARSER_SPECIFIED_WARNING: str
+    element_classes: Any
+    builder: TreeBuilder
+    is_xml: bool
+    known_xml: bool
+    parse_only: SoupStrainer | None
+    markup: str
+    def __init__(
+        self,
+        markup: str | bytes | SupportsRead[str] | SupportsRead[bytes] = ...,
+        features: str | Sequence[str] | None = ...,
+        builder: TreeBuilder | Type[TreeBuilder] | None = ...,
+        parse_only: SoupStrainer | None = ...,
+        from_encoding: str | None = ...,
+        exclude_encodings: Sequence[str] | None = ...,
+        element_classes: dict[Type[PageElement], Type[Any]] | None = ...,
+        **kwargs,
+    ) -> None: ...
+    def __copy__(self: Self) -> Self: ...
+    hidden: bool
+    current_data: Any
+    currentTag: Any
+    tagStack: Any
+    open_tag_counter: Any
+    preserve_whitespace_tag_stack: Any
+    string_container_stack: Any
+    def reset(self) -> None: ...
+    def new_tag(
+        self,
+        name,
+        namespace: Any | None = ...,
+        nsprefix: Any | None = ...,
+        attrs=...,
+        sourceline: Any | None = ...,
+        sourcepos: Any | None = ...,
+        **kwattrs,
+    ) -> Tag: ...
+    def string_container(self, base_class: Any | None = ...): ...
+    def new_string(self, s, subclass: Any | None = ...): ...
+    def insert_before(self, *args) -> None: ...
+    def insert_after(self, *args) -> None: ...
+    def popTag(self): ...
+    def pushTag(self, tag) -> None: ...
+    def endData(self, containerClass: Any | None = ...) -> None: ...
+    def object_was_parsed(self, o, parent: Any | None = ..., most_recent_element: Any | None = ...) -> None: ...
+    def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline: Any | None = ..., sourcepos: Any | None = ...): ...
+    def handle_endtag(self, name, nsprefix: Any | None = ...) -> None: ...
+    def handle_data(self, data) -> None: ...
+    def decode(self, pretty_print: bool = ..., eventual_encoding: str = ..., formatter: str | Formatter = ...): ...  # type: ignore  # missing some arguments
+
+class BeautifulStoneSoup(BeautifulSoup): ...
+class StopParsing(Exception): ...
+class FeatureNotFound(ValueError): ...
diff --git a/stubs/beautifulsoup4/bs4/builder/__init__.pyi b/stubs/beautifulsoup4/bs4/builder/__init__.pyi
@@ -0,0 +1,66 @@
+from typing import Any
+
+class TreeBuilderRegistry:
+    builders_for_feature: Any
+    builders: Any
+    def __init__(self) -> None: ...
+    def register(self, treebuilder_class) -> None: ...
+    def lookup(self, *features): ...
+
+class TreeBuilder:
+    NAME: str
+    ALTERNATE_NAMES: Any
+    features: Any
+    is_xml: bool
+    picklable: bool
+    empty_element_tags: Any
+    DEFAULT_CDATA_LIST_ATTRIBUTES: Any
+    DEFAULT_PRESERVE_WHITESPACE_TAGS: Any
+    DEFAULT_STRING_CONTAINERS: Any
+    USE_DEFAULT: Any
+    TRACKS_LINE_NUMBERS: bool
+    soup: Any
+    cdata_list_attributes: Any
+    preserve_whitespace_tags: Any
+    store_line_numbers: Any
+    string_containers: Any
+    def __init__(
+        self, multi_valued_attributes=..., preserve_whitespace_tags=..., store_line_numbers=..., string_containers=...
+    ) -> None: ...
+    def initialize_soup(self, soup) -> None: ...
+    def reset(self) -> None: ...
+    def can_be_empty_element(self, tag_name): ...
+    def feed(self, markup) -> None: ...
+    def prepare_markup(
+        self,
+        markup,
+        user_specified_encoding: Any | None = ...,
+        document_declared_encoding: Any | None = ...,
+        exclude_encodings: Any | None = ...,
+    ) -> None: ...
+    def test_fragment_to_document(self, fragment): ...
+    def set_up_substitutions(self, tag): ...
+
+class SAXTreeBuilder(TreeBuilder):
+    def feed(self, markup) -> None: ...
+    def close(self) -> None: ...
+    def startElement(self, name, attrs) -> None: ...
+    def endElement(self, name) -> None: ...
+    def startElementNS(self, nsTuple, nodeName, attrs) -> None: ...
+    def endElementNS(self, nsTuple, nodeName) -> None: ...
+    def startPrefixMapping(self, prefix, nodeValue) -> None: ...
+    def endPrefixMapping(self, prefix) -> None: ...
+    def characters(self, content) -> None: ...
+    def startDocument(self) -> None: ...
+    def endDocument(self) -> None: ...
+
+class HTMLTreeBuilder(TreeBuilder):
+    empty_element_tags: Any
+    block_elements: Any
+    DEFAULT_STRING_CONTAINERS: Any
+    DEFAULT_CDATA_LIST_ATTRIBUTES: Any
+    DEFAULT_PRESERVE_WHITESPACE_TAGS: Any
+    def set_up_substitutions(self, tag): ...
+
+class ParserRejectedMarkup(Exception):
+    def __init__(self, message_or_exception) -> None: ...
diff --git a/stubs/beautifulsoup4/bs4/builder/_html5lib.pyi b/stubs/beautifulsoup4/bs4/builder/_html5lib.pyi
@@ -0,0 +1,67 @@
+from typing import Any
+
+from bs4.builder import HTMLTreeBuilder
+
+class HTML5TreeBuilder(HTMLTreeBuilder):
+    NAME: str
+    features: Any
+    TRACKS_LINE_NUMBERS: bool
+    user_specified_encoding: Any
+    def prepare_markup(  # type: ignore  # user_specified_encoding doesn't have a default
+        self, markup, user_specified_encoding, document_declared_encoding: Any | None = ..., exclude_encodings: Any | None = ...
+    ) -> None: ...
+    def feed(self, markup) -> None: ...
+    underlying_builder: Any
+    def create_treebuilder(self, namespaceHTMLElements): ...
+    def test_fragment_to_document(self, fragment): ...
+
+class TreeBuilderForHtml5lib(Any):  # type: ignore # html5lib.treebuilders.base.TreeBuilder
+    soup: Any
+    parser: Any
+    store_line_numbers: Any
+    def __init__(self, namespaceHTMLElements, soup: Any | None = ..., store_line_numbers: bool = ..., **kwargs) -> None: ...
+    def documentClass(self): ...
+    def insertDoctype(self, token) -> None: ...
+    def elementClass(self, name, namespace): ...
+    def commentClass(self, data): ...
+    def fragmentClass(self): ...
+    def appendChild(self, node) -> None: ...
+    def getDocument(self): ...
+    def getFragment(self): ...
+    def testSerializer(self, element): ...
+
+class AttrList:
+    element: Any
+    attrs: Any
+    def __init__(self, element) -> None: ...
+    def __iter__(self): ...
+    def __setitem__(self, name, value) -> None: ...
+    def items(self): ...
+    def keys(self): ...
+    def __len__(self): ...
+    def __getitem__(self, name): ...
+    def __contains__(self, name): ...
+
+class Element(Any):  # type: ignore  # html5lib.treebuilders.base.Node
+    element: Any
+    soup: Any
+    namespace: Any
+    def __init__(self, element, soup, namespace) -> None: ...
+    def appendChild(self, node) -> None: ...
+    def getAttributes(self): ...
+    def setAttributes(self, attributes) -> None: ...
+    attributes: Any
+    def insertText(self, data, insertBefore: Any | None = ...) -> None: ...
+    def insertBefore(self, node, refNode) -> None: ...
+    def removeChild(self, node) -> None: ...
+    def reparentChildren(self, new_parent) -> None: ...
+    def cloneNode(self): ...
+    def hasContent(self): ...
+    def getNameTuple(self): ...
+    nameTuple: Any
+
+class TextNode(Element):
+    element: Any
+    soup: Any
+    def __init__(self, element, soup) -> None: ...
+    def cloneNode(self) -> None: ...
diff --git a/stubs/beautifulsoup4/bs4/builder/_htmlparser.pyi b/stubs/beautifulsoup4/bs4/builder/_htmlparser.pyi
@@ -0,0 +1,41 @@
+from html.parser import HTMLParser
+from typing import Any
+
+from bs4.builder import HTMLTreeBuilder
+
+class HTMLParseError(Exception): ...
+
+class BeautifulSoupHTMLParser(HTMLParser):
+    IGNORE: str
+    REPLACE: str
+    on_duplicate_attribute: Any
+    already_closed_empty_element: Any
+    def __init__(self, *args, **kwargs) -> None: ...
+    def error(self, msg) -> None: ...
+    def handle_startendtag(self, name, attrs) -> None: ...
+    def handle_starttag(self, name, attrs, handle_empty_element: bool = ...) -> None: ...
+    def handle_endtag(self, name, check_already_closed: bool = ...) -> None: ...
+    def handle_data(self, data) -> None: ...
+    def handle_charref(self, name) -> None: ...
+    def handle_entityref(self, name) -> None: ...
+    def handle_comment(self, data) -> None: ...
+    def handle_decl(self, data) -> None: ...
+    def unknown_decl(self, data) -> None: ...
+    def handle_pi(self, data) -> None: ...
+
+class HTMLParserTreeBuilder(HTMLTreeBuilder):
+    is_xml: bool
+    picklable: bool
+    NAME: Any
+    features: Any
+    TRACKS_LINE_NUMBERS: bool
+    parser_args: Any
+    def __init__(self, parser_args: Any | None = ..., parser_kwargs: Any | None = ..., **kwargs) -> None: ...
+    def prepare_markup(
+        self,
+        markup,
+        user_specified_encoding: Any | None = ...,
+        document_declared_encoding: Any | None = ...,
+        exclude_encodings: Any | None = ...,
+    ) -> None: ...
+    def feed(self, markup) -> None: ...
diff --git a/stubs/beautifulsoup4/bs4/builder/_lxml.pyi b/stubs/beautifulsoup4/bs4/builder/_lxml.pyi
@@ -0,0 +1,49 @@
+from typing import Any
+
+from bs4.builder import HTMLTreeBuilder, TreeBuilder
+
+class LXMLTreeBuilderForXML(TreeBuilder):
+    DEFAULT_PARSER_CLASS: Any
+    is_xml: bool
+    processing_instruction_class: Any
+    NAME: str
+    ALTERNATE_NAMES: Any
+    features: Any
+    CHUNK_SIZE: int
+    DEFAULT_NSMAPS: Any
+    DEFAULT_NSMAPS_INVERTED: Any
+    def initialize_soup(self, soup) -> None: ...
+    def default_parser(self, encoding): ...
+    def parser_for(self, encoding): ...
+    empty_element_tags: Any
+    soup: Any
+    nsmaps: Any
+    def __init__(self, parser: Any | None = ..., empty_element_tags: Any | None = ..., **kwargs) -> None: ...
+    def prepare_markup(  # type: ignore  # the order of the parameters is different
+        self,
+        markup,
+        user_specified_encoding: Any | None = ...,
+        exclude_encodings: Any | None = ...,
+        document_declared_encoding: Any | None = ...,
+    ) -> None: ...
+    parser: Any
+    def feed(self, markup) -> None: ...
+    def close(self) -> None: ...
+    def start(self, name, attrs, nsmap=...) -> None: ...
+    def end(self, name) -> None: ...
+    def pi(self, target, data) -> None: ...
+    def data(self, content) -> None: ...
+    def doctype(self, name, pubid, system) -> None: ...
+    def comment(self, content) -> None: ...
+    def test_fragment_to_document(self, fragment): ...
+
+class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
+    NAME: Any
+    ALTERNATE_NAMES: Any
+    features: Any
+    is_xml: bool
+    processing_instruction_class: Any
+    def default_parser(self, encoding): ...
+    parser: Any
+    def feed(self, markup) -> None: ...
+    def test_fragment_to_document(self, fragment): ...
diff --git a/stubs/beautifulsoup4/bs4/dammit.pyi b/stubs/beautifulsoup4/bs4/dammit.pyi
@@ -0,0 +1,68 @@
+from typing import Any
+
+chardet_type: Any
+
+def chardet_dammit(s): ...
+
+xml_encoding: str
+html_meta: str
+encoding_res: Any
+
+class EntitySubstitution:
+    CHARACTER_TO_HTML_ENTITY: Any
+    HTML_ENTITY_TO_CHARACTER: Any
+    CHARACTER_TO_HTML_ENTITY_RE: Any
+    CHARACTER_TO_XML_ENTITY: Any
+    BARE_AMPERSAND_OR_BRACKET: Any
+    AMPERSAND_OR_BRACKET: Any
+    @classmethod
+    def quoted_attribute_value(cls, value): ...
+    @classmethod
+    def substitute_xml(cls, value, make_quoted_attribute: bool = ...): ...
+    @classmethod
+    def substitute_xml_containing_entities(cls, value, make_quoted_attribute: bool = ...): ...
+    @classmethod
+    def substitute_html(cls, s): ...
+
+class EncodingDetector:
+    override_encodings: Any
+    exclude_encodings: Any
+    chardet_encoding: Any
+    is_html: Any
+    declared_encoding: Any
+    def __init__(
+        self, markup, override_encodings: Any | None = ..., is_html: bool = ..., exclude_encodings: Any | None = ...
+    ) -> None: ...
+    @property
+    def encodings(self) -> None: ...
+    @classmethod
+    def strip_byte_order_mark(cls, data): ...
+    @classmethod
+    def find_declared_encoding(cls, markup, is_html: bool = ..., search_entire_document: bool = ...): ...
+
+class UnicodeDammit:
+    CHARSET_ALIASES: Any
+    ENCODINGS_WITH_SMART_QUOTES: Any
+    smart_quotes_to: Any
+    tried_encodings: Any
+    contains_replacement_characters: bool
+    is_html: Any
+    log: Any
+    detector: Any
+    markup: Any
+    unicode_markup: Any
+    original_encoding: Any
+    def __init__(
+        self, markup, override_encodings=..., smart_quotes_to: Any | None = ..., is_html: bool = ..., exclude_encodings=...
+    ) -> None: ...
+    @property
+    def declared_html_encoding(self): ...
+    def find_codec(self, charset): ...
+    MS_CHARS: Any
+    MS_CHARS_TO_ASCII: Any
+    WINDOWS_1252_TO_UTF8: Any
+    MULTIBYTE_MARKERS_AND_SIZES: Any
+    FIRST_MULTIBYTE_MARKER: Any
+    LAST_MULTIBYTE_MARKER: Any
+    @classmethod
+    def detwingle(cls, in_bytes, main_encoding: str = ..., embedded_encoding: str = ...): ...
diff --git a/stubs/beautifulsoup4/bs4/diagnose.pyi b/stubs/beautifulsoup4/bs4/diagnose.pyi
diff --git a/stubs/beautifulsoup4/bs4/element.pyi b/stubs/beautifulsoup4/bs4/element.pyi
diff --git a/stubs/beautifulsoup4/bs4/formatter.pyi b/stubs/beautifulsoup4/bs4/formatter.pyi

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+bs4.element.NavigableString.string # created through __getattr__`