dns-check-py/venv/lib/python3.12/site-packages/pip/_internal/index/sources.py

import logging
import mimetypes
import os
import pathlib
from typing import Callable, Iterable, Optional, Tuple

from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.link import Link
from pip._internal.utils.urls import path_to_url, url_to_path
from pip._internal.vcs import is_url

logger = logging.getLogger(__name__)

FoundCandidates = Iterable[InstallationCandidate]
FoundLinks = Iterable[Link]
CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
PageValidator = Callable[[Link], bool]


class LinkSource:
    @property
    def link(self) -> Optional[Link]:
        """Returns the underlying link, if there's one."""
        raise NotImplementedError()

    def page_candidates(self) -> FoundCandidates:
        """Candidates found by parsing an archive listing HTML file."""
        raise NotImplementedError()

    def file_links(self) -> FoundLinks:
        """Links found by specifying archives directly."""
        raise NotImplementedError()


def _is_html_file(file_url: str) -> bool:
    return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"


class _FlatDirectorySource(LinkSource):
    """Link source specified by ``--find-links=<path-to-dir>``.

    This looks the content of the directory, and returns:

    * ``page_candidates``: Links listed on each HTML file in the directory.
    * ``file_candidates``: Archives in the directory.
    """

    def __init__(
        self,
        candidates_from_page: CandidatesFromPage,
        path: str,
    ) -> None:
        self._candidates_from_page = candidates_from_page
        self._path = pathlib.Path(os.path.realpath(path))

    @property
    def link(self) -> Optional[Link]:
        return None

    def page_candidates(self) -> FoundCandidates:
        for path in self._path.iterdir():
            url = path_to_url(str(path))
            if not _is_html_file(url):
                continue
            yield from self._candidates_from_page(Link(url))

    def file_links(self) -> FoundLinks:
        for path in self._path.iterdir():
            url = path_to_url(str(path))
            if _is_html_file(url):
                continue
            yield Link(url)


class _LocalFileSource(LinkSource):
    """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.

    If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
    the option, it is converted to a URL first. This returns:

    * ``page_candidates``: Links listed on an HTML file.
    * ``file_candidates``: The non-HTML file.
    """

    def __init__(
        self,
        candidates_from_page: CandidatesFromPage,
        link: Link,
    ) -> None:
        self._candidates_from_page = candidates_from_page
        self._link = link

    @property
    def link(self) -> Optional[Link]:
        return self._link

    def page_candidates(self) -> FoundCandidates:
        if not _is_html_file(self._link.url):
            return
        yield from self._candidates_from_page(self._link)

    def file_links(self) -> FoundLinks:
        if _is_html_file(self._link.url):
            return
        yield self._link


class _RemoteFileSource(LinkSource):
    """``--find-links=<url>`` or ``--[extra-]index-url=<url>``.

    This returns:

    * ``page_candidates``: Links listed on an HTML file.
    * ``file_candidates``: The non-HTML file.
    """

    def __init__(
        self,
        candidates_from_page: CandidatesFromPage,
        page_validator: PageValidator,
        link: Link,
    ) -> None:
        self._candidates_from_page = candidates_from_page
        self._page_validator = page_validator
        self._link = link

    @property
    def link(self) -> Optional[Link]:
        return self._link

    def page_candidates(self) -> FoundCandidates:
        if not self._page_validator(self._link):
            return
        yield from self._candidates_from_page(self._link)

    def file_links(self) -> FoundLinks:
        yield self._link


class _IndexDirectorySource(LinkSource):
    """``--[extra-]index-url=<path-to-directory>``.

    This is treated like a remote URL; ``candidates_from_page`` contains logic
    for this by appending ``index.html`` to the link.
    """

    def __init__(
        self,
        candidates_from_page: CandidatesFromPage,
        link: Link,
    ) -> None:
        self._candidates_from_page = candidates_from_page
        self._link = link

    @property
    def link(self) -> Optional[Link]:
        return self._link

    def page_candidates(self) -> FoundCandidates:
        yield from self._candidates_from_page(self._link)

    def file_links(self) -> FoundLinks:
        return ()


def build_source(
    location: str,
    *,
    candidates_from_page: CandidatesFromPage,
    page_validator: PageValidator,
    expand_dir: bool,
    cache_link_parsing: bool,
) -> Tuple[Optional[str], Optional[LinkSource]]:
    path: Optional[str] = None
    url: Optional[str] = None
    if os.path.exists(location):  # Is a local path.
        url = path_to_url(location)
        path = location
    elif location.startswith("file:"):  # A file: URL.
        url = location
        path = url_to_path(location)
    elif is_url(location):
        url = location

    if url is None:
        msg = (
            "Location '%s' is ignored: "
            "it is either a non-existing path or lacks a specific scheme."
        )
        logger.warning(msg, location)
        return (None, None)

    if path is None:
        source: LinkSource = _RemoteFileSource(
            candidates_from_page=candidates_from_page,
            page_validator=page_validator,
            link=Link(url, cache_link_parsing=cache_link_parsing),
        )
        return (url, source)

    if os.path.isdir(path):
        if expand_dir:
            source = _FlatDirectorySource(
                candidates_from_page=candidates_from_page,
                path=path,
            )
        else:
            source = _IndexDirectorySource(
                candidates_from_page=candidates_from_page,
                link=Link(url, cache_link_parsing=cache_link_parsing),
            )
        return (url, source)
    elif os.path.isfile(path):
        source = _LocalFileSource(
            candidates_from_page=candidates_from_page,
            link=Link(url, cache_link_parsing=cache_link_parsing),
        )
        return (url, source)
    logger.warning(
        "Location '%s' is ignored: it is neither a file nor a directory.",
        location,
    )
    return (url, None)
first commit 2023-11-30 14:53:37 +01:00			`import logging`
			`import mimetypes`
			`import os`
			`import pathlib`
			`from typing import Callable, Iterable, Optional, Tuple`

			`from pip._internal.models.candidate import InstallationCandidate`
			`from pip._internal.models.link import Link`
			`from pip._internal.utils.urls import path_to_url, url_to_path`
			`from pip._internal.vcs import is_url`

			`logger = logging.getLogger(__name__)`

			`FoundCandidates = Iterable[InstallationCandidate]`
			`FoundLinks = Iterable[Link]`
			`CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]`
			`PageValidator = Callable[[Link], bool]`


			`class LinkSource:`
			`@property`
			`def link(self) -> Optional[Link]:`
			`"""Returns the underlying link, if there's one."""`
			`raise NotImplementedError()`

			`def page_candidates(self) -> FoundCandidates:`
			`"""Candidates found by parsing an archive listing HTML file."""`
			`raise NotImplementedError()`

			`def file_links(self) -> FoundLinks:`
			`"""Links found by specifying archives directly."""`
			`raise NotImplementedError()`


			`def _is_html_file(file_url: str) -> bool:`
			`return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"`


			`class _FlatDirectorySource(LinkSource):`
			"""Link source specified by ``--find-links=<path-to-dir>``.

			`This looks the content of the directory, and returns:`

			* ``page_candidates``: Links listed on each HTML file in the directory.
			* ``file_candidates``: Archives in the directory.
			`"""`

			`def __init__(`
			`self,`
			`candidates_from_page: CandidatesFromPage,`
			`path: str,`
			`) -> None:`
			`self._candidates_from_page = candidates_from_page`
			`self._path = pathlib.Path(os.path.realpath(path))`

			`@property`
			`def link(self) -> Optional[Link]:`
			`return None`

			`def page_candidates(self) -> FoundCandidates:`
			`for path in self._path.iterdir():`
			`url = path_to_url(str(path))`
			`if not _is_html_file(url):`
			`continue`
			`yield from self._candidates_from_page(Link(url))`

			`def file_links(self) -> FoundLinks:`
			`for path in self._path.iterdir():`
			`url = path_to_url(str(path))`
			`if _is_html_file(url):`
			`continue`
			`yield Link(url)`


			`class _LocalFileSource(LinkSource):`
			"""``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.

			If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
			`the option, it is converted to a URL first. This returns:`

			* ``page_candidates``: Links listed on an HTML file.
			* ``file_candidates``: The non-HTML file.
			`"""`

			`def __init__(`
			`self,`
			`candidates_from_page: CandidatesFromPage,`
			`link: Link,`
			`) -> None:`
			`self._candidates_from_page = candidates_from_page`
			`self._link = link`

			`@property`
			`def link(self) -> Optional[Link]:`
			`return self._link`

			`def page_candidates(self) -> FoundCandidates:`
			`if not _is_html_file(self._link.url):`
			`return`
			`yield from self._candidates_from_page(self._link)`

			`def file_links(self) -> FoundLinks:`
			`if _is_html_file(self._link.url):`
			`return`
			`yield self._link`


			`class _RemoteFileSource(LinkSource):`
			"""``--find-links=<url>`` or ``--[extra-]index-url=<url>``.

			`This returns:`

			* ``page_candidates``: Links listed on an HTML file.
			* ``file_candidates``: The non-HTML file.
			`"""`

			`def __init__(`
			`self,`
			`candidates_from_page: CandidatesFromPage,`
			`page_validator: PageValidator,`
			`link: Link,`
			`) -> None:`
			`self._candidates_from_page = candidates_from_page`
			`self._page_validator = page_validator`
			`self._link = link`

			`@property`
			`def link(self) -> Optional[Link]:`
			`return self._link`

			`def page_candidates(self) -> FoundCandidates:`
			`if not self._page_validator(self._link):`
			`return`
			`yield from self._candidates_from_page(self._link)`

			`def file_links(self) -> FoundLinks:`
			`yield self._link`


			`class _IndexDirectorySource(LinkSource):`
			"""``--[extra-]index-url=<path-to-directory>``.

			This is treated like a remote URL; ``candidates_from_page`` contains logic
			for this by appending ``index.html`` to the link.
			`"""`

			`def __init__(`
			`self,`
			`candidates_from_page: CandidatesFromPage,`
			`link: Link,`
			`) -> None:`
			`self._candidates_from_page = candidates_from_page`
			`self._link = link`

			`@property`
			`def link(self) -> Optional[Link]:`
			`return self._link`

			`def page_candidates(self) -> FoundCandidates:`
			`yield from self._candidates_from_page(self._link)`

			`def file_links(self) -> FoundLinks:`
			`return ()`


			`def build_source(`
			`location: str,`
			`*,`
			`candidates_from_page: CandidatesFromPage,`
			`page_validator: PageValidator,`
			`expand_dir: bool,`
			`cache_link_parsing: bool,`
			`) -> Tuple[Optional[str], Optional[LinkSource]]:`
			`path: Optional[str] = None`
			`url: Optional[str] = None`
			`if os.path.exists(location): # Is a local path.`
			`url = path_to_url(location)`
			`path = location`
			`elif location.startswith("file:"): # A file: URL.`
			`url = location`
			`path = url_to_path(location)`
			`elif is_url(location):`
			`url = location`

			`if url is None:`
			`msg = (`
			`"Location '%s' is ignored: "`
			`"it is either a non-existing path or lacks a specific scheme."`
			`)`
			`logger.warning(msg, location)`
			`return (None, None)`

			`if path is None:`
			`source: LinkSource = _RemoteFileSource(`
			`candidates_from_page=candidates_from_page,`
			`page_validator=page_validator,`
			`link=Link(url, cache_link_parsing=cache_link_parsing),`
			`)`
			`return (url, source)`

			`if os.path.isdir(path):`
			`if expand_dir:`
			`source = _FlatDirectorySource(`
			`candidates_from_page=candidates_from_page,`
			`path=path,`
			`)`
			`else:`
			`source = _IndexDirectorySource(`
			`candidates_from_page=candidates_from_page,`
			`link=Link(url, cache_link_parsing=cache_link_parsing),`
			`)`
			`return (url, source)`
			`elif os.path.isfile(path):`
			`source = _LocalFileSource(`
			`candidates_from_page=candidates_from_page,`
			`link=Link(url, cache_link_parsing=cache_link_parsing),`
			`)`
			`return (url, source)`
			`logger.warning(`
			`"Location '%s' is ignored: it is neither a file nor a directory.",`
			`location,`
			`)`
			`return (url, None)`