Source code for bundle.scraper.sites.site_1337.browser

# Copyright 2026 HorusElohim
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from __future__ import annotations

import asyncio

from tabulate import tabulate

from ....core import browser, logger
from .data import TorrentData

log = logger.get_logger(__name__)



[docs]
class Browser(browser.Browser):
    """
    A specialized browser class for parsing 1337x.to search results,
    including extracting magnet links from detail pages.
    """

    url_suffix: str = "https://1337x.to/"


[docs]
    async def set_context(self) -> Browser:
        return await self.new_context(
            user_agent=("Mozilla/5.0 (X11; Linux x86_64; rv:101.0) Gecko/20100101 Firefox/101.0"),
            locale="en-US",
        )



[docs]
    async def get_search_url(self, name: str, page: int = 1) -> str:
        return f"{self.url_suffix}search/{'+'.join(name.split())}/{page}/"



[docs]
    async def get_torrents(self, page: browser.Page) -> list[TorrentData]:
        log.debug("Begin parsing torrents from page...")

        table = self.Table(
            row_selector="table.table-list tbody tr",
            columns=[
                self.Table.Column(
                    name="name",
                    selector="td.coll-1.name a:nth-of-type(2)",
                    parser_type=self.Table.Column.Type.TEXT,
                ),
                self.Table.Column(
                    name="detail_url",
                    selector="td.coll-1.name a:nth-of-type(2)",
                    parser_type=self.Table.Column.Type.URL,
                    base_url=self.url_suffix,
                ),
                self.Table.Column(
                    name="seeds",
                    selector="td.coll-2.seeds",
                    parser_type=self.Table.Column.Type.INT,
                ),
                self.Table.Column(
                    name="leeches",
                    selector="td.coll-3.leeches",
                    parser_type=self.Table.Column.Type.INT,
                ),
                self.Table.Column(
                    name="uploaded_at",
                    selector="td.coll-date",
                    parser_type=self.Table.Column.Type.TEXT,
                ),
                self.Table.Column(
                    name="size",
                    selector="td.coll-4",
                    parser_type=self.Table.Column.Type.TEXT,
                ),
                self.Table.Column(
                    name="uploader",
                    selector="td.coll-5 a",
                    parser_type=self.Table.Column.Type.TEXT,
                ),
            ],
            model=TorrentData,
        )

        torrents = await self.extract_table(page, table)
        # initialize magnet_link so field exists
        for t in torrents:
            t.magnet_link = ""

        if torrents:
            await asyncio.gather(*(self._fetch_magnet_link_for_torrent(t) for t in torrents if t.detail_url))

        log.debug("Total parsed torrents with magnet links: %d", len(torrents))
        return torrents


    async def _fetch_magnet_link_for_torrent(self, torrent: TorrentData) -> None:
        log.debug("Fetching magnet link for %r", torrent.name)
        page = await self.new_page()
        await page.goto(torrent.detail_url, wait_until="commit")
        await page.wait_for_selector("a#openPopup")
        torrent.magnet_link = (await page.get_attribute("a#openPopup", "href")) or ""
        await page.close()


[docs]
    async def tabulate_torrents(self, torrents: list[TorrentData], truncate_width: int = 30) -> str:
        def _truncate(text: str) -> str:
            return text if len(text) <= truncate_width else text[: truncate_width - 3] + "..."

        table_data = [
            [
                idx + 1,
                t.name,
                t.seeds,
                t.leeches,
                t.uploaded_at,
                t.size,
                _truncate(t.uploader),
            ]
            for idx, t in enumerate(torrents)
        ]
        headers = [
            "#",
            "Name",
            "Seeds",
            "Leeches",
            "Uploaded At",
            "Size",
            "Uploader",
        ]
        grid = tabulate(table_data, headers=headers, tablefmt="fancy_grid")

        links = "\n\n".join(
            f"[{i + 1}] Detail URL: {t.detail_url}\n    Magnet Link: {t.magnet_link}" for i, t in enumerate(torrents)
        )

        return f"\n{grid}\n\nLinks:\n{links}"