NostrHTTP - ``` import asyncio i

@ 颜值精选官
2025-04-30 05:37:51

``` import asyncio import time import requests from bs4 import BeautifulSoup import os import urllib.parse import re from nostr_poster import post_to_nostr def extract_redirect_url_from_onclick(onclick_attr): """ Extract the URL from JavaScript onclick attribute Example: onclick="location.href='https://v2.jk.rs/2022/12/22/275.html';" """ if not onclick_attr: return None # Extract URL from location.href='URL' pattern pattern = r"location\.href=\'(https?://[^\']+)\'" match = re.search(pattern, onclick_attr) if match: return match.group(1) return None def extract_images(url): """ Extract all images from the specified webpage URL """ # Send HTTP request to the webpage print(f"Fetching webpage: {url}") headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36' } try: response = requests.get(url, headers=headers) response.raise_for_status() # Raise exception for 4XX/5XX responses except requests.exceptions.RequestException as e: print(f"Error fetching the webpage: {e}") return [] # Parse HTML content soup = BeautifulSoup(response.text, 'html.parser') title = soup.title.string.strip() if soup.title and soup.title.string else "无题" # Only keep the part of the title before the first dash title = title.split('-')[0].strip() # Find gallery divs with data-src attributes (new pattern) gallery_divs = soup.find_all('div', attrs={'data-fancybox': 'gallery'}) print(f"Found {len(gallery_divs)} gallery divs") # Extract images image_urls = [] # Then process gallery divs with data-src for div in gallery_divs: data_src = div.get('data-src') if data_src: # Handle relative URLs if not data_src.startswith('http'): data_src = urllib.parse.urljoin(url, data_src) image_urls.append(data_src) # Remove duplicates while preserving order unique_urls = [] for img_url in image_urls: if img_url not in unique_urls: unique_urls.append(img_url) print(f"Found {len(unique_urls)} unique image URLs on the webpage") return title, unique_urls def find_article_urls(base_url): """ Extract article URLs from the base website that use JavaScript redirects """ print(f"Looking for article links on: {base_url}") headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36' } try: response = requests.get(base_url, headers=headers) response.raise_for_status() except requests.exceptions.RequestException as e: print(f"Error fetching the base webpage: {e}") return [] soup = BeautifulSoup(response.text, 'html.parser') redirect_url = "" # Find all anchor tags anchors = soup.find_all('a') for anchor in anchors: # Check for JavaScript onclick redirect onclick_attr = anchor.get('onclick') redirect_url = extract_redirect_url_from_onclick(onclick_attr) if redirect_url: return redirect_url return redirect_url async def main(): while True: # Main website URL base_url = "https://v2.jk.rs/" # Find article URLs with JavaScript redirects article_url = find_article_urls(base_url) if not article_url: print("No article URLs found. Using the base URL instead.") continue print(f"\nProcessing article: {article_url}") title, image_urls = extract_images(article_url) print(f"Found {len(image_urls)} images on {article_url}, title: {title}") if image_urls: for img in image_urls: print(f"Image URL: {img}") # Post to Nostr await post_to_nostr("随机妹子图", title, image_urls) # sleep 4 hours time.sleep(4 * 60 * 60) if __name__ == "__main__": try: asyncio.run(main()) except KeyboardInterrupt: print("\nProcess interrupted by user.") except Exception as e: print(f"An error occurred: {e}, restarting the program.") asyncio.run(main()) finally: print("Exiting the program.") ```

yakihonne.com iris.to jumble.social

导航栏

Home