diff --git a/Dockerfile b/Dockerfile index 7aad0389..7cbdc5f1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,50 +1,46 @@ # Use an official Python runtime as a parent image FROM python:3.11-slim -# Set environment variables -ENV PYTHONNOUSERSITE=1 \ - PYTHONPATH="" \ - PYTHONHOME="" \ - TEMP="/installer_files/temp" \ - TMP="/installer_files/temp" \ - MINICONDA_DIR="/installer_files/miniconda3" \ - INSTALL_ENV_DIR="/installer_files/lollms_env" \ - PACKAGES_TO_INSTALL="python=3.11 git pip" +# Set the working directory in the container +WORKDIR /app -# Create necessary directories -RUN mkdir -p /installer_files/temp /installer_files/miniconda3 /installer_files/lollms_env - -# Install dependencies +# Install system dependencies RUN apt-get update && apt-get install -y \ - curl \ git \ + curl \ && rm -rf /var/lib/apt/lists/* -# Download and install Miniconda -RUN curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - bash Miniconda3-latest-Linux-x86_64.sh -b -p $MINICONDA_DIR && \ - rm Miniconda3-latest-Linux-x86_64.sh +# Install Miniconda +RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \ + && rm Miniconda3-latest-Linux-x86_64.sh -# Initialize conda -RUN $MINICONDA_DIR/bin/conda init bash +# Add Conda to PATH +ENV PATH /opt/conda/bin:$PATH -# Create and activate the conda environment -RUN $MINICONDA_DIR/bin/conda create -y -p $INSTALL_ENV_DIR $PACKAGES_TO_INSTALL && \ - $MINICONDA_DIR/bin/conda install -y conda +# Create and activate Conda environment +RUN conda create --name lollms_env python=3.11 git pip -y +SHELL ["conda", "run", "-n", "lollms_env", "/bin/bash", "-c"] -# Clone the repository and install dependencies -RUN git clone --depth 1 --recurse-submodules https://github.com/ParisNeo/lollms-webui.git && \ - cd lollms-webui && \ - git submodule update --init --recursive && \ - cd lollms_core && \ - pip install -e . && \ - cd ../utilities/pipmaster && \ - pip install -e . && \ - cd ../.. && \ - pip install -r requirements.txt +# Clone the repository +RUN git clone --depth 1 --recurse-submodules https://github.com/ParisNeo/lollms-webui.git \ + && cd lollms-webui/lollms_core \ + && pip install -e . \ + && cd ../.. \ + && cd lollms-webui/utilities/pipmaster \ + && pip install -e . \ + && cd ../.. -# Set the working directory -WORKDIR /lollms-webui +# Install project dependencies +WORKDIR /app/lollms-webui +COPY requirements.txt . +RUN pip install -r requirements.txt -# Default command -CMD ["bash"] \ No newline at end of file +# Copy the rest of the application code +COPY . . + +# Expose port 9600 +EXPOSE 9600 + +# Set the default command to run the application +CMD ["python", "app.py"] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 1b2ee620..3f9dad9b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,17 +1,11 @@ -version: '3.8' - -services: - webui: - build: - context: . - dockerfile: Dockerfile - volumes: - - ./data:/data - - ./data/.parisneo:/root/.parisneo/ - - ./configs:/srv/configs - environment: - - HOME=/data - tty: true - stdin_open: true - ports: +version: '3' +services: + lollms-webui: + build: . + ports: - "9600:9600" + volumes: + - ./lollms-webui:/app/lollms-webui + environment: + - PYTHONUNBUFFERED=1 + command: python app.py \ No newline at end of file diff --git a/endpoints/docs/lollms_webscraper/DOC.md b/endpoints/docs/lollms_webscraper/DOC.md new file mode 100644 index 00000000..59421da2 --- /dev/null +++ b/endpoints/docs/lollms_webscraper/DOC.md @@ -0,0 +1,45 @@ +Here's a concise version of the `README.md` for LLMs: + +```markdown +# WebScraper.js + +A lightweight JavaScript library for web scraping in the browser. Fetch and extract data using CSS selectors. + +## Features + +- Fetch HTML content from URLs. +- Extract text using CSS selectors. +- Browser-compatible with native JavaScript APIs. + +## Usage + +Include in HTML: + +```html + +``` + +Example: + +```html + +``` + +## Notes + +- **CORS**: Ensure target URLs allow cross-origin requests. +- **Compliance**: Follow website terms of service. +- **Dynamic Content**: Does not handle JavaScript-rendered content. + +## Author + +By ParisNeo, AI enthusiast. +``` + +This version provides a quick overview of the library's purpose, features, usage, and important notes, making it suitable for quick reference or inclusion in a larger document. \ No newline at end of file diff --git a/endpoints/docs/lollms_webscraper/README.md b/endpoints/docs/lollms_webscraper/README.md new file mode 100644 index 00000000..adfb035c --- /dev/null +++ b/endpoints/docs/lollms_webscraper/README.md @@ -0,0 +1,63 @@ +# WebScraper.js + +WebScraper.js is a lightweight JavaScript library designed for web scraping tasks directly from the browser. It allows you to fetch and extract data from web pages using CSS selectors. This library is ideal for simple scraping tasks where you need to gather text content from static web pages. + +## Features + +- **Fetch HTML Content**: Retrieve HTML content from a specified URL. +- **Extract Data**: Use CSS selectors to extract text content from HTML elements. +- **Browser Compatible**: Designed to work in a browser environment using native JavaScript APIs. + +## Installation + +To use WebScraper.js in your project, include it in your HTML file using a ` +``` + +## Usage + +Here's a basic example of how to use WebScraper.js to scrape data from a web page: + +```html + + + + + + Web Scraper Test + + +

Web Scraper Test

+ + + + +``` + +## Important Considerations + +- **CORS Restrictions**: Be aware of Cross-Origin Resource Sharing (CORS) restrictions when making HTTP requests from the browser. Ensure that the server hosting the target URL allows cross-origin requests. +- **Security and Compliance**: Always ensure that your web scraping activities comply with the terms of service of the websites you are accessing. +- **Dynamic Content**: This library does not handle JavaScript-rendered content. For more complex scraping tasks, consider using a headless browser like Puppeteer in a Node.js environment. + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## Contributing + +Contributions are welcome! Please feel free to submit a pull request or open an issue if you have any suggestions or improvements. + +## Author + +Created by ParisNeo, a computer geek passionate about AI. diff --git a/endpoints/libraries/lollms_webscraper.js b/endpoints/libraries/lollms_webscraper.js new file mode 100644 index 00000000..50568fb3 --- /dev/null +++ b/endpoints/libraries/lollms_webscraper.js @@ -0,0 +1,32 @@ +class WebScraper { + constructor() {} + + async fetchHTML(url) { + try { + const response = await fetch(url); + const text = await response.text(); + return new DOMParser().parseFromString(text, 'text/html'); + } catch (error) { + console.error(`Error fetching the HTML: ${error}`); + throw error; + } + } + + extractData(doc, selector) { + const data = []; + const elements = doc.querySelectorAll(selector); + elements.forEach(element => { + data.push(element.textContent.trim()); + }); + return data; + } + + async scrape(url, selector) { + const doc = await this.fetchHTML(url); + return this.extractData(doc, selector); + } + } + + // Expose the WebScraper class to the global window object + window.WebScraper = WebScraper; + \ No newline at end of file