create_file

def create_file(file: BytesIO, filename: str):

Reads an uploaded file and returns a File object.

Parameters:

  • file (BytesIO): A BytesIO object representing the contents of the file.
  • filename (str): The name of the file.

Returns:

File: A File object.

create_file_from_raw_bytes

def create_file_from_raw_bytes(raw_bytes: bytes, filename: str):

Reads raw bytes and returns a File object.

Parameters:

  • raw_bytes (bytes): The raw bytes content of the file.
  • filename (str): The name of the file.

Returns:

File: A File object.

File

class File(ABC):

Represents an uploaded file comprised of Documents.

Parameters:

  • name (str): The name of the file.
  • file_id (str): The unique identifier of the file.
  • metadata (Dict[str, Any], optional): Additional metadata associated with the file. Defaults to None.
  • docs (List[Dict[str, Any]], optional): A list of documents contained within the file. Defaults to None.
  • raw_bytes (bytes, optional): The raw bytes content of the file. Defaults to b"".

init

def __init__(
    self,
    name: str,
    file_id: str,
    metadata: Optional[Dict[str, Any]] = None,
    docs: Optional[List[Dict[str, Any]]] = None,
    raw_bytes: bytes = b''
):

from_bytes

def from_bytes(cls, file: BytesIO, filename: str):

Creates a File object from a BytesIO object.

Parameters:

  • file (BytesIO): A BytesIO object representing the contents of the file.
  • filename (str): The name of the file.

Returns:

File: A File object.

from_raw_bytes

def from_raw_bytes(cls, raw_bytes: bytes, filename: str):

Creates a File object from raw bytes.

Parameters:

  • raw_bytes (bytes): The raw bytes content of the file.
  • filename (str): The name of the file.

Returns:

File: A File object.

repr

def __repr__(self):

str

def __str__(self):

copy

def copy(self):

Create a deep copy of this File

strip_consecutive_newlines

def strip_consecutive_newlines(text: str):

Strips consecutive newlines from a string.

Parameters:

  • text (str): The string to strip.

Returns:

str: The string with consecutive newlines stripped.

DocxFile

class DocxFile(File):

from_bytes

def from_bytes(cls, file: BytesIO, filename: str):

Creates a DocxFile object from a BytesIO object.

Parameters:

  • file (BytesIO): A BytesIO object representing the contents of the docx file.
  • filename (str): The name of the file.

Returns:

DocxFile: A DocxFile object.

PdfFile

class PdfFile(File):

from_bytes

def from_bytes(cls, file: BytesIO, filename: str):

Creates a PdfFile object from a BytesIO object.

Parameters:

  • file (BytesIO): A BytesIO object representing the contents of the pdf file.
  • filename (str): The name of the file.

Returns:

PdfFile: A PdfFile object.

TxtFile

class TxtFile(File):

from_bytes

def from_bytes(cls, file: BytesIO, filename: str):

Creates a TxtFile object from a BytesIO object.

Parameters:

  • file (BytesIO): A BytesIO object representing the contents of the txt file.
  • filename (str): The name of the file.

Returns:

TxtFile: A TxtFile object.

JsonFile

class JsonFile(File):

from_bytes

def from_bytes(cls, file: BytesIO, filename: str):

Creates a JsonFile object from a BytesIO object.

Parameters:

  • file (BytesIO): A BytesIO object representing the contents of the json file.
  • filename (str): The name of the file.

Returns:

JsonFile: A JsonFile object.

HtmlFile

class HtmlFile(File):

from_bytes

def from_bytes(cls, file: BytesIO, filename: str):

Creates a HtmlFile object from a BytesIO object.

Parameters:

  • file (BytesIO): A BytesIO object representing the contents of the html file.
  • filename (str): The name of the file.

Returns:

HtmlFile: A HtmlFile object.