camel.datagen.self_instruct.filter package#

Submodules#

camel.datagen.self_instruct.filter.filter_function module#

class camel.datagen.self_instruct.filter.filter_function.FilterFunction[source]#

Bases: ABC

A base abstract class for filter functions.

Subclasses must implement the apply method, which determines whether a given instruction passes the filter criteria.

abstract apply(instruction: str) bool[source]#

Evaluate the given instruction based on the filter’s criteria.

Parameters:

instruction (str) – The instruction to evaluate.

Returns:

True if the instruction passes the filter, False otherwise.

Return type:

bool

class camel.datagen.self_instruct.filter.filter_function.KeywordFilter(keywords: List[str])[source]#

Bases: FilterFunction

Filters instructions that contain specific undesirable keywords.

Parameters:

keywords (List[str]) – A list of keywords to filter out.

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True Instruction must NOT contain any of the keywords.

Return type:

bool

class camel.datagen.self_instruct.filter.filter_function.LengthFilter(min_len: int = 5, max_len: int = 200)[source]#

Bases: FilterFunction

Filters instructions based on their word count.

Parameters:
  • min_len (int) – The minimum word count required for an instruction. (default:5)

  • max_len (int) – The maximum word count allowed for an instruction. (default:200)

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True if the length of the instruction is within the range

of [min_len, max_len]

Return type:

bool

class camel.datagen.self_instruct.filter.filter_function.NonEnglishFilter[source]#

Bases: FilterFunction

Filters instructions that do not begin with English letters.

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True if the instruction starts with an English letter.

Return type:

bool

class camel.datagen.self_instruct.filter.filter_function.PunctuationFilter[source]#

Bases: FilterFunction

Filters instructions that begin with a non-alphanumeric character.

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True if the instruction does not start with punctuation.

Return type:

bool

class camel.datagen.self_instruct.filter.filter_function.RewardModelFilter(reward_model: BaseRewardModel, threshold: float = 0.5)[source]#

Bases: FilterFunction

Filters instructions based on scores provided by a reward model.

Parameters:
  • reward_model (BaseRewardModel) – The reward model used to evaluate the instructions.

  • threshold (float) – The minimum score required for an instruction to pass the filter.

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – The instruction to be filtered.

Returns:

True if the instruction’s score is above the threshold.

Return type:

bool

Raises:

ValueError – ValueError: If score_types is empty or if the required score is not found in scores.

class camel.datagen.self_instruct.filter.filter_function.RougeSimilarityFilter(existing_instructions: List[str], threshold: float = 0.7)[source]#

Bases: FilterFunction

Filters instructions that are too similar to existing instructions based on ROUGE scores.

Parameters:
  • existing_instructions (List[str]) – A list of existing instructions to compare against.

  • threshold (float) – The similarity threshold for filtering. (default:0.7)

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True if the instruction’s similarity to any existing

instruction is below the threshold.

Return type:

bool

camel.datagen.self_instruct.filter.filter_registry module#

camel.datagen.self_instruct.filter.filter_registry.register_filter(name: str, constructor: Callable[[Dict[str, Any]], FilterFunction])[source]#

Registers a new filter constructor in FILTER_REGISTRY.

Parameters:
  • name (str) – Unique name of the filter.

  • constructor (Callable[[Dict[str, Any]], FilterFunction]) – Function to create the filter using a dictionary of parameters.

camel.datagen.self_instruct.filter.instruction_filter module#

class camel.datagen.self_instruct.filter.instruction_filter.InstructionFilter(filters_config: Dict[str, Dict[str, Any]], stop_on_first_failure: bool = False)[source]#

Bases: object

add_filter(filter_function: FilterFunction)[source]#

Add a custom filter function to the InstructionFilter. This allows adding filters that are not in the registry.

Parameters:

filter_function (FilterFunction) – The filter function to be added

filter(prompt: str, instruction: str, return_details: bool = False) bool | Tuple[bool, List[str]][source]#

Check if the given instruction passes all filter functions.

Parameters:
  • prompt (str) – The prompt of generating the instruction.

  • instruction (str) – The instruction to evaluate.

  • return_details (bool) – If True, returns a tuple (bool, List[str]) where the list contains the names of filters that failed. (default:False)

Returns:

True if the instruction passes all filters, False otherwise.

OR (bool, List[str]) if return_details is True.

Return type:

bool

Module contents#

class camel.datagen.self_instruct.filter.FilterFunction[source]#

Bases: ABC

A base abstract class for filter functions.

Subclasses must implement the apply method, which determines whether a given instruction passes the filter criteria.

abstract apply(instruction: str) bool[source]#

Evaluate the given instruction based on the filter’s criteria.

Parameters:

instruction (str) – The instruction to evaluate.

Returns:

True if the instruction passes the filter, False otherwise.

Return type:

bool

class camel.datagen.self_instruct.filter.InstructionFilter(filters_config: Dict[str, Dict[str, Any]], stop_on_first_failure: bool = False)[source]#

Bases: object

add_filter(filter_function: FilterFunction)[source]#

Add a custom filter function to the InstructionFilter. This allows adding filters that are not in the registry.

Parameters:

filter_function (FilterFunction) – The filter function to be added

filter(prompt: str, instruction: str, return_details: bool = False) bool | Tuple[bool, List[str]][source]#

Check if the given instruction passes all filter functions.

Parameters:
  • prompt (str) – The prompt of generating the instruction.

  • instruction (str) – The instruction to evaluate.

  • return_details (bool) – If True, returns a tuple (bool, List[str]) where the list contains the names of filters that failed. (default:False)

Returns:

True if the instruction passes all filters, False otherwise.

OR (bool, List[str]) if return_details is True.

Return type:

bool

class camel.datagen.self_instruct.filter.KeywordFilter(keywords: List[str])[source]#

Bases: FilterFunction

Filters instructions that contain specific undesirable keywords.

Parameters:

keywords (List[str]) – A list of keywords to filter out.

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True Instruction must NOT contain any of the keywords.

Return type:

bool

class camel.datagen.self_instruct.filter.LengthFilter(min_len: int = 5, max_len: int = 200)[source]#

Bases: FilterFunction

Filters instructions based on their word count.

Parameters:
  • min_len (int) – The minimum word count required for an instruction. (default:5)

  • max_len (int) – The maximum word count allowed for an instruction. (default:200)

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True if the length of the instruction is within the range

of [min_len, max_len]

Return type:

bool

class camel.datagen.self_instruct.filter.NonEnglishFilter[source]#

Bases: FilterFunction

Filters instructions that do not begin with English letters.

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True if the instruction starts with an English letter.

Return type:

bool

class camel.datagen.self_instruct.filter.PunctuationFilter[source]#

Bases: FilterFunction

Filters instructions that begin with a non-alphanumeric character.

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True if the instruction does not start with punctuation.

Return type:

bool

class camel.datagen.self_instruct.filter.RougeSimilarityFilter(existing_instructions: List[str], threshold: float = 0.7)[source]#

Bases: FilterFunction

Filters instructions that are too similar to existing instructions based on ROUGE scores.

Parameters:
  • existing_instructions (List[str]) – A list of existing instructions to compare against.

  • threshold (float) – The similarity threshold for filtering. (default:0.7)

apply(instruction: str) bool[source]#

Filter the instruction

Parameters:

instruction (str) – the instruction to be filtered.

Returns:

True if the instruction’s similarity to any existing

instruction is below the threshold.

Return type:

bool