class PDF::Reader::AdvancedTextRunFilter
Filter
a collection of TextRun
objects based on a set of conditions. It can be used to filter text runs based on their attributes. The filter can return the text runs that matches the conditions (only) or the text runs that do not match the conditions (exclude).
You can filter the text runs based on all its attributes with the operators mentioned in VALID_OPERATORS
. The filter can be nested with ‘or’ and ‘and’ conditions.
Examples:
-
Single condition
AdvancedTextRunFilter.exclude
(text_runs
, text: { include: ‘sample’ })
-
Multiple conditions (and)
AdvancedTextRunFilter.exclude
(text_runs
, {
font_size: { greater_than: 10, less_than: 15 }
})
-
Multiple possible values (or)
AdvancedTextRunFilter.exclude
(text_runs
, {
font_size: { equal: [10, 12] }
})
-
Complex AND/OR filter
AdvancedTextRunFilter.exclude
(text_runs
, {
and: [ { font_size: { greater_than: 10 } }, { or: [ { text: { include: "sample" } }, { width: { greater_than: 100 } } ]} ]
})
Constants
- VALID_OPERATORS
Attributes
Public Class Methods
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 55 def self.exclude(text_runs, filter_hash) new(text_runs, filter_hash).exclude end
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 61 def initialize(text_runs, filter_hash) @text_runs = text_runs @filter_hash = filter_hash end
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 51 def self.only(text_runs, filter_hash) new(text_runs, filter_hash).only end
Public Instance Methods
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 71 def exclude return text_runs if filter_hash.empty? text_runs.reject { |text_run| evaluate_filter(text_run) } end
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 66 def only return text_runs if filter_hash.empty? text_runs.select { |text_run| evaluate_filter(text_run) } end
Private Instance Methods
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 116 def apply_operator(attribute_value, operator, filter_value) case operator when :equal Array(filter_value).include?(attribute_value) when :not_equal !Array(filter_value).include?(attribute_value) when :greater_than attribute_value > filter_value when :less_than attribute_value < filter_value when :greater_than_or_equal attribute_value >= filter_value when :less_than_or_equal attribute_value <= filter_value when :include Array(filter_value).any? { |v| attribute_value.to_s.include?(v.to_s) } when :exclude Array(filter_value).none? { |v| attribute_value.to_s.include?(v.to_s) } end end
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 94 def evaluate_and_filters(text_run, conditions) conditions.all? do |condition| evaluate_filters(text_run, condition) end end
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 106 def evaluate_attribute_conditions(text_run, attribute, conditions) conditions.all? do |operator, value| unless VALID_OPERATORS.include?(operator) raise ArgumentError, "Invalid operator: #{operator}" end apply_operator(text_run.send(attribute), operator, value) end end
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 78 def evaluate_filter(text_run) if filter_hash[:or] evaluate_or_filters(text_run, filter_hash[:or]) elsif filter_hash[:and] evaluate_and_filters(text_run, filter_hash[:and]) else evaluate_filters(text_run, filter_hash) end end
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 100 def evaluate_filters(text_run, filter_hash) filter_hash.all? do |attribute, conditions| evaluate_attribute_conditions(text_run, attribute, conditions) end end
Source
# File lib/pdf/reader/advanced_text_run_filter.rb, line 88 def evaluate_or_filters(text_run, conditions) conditions.any? do |condition| evaluate_filters(text_run, condition) end end