package Plucene::Index::SegmentTermDocs; =head1 NAME Plucene::Index::SegmentTermDocs - Segment term docs =head1 SYNOPSIS my $seg_term_docs = Plucene::Index::SegmentTermDocs ->new(Plucene::Index::SegmentReader $seg_reader); $seg_term_docs->seek($term); $seg_term_docs->next; $seg_term_docs->read; $seg_term_docs->skip_to($target); =head1 DESCRIPTION This is the segment term docs class. =head1 METHODS =cut use strict; use warnings; use IO::Handle; use Carp qw/confess/; use Plucene::Bitvector; use base 'Class::Accessor::Fast'; __PACKAGE__->mk_accessors( qw(parent freq_stream freq_count deleted_docs doc freq)); =head2 new my $seg_term_docs = Plucene::Index::SegmentTermDocs ->new(Plucene::Index::SegmentReader $seg_reader); This will create a new Plucene::Index::SegmentTermDocs object with the passed segment reader. =head2 parent / freq_stream / freq_count / deleted_docs / doc / freq Get / set these attributes. =cut sub new { my $self = shift; my $seg_reader = shift; return bless { parent => $seg_reader, freq_stream => $seg_reader->freq_stream, # listref deleted_docs => $seg_reader->deleted_docs, doc => 0, } => $self; } =head2 seek $seg_term_docs->seek($term); =cut sub seek { my ($self, $ti) = @_; # I object to this, but hey. if ($ti->isa("Plucene::Index::Term")) { $self->_seek($self->parent->{tis}->get($ti)); } else { $self->_seek($ti); } } sub _seek { my ($self, $ti) = @_; if (!$ti) { $self->freq_count(0); return; } $self->freq_count($ti->doc_freq); $self->doc(0); $self->{ptr} = $ti->freq_pointer; # offset in our array } =head2 skipping_doc By default this does nothing. You may wish to override it to do something. =cut sub skipping_doc { } sub _read_one { my $self = shift; my $doc_code = $self->freq_stream->[ $self->{ptr}++ ]; # A sequence that smacks of overoptimization $self->{doc} += $doc_code >> 1; if ($doc_code & 1) { $self->freq(1); } else { $self->freq($self->freq_stream->[ $self->{ptr}++ ]); } $self->{freq_count}--; } =head2 next $seg_term_docs->next; =cut sub next { my $self = shift; while (1) { return if $self->freq_count == 0; $self->_read_one(); last unless $self->{deleted_docs} && $self->{deleted_docs}->get($self->{doc}); $self->skipping_doc; } return 1; } =head2 read $seg_term_docs->read; =cut # Called by TermScorer and SegmentsTermDocs sub read { my $self = shift; my (@docs, @freqs); while ($self->{freq_count} > 0) { $self->_read_one; next if $self->{deleted_docs} && $self->{deleted_docs}->get($self->{doc}); push @docs, $self->doc; push @freqs, $self->freq; } return (\@docs, \@freqs); } =head2 skip_to $seg_term_docs->skip_to($target); =cut sub skip_to { my ($self, $target) = @_; $self->next || return 0 while $target > $self->doc; return 1; } 1;