package WWW::Search::Ebay; use strict; use warnings; our $VERSION = 2.282; =head1 NAME WWW::Search::Ebay - backend for searching www.ebay.com =head1 SYNOPSIS use WWW::Search; my $oSearch = new WWW::Search('Ebay'); my $sQuery = WWW::Search::escape_query("C-10 carded Yakface"); $oSearch->native_query($sQuery); while (my $oResult = $oSearch->next_result()) { print $oResult->url, "\n"; } =head1 DESCRIPTION This class is a Ebay specialization of L. It handles making and interpreting Ebay searches F. This class exports no public interface; all interaction should be done through L objects. =head1 NOTES The search is done against CURRENT running AUCTIONS only. (NOT completed auctions, NOT eBay Stores items, NOT Buy-It-Now only items.) (If you want to search completed auctions, use the L module.) (If you want to search eBay Stores, use the L module.) The query is applied to TITLES only. This module can return only the first 200 results matching your query. In the resulting L objects, the description() field consists of a human-readable combination (joined with semicolon-space) of the Item Number; number of bids; and high bid amount (or starting bid amount). In the resulting L objects, the end_date() field contains a human-readable DTG of when the auction is scheduled to end (in the form "YYYY-MM-DD HH:MM TZ"). If environment variable TZ is set, the time will be converted to that timezone; otherwise the time will be left in ebay.com's default timezone (US/Pacific). In the resulting L objects, the bid_count() field contains the number of bids as an integer. In the resulting L objects, the bid_amount() field is a string containing the high bid or starting bid as a human-readable monetary value in seller-native units, e.g. "$14.95" or "GBP 6.00". In the resulting L objects, the sold() field will be non-zero if the item has already sold. (Only if you're using WWW::Search::Ebay::Completed) After a successful search, your search object will contain an element named 'categories' which will be a reference to an array of hashes containing names and IDs of categories and nested subcategories, and the count of items matching your query in each category and subcategory. (Special thanks to Nick Lokkju for this code!) For example: $oSearch->{categories} = [ { 'ID' => '1', 'Count' => 19, 'Name' => 'Collectibles', 'Subcategory' => [ { 'ID' => '13877', 'Count' => 11, 'Name' => 'Historical Memorabilia' }, { 'ID' => '11450', 'Count' => 1, 'Name' => 'Clothing, Shoes & Accessories' }, ] }, { 'ID' => '281', 'Count' => 1, 'Name' => 'Jewelry & Watches', } ]; If your query string happens to be an eBay item number, (i.e. if ebay.com redirects the query to an auction page), you will get back one WWW::Search::Result without bid or price information. =head1 OPTIONS =over =item Limit search by price range Contributed by Brian Wilson: $oSearch->native_query($sQuery, { _mPrRngCbx=>'1', _udlo=>$minPrice, _udhi=>$maxPrice, } ); =back =head1 PUBLIC METHODS OF NOTE =over =cut use base 'WWW::Search'; use constant DEBUG_DATES => 0; use constant DEBUG_COLUMNS => 0; use Carp (); use CGI; use Data::Dumper; # for debugging only use Date::Manip; # Date_Init("setdate=now,America/Los_Angeles"); use HTML::TreeBuilder; use LWP::Simple; use WWW::Search qw( generic_option strip_tags ); # We need the version that has the sold() method: use WWW::SearchResult 2.072; use WWW::Search::Result; our $MAINTAINER = 'Martin Thurn '; my $cgi = new CGI; sub _native_setup_search { my ($self, $native_query, $rhOptsArg) = @_; # Set some private variables: $self->{_debug} ||= $rhOptsArg->{'search_debug'}; $self->{_debug} = 2 if ($rhOptsArg->{'search_parse_debug'}); $self->{_debug} ||= 0; my $DEFAULT_HITS_PER_PAGE = 200; $self->{'_hits_per_page'} = $DEFAULT_HITS_PER_PAGE; $self->user_agent('non-robot'); $self->agent_name('Mozilla/5.0 (compatible; Mozilla/4.0; MSIE 6.0; Windows NT 5.1; Q312461)'); $self->{'_next_to_retrieve'} = 0; $self->{'_num_hits'} = 0; # As of 2013-03-01 (probably much before that, but first time I # looked at it in quite a while):

To use our basic experience # which does not require JavaScript, click # here.

$self->{search_host} ||= 'http://www.ebay.com'; # as of 2013-03-01 $self->{search_host} ||= 'http://search.ebay.com'; $self->{search_path} ||= '/sch/i.html'; # as of 2013-03-01 $self->{search_path} ||= '/ws/search/SaleSearch'; if (!defined($self->{_options})) { # http://shop.ebay.com/items/_W0QQLHQ5fBINZ1?_nkw=trinidad+flag&_sacat=0&_fromfsb=&_trksid=m270.l1313&_odkw=burkina+faso+flag&_osacat=0 $self->{_options} = { satitle => $native_query, # Search AUCTIONS ONLY: sasaleclass => 1, # Display item number explicitly: socolumnlayout => 2, # Do not convert everything to US$: socurrencydisplay => 1, sorecordsperpage => $self->{_hits_per_page}, _ipg => $self->{_hits_per_page}, # Display absolute times, NOT relative times: sotimedisplay => 0, # Use the default columns, NOT anything the # user may have customized (which would come # through via cookies): socustoverride => 1, # Output basic HTML, not JavaScript: _armrs => 1, }; # https://www.ebay.com/sch/i.html?LH_Auction=1&SortProperty=MetaEndSort&_armrs=1&_ipg=200&_fosrp=1&_nkw=zeppelin&_sop=1 $self->{_options} = { _nkw => $native_query, _armrs => 1, # Turn off JavaScript: _jsoff => 1, # Search AUCTIONS ONLY: LH_Auction => 1, _ipg => $self->{_hits_per_page}, # Which page are we on: # _from => 2, # }; } # if if (defined($rhOptsArg)) { # Copy in new options. foreach my $key (keys %$rhOptsArg) { # print STDERR " DDD inspecting option $key..."; if (WWW::Search::generic_option($key)) { # print STDERR "promote & delete\n"; $self->{$key} = $rhOptsArg->{$key} if defined($rhOptsArg->{$key}); delete $rhOptsArg->{$key}; } else { # print STDERR "copy\n"; $self->{_options}->{$key} = $rhOptsArg->{$key} if defined($rhOptsArg->{$key}); } } # foreach } # if # Clear the list of results per category: $self->{categories} = []; # Finally, figure out the url. $self->{_next_url} = $self->{'search_host'} . $self->{'search_path'} .'?'. $self->hash_to_cgi_string($self->{_options}); } # _native_setup_search =item user_agent_delay Introduce a few-seconds delay to avoid overwhelming the server. =cut sub user_agent_delay { my $self = shift; # return; my $iSecs = int(3 + rand(3)); print STDERR " DDD sleeping $iSecs seconds...\n" if (0 < $self->{_debug}); sleep($iSecs); } # user_agent_delay =item need_to_delay Controls whether we do the delay or not. =cut sub need_to_delay { 1; } # need_to_delay =item preprocess_results_page Grabs the eBay Official Time so that when we parse the DTG from the HTML, we can convert / return exactly what eBay means for each one. =cut sub preprocess_results_page { my $self = shift; my $sPage = shift; if (25 < $self->{_debug}) { # print STDERR Dumper($self->{response}); # For debugging: print STDERR $sPage; exit 88; } # if my $sTitle = $self->{response}->header('title') || ''; my $qrTitle = $self->_title_pattern; if ($sTitle =~ m!$qrTitle!) { # print STDERR " DDD got a Title: ==$sTitle==\n"; # This search returned a single auction item page. We do not need # to fetch eBay official time. } # if else { # Use the UserAgent object in $self to fetch the official ebay.com time: $self->{_ebay_official_time} = 'now'; # my $sPageDate = get('http://cgi1.ebay.com/aw-cgi/eBayISAPI.dll?TimeShow') || ''; my $sPageDate = $self->http_request(GET => 'http://viv.ebay.com/ws/eBayISAPI.dll?EbayTime')->content || ''; if ($sPageDate eq '') { die " EEE could not fetch official eBay time"; } else { my $tree = HTML::TreeBuilder->new; $tree->utf8_mode('true'); $tree->parse($sPageDate); $tree->eof; my $s = $tree->as_text; # print STDERR " DDD official time =====$s=====\n"; if ($s =~ m!The official eBay Time is now:(.+?(P[SD]T))\s*Pacific\s!i) { my ($sDateRaw, $sTZ) = ($1, $2); DEBUG_DATES && print STDERR " DDD official time raw ==$sDateRaw==\n"; # Apparently, ParseDate() automatically converts to local timezone: my $date = ParseDate($sDateRaw); DEBUG_DATES && print STDERR " DDD official time cooked ==$date==\n"; $self->{_ebay_official_time} = $date; } # if } # else } # else return $sPage; # Ebay used to send malformed HTML: # my $iSubs = 0 + ($sPage =~ s!!!gi); # print STDERR " DDD deleted $iSubs extraneous tags\n" if 1 < $self->{_debug}; } # preprocess_results_page sub _cleanup_url { my $self = shift; my $sURL = shift() || ''; # Make sure we don't return two different URLs for the same item: $sURL =~ s!&rd=\d+!!; $sURL =~ s!&category=\d+!!; $sURL =~ s!&ssPageName=[A-Z0-9]+!!; return $sURL; } # _cleanup_url sub _format_date { my $self = shift; return UnixDate(shift, '%Y-%m-%d %H:%M %Z'); } # _format_date sub _bidcount_as_text { my $self = shift; my $hit = shift; my $iBids = $hit->bid_count || 'no'; my $s = "$iBids bid"; $s .= 's' if ($iBids ne '1'); $s .= '; '; } # _bidcount_as_text sub _bidamount_as_text { my $self = shift; my $hit = shift; my $iPrice = $hit->bid_amount || 'unknown'; my $sDesc = ''; $sDesc .= $hit->bid_count ? 'current' : 'starting'; $sDesc .= " bid $iPrice"; } # _bidamount_as_text sub _create_description { my $self = shift; my $hit = shift; my $iItem = $hit->item_number || 'unknown'; my $sWhen = shift() || 'current'; # print STDERR " DDD _c_d($iItem, $iBids, $iPrice, $sWhen)\n"; my $sDesc = "Item \043$iItem; ". $self->_bidcount_as_text($hit); $sDesc .= $self->_bidamount_as_text($hit); return $sDesc; } # _create_description sub _parse_category { my $self = shift; my $oTD = shift; return -1 if ! ref $oTD; my $oA = $oTD->look_down(_tag => 'a'); return -1 if ! ref $oA; if (DEBUG_COLUMNS || (1 < $self->{_debug})) { my $s = $oA->as_HTML; print STDERR " DDD TDcategory's A ===$s===\n"; } # if my $sURL = $oA->attr('href') || q{}; if ($sURL =~ m/sibeleafcat=(\d+)/) { return $1; } # if return -1; } # _parse_category sub _parse_price { my $self = shift; my $oTDprice = shift; my $hit = shift; return 0 unless (ref $oTDprice); my $s = $oTDprice->as_HTML; if (DEBUG_COLUMNS || (1 < $self->{_debug})) { print STDERR " DDD try TDprice ===$s===\n"; } # if if ($oTDprice->attr('class') =~ m'\bebcBid\b') { # If we see this, we must have been searching for Stores items # but we ran off the bottom of the Stores item list and ran # into the list of "other" items. return 1; # We could probably return 0 to abandon the rest of the page, but # maybe just maybe we hit this because of a parsing glitch which # might correct itself on the next TD. } # if if ($oTDprice->attr('class') !~ m'\b(ebcPr|prices|prc)\b') { # If we see this, we probably were searching for Store items # but we ran off the bottom of the Store item list and ran # into the list of Auction items. return 0; # There is a separate backend for searching Auction items! } # if if ( $oTDprice->look_down(_tag => 'span', class => 'ebSold') || $oTDprice->look_down(_tag => 'span', class => 'bold bidsold') ) { # This item sold, even if it had no bids (i.e. Buy-It-Now) $hit->sold(1); } # if if (my $oChild = $oTDprice->look_down(_tag => 'div', itemprop => 'price')) { # As of 2013-03, we need to separate out the price and the bid: $oTDprice = $oChild; } # if my $iPrice = $oTDprice->as_text; print STDERR " DDD raw iPrice ===$iPrice===\n" if (DEBUG_COLUMNS || (1 < $self->{_debug})); $iPrice =~ s!£!GBP!; $iPrice =~ s!\s*Trending.+!!; $iPrice =~ s!\s*Was.+!!; # Convert nbsp to regular space: $iPrice =~ s!\240!\040!g; # I don't know why there are sometimes weird characters in there: $iPrice =~ s!Â!!g; $iPrice =~ s!Â!!g; my $currency = $self->_currency_pattern; my $W = $self->whitespace_pattern; $iPrice =~ s!($currency)$W*($currency)!$1 (Buy-It-Now for $2)!; if ($iPrice =~ s/FREE\s+SHIPPING//i) { $hit->shipping('free'); } # if $hit->bid_amount($iPrice); return 1; } # _parse_price sub _parse_bids { my $self = shift; my $oTDbids = shift; my $hit = shift; my $iBids = 0; if (ref $oTDbids) { if (my $oChild = $oTDbids->look_down(_tag => 'div', class => 'bids')) { # As of 2013-03, we need to separate out the price and the bid: $oTDbids = $oChild; } # if my $s = $oTDbids->as_HTML; if (DEBUG_COLUMNS || (1 < $self->{_debug})) { print STDERR " DDD TDbids ===$s===\n"; } # if if ($oTDbids->attr('class') !~ m'\b(ebcBid|bids)\b') { # If we see this, we probably were searching for Store items # but we ran off the bottom of the Store item list and ran # into the list of Auction items. return 0; # There is a separate backend for searching Auction items! } # if $iBids = 1 if ($oTDbids->as_text =~ m/SOLD/i); $iBids = $1 if ($oTDbids->as_text =~ m/(\d+)/); my $W = $self->whitespace_pattern; if ( # Bid listed as hyphen means no bids: ($iBids =~ m!\A$W*-$W*\Z!) || # Bid listed as whitespace means no bids: ($iBids =~ m!\A$W*\Z!) ) { $iBids = 0; } # if } # if if ($iBids =~ m/NO/i) { $iBids = 0; } # if $iBids ||= 0; # print STDERR " DDD setting bid_count to =$iBids=\n"; $hit->bid_count($iBids); return 1; } # _parse_bids sub _parse_shipping { my $self = shift; my $oTD = shift; my $hit = shift; if ($oTD->attr('class') =~ m'\bebcCty\b') { # If we see this, we probably were searching for UK auctions # but we ran off the bottom of the UK item list and ran # into the list of international items. return 0; } # if if (my $oChild = $oTD->look_down(_tag => 'span', class => 'ship')) { # As of 2013-03, we need to separate out the price and the # shipping for some flavors of eBay: $oTD = $oChild; } # if my $iPrice = $oTD->as_text; # I don't know why there are sometimes weird characters in there: $iPrice =~ s!Â!!g; $iPrice =~ s!Â!!g; print STDERR " DDD raw shipping ===$iPrice===\n" if (DEBUG_COLUMNS || (1 < $self->{_debug})); if ($iPrice =~ m/UNKNOWN/i) { # No shipping info provided: return 1; } # if if ($iPrice =~ m/FREE/i) { $iPrice = 0.00; } # if return 0 if ($iPrice !~ m/\d/); $iPrice =~ s!£!GBP!; $hit->shipping($iPrice); return 1; } # _parse_shipping sub _parse_skip { my $self = shift; my $oTD = shift; my $hit = shift; return 1; } # _parse_skip sub _parse_enddate { my $self = shift; my $oTDdate = shift; my $hit = shift; my $sDate = 'unknown'; my ($s, $sDateTemp); if (ref $oTDdate) { $sDateTemp = $oTDdate->as_text; $s = $oTDdate->as_HTML; } # if else { $sDateTemp = $s = $oTDdate; } print STDERR " DDD TDdate ===$s===\n" if (DEBUG_COLUMNS || (1 < $self->{_debug})); # New version as of 2013-03: if ($s =~ m/\bTIMEMS="(\d+)"/i) { $sDate = $1; $sDate = $self->_format_date(ParseDate(q{epoch }. int($sDate/1000))); print STDERR " DDD sDate =$sDate=\n" if (DEBUG_COLUMNS || (1 < $self->{_debug})); $hit->end_date($sDate); # For backward-compatibility: $hit->change_date($sDate); return 1; } if (ref($oTDdate)) { my $sClass = $oTDdate->attr('class') || q{}; if ($sClass !~ m/\b(col3|ebcTim|ti?me)\b/) { # If we see this, we probably were searching for Buy-It-Now items # but we ran off the bottom of the item list and ran into the list # of Store items. return 0; # There is a separate backend for searching Store items! } # if } # if print STDERR " DDD raw sDateTemp ===$sDateTemp===\n" if (DEBUG_DATES || (1 < $self->{_debug})); if ($sDateTemp =~ m/---/) { # If we see this, we probably were searching for Buy-It-Now items # but we ran off the bottom of the item list and ran into the list # of Store items. return 0; # There is a separate backend for searching Store items! } # if # I don't know why there are sometimes weird characters in there: $sDateTemp =~ s!Â!!g; $sDateTemp =~ s!Â!!g; $sDateTemp =~ s!_process_date_abbrevs($sDateTemp); print STDERR " DDD cooked sDateTemp ===$sDateTemp===\n" if (DEBUG_DATES || (1 < $self->{_debug})); print STDERR " DDD official time =====$self->{_ebay_official_time}=====\n" if (DEBUG_DATES || (1 < $self->{_debug})); my $date = DateCalc($self->{_ebay_official_time}, " + $sDateTemp"); print STDERR " DDD date ===$date===\n" if (DEBUG_DATES || (1 < $self->{_debug})); $sDate = $self->_format_date($date); print STDERR " DDD sDate ===$sDate===\n" if (DEBUG_DATES || (1 < $self->{_debug})); $hit->end_date($sDate); # For backward-compatibility: $hit->change_date($sDate); return 1; } # _parse_enddate =item result_as_HTML Given a WWW::SearchResult object representing an auction, formats it human-readably with HTML. An optional second argument is the date format, a string as specified for Date::Manip::UnixDate. Default is '%Y-%m-%d %H:%M:%S' my $sHTML = $oSearch->result_as_HTML($oSearchResult, '%H:%M %b %E'); =cut sub result_as_HTML { my $self = shift; my $oSR = shift or return ''; my $sDateFormat = shift || q'%Y-%m-%d %H:%M:%S'; my $dateEnd = ParseDate($oSR->end_date) || q{}; my $iItemNum = $oSR->item_number; my $sSold = $oSR->sold ? $cgi->font({color=>'green'}, 'sold') .q{; } : $cgi->font({color=>'red'}, 'not sold') .q{; }; my $sBids = $self->_bidcount_as_text($oSR); my $sPrice = $self->_bidamount_as_text($oSR); my $sEndedColor = 'green'; my $sEndedWord = 'ends'; print STDERR " DDD compare end_date ==$dateEnd== to now\n" if (DEBUG_DATES || (1 < $self->{_debug})); if (Date_Cmp($dateEnd, 'now') < 0) { $sEndedColor = 'red'; $sEndedWord = 'ended'; } # if my $sEnded = $cgi->font({ color => $sEndedColor }, UnixDate($dateEnd, qq"$sEndedWord $sDateFormat")); my $s = $cgi->b( $cgi->a({href => $oSR->url}, $oSR->title), $cgi->br, qq{$sEnded; $sSold$sBids$sPrice}, ); $s .= $cgi->br; $s .= $cgi->font({size => -1}, $cgi->a({href => qq{http://cgi.ebay.com/ws/eBayISAPI.dll?MakeTrack&item=$iItemNum}}, 'watch this item in MyEbay'), ); # Format the entire thing as Helvetica: $s = $cgi->font({face => 'Arial, Helvetica'}, $s); return $s; } # result_as_HTML =back =head1 METHODS TO BE OVERRIDDEN IN SUBCLASSING You only need to read about these if you are subclassing this module (i.e. making a backend for another flavor of eBay search). =over =cut =item _get_result_count_elements Given an HTML::TreeBuilder object, return a list of HTML::Element objects therein which could possibly contain the approximate result count verbiage. =cut sub _get_result_count_elements { my $self = shift; my $tree = shift; my @ao; push @ao, $tree->look_down( # as of 2020-04 '_tag' => 'h1', class => 'srp-controls__count-heading' ); push @ao, $tree->look_down( # as of 2020-04 '_tag' => 'div', class => 'srp-controls__control srp-controls__count' ); push @ao, $tree->look_down( # as of 2015-06 '_tag' => 'span', class => 'listingscnt' ); push @ao, $tree->look_down( # as of 2020-03 '_tag' => 'span', class => 'rcnt' ); push @ao, $tree->look_down( '_tag' => 'div', class => 'fpcc' ); push @ao, $tree->look_down( '_tag' => 'div', class => 'fpc' ); push @ao, $tree->look_down( # For basic search, as of 2013-03: '_tag' => 'div', class => 'clt' ); push @ao, $tree->look_down( '_tag' => 'div', class => 'count' ); push @ao, $tree->look_down( '_tag' => 'div', class => 'pageCaptionDiv' ); push @ao, $tree->look_down( # for BySellerID as of 2010-07 '_tag' => 'div', id => 'rsc' ); return @ao; } # _get_result_count_elements =item _get_itemtitle_tds Given an HTML::TreeBuilder object, return a list of HTML::Element objects therein representing elements which could possibly contain the HTML for result title and hotlink. =cut sub _get_itemtitle_tds { my $self = shift; my $tree = shift; printf STDERR (" TTT this is Ebay::_get_itemtitle_tds\n") if (1 < $self->{_debug}); my @ao = $tree->look_down(_tag => 'td', class => 'details', ); push @ao, $tree->look_down(_tag => 'td', class => 'ebcTtl', ); push @ao, $tree->look_down(_tag => 'td', class => 'dtl', # This is for eBay auctions as of 2010-07 ); # This is for BuyItNow (thanks to Brian Wilson): push @ao, $tree->look_down(_tag => 'td', class => 'details ttl', ); push @ao, $tree->look_down(_tag => 'div', class => 's-item__info clearfix', ); my $oDiv = $tree->look_down(_tag => 'div', id => 'ResultSetItems', ); if (ref $oDiv) { push @ao, $oDiv->look_down(_tag => 'td', class => 'dtl dtlsp', ); push @ao, $oDiv->look_down(_tag => 'h3', class => 'lvtitle', ); } # if # printf STDERR (" DDD found %d itemtitle tags", scalar(@ao)) if (1 < $self->{_debug}); return @ao; } # _get_itemtitle_tds sub _parse_tree { my $self = shift; my $tree = shift; print STDERR " FFF Ebay::_parse_tree\n" if (1 < $self->{_debug}); my $sTitle = $self->{response}->header('title') || ''; my $qrTitle = $self->_title_pattern; # print STDERR " DDD trying to match ==$sTitle== against ==$qrTitle==\n"; if ($sTitle =~ m!$qrTitle!) { my ($sTitle, $iItem, $sDateRaw) = ($1, $2, $3); my $sDateCooked = $self->_format_date($sDateRaw); my $hit = new WWW::Search::Result; $hit->item_number($iItem); $hit->end_date($sDateCooked); # For backward-compatibility: $hit->change_date($sDateCooked); $hit->title($sTitle); $hit->add_url($self->{response}->request->uri); $hit->description($self->_create_description($hit)); # print Dumper($hit); push(@{$self->{cache}}, $hit); $self->{'_num_hits'}++; # $self->approximate_result_count(1); return 1; } # if # First, see if: there were zero results and eBay automatically did # a spell-check and searched for other words (or searched for a # subset of query terms): my $oDIV = $tree->look_down( _tag => 'div', class => 'messages', ); if (ref $oDIV) { my $sText = $oDIV->as_text; if ( ($sText =~ m/0 results found for /) && ( ($sText =~ m/ so we searched for /) || ($sText =~ m/ so we removed keywords /) ) ) { $self->approximate_result_count(0); return 0; } # if } # if # See if our query was completely replaced by a similar-spelling query: my $oLI = $tree->look_down(_tag => 'li', class => 'ebInf', ); if (ref $oLI) { if ($oLI->as_text =~ m! keyword has been replaced !) { $self->approximate_result_count(0); return 0; } # if } # if # See if our category-only query was replaced by a global query: my $oP = $tree->look_down(_tag => 'p', class => 'sm-md', ); if (ref $oP) { my $s = $oP->as_text; if (($s =~ m/0 results found in the/) && ($s =~ m/so we searched in all categories/)) { return 0; } # if } # if my $iHits = 0; # The hit count is in one of these tags: my @aoResultCountTagset = $self->_get_result_count_elements($tree); if (scalar(@aoResultCountTagset) < 1) { warn " EEE no result_count_elements matched the HTML\n"; } # if FONT: foreach my $oFONT (@aoResultCountTagset) { my $s = $oFONT->as_text; my $qr = $self->_result_count_pattern; print STDERR (" DDD result_count try qr=$qr= against ==$s==\n") if (1 < $self->{_debug}); if ($s =~ m/$qr/) { my $sCount = $1; print STDERR " DDD matched ($sCount)\n" if (1 < $self->{_debug}); # Make sure it's an integer: $sCount =~ s!,!!g; $self->approximate_result_count(0 + $sCount); last FONT; } # if else { print STDERR " WWW DID NOT MATCH\n" if (1 < $self->{_debug}); } } # foreach FONT my $iApprox = $self->approximate_result_count(); if (defined($iApprox) && ($iApprox < 1)) { # Header says there are no results; return now and don't bother # parsing the page: return $iHits; } # if # Recursively parse the stats telling how many items were found in # each category: my $oUL = $tree->look_down(_tag => 'ul', class => 'categories'); $self->{categories} ||= []; $self->_parse_category_list($oUL, $self->{categories}) if ref($oUL); # First, delete all the results that came from spelling variations: my $oDiv = $tree->look_down(_tag => 'div', id => 'expSplChk', ); if (ref $oDiv) { # print STDERR " DDD found a spell-check ===", $oDiv->as_text, "===\n"; $oDiv->detach; $oDiv->delete; } # if # The list of matching items is in a table. The first column of the # table is nothing but icons; the second column is the good stuff. my @aoTD = $self->_get_itemtitle_tds($tree); unless (@aoTD) { print STDERR " EEE did not find any result title tags\n" if $self->{_debug}; # use File::Slurp; # write_file('no-results.html', $self->{response}->content); } # unless my $qrItemNum = qr{(\d{11,13})}; TD: foreach my $oTDtitle (@aoTD) { # Sanity check: next TD unless ref $oTDtitle; my $sTDtitle = $oTDtitle->as_HTML; print STDERR " DDD try TDtitle ===$sTDtitle===\n" if (1 < $self->{_debug}); # First A tag contains the url & title: my $oA = $oTDtitle->look_down('_tag', 'a'); next TD unless ref $oA; my $sTitle = $oA->as_text || ''; if ($sTitle eq '') { my $oImg = $oA->look_down(_tag => 'img'); if (defined $oImg) { $sTitle = $oImg->attr('alt') || ''; } # end if } # end if next TD if ($sTitle eq ''); print STDERR " DDD sTitle ===$sTitle===\n" if (1 < $self->{_debug}); my $oURI = URI->new($oA->attr('href')); # next TD unless ($oURI =~ m!ViewItem!); next TD if ($oURI !~ m!$qrItemNum!); my $iItemNum = $1; print STDERR " DDD iItemNum ===$iItemNum===\n" if (1 < $self->{_debug}); my $iCategory = -1; $iCategory = $1 if ($oURI =~ m!QQcategoryZ(\d+)QQ!); if ($oURI->as_string =~ m!QQitemZ(\d+)QQ!) { # Convert new eBay links to old reliable ones: # $oURI->path(''); $oURI->path('/ws/eBayISAPI.dll'); $oURI->query("ViewItem&item=$1"); } # if my $sURL = $oURI->as_string; my $hit = new WWW::Search::Result; $hit->add_url($self->_cleanup_url($sURL)); $hit->title($sTitle); $hit->item_number($iItemNum); # This is just to prevent undef warnings later on: $hit->bid_count(0); # The rest of the info about this item is in sister
  • elements # to the right: my $sTag = 'li'; my @aoSibs = $oTDtitle->parent->look_down(_tag => $sTag); # The parent itself is an
  • tag: shift @aoSibs; warn " DDD before loop, there are ", scalar(@aoSibs), " sibling $sTag tags\n" if (1 < $self->{_debug}); SIBLING_TD: while (my $oTDsib = shift @aoSibs) { next unless ref($oTDsib); my $sColumn = $oTDsib->attr('class') || q{}; my $s = $oTDsib->as_HTML; if ($sColumn eq q{}) { warn " WWW auction info sibling has no class ==$s==" if (DEBUG_COLUMNS || (1 < $self->{_debug})); if ($s =~ m/The item is listed as a Top Rated Plus item/) { # Note: These items screw up the ByEndDate results because # they appear out of order. } # if } # if print STDERR " DDD looking at $sTag'$sColumn' ===$s===\n" if (DEBUG_COLUMNS || (1 < $self->{_debug})); if ($sColumn =~ m'price') { next TD unless $self->_parse_price($oTDsib, $hit); } # if if (($sColumn =~ m'bids') || ($sColumn =~ m'format')) { # It is not a fatal error if there are no bids (i.e. buy-it-now) $self->_parse_bids($oTDsib, $hit); } if ($sColumn =~ m'shipping') { next TD if ! $self->_parse_shipping($oTDsib, $hit); } if ($sColumn =~ m'end') { next TD if ! $self->_parse_enddate($oTDsib, $hit); } if ($sColumn =~ m'time') { next TD if ! $self->_parse_enddate($oTDsib, $hit); } if ($sColumn =~ m'country') { # This listing is from a country other than the base site # we're searching against. Throw it out: next TD; } if ($sColumn =~ m'extras') { if ($iCategory < 0) { # We haven't found this item's category. Look for it here: $iCategory = $self->_parse_category($oTDsib); } # if } # if 'extras' # Any other class="" value will cause this
  • to be ignored. print STDERR " DDD don't know how to process $sTag'$sColumn' ===$s===\n" if (DEBUG_COLUMNS || (1 < $self->{_debug})); } # while my $sDesc = $self->_create_description($hit); $hit->description($sDesc); $hit->category($iCategory); # Clean up / sanity check hit info: my ($enddate, $iBids); if ( defined($enddate = $hit->end_date) && defined($iBids = $hit->bid_count) && (0 < $iBids) # Item got any bids && (Date_Cmp($enddate, 'now') < 0) # Item is ended ) { # Item must have been sold!?! $hit->sold(1); } # if print STDERR " DDD add hit to cache\n" if (1 < $self->{_debug}); push(@{$self->{cache}}, $hit); $self->{'_num_hits'}++; $iHits++; if ($self->approximate_result_count() <= $iHits) { # We've already scraped all the hits that eBay said there were. # Stop scraping, so that we don't return hits that are not an # exact match to the query but are just "related": last TD; } # if # Delete this HTML element so that future searches go faster? $oTDtitle->detach; $oTDtitle->delete; } # foreach TD undef $self->{_next_url}; if (0) { # AS OF 2008-11 THE NEXT LINK CAN NOT BE FOLLOWED FROM PERL CODE # Look for a NEXT link: my @aoA = $tree->look_down('_tag' => 'a'); TRY_NEXT: foreach my $oA (0, reverse @aoA) { next TRY_NEXT unless ref $oA; print STDERR " DDD try NEXT A ===", $oA->as_HTML, "===\n" if (1 < $self->{_debug}); my $href = $oA->attr('href'); next TRY_NEXT unless $href; # Looking backwards from the bottom of the page, if we get all the # way to the item list, there must be no next button: last TRY_NEXT if ($href =~ m!ViewItem!); if ($oA->as_text eq $self->_next_text) { print STDERR " DDD got NEXT A ===", $oA->as_HTML, "===\n" if 1 < $self->{_debug}; my $sClass = $oA->attr('class') || ''; if ($sClass =~ m/disabled/i) { last TRY_NEXT; } # if $self->{_next_url} = $self->absurl($self->{_prev_url}, $href); last TRY_NEXT; } # if } # foreach } # if 0 # All done with this page. $tree->delete; return $iHits; } # _parse_tree =item _parse_category_list Parses the Category list from the left side of the results page. So far, this method can handle every type of eBay search currently implemented. If you find that it doesn't suit your needs, please contact the author because it's probably just a tiny tweak that's needed. =cut sub _parse_category_list { my $self = shift; my $oTree = shift; my $ra = shift; my $oUL = $oTree->look_down(_tag => 'ul'); my @aoLI = $oUL->look_down(_tag => 'li'); CATLIST_LI: foreach my $oLI (@aoLI) { my %hash; next CATLIST_LI unless ref($oLI); if ($oLI->parent->same_as($oUL)) { my $oA = $oLI->look_down(_tag => 'a'); next CATLIST_LI unless ref($oA); my $oSPAN = $oLI->look_down(_tag => 'span'); next CATLIST_LI unless ref($oSPAN); $hash{'Name'} = $oA->as_text; $hash{'ID'} = $oA->{'href'}; $hash{'ID'} =~ /sacatZ([0-9]+)/; $hash{'ID'} = $1; my $i = $oSPAN->as_text; $i =~ tr/0-9//cd; $hash{'Count'} = $i; push @{$ra}, \%hash; } # if my @aoUL = $oLI->look_down(_tag => 'ul'); CATLIST_UL: foreach my $oUL (@aoUL) { next CATLIST_UL unless ref($oUL); if($oUL->parent()->same_as($oLI)) { $hash{'Subcategory'} = (); $self->_parse_category_list($oLI, \@{$hash{'Subcategory'}}); } # if } # foreach CATLIST_UL } # foreach CATLIST_LI } # _parse_category_list =item _process_date_abbrevs Given a date string, converts common abbreviations to their full words (so that the string can be unambiguously parsed by Date::Manip). For example, in the default English, 'd' becomes 'days'. =cut sub _process_date_abbrevs { my $self = shift; my $s = shift; $s =~ s!d! days!; $s =~ s!h! hours!; $s =~ s!m! minutes!; return $s; } # _process_date_abbrevs =item _next_text The text of the "Next" button, localized for a specific type of eBay backend. =cut sub _next_text { return 'Next'; } # _next_text =item whitespace_pattern Return a qr// pattern to match whitespace your webpage's language. =cut sub whitespace_pattern { # A pattern to match HTML whitespace: return qr{[\ \t\r\n\240]}; } # whitespace_pattern =item _currency_pattern Return a qr// pattern to match mentions of money in your webpage's language. Include the digits in the pattern. =cut sub _currency_pattern { my $self = shift; # A pattern to match all possible currencies found in USA eBay # listings: my $W = $self->whitespace_pattern; return qr/(?:\$|C|EUR|GBP)$W*[0-9.,]+/; } # _currency_pattern =item _title_pattern Return a qr// pattern to match the webpage title in your webpage's language. Add grouping parenthesis so that $1 becomes the auction title, $2 becomes the eBay item number, and $3 becomes the end date. =cut sub _title_pattern { return qr{\A(.+?)\s+-\s+EBAY\s+\(ITEM\s+(\d+)\s+END\s+TIME\s+([^)]+)\)\Z}i; # } # _title_pattern =item _result_count_pattern Return a qr// pattern to match the result count in your webpage's language. Include parentheses so that $1 becomes the number (with commas is OK). =cut sub _result_count_pattern { return qr'([0-9,]+)\s+(active\s+)?(listing|item|matche?|result)s?(\s+found)?'i; } # _result_count_pattern 1; __END__ =back =head1 SEE ALSO To make new back-ends, see L. =head1 BUGS Please tell the author if you find any! =head1 AUTHOR Martin 'Kingpin' Thurn, C, L. Some fixes along the way contributed by Troy Davis. =head1 LEGALESE THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. =head1 LICENSE Copyright (C) 1998-2009 Martin 'Kingpin' Thurn This software is released under the same license as Perl itself. =cut