o ,]@sddlmZmZmZddlmZ ddlmZddlm Z m Z e e_ e e_ dgZ GdddeZGdddeZGd d d eZd S) )absolute_importdivisionunicode_literalsstr)urllib)parserequestRobotFileParserc@sZeZdZdZdddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS)r zs This class provides a set of methods to read, parse and answer questions about a single robots.txt file. cCs,g|_d|_d|_d|_||d|_dS)NFr)entries default_entry disallow_all allow_allset_url last_checkedselfurlrE/usr/lib/python3/dist-packages/future/backports/urllib/robotparser.py__init__s   zRobotFileParser.__init__cCs|jS)zReturns the time the robots.txt file was last fetched. This is useful for long-running web spiders that need to check for new robots.txt files periodically. )rrrrrmtime&szRobotFileParser.mtimecCsddl}||_dS)zYSets the time the robots.txt file was last fetched to the current time. rN)timer)rrrrrmodified/szRobotFileParser.modifiedcCs&||_tj|dd\|_|_dS)z,Sets the URL referring to a robots.txt file.N)rrrurlparsehostpathrrrrr7s zRobotFileParser.set_urlc Csz tj|j}Wn4tjjy=}z&|jdvrd|_n|jdkr+d|_WYd}~dSWYd}~dSWYd}~dSd}~ww| }| | d dS)z4Reads the robots.txt URL and feeds it to the parser.)iiTiNzutf-8) rr urlopenrerror HTTPErrorcoderrreadrdecode splitlines)rferrrawrrrr%<s  zRobotFileParser.readcCs2d|jvr|jdur||_dSdS|j|dSN*) useragentsr r append)rentryrrr _add_entryIs   zRobotFileParser._add_entrycCsjd}t}|D]}|s#|dkrt}d}n|dkr#||t}d}|d}|dkr2|d|}|}|s9q|dd}t|dkr|d|d<tj |d|d<|ddkrx|dkrm||t}|j |dd}q|ddkr|dkr|j t |dd d}q|dd kr|dkr|j t |dd d}q|dkr||dSdS) zParse the input lines from a robots.txt file. We allow that a user-agent: line is not preceded by one or more blank lines. rr#N:z user-agentdisallowFallowT)Entryr0findstripsplitlenlowerrrunquoter-r. rulelinesRuleLine)rlinesstater/lineirrrrRsP          zRobotFileParser.parsecCs|jrdS|jr dStjtj|}tjdd|j|j|j |j f}tj |}|s.d}|j D]}| |r?||Sq1|jrI|j|SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTr /)rrrrrr< urlunparser paramsqueryfragmentquoter applies_to allowancer )r useragentr parsed_urlr/rrr can_fetchs$     zRobotFileParser.can_fetchcCsddd|jDS)Nr cSsg|]}t|dqS) r).0r/rrr sz+RobotFileParser.__str__..)joinr rrrr__str__zRobotFileParser.__str__N)r ) __name__ __module__ __qualname____doc__rrrrr%r0rrMrRrrrrr s    3 c@s(eZdZdZddZddZddZdS) r>zoA rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.cCs(|dkr|sd}tj||_||_dS)Nr T)rrrHr rJ)rr rJrrrrs  zRuleLine.__init__cCs|jdkp ||jSr+)r startswith)rfilenamerrrrIrSzRuleLine.applies_tocCs|jrdpdd|jS)NAllowDisallowz: )rJr rrrrrRszRuleLine.__str__N)rTrUrVrWrrIrRrrrrr>s  r>c@s0eZdZdZddZddZddZdd Zd S) r6z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS)N)r-r=rrrrrs zEntry.__init__cCsHg}|jD] }|d|dgq|jD] }|t|dgqd|S)Nz User-agent: rNr )r-extendr=rrQ)rretagentrArrrrRs    z Entry.__str__cCsF|dd}|jD]}|dkrdS|}||vr dSq dS)z2check if this entry applies to the specified agentrCrr,TF)r9r;r-)rrKr^rrrrIs zEntry.applies_tocCs$|jD] }||r|jSqdS)zZPreconditions: - our agent applies to this entry - filename is URL decodedT)r=rIrJ)rrYrArrrrJs   zEntry.allowanceN)rTrUrVrWrrRrIrJrrrrr6s  r6N) __future__rrrfuture.builtinsrfuture.backportsrfuture.backports.urllibr_parser _request__all__objectr r>r6rrrrs