
    Xi2n                      % S r SSKJr  SSKrSSKrSSKrSSKrSSKrSSKrSSK	r	SSK
r
SSKrSSKrSSKrSSKrSSKrSSKJr  SSKrSSKJr  SSKJrJrJrJrJr  SSKJrJrJr  SSK J!r!J"r"  SS	K#J$r$  SS
K%J&r&  SSK'J(r(J)r)  SSK*J+r+  SSK,J-r-  SSK.J/r/  SSK0J1r2  SSK3J4r5  SSK6J7r7  S/r8\8S   r9Sr:Sr;\Rx                  R{                  SS5      r>\Rx                  R{                  SS5      r?\Rx                  R{                  SS5      R                  5       R                  5       S;   rB\Rx                  R{                  SS5      R                  5       R                  5       S;   rCSrDS\ES'   \" \F5      R                  5       R                  rI\IS -  rJ\IS!-  rK\IS"-  rLS#rMS$rNS%rOS&rP/ S'QrQ\R" \Q5      rSS$rTS(rUS)rVS*rWS+rXS$rYS$rZS,r[S-r\S.r]\Rx                  R{                  S/S5      R                  5       =(       d    Sr^SwS0 jr_SxS1 jr`SyS2 jraSzS3 jrbS{S4 jrcS|S5 jrdS}S6 jreS~S7 jrf " S8 S95      rgSS: jrhSS; jriSS< jrjSS= jrkSS> jrl SS?S$\USS@.             SSA jjjrmSSB jrnSSC jro\U\WS-4         SSD jjrpSSE jrqSSSF jjrr " SG SH\5      rsSSI jrtSSJ jruSSK jrvSSL jrwSM rxSSN jrySSO jrzSSP jr{SSQ jr|SSR jr}SSSS jjr~SST jrSSU jrSSSV jjrSxSW jrSSX jrSSY jrSSZ jrSS[ jrSS\ jrSS] jrSS^ jrSS_ jrSS` jrSSa jrSSb jrSSc jrSSd jrSSSe jjrSSf jrSSg jrSh rSSi jrSSj jrSSk jrSSl jrSSSSm.               SSn jjrSSSo jjrS&Sp.SSq jjrSSr jrSSs jrSSt jrSSu jr\Sv:X  a  \" 5         gg)a  
RW_Site_Scraper-Orders_Page.py
==============================

Scrapes Red Wing footwear catalog:
- https://order.redwingshoes.com/footwear-rwbr

Test mode:
- Set ONLY_STYLE at the top of this file to a style number (e.g. "400")
  to scrape just that single product.

Outputs (next to this script):
- RW_Orders_Site_Scrape.md
- RW_Site_Scraper_v2_checkpoint.json   (resume state)
- RW_Site_Scraper_v2_errors.txt        (links that failed repeatedly)

Stability features:
- `safe_get()` uses short timeouts + window.stop() so Selenium doesn't hang forever
- HTTP fallback (no Selenium) for the few product pages that still time out
- Optional salvage pass at the end to retry hard failures (HTTP-first)

Cross-platform (Windows + Linux Cinnamon):
- Headless Firefox (default)
- Geckodriver resolution "like Parts_Auto" (explicit Service path; no Selenium Manager):
    1) GECKODRIVER_PATH env var (file or directory)
    2) geckodriver(.exe) on PATH
    3) auto-download geckodriver (GitHub releases) into a user cache dir
    4) (optional) if double-click/no terminal and Tk is available, prompt to pick geckodriver

Feature columns are 1/0 (not Yes/No).
Includes Brand (string) + brand family flags (Red Wing / Irish Setter / Worx).

Dependencies:
- Python 3.9+
- Firefox installed
- Selenium installed:
    Linux Mint/Ubuntu: sudo apt install -y python3-selenium
    Windows: python -m pip install selenium

Notes for Linux Mint PEP 668:
- Prefer `python3-selenium` from apt (as above).
- This script does NOT require webdriver-manager.
    )annotationsN)
HTMLParser)Path)DictListOptionalSetTuple)parse_qsurljoinurlparse)Requesturlopen)BeautifulSoup)	webdriver)TimeoutExceptionWebDriverException)By)Keys)Options)Service)expected_conditions)WebDriverWaitz,https://order.redwingshoes.com/footwear-rwbrzorder.redwingshoes.comz/footwear-rwbrRW_SITE_USERNAMEzrwss614@redwingshoes.comRW_SITE_PASSWORDzWelcomeBack99!RW_HEADLESS1>   r   yonyestrueRW_REFRESH_LINKS str
ONLY_STYLEzRW_Orders_Site_Scrape.mdz"RW_Site_Scraper_v2_checkpoint.jsonzRW_Site_Scraper_v2_errors.txt   TRED WING FOR BUSINESSF)(Style #NameURLImageBrandMaleFemaleRed WingIrish SetterWorxz
Safety Toez	Steel ToezNon-Metallic ToezAluminum ToezMetatarsal GuardzSoft Toe
Waterproof
InsulationzSlip ResistantzElectrical HazardzPuncture ResistantzStatic DissipativezAnkle Protectionu   BOA® Lacing SystemDefined HeelzAll Leather Upper
ResoleablezOxford/AthleticChukkaHiker5"6"7"8"9"10"11"12"zBuilt in USAzMade in USA-      g      ?)333333?g?Z         GECKODRIVER_PATHc                    U (       a  S$ S$ )Nr   0 )vs    k/media/justin/syncPi/Shared/61_RW_Site/Pulled_Info/Boot_Features/RW_Scrapers/RW_Site_Scraper-Orders_Page.pyb01rL      s    3    c                    U =(       d    SR                  SS5      n [        R                  " SSU 5      R                  5       n U $ )Nr#   |z\|\s+ )replaceresubstrip)ss    rK   md_escape_cellrW      s8    	
b#u%A
vsA$$&AHrM   c                    U R                  U R                  S-   5      nUR                  USS9  UR                  U 5        g )Nz.tmputf-8encoding)with_suffixsuffix
write_textrR   )pathtexttmps      rK   atomic_writerb      s7    


4;;/
0CNN4'N*KKrM   c                 J   [         R                  5       (       d)  [        [        / / / 0 S0 / [        R                  " 5       S S.$  [
        R                  " [         R                  SS95      n [        U R                  SS5      5      nU[        :w  aS   [         R                  [         R                  SU S35      5        [        [        / / / 0 [        R                  " 5       S S	.$ U R                  S[        5        U R                  S
[        5        U R                  S/ 5        U R                  S/ 5        U R                  S/ 5        U R                  S0 5        U R                  S/ 5        U R                  S[        R                  " 5       5        U R                  SS 5        U $ ! [         a     Nf = f! [         af     [         R                  [         R                  S5      5        O! [         a     Of = f[        [        / / / 0 S0 / [        R                  " 5       S S.s $ f = f)NF)versioncatalog_urlsproduct_links
done_linksrowspreferred_namesmedia_repair_donefail_countshard_failed_links
started_atgeckodriver_pathrY   rZ   rd   r   z.json.vz.bak)rd   re   rf   rg   rh   rk   rm   rn   re   rf   rg   rh   rk   rl   rm   rn   z.json.corrupt)
CHECKPOINTexistsCHECKPOINT_VERSIONCATALOG_URLStimejsonloads	read_textintgetrR   r\   	Exception
setdefault)dataold_vers     rK   load_checkpointr}      s   )(!!&!#))+ $
 	
-
zz*...@Adhhy!,-(("":#9#9GG9D:Q#RS . ,!# !"iik$(	 	 		#565,b)#r*+R0diik2*D1+  ,  
	z55oFG 		 *(!!&!#))+ $
 	

sb   AF2 ,F" =%F2 #B>F2 "
F/,F2 .F//F2 2
H"=(G&%H"&
G30H"2G33,H"!H"c           	     L    [        [        [        R                  " U SSS95        g )NrD   T)indent	sort_keys)rb   ro   rt   dumps)r{   s    rK   save_checkpointr      s    TZZQ$GHrM   c                    [        [        SSS9 nUR                  U R                  5       S-   5        S S S 5        g ! , (       d  f       g = f)NarY   rZ   
)open
ERRORS_TXTwriterstrip)linefs     rK   write_errors_liner      s3    	j#	0A	$% 
1	0	0s	   #<
A
c                 .     SS K n g! [         a     gf = f)Nr   TF)tkinterry   )r   s    rK   _can_use_tkr      s     s    
c                   [         R                  R                  5       (       d  [        5       (       d  g  SS KnSSKJnJn  UR                  5       nUR                  5         UR                  SS5        UR                  SS5        UR                  U S9nUR                  5         U=(       d    SR                  5       nU(       a  U$ S $ ! [         a     g f = f)	Nr   )
filedialog
messageboxz-topmostTRW Site ScraperzCould not find geckodriver automatically.

Please select the geckodriver executable.
Windows: geckodriver.exe
Linux: geckodriver)titler#   )sysstdoutisattyr   r   r   r   Tkwithdraw
attributesshowinfoaskopenfilenamedestroyrU   ry   )r   tkr   r   rootr_   s         rK   _tk_pick_filer     s    
zz+--2uuw
D);	
 )))6
!!#t%% s   BC C 
CCc                  :    \ rS rSrSrSS jrS	S
S jjrSS jrSrg)ProgressReporteri!  zDTTY progress bar, or Tk window if launched by double-click (no TTY).c                   [        [        U5      S5      U l        X l        [        R
                  R                  5       U l        SU l        S U l	        S U l
        S U l        U R                  (       Gd>  [        5       (       Ga-   SS KnSSKJn  UR                  5       U l	        U R                  R!                  S5        U R                  R#                  S5        U R                  R%                  SS5        UR'                  U R                  SSS	9U l
        U R                  R)                  S
SSS9  UR+                  U R                  U R                  SS9U l        U R                  R)                  SSS9  SU l        U R                  R-                  5         U R                  R/                  5         g g g ! [0         a
    SU l         g f = f)N   Fr   )ttkr   620x150zStarting...w)r`   anchorx   )r      )fillpadxpadyiD  )maximumlength)r   
   )r   r   T)maxrw   totalrm   r   r   r   use_ttygui_root_label_pbarr   r   r   r   r   geometry	resizableLabelpackProgressbarupdate_idletasksupdatery   )selfr   rm   r   r   s        rK   __init__ProgressReporter.__init__$  sU   UQ'
$zz((*

+--!$'UUW


  !23

##I.

$$UE2 hhtzzchR  c A __TZZTW_X


Rg6

++-

!!#% #0&  ! !s   D+F1 1GGc                d   [        S[        [        U5      U R                  5      5      nU R                  (       a  U R
                  (       a  U SU R                   SU 3R                  5       nU R                  (       a  U R                  R                  US9  U R                  (       a  XR                  S'   U R
                  R                  5         U R
                  R                  5         g SnXR                  -  n[        [        XT-  5      5      nSU-  SXF-
  -  -   n[        [        R                  " 5       U R                  -
  S	5      nX-  n	U	S
:  a  U R                  U-
  U	-  OSn
SU SU SU R                   SUS-  S S[        U
5       S3nU(       a  USU 3-  n[        R                   R#                  SUS S -   5        [        R                   R%                  5         XR                  :X  a>  [        R                   R#                  S5        [        R                   R%                  5         g g )Nr   /z  )r`   value    #-g-C6?g&.>        [z]  (d   z5.1fz%) ETA rV      r   )r   minrw   r   r   r   rU   r   configr   r   r   roundrs   rm   r   r   r   flush)r   currentnotemsgwidthfracfilledbarelapsedrateetas              rK   r   ProgressReporter.updateC  s   aS\4::6788

IQtzzl"TF399;C{{""",zz&-

7#JJ'')JJ#U4<()FlSEN33diikDOO3V< /3d{tzzG#t+#b	4::,bc$ws3xjPQRRv;C

DS	)*

jj JJT"JJ !rM   c                    U R                   (       a.  U R                  (       a   U R                  R                  5         g g g ! [         a     g f = fN)r   r   r   ry   )r   s    rK   closeProgressReporter.closec  s?    88



""$ #8  s   A 
AA)r   r   r   r   rm   r   r   N)r   rw   rm   floatr#   )r   rw   r   r$   returnNoner   r   )	__name__
__module____qualname____firstlineno____doc__r   r   r   __static_attributes__rI   rM   rK   r   r   !  s    N!>@rM   r   c                    [         R                  S:X  ay  [         R                  R                  S5      =(       dD    [         R                  R                  S5      =(       d    [	        [
        R                  " 5       5      n [        U 5      S-  nOU[         R                  R                  S5      =(       d!    [	        [
        R                  " 5       S-  5      n [        U 5      S-  nUR                  SSS	9  U$ )
NntLOCALAPPDATAAPPDATARW_Site_ScraperXDG_CACHE_HOMEz.cacherw_site_scraperTparentsexist_ok)osnameenvironrx   r$   r   homemkdir)baseds     rK   
_cache_dirr   n  s    	ww$zz~~n-^	1J^cRVR[R[R]N^J**zz~~./N3tyy{X7M3NJ**GGD4G(HrM   c                 :   [         (       as  [        [         5      n U R                  5       (       a  [        U 5      $ U R	                  5       (       a/  S H)  nX-  nUR                  5       (       d  M  [        U5      s  $    S H.  n[
        U-  nUR                  5       (       d  M#  [        U5      s  $    S H2  n[        5       U-  nUR                  5       (       d  M'  [        U5      s  $    [        R                  S:w  aa  [        R                  " 5       S-  [        R                  " 5       S-  S-  4 H*  nUS-  nUR                  5       (       d  M  [        U5      s  $    [        R                  " S5      =(       d    [        R                  " S5      n U (       a  U $ g )N)geckodriver.exegeckodriverr   binz.localr  r   )rF   r   is_filer$   is_dirBASE_DIRr   r   r   r   shutilwhich)pr   candr   s       rK   _resolve_from_env_or_pathr
  y  s0   !"99;;q6M88:::x<<>>t9$ ; 3$<<>>t9 3 3|d"<<>>t9 3 
ww$))+%tyy{X'='EFA}$D||~~4y  G 	]#Fv||4E'FArM   c                 v   [         R                  R                  5       n [        R                  " 5       R                  5       nU R	                  S5      (       a*  S[        R
                  " 5       S   ;   nU(       a  SS4$ SS4$ U R	                  S5      (       a  SU;   d  S	U;   a  g
gU S:X  a  S	U;   d  SU;   a  ggg)zG
Returns (asset_contains, archive_type) matching geckodriver releases.
win64r   win64win32ziplinuxaarch64arm64)zlinux-aarch64tar.gz)linux64r  darwin)zmacos-aarch64r  )macosr  )r   platformlowermachine
startswitharchitecture)sysplatmachis_64s      rK   _platform_asset_keyr     s     ll  "G##%D%  --/22 u55gu55'""4.$(d?i4/." rM   c                   [        5       u  pSn[        USS0S9n[        USS9 n[        R                  " UR                  5       R                  SSS	95      nS
S
S
5        WR                  S/ 5      nS
nS
n	U HG  n
U
R                  SS5      nX;   d  M  UR                  U5      (       d  M4  U
R                  S5      nUn	  O   U(       d  [        SU SU S35      eX	-  n[        USS0S9n[        USS9 n[        US5       n[        R                  " X^5        S
S
S
5        S
S
S
5        [        R                  S:X  a  SOSnX-  nUS:X  av  [        R                   " US5       nUR#                  5        HA  nUR                  U5      (       d  M  UR%                  UU S9  U U-  nUR'                  U5          O   S
S
S
5        O[(        R                  " US5       nUR+                  5        Hh  nUR                  R                  SU-   5      (       d  UR                  U:X  d  M8  UR%                  UU S9  U UR                  -  nUR'                  U5          O   S
S
S
5         UR-                  SS9  [        R                  S:w  ak   [        R0                  " U5      n[        R2                  " UUR4                  [0        R6                  -  [0        R8                  -  [0        R:                  -  5        UR=                  5       (       d  [        S5      eU$ ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       GN= f! [.         a     GNf = f! [.         a     Nf = f)z\
Downloads and extracts latest geckodriver into dest_dir.
Returns path to extracted driver.
z@https://api.github.com/repos/mozilla/geckodriver/releases/latest
User-AgentzRW_Site_Scraper/1.0headersrA   timeoutrY   rR   errorsNassetsr   r#   browser_download_urlz'Could not find a geckodriver asset for r   z).<   wbr   r   r  r  r)r_   zr:gzr   T
missing_okz5Download succeeded but geckodriver was not extracted.)r   r   r   rt   ru   readdecoderx   endswithRuntimeErrorr   r  copyfileobjr   r   zipfileZipFilenamelistextractrR   tarfile
getmembersunlinkry   statchmodst_modeS_IXUSRS_IXGRPS_IXOTHrp   )dest_dir	asset_keyarchive_typeapireqr-  r{   r)  dl_urldl_namer   r   archive_pathreq2r   driver_nameextracted_pathzmemberr  tsts                         rK   _download_latest_geckodriverrQ    s   
 23I
LC
#.CD
EC	b	!Qzz!&&(//')/DE 
" XXh#FFGuuVR |!<!<UU12FG  DYKrR^Q__abcc%L6L2G#HID	r	"alD)AQ1  *B	" (*ww$#MK+Nu__\3/1**,??;//IIf8I4 6)AIIn- ' 0/ \\,/1,,.;;''k(9::fkk[>XIIf8I4 6;;.AIIn- ) 0t, 
ww$	(BHH^RZZ$,,%>%MPTP\P\%\]   ""RSSq 
"	!$ *B)A	"	" 0/ 0/    		sy   3L6L3L!L3#(M*MAM"4MM( A*M9 
L!
L0	+L33
M
M
M%(
M65M69
NNc                B   [        5       nU(       a   [        U5      R                  5       (       a  U$ U R                  S5      =(       d    SR	                  5       nU(       a   [        U5      R                  5       (       a  U$  [        5       n[        U5      n[        U5      U S'   [        U 5        [        U5      $ ! [         aY  n[        S5      nU(       a4  [        U5      R                  5       (       a  X`S'   [        U 5        Us S nA$ [        SU 35      eS nAff = f)Nrn   r#   z$Select geckodriver / geckodriver.exezUnable to locate or install geckodriver.

Fix options:
  - Put geckodriver on PATH
  - OR set GECKODRIVER_PATH to the full driver path
  - OR install via package manager (Linux often: sudo apt install firefox-geckodriver)

Underlying error: )r
  r   rp   rx   rU   r   rQ  r$   r   ry   r   r3  )ckr  saveddestdriver_pathepickeds          rK   ensure_geckodriverrY    s    !#AT!W^^ VV&'-2446Ee##%%
|248!$[!1; 
EFd6l))++%+!"BM!
 "#%
 	

s%   8B; ;
DA DDDDeager)page_load_strategyblock_imagespage_load_timeout
user_agentc               d   [        U 5      n[        5       nU(       a  UR                  S5        [        (       a  UR                  S5        U(       a  UR	                  SU5        UR                  SS5        UR                  SS5        UR                  SS	5        UR                  S
S5        UR                  SS5        UR                  SS5        UR                  SS5        UR                  SS5        UR                  SS5        UR                  SS5        UR                  SS	5        UR                  SS	5        U(       a  UR                  SS5        U(       a  UR                  SU5        [        US9n[        R                  " XS9n	U	R                  U5        U	R                  [        5        U	$ )a6  Start Firefox using an explicit geckodriver path (no Selenium Manager).

Args:
    page_load_strategy: "eager" returns after DOMContentLoaded. "normal" waits for full load.
    block_images: If True, blocks images to reduce load stalls/timeouts.
    page_load_timeout: Seconds for Selenium navigation timeout.
z	-headlessz-privatepageLoadStrategyzdom.webnotifications.enabledFzmedia.volume_scalez0.0z!browser.privatebrowsing.autostartTznetwork.http.http3.enablezbrowser.cache.disk.enablezbrowser.cache.memory.enablezbrowser.cache.offline.enableznetwork.http.use-cachezplaces.history.enabledzsignon.rememberSignonsz"privacy.trackingprotection.enabledz)privacy.trackingprotection.pbmode.enabledzpermissions.default.imagerD   zgeneral.useragent.override)executable_path)serviceoptions)rY  r   add_argumentPRIVATE_BROWSER_MODEset_capabilityset_preferenceFirefoxServicer   Firefoxset_page_load_timeoutset_script_timeoutSCRIPT_TIMEOUT)
rS  headlessr[  r\  r]  r^  geckorc  rb  drivers
             rK   create_driverrp    s     r"EiG[)Z(13EF 95A/7>E6>6>8%@95A3U;3U;3U;?FFM:A>;ZHU3Gw@F
  !23
n-MrM   c                     U R                  5          U R                  S5        U R                  S5        g! [         a     N0f = f! [         a     gf = f)z;Clear cookies/storage for a clean private session baseline.about:blankz}
            try { localStorage.clear(); } catch (e) {}
            try { sessionStorage.clear(); } catch (e) {}
            N)delete_all_cookiesry   rx   execute_script)ro  s    rK   reset_browser_stateru  W  s^    !!#	

=!	
	    s    6 "A 
AA
AAc                     [         u  p [        R                  " [        R                  " X5      5        g ! [
         a    [        R                  " U 5         g f = fr   )REQUEST_DELAY_RANGErs   sleeprandomuniformry   )lohis     rK   _polite_delayr}  m  s=     FB

6>>")* 

2s   *5  AAc                L   Sn[        SUS-   5       GH
  n U R                  U5        [        5          [        U SS5      nUb  [	        US5      (       a8  [	        UR
                  S5      (       a  [        U5      S-   UR
                  l        [	        US5      (       a8  [	        UR                  S5      (       a  [        U5      S-   UR                  l        U R                  U5        [        R                  " U5         U R                  [        R                  S	5          g   U(       a  Ueg! [         a     Nbf = f! [         a     N*f = f! [          a{  nUn U R#                  S
5        O! [         a     Of = f[        R                  " U5         U R                  [        R                  S	5         SnA  g! [         a      SnAO f = fSnAf[$         a  nUn SnAOSnAff = f U R                  S5        O! [         a     Of = f[        R                  " SU-  5        GM  )zNavigate without getting stuck on pages that never fully finish loading.

- Uses page_load_timeout
- On timeout, calls window.stop() and continues if the DOM exists
- Retries a couple times with light cleanup
Nr   rD   command_executor_client_configr&  r   _connbodyzwindow.stop();rr  g      ?)rangerj  r}  getattrhasattrr  rw   r&  r  ry   rx   rs   rx  find_elementr   TAG_NAMEr   rt  r   )	ro  urlr&  settle	max_trieslast_excattemptcerW  s	            rK   safe_getr  u  s    )-HIM*)	((1O
V%7>>r#344ARART]9^9^47L24E))1r7++)0L0L+.w<"+<( JJsO JJv##BKK8 5 +f  K       	H%%&67 JJv##BKK8  " 	H		JJ}% 		

4'>"s   EB"D+'E= D;+
D85E7D88E;
EEEE
G$GE*)G*
E74G6E77G F77
GGGGG$GG$(G::
HHc                   / nU 4 H  nX!;  d  M
  UR                  U5        M     SU ;   a+  U R                  SSS5      nX1;  a  UR                  U5        U$ U R                  SSS5      nX1;  a  UR                  U5        U$ )zOReturn a small set of URL variants (www/non-www) to dodge occasional redirects.z//www.z//r   )appendrR   )r  outsuu2s       rK   _url_variantsr    s~    DU=KKN  3[[4+>KKO
 K [[x+>KKOKrM   c                    [        U SSSS.S9n[        X!S9 nUR                  5       nS S S 5        WR                  SSS	9$ ! , (       d  f       N= f)
Nz_Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36z?text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8zen-US,en;q=0.9)r"  AcceptzAccept-Languager#  r%  rY   rR   r'  )r   r   r0  r1  )r  r&  rF  r-  r{   s        rK   _fetch_htmlr    sW    
{W/
C 
	&!vvx 
' ;;wy;11 
'	&s   A
Ac                  F   ^  \ rS rSrSr1 SkrU 4S jrS rS rS r	Sr
U =r$ )	_VisibleTextExtractori  z;Very small HTML->visible text extractor (no external deps).>   r  brh1h2h3h4h5h6litdthtrdivfooterheaderarticlesectionc                0   > [         TU ]  5         / U l        g r   )superr   parts)r   	__class__s    rK   r   _VisibleTextExtractor.__init__  s     "
rM   c                x    UR                  5       U R                  ;   a  U R                  R                  S5        g g Nr   r  _BLOCK_TAGSr  r  )r   tagattrss      rK   handle_starttag%_VisibleTextExtractor.handle_starttag  -    99;$***JJd# +rM   c                x    UR                  5       U R                  ;   a  U R                  R                  S5        g g r  r  )r   r  s     rK   handle_endtag#_VisibleTextExtractor.handle_endtag  r  rM   c                v    U(       a2  UR                  5       (       a  U R                  R                  U5        g g g r   )rU   r  r  )r   r{   s     rK   handle_data!_VisibleTextExtractor.handle_data  s(    DJJLLJJd# !4rM   )r  )r   r   r   r   r   r  r   r  r  r  r   __classcell__)r  s   @rK   r  r    s&    EK#$$$ $rM   r  c                <   [        5       n UR                  U 5        SR                  UR                  5      n[
        R                  " U5      n[        R                  " SSU5      n[        R                  " SSU5      nUR                  5       $ ! [         a     N}f = f)Nr#   z\n{3,}z

z[\t\r]+rQ   )
r  feedry   joinr  html_libunescaperS   rT   rU   )htmlr  raws      rK   _html_to_textr    s    A	t ''!''
C


C
 C
&&FC
(C
&&S#
&C99;  s   B 
BBc                J   [         R                  " SU [         R                  [         R                  -  S9nU(       d  gUR	                  S5      n[         R
                  " SSU5      n[        R                  " U5      n[         R
                  " SSU5      R                  5       nU$ )Nz<h1\b[^>]*>(.*?)</h1>flagsr#   r   <[^>]+>rQ   rP   )	rS   search
IGNORECASEDOTALLgrouprT   r  r  rU   )r  minners      rK   _extract_first_h1r    sw    
		*D		8QRAGGAJEFF:sE*Ee$EFF63&,,.ELrM   c                  ^^ S1m1 SkmS	S jnS
UU4S jjn[         R                  " SU [         R                  [         R                  -  S9nU(       a&  U" UR	                  S5      5      nU" U5      (       a  U$ [         R
                  " SU [         R                  [         R                  -  S9 H+  nU" UR	                  S5      5      nU" U5      (       d  M)  Us  $    [        U 5      $ )zExtract a likely product title from HTML.

Many Red Wing product pages place the actual product name in <h3>, while <h1>
can be a site/banner header (e.g., 'RED WING FOR BUSINESS'). We therefore
try meaningful <h3> first, then fall back to <h1>.
r'   >	   CARESIZINGDETAILSREVIEWSFEATURES
TECHNOLOGYSPECIFICATIONS
SIZE & FITRELATED PRODUCTSc                    [         R                  " SSU 5      n [        R                  " U 5      n [         R                  " SSU 5      R	                  5       n U $ )Nr  rQ   rP   )rS   rT   r  r  rU   )r  s    rK   _clean&_extract_first_heading.<locals>._clean  sD    z3.!!%(vsE*002rM   c                   > U =(       d    SR                  5       n U (       d  gU R                  5       nUT;   d  UT;   a  g[        U5      S::  a  g[        R                  " SU5      (       d  gg)Nr#   Fr&   z[A-Z]T)rU   upperlenrS   r  )rV   upBADSTOPs     rK   _ok#_extract_first_heading.<locals>._ok#  sY    W"OOWWY9d
r7a<yy2&&rM   z<td[^>]*class=['\"][^'\"]*prTitle[^'\"]*['\"][^>]*>\s*Name\s*</td>\s*<td[^>]*class=['\"][^'\"]*prValue[^'\"]*['\"][^>]*>(.*?)</td>r  r   z<h3\b[^>]*>(.*?)</h3>)r  r$   r   r$   rV   r$   r   bool)rS   r  r  r  r  finditerr  )r  r  r  mnamerV   mh3r  r  s         @@rK   _extract_first_headingr    s     #
#CD
  II	Immbii'	E 5;;q>"q66H {{3TQSQZQZAZ[399Q< q66H \
 T""rM   c                ,   U =(       d    SR                  5       n U (       d  gU R                  5       nUR                  S5      (       d,  UR                  S5      (       d  UR                  S5      (       a  gSU;   a  g [        U 5      nUR                  =(       d    SR                  5       R                  S5      n[        UR                  =(       d    S5      nS	U;   ac  [        R                  " S
U5      (       aF  S H?  nUR                  U5      (       d&  UR                  UR                  5       5      (       d  M?    g   ggSU;   a  [        R                  " SU5      (       a  g[        R                  " SU5      (       a  gg! [         a    Un0 n Nf = f)Nr#   Fjavascript:zmailto:ztel:z/safety-boot/Tr   
/footwear-z.*/footwear-[^/]+stylestyleNumberskuitempid	productIdz	/product/z\d{3,6}z/\d{3,6}\.html$)rU   r  r  r   r_   r   r   queryry   rS   	fullmatchrx   r  )hreflowparsedr_   r  keys         rK   _is_product_linkr  I  sG   JBD
**,C
~~m$$y(A(AS^^TZE[E[#$!r((*11#6+,
 t<<,d33R99S>>UYYsyy{%;%; S  dryyT:: 
yy#T**+  s   =AF FFc                ~   / n[        5       n U R                  [        R                  S5      nU Hn  n UR	                  S5      =(       d    SR                  5       nU(       a  XR;   a  M:  [        U5      (       d  ML  UR                  U5        UR                  U5        Mp      U R                  [        R                  S5      nU Hn  n UR	                  S5      =(       d    SR                  5       nU(       a  XR;   a  M:  [        U5      (       d  ML  UR                  U5        UR                  U5        Mp     U(       d   U R                  [        R                  S5       Hn  n UR	                  S5      =(       d    SR                  5       nU(       a  XR;   a  M:  [        U5      (       d  ML  UR                  U5        UR                  U5        Mp     U$ U$ ! [         a    Sn GNf = f! [         a     GNSf = f! [         a    Sn GNf = f! [         a     Nf = f! [         a    Sn Nf = f! [         a     U$ f = f)u  Locate product anchors on the catalog page.

The Red Wing wholesale catalog renders product tiles with specific CSS
classes.  Each tile (<li>) contains two links pointing at the same
product detail page: one wrapping the image and one wrapping the product
name.  Both have an href that ends in ".html" with the numeric style
number.  On the current site the anchor for the name has the classes
``c-product-tile__pdp-link js-product-name``, while the image link has
no special class but is still within the tile.  This helper first
attempts to extract anchors using those specific selectors and falls
back to scanning all anchors when necessary.

Returns a list of WebElement objects (anchors) with unique hrefs.
Hli.js-product-grid-item a.c-product-tile__pdp-link.js-product-name[href]r  r#   (li.js-product-grid-item a[href$='.html']z
//a[@href])setfind_elementsr   CSS_SELECTORget_attributerU   ry   r  r  addXPATH)ro  anchorsseenelementsr   r  	elements2s          rK   _find_product_anchorsr  n  s    GUD''OOV
 A/52<<> 4<%%q! ((OOG
	 A/52<<> 4<%%q!  	))"((LAOOF39r@@BD t|#D))NN1%HHTN B N7NS         ! D  	N	s   %G( (GG( <&G( #%H 	(G91H &H ?#H. #(HH. +&H. G%!G( $G%%G( (
G65G69H	H H		H 
HHH+(H. *H++H. .
H<;H<c                
    U R                   =(       d    [        n U R                  =(       d    SnU(       d  / $ [	        5       n [        US5      nUR                  S5       HE  nUR                  S5      =(       d    SR                  5       nU(       d  M4  UR                  U5        MG     UR                  S5       HE  nUR                  S5      =(       d    SR                  5       nU(       d  M4  UR                  U5        MG     UR                  SSS9 HE  nUR                  S5      =(       d    SR                  5       nU(       d  M4  UR                  U5        MG     [        R                  " U5      n/ S	QnU Ho  n	[        R                  " X[        R                  S
9 HE  n
U
R!                  S5      =(       d    SR                  5       nU(       d  M4  UR                  U5        MG     Mq     / n[	        5       nU HI  n[#        X5      n[%        U5      (       d  M   X;   a  M'  UR                  U5        UR'                  U5        MK     U$ ! [         a
    [        n GNBf = f! [         a    Sn GN?f = f! [         a     GN#f = f)zIFallback link discovery when clickable anchors are sparse or JS-rendered.r#   html.parserr  r  r   r   T)r  )zhttps?://[^\s\"'<>]+z2/(?:footwear-rwbr|safety-boot|product)/[^\s\"'<>]+z"/\d{3,6}\.html(?:[?#][^\s\"'<>]*)?r  r   )current_urlCATALOG_URL_PRIMARYry   page_sourcer  r   selectrx   rU   r  find_allr  r  rS   r  r  r  r   r  r  )ro  base_urlr  
candidatessoupr   r  blobpatternspatr  r  outr  r  abs_urls                   rK   '_extract_product_links_from_page_sourcer    s   '%%<)<!!'R 	5JT=1ghAEE&M'R..0Dtt$ i
 GHAEE&M'R..0Dtt$ I
 s.AEE&M'R..0Dtt$ / T"DH
 Sbmm<A!r((*Aqq! =  CUD((((?

7  Js  '&'
  4  sI   I
 I! AI4 AI4 0AI4 	I4 
II!I10I14
JJc                N   [        5         [        X5        [        U 5        [        R                  S4[        R                  S4[        R                  S4[        R                  S4[        R                  S4[        R                  S4/n[        R                  S4[        R                  S4[        R                  S	4[        R                  S
4/n[        R                  S4[        R
                  S4[        R
                  S4/n [        U 5      (       a  g SnSnU H0  u  px U R                  Xx5      n	U	R                  5       (       a  U	n  OM2     U H0  u  px U R                  Xx5      n	U	R                  5       (       a  U	n  OM2     U(       a  U(       d  g UR                  5         UR                  [        5         UR                  5         UR                  [        5        Sn
U HW  u  px U R                  Xx5      nUR                  5       (       a+  UR                  5       (       a  UR                  5         Sn
  OMW  MY     U
(       d  UR                  [         R"                  5        [$        R$                  " 5       S-   n[$        R$                  " 5       U:  a  [        U 5         [        U 5      (       a  g  U H-  u  pxU R                  Xx5      nUR                  5       (       d  M-    O   g[$        R&                  " S5        [$        R$                  " 5       U:  a  M  gg! [         a     GNf = f! [         a     GM   f = f! [         a     GM  f = f! [         a     GNf = f! [         a     GNf = f! [         a     GM  f = f! [         a     Nf = f! [         a     gf = f)zCAttempt a best-effort login for order.redwingshoes.com if prompted.zinput[type='email']zinput[name='email']zinput[name='username']zinput[id*='email' i]zinput[id*='user' i]zinput[autocomplete='username']zinput[type='password']zinput[name='password']zinput[id*='password' i]z&input[autocomplete='current-password']zbutton[type='submit']z//button[contains(translate(.,'LOGINSGNIN','loginsgnin'),'login') or contains(translate(.,'LOGINSGNIN','loginsgnin'),'sign in')]z//input[@type='submit']NFT   g?)r}  r  dismiss_popupsr   r  r  r  ry   r  is_displayedclear	send_keysr   r   
is_enabledclickr   ENTERrs   rx  )ro  
target_urllogin_user_selectorslogin_pass_selectorssubmit_selectorsuser_elpass_elbyselr	  	submittedbtnendpes                 rK   ensure_logged_inr1    sj   OV 6 
/0	/0	23	01	/0	:; 
23	23	34	BC	 
12	  V  	W	,- (( )
 GG'	&&r/D  "" # ( (	&&r/D  "" # ( ' &' &'I#	%%b.C!!cnn&6&6		 	 '7! $ $**% ))+
C
))+
v	$V,, -	/((1??$$ 0
  	

3! ))+
g    		  		  
    		  		  		s   L #(L/(MM =M$ ,AM5$N 7+N &N (N 
L,+L,/
L>=L>
MM
M! M!$
M21M25
NN
NN
N$#N$c                .   [         R                  S4[         R                  S4[         R                  S4[         R                  S4[         R                  S4/nU H  u  p# [	        U S5      R                  [        R                  " X#45      5      nU R                  SU5        [        R                  " S5         UR                  5         [        R                  " S
5          g   g! [         a    U R                  S	U5         N9f = f! [         a     M  f = f)zCOpen the Footwear section after login when required by the site UI.zfootwear-labelz#footwear-labelz;//button[@id='footwear-label' or @aria-controls='footwear']z//a[@id='footwear-label']zU//*[self::button or self::a or @role='tab'][contains(normalize-space(.), 'Footwear')]r   ?arguments[0].scrollIntoView({block:'center', inline:'center'});rB   arguments[0].click();g?N)r   IDr  r  r   untilECelement_to_be_clickablert  rs   rx  r#  ry   )ro  	selectorsr+  r,  els        rK   open_footwear_sectionr;  X  s     
 !	+,	PQ	./	jkI 	vr*001K1KRI1VWB!!"ceghJJtC
 JJsO   C%%&=rBC  		s7    AD9C$	D$D DDD
DDc                   [         R                  S4[         R                  S4[         R                  S4/nSnU Hq  u  p4 [        U S5      R                  [        R
                  " X445      5      nU R                  SU5        [        R                  " S5         UR                  5         S	n  O    [        U [        5         [        U S
5      R                  S 5        g! [         a    U R                  SU5         NSf = f! [         a     M  f = f! [         a     N]f = f! [         a%     [        U [        5         g! [         a      gf = ff = f)zQUse Footwear > Red Wing navigation described in docs, then ensure /footwear-rwbr.z%//a[contains(@href,'/footwear-rwbr')]z0//button[contains(@data-target,'footwear-rwbr')]zx//*[self::a or self::button][contains(normalize-space(.), 'Red Wing') and not(contains(normalize-space(.), 'Heritage'))]Fr   r3  rB   r4  Tr   c                |    [         [        U R                  5      R                  =(       d    SR	                  5       ;   $ )Nr#   )FOOTWEAR_RWB_SLUGr   r  r_   r  r   s    rK   <lambda>-open_red_wing_footwear_page.<locals>.<lambda>  s'    'Xamm-D-I-I-OR,V,V,XYrM   N)r   r  r   r6  r7  r8  rt  rs   rx  r#  ry   r  r  )ro  r9  clickedr+  r,  r:  s         rK   open_red_wing_footwear_pagerC  p  sT    
:;	EF	  N  	OI
 G	vq)//0J0JB90UVB!!"ceghJJtC
 G ",-fb!''Y	
  C%%&=rBC  		    	V01 		ss    AD C)D 0D D! C=:D <C==D  
DD
DD!
E,D>>
EEEEc                f    U R                   =(       d    [        n/ n/ SQnU Hm  n U R                  [        R
                  U5       HE  nUR                  S5      =(       d    SR                  5       nU(       d  M4  UR                  U5        MG     Mo      U R                  [        R
                  S5       HM  nS HD  nUR                  U5      =(       d    SR                  5       nSU;   d  M3  UR                  U5        MF     MO      U R                  =(       d    Sn	U	(       a  [        R                  " U	5      n
[        R                  " SU
[        R                  S9 H#  nUR                  UR                  S	5      5        M%     [        R                  " S
U
[        R                  S9 H#  nUR                  UR                  S	5      5        M%     / n[!        5       nU H  n[#        X5      n [%        U5      nUR&                  =(       d    SR)                  5       nUR*                  =(       d    SR)                  5       n[,        U;  a  Ml  [        R.                  " SU5      (       d  M  UR0                   SUR&                   U 3nUU;  d  M  UR3                  U5        UR                  U5        M     [        U;  a  UR5                  S	[        5        U$ ! [         a
    [        n GNf = f! [         a     GM  f = f! [         a     GNf = f! [         a    Sn	 GNf = f! [         a     GMR  f = f)zGDiscover footwear brand catalog paths from RW site navigation/elements.)za[href*='/footwear-']z[id*='footwear' i] a[href]znav a[href*='footwear']r  r#   z&[data-target], [data-url], [data-href])zdata-targetzdata-urlz	data-hrefr  z/footwear-[a-z0-9\-]+r  r   z)https?://[^\s\"'<>]*/footwear-[a-z0-9\-]+z/footwear-[a-z0-9\-]+$z://)r  r  ry   r  r   r  r  rU   r  r  r  r  rS   r  r  r  r  r   r   netlocr  r_   	SITE_HOSTr  schemer  insert)ro  r  r  dom_selectorsr,  r:  r  attrrJ   r  r  r  urlsr  r  r  r  hostr_   norms                       rK   discover_footwear_catalog_urlsrN    s   '%%<)< JM
 	**2??C@((06B==?4%%d+ A &&r8`aB@%%d+1r88:1$%%a( A b!!'R   &5t2==QAaggaj) RI4WYWdWdeAaggaj) f DUDH"	AHHN))+DFFLb'')D D yy2D99((3qxxj/tHHTNKK $ $&A*+KC  '&'"  		    "  		sg   K AK+;K+AK= -K= L ,AL!K('K(+
K:9K:=
L
LLL!
L0/L0c                B    SS jn[        X5      R                  U5        g)zVWait for shared footwear-page elements instead of requiring product links immediately.c                    U R                  [        R                  S5      nU(       a  g  U R	                  [        R
                  S5      R                  =(       d    SR                  5       nSU;   nSU;   nSU;   nS	U;   nS
U;   nSU;   =(       d&    SU R                  =(       d    SR                  5       ;   nU=(       a&    U=(       d    U=(       d    U=(       d    U=(       d    U$ ! [         a     Nf = f! [         a     gf = f)u  Check that the catalog UI has loaded.

On the current Red Wing site, the grid of products is present in
``li.js-product-grid-item`` elements.  Earlier heuristics looked for
text such as "Sort By", "Results" or "Filters"; those still apply
but may not appear until the user scrolls.  We consider the catalog
ready when either at least one product tile exists or legacy text
heuristics are satisfied.
zli.js-product-grid-itemTr  r#   Fresultzsort byfilterszmore resultsr  footwearr  )	r  r   r  ry   r  r  r`   r  r  )	r   tilestxthas_resultshas_sorthas_filtershas_more
has_searchhas_footwear_ctxs	            rK   _ready/wait_for_footwear_catalog_ready.<locals>._ready  s    	OOBOO5NOE 	>>"++v6;;ArHHJC #o#3&!S(_
%,]ATRT@[@[@]0]fX%e%e%ex%e[ef  		  		s#   'C' AC7 '
C43C47
DDNr   r  )r   r6  )ro  r&  r\  s      rK   wait_for_footwear_catalog_readyr_    s    g: &"((0rM   c                0   [        U S5      n[        U5      nSnSnUR                  SSS9=(       d    UR                  SS9nU(       a  UR                  S5      nUR                  S	5      nU(       a?  UR                  S
SS9n[        R
                  " SU5      n	U	(       a  U	R                  S5      nU(       a  UR                  S
SS9nU(       GdG  UR                  S5       GH1  n
 [        R                  " U
R                  SS95      n[        U[        5      (       a  UOU/nU H  n[        U[        5      (       d  M  U(       d1  UR                  S5      =(       d    SR                  5       nU(       a  UnU(       dk  S He  n[!        UR                  U5      =(       d    S5      R                  5       n[        R
                  " SU5      nU(       d  MT  UR                  S5      n  O   U(       d  M  U(       d  M    O   U(       d  GM(  U(       d  GM2    O   U(       d  S H  n UR#                  U5      nU(       d  M  UR%                  S5      (       a)  UR                  S5      =(       d    SR                  5       nOUR                  S
SS9nU(       d  Mv  SUR'                  5       ;  d  M  Un  O   U(       d  U(       a  SU 3O
U=(       d    SnU(       d  UR                  5       nU(       d  UnX4U4$ ! [         a     GM!  f = f! [         a    Sn Nf = f)zMExtract (style_number, name, style_text) from legacy and RW order-site pages.r  r#   r  	shoeguide)class_printSpacing)idr  strongrQ   T)rU   z	#\s*(\d+)r   z"script[type='application/ld+json']r   )r  mpn	productID\b(\d{3,6})\b)r  r  r   zmeta[property='og:title']Nmetacontentzred wing for businessr(   )r   extract_style_from_urlfindget_textrS   r  r  r  rt   ru   ry   
isinstancelistdictrx   rU   r$   
select_oner  r  )r  r  r  style_numberr   
style_text	guide_divh3_tag
strong_tagmatchscriptpayloadobjsobjnmr  r  r  r,  r:  rU  s                        rK    extract_style_and_name_from_htmlr}    s   }-D)#.LDJ		%	4T		^	8TI%^^H-
D9JIIlJ7E${{1~&&s$&7Dkk"FGF**V__4_%@A )$777gYD!#t,,''&//R668B!#:!#''#,"4"5;;=II&6<1+,771:L!  ; 4LL   t/ H2 EC__S) ~~f%%vvi(.B557kk#Tk2s.ciikA F 1=w|n-DJB
!'')z))Y  4  s$   &#K4$L4
LLLLc                   / SQn[         R                   " 5       S-   n[         R                   " 5       U:  a  SnU Hd  n U R                  [        R                  U5      nUR	                  5       (       a+  UR                  5       (       a  UR                  5         Sn  OMd  Mf     U(       d  g [         R                  " S5        [         R                   " 5       U:  a  M  g g ! [         a     M  f = f)N)zN//button[contains(.,'Agree') or contains(.,'Accept') or contains(.,'Proceed')]zI//a[contains(.,'Agree') or contains(.,'Accept') or contains(.,'Proceed')]uR   //button[contains(.,'Close') or contains(.,'×') or contains(@aria-label,'Close')]g       @FT皙?)	rs   r  r   r  r  r"  r#  ry   rx  )ro  xpathsr/  rB  xpr:  s         rK   r  r  G  s    F
 ))+
C
))+
B((26??$$HHJ"G *9$  

3 ))+
  s   AC
C+*C+c           	        SnSn[        U5       GHx  n[        U 5        SnS H  n U R                  [        R                  U5      nUR                  5       (       d  M;  UR                  S5      n[        US5      (       a  UR                  5       (       d  Mt  US;  a  M|   U R                  SU5        [        R                  " S	5         UR                  5         Sn[        R                  " S5          O   [        U 5      n	[        U	 V
s1 s H+  oR                  S5      (       d  M  U
R                  S5      iM-     sn
5      nX:X  a  US-  nOSnUnU(       d  US:  a    g  U R                  S5        [        R                  " S5        GM{     g ! [         a     Nf = f! [         a)     U R                  S
U5         N! [         a      GM  f = ff = f! [         a     GM  f = fs  sn
f ! [         a     Nf = f)Nr   F)zz//button[contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'more results')]zu//a[contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'more results')]disabledr"  )Nfalser#   r3  rB   r4  T333333?r  r   r&   z/window.scrollTo(0, document.body.scrollHeight);g      ?)r  r  r  r   r  r  r  r  r"  rt  ry   rs   rx  r#  r  r  )ro  
max_roundsstable_rounds
last_count_clicked_morer  r.  disabled_attrlinksr   counts               rK   scroll_to_load_allr  ^  s   MJ:v
B!))"((B7'')) # 1 1* =3--cnn6F6F (;;))Y 

4 !IIK  $

3G
N &f-eWev?V,Q__V,eWXQMMJ  2	!!"ST 	

3 8 ! 
 ! !!--.EsK$ ! !	!   X  		s   5G7GG F2G	FG	G&
&G&
!G+
FGFG
G)F=;G=
G	GGG	GG
G#"G#+
G87G8c                   U =(       d    SR                  5       n [        R                  " SSU 5      n U (       d  gU R                  5       nUS;   a  g[        R                  " SU 5      (       a  gSU ;   a  g[        U 5      S:  a  gU $ )Nr#   rP   rQ   >   r  r  r  r  
QUICK VIEWADD TO CARTVIEW DETAILSr'   z\d{2,}$r&   )rU   rS   rT   r  r  r  )rV   r  s     rK   _clean_listing_namer    sx    	
bA
vsAA		A 	 	 	||Iq!!
ax
1vzHrM   c                   / n UR                  U R                  S5      =(       d    S5         UR                  U R                  S5      =(       d    S5         UR                  U R                  =(       d    S5        / nU HQ  nU(       d  M  [	        U5      R                  5        H(  n[        U5      nU(       d  M  UR                  U5        M*     MS     [        5       n/ nU H,  nXE;  d  M
  UR                  U5        UR                  U5        M.     U(       d  gSS jnUR                  USS9  US   $ ! [         a     GNf = f! [         a     Nf = f! [         a     Nf = f)	zJBest-effort: read the product name as displayed on a catalog listing tile.
aria-labelr#   r   c                Z  ^  [        T 5      n[        R                  " ST 5      (       a  US-  n[        R                  " ST R                  5       5      (       a  US-  n[        R                  " ST R                  5       5      (       a  US-  n[	        U 4S jS 5       5      (       a  US	-  nU$ )
Nz[A-Za-z]   \bWOMEN'?S\br   
\bMEN'?S\b   c              3  H   >#    U  H  oTR                  5       ;   v   M     g 7fr   )r  .0rO  rV   s     rK   	<genexpr>;_listing_name_from_anchor.<locals>.score.<locals>.<genexpr>  s     X'W!AGGI~'Ws   ")viewquickcartcomparewishlistrA   )r  rS   r  r  any)rV   scs   ` rK   score(_listing_name_from_anchor.<locals>.score  s    V99[!$$"HB99_aggi00"HB99]AGGI..!GBX'WXXX"HB	rM   T)r  reverser   )rV   r$   r   rw   )
r  r  ry   r`   r$   
splitlinesr  r  r  sort)r   r  linesclnr  uniqr  s           rK   _listing_name_from_anchorr    sC   J!//,7=2>!//'28b9!&&,B' Ea&##%B$R(BrR  &  5DD>HHRLKKO 
 
 	II%I&7NY      s4   )D5 )E $E 5
EE
EE
E#"E#c                @   U (       a  U(       d  gSnU  H  n[        U[        5      (       a  [        U5      S:  a  M)  US   =(       d    SR                  5       nU(       d  MN  [	        UR                  US5      5      nU(       d  Mr  US   U:w  d  M}  XSS'   US-  nM     U$ )zQOverwrite Name column using preferred mapping (by style). Returns number updated.r   rD   r#   r   )rn  ro  r  rU   r  rx   )rh   	preferredupdatedr-  r  prefs         rK   apply_preferred_namesr    s    yG!T""c!fqj1""$"9==#;<4AaDDLaDqLG  NrM   c           	     V   / n[        5       n[        5       n0 n[        U [        5        [        U 5        [	        U 5      =(       d    [        [        5      n[        R                  R                  S[        U5       S35        U H&  n[        R                  R                  SU S35        M(     U GHt  n[        5         [        X5        [        X5        [        U SS9  [        U 5        [        U 5        / n[!        U 5       H@  n	U	R#                  S5      n
U
(       d  M  [%        U
5      (       d  M/  UR'                  U
5        MB     [)        U 5       H  n
X;  d  M
  UR'                  U
5        M     [        R                  R                  SU S[        U5       S	35        U H  n
X;   a  M
  [+        U
5      n[,        (       a  U(       a  S
U;   a   U(       a)  X;   a  UR/                  U
5        MM  UR/                  U5        UR/                  U
5        UR'                  U
5        M     GMw     X4$ )zCollect unique product links from all configured catalog sections.

Dedupes by style number when possible (preferred), otherwise by URL.
z([collect] footwear catalogs discovered: r   z[collect] catalog: #   r%  r  z
[collect] z -> discovered z candidate product links
zcatalog=international)r  r1  r  r;  rN  ro  rr   r   stderrr   r  r}  r  r_  r  r  r  r  r  r  r  rk  "PREFER_INTERNATIONAL_LISTING_NAMESr  )ro  	all_links	seen_href
seen_styleri   re   r  r  
page_linksr   r  r  s               rK   collect_product_linksr    s   
 I%I5J&(OV01&!1&9OT,=OLJJ?L@Q?RRTUV

.qc45  %';v6" !#
&v.A??6*D#D))d# / <FCD%!!$' D 	

:cU/#j/9JJdefD *40E11e@W[^@^&MM$'u%MM$T"! 3 V %%rM   c                   U (       d  g[         R                  " SU 5      nU(       a  UR                  S5      $ [         R                  " SU 5      nU(       a  UR                  S5      $ [         R                  " SU [         R                  S9nU(       a  UR                  S5      $ [         R                  " SU [         R                  S9nU(       a  UR                  S5      $ [         R                  " SU 5      nU(       a%  S	U R	                  5       ;   a  UR                  S5      $  [        [        U 5      R                  5      nS
 H  nUR                  U5      =(       d(    UR                  UR	                  5       5      =(       d    / nU H?  n[         R                  " S[        U5      5      nU(       d  M,  UR                  S5      s  s  $    M     g! [         a     gf = f)Nr#   z/safety-boot/(\d+)[-/]r   z/safety-boot/(\d+)z2/footwear-[^/]+/(?:[^/?#]*/)?(\d{3,6})(?:[-/?#]|$)r  z/(\d{3,6})\.html(?:[?#]|$)z/(\d{3,6})(?:[-/?#]|$)zfootwear-rwbrr  rh  )rS   r  r  r  r  r   r   r  rx   r$   ry   )linkr  qr  valsrJ   s         rK   rk  rk  C  sh   
		+T2Awwqz
		'.Awwqz 			GUWUbUbcAwwqz
		/R]]KAwwqz
		+T2A_

,wwqz	Xd^))*NC55:9syy{!39rDII.A71771:%  O   s   #BG 4G 	G 
GGc                   ^  T =(       d    SR                  5       R                  5       m T (       d  gSn[        U 4S jU 5       5      (       a  g[        T 5      S:  a  gg)Nr#   T)zwindow.openr  zfacebook.comzhttp://zhttps://c              3  ,   >#    U  H	  oT;   v   M     g 7fr   rI   r  s     rK   r  #_looks_like_junk.<locals>.<genexpr>n  s     
';a6;   r+  F)rU   r  r  r  )rV   junk_tokenss   ` rK   _looks_like_junkr  h  sL    	
b!AWK

';
'''
1v{rM   c                     U S   =(       d    SR                  5       nU S   =(       d    SR                  5       nU(       d  gUR                  5       [        :X  a  gg! [         a     gf = f)Nr   r#   r   TF)rU   ry   r  BAD_NAME_SENTINEL)rowr  r   s      rK   _is_bad_rowr  v  sd    Q2$$&A"##% zz|((  s   8A 
A)(A)c                V   U R                  S5      (       a  gU R                  S/ 5      =(       d    / n[        U R                  S/ 5      =(       d    / 5      nSn[        5       nU H  n[        U[        5      (       a  U(       d  M!  US   =(       d    SR	                  5       nU(       d  MF  [        U5      [        :  a  UR                  U5        Ml  [        U5      S:  a  US   =(       d    SR	                  5       OSn[        U5      S:  a  US   =(       d    SR	                  5       OSnU(       a	  U(       a  M  UR                  U5        M     U(       d  g[        U5       H4  n	[        U	5      n
U
(       d  M  X;   d  M  UR                  U	5        US-  nM6     [        U5      U S'   U$ )	zIf checkpoint rows are missing URL/Image columns or have empty URL/Image, requeue those links.
Runs at most once per checkpoint unless you delete/clear ck['media_repair_done'].
rj   r   rh   rg   r#   rD   r&   r   )rx   r  rn  ro  rU   r  EXPECTED_COLSr  rk  discardsorted)rS  rh   rg   removedstyles_neededr-  r  url_cellimg_cellr  rP  s              rK   repair_missing_mediar    sX    
vv!""66&"#DRVVL"-34JGEM!T""!1""$q6M!e$+.q6A:AaDJB%%'2+.q6A:AaDJB%%'2xxe$  Z #D)2"%t$qLG	 ! j)B|NrM   c                    [        U R                  S/ 5      =(       d    / 5      n[        U R                  S/ 5      =(       d    / 5      nU(       a  U(       d  gU Vs1 s H"  o3(       d  M  [        U5      (       d  M  US   iM$     nnU(       d  gSn[        U5       H4  n[	        U5      nU(       d  M  Xt;   d  M  UR                  U5        US-  nM6     U(       a  [        U5      U S'   [        U R                  S0 5      =(       d    0 5      n[        UR                  5       5       H0  n	[	        U	5      nU(       d  M  Xt;   d  M  UR                  U	S5        M2     XS'   U R                  S/ 5      =(       d    /  V
s/ s H  n
[	        U
5      U;  d  M  U
PM     sn
U S'   [        U 5        U$ s  snf s  sn
f ! [         a     gf = f)aM  If checkpoint contains obviously bad rows, un-mark those links as 'done'.

This fixes the situation where a previous run captured the site header/ads into the Name/Brand
columns and those rows are now 'stuck' because resume logic skips already-done links.

Returns:
    Number of links that were re-queued (removed from done_links).
rh   rg   r   r   rk   Nrl   )ro  rx   r  r  rk  remover  rp  keyspopr   ry   )rS  rh   rg   r-  
bad_stylesr  r  rP  fckr  s              rK   repair_bad_checkpoint_rowsr    s   $BFF62&,"-b17R8
:$(ADqAd+a.dadD
A$D'-Brb&!!$'1	 % %j1B| bff]B/526B"'')_+A.2"*FF1dO % !#} 4666:Mr3R3XVX3X 'V3XQ)?)B*)T ()3X 'VB"# B9 B.'V  sg   AF6 F6 
F,+F,=	F,	F6  F6 4F6 ;A?F6 >F6 7F6 <F1F1F6 ,
F6 6
GGc           
        U =(       d    SR                  5        Vs/ s H  o"R                  5       PM     nn[        U5       H  u  pEU(       d  M  UR                  U5      (       d  M&  U[	        U5      S R                  S5      nU(       a  Us  $ [        US-   [        US-   [	        U5      5      5       H+  nX7   =(       d    SR                  5       nU(       d  M'  Us  s  $      g   gs  snf )u   Parse a simple field/value from extracted page text.

Red Wing pages often render fields in tables so the extracted text looks like:

    Name
    DynaForce®

We support both 'Name: DynaForce' and 'Name' on one line with the value on the next.
r#   Nz :	r   r   )r  rU   	enumerater  r  r  r   )	
text_block
field_namer  r  ir   tailjnxts	            rK   parse_field_liner    s     $.#3"?"?"AB"ABXXZ"AEBU#??:&&J()//7D1q5#a!eSZ"89x~2,,.3J :  $  Cs   C)c                   U =(       d    SR                  5       n[        R                  " SU[        R                  S9nU(       a  UR	                  S5      R                  5       nU(       a`  / nUR                  5        H9  nUR                  UR                  5       (       a  UOUR                  5       5        M;     SR                  U5      $ UR                  5        H  n[        R                  " SU[        R                  S9(       d  M.  [        R                  " SUR                  5       [        R                  S9nU(       a  UR	                  S5      R                  5       nU(       ab  / nUR                  5        H9  nUR                  UR                  5       (       a  UOUR                  5       5        M;     SR                  U5      s  $   g   g)Nr#   z^(.*?)\s+style\s*#\s*\d+r  r   rQ   z\bstyle\s*#\s*\d+\bz^(.*?)\s+style\s*#\s*\d+\b)rU   rS   r  r  r  splitr  isupper
capitalizer  r  )	header_text	body_texthtr  r  r  r   r   m2s	            rK   extract_brandr    s9   

	"	"	$B
		-rGAggaj CYY[

		1@ !88C= $$&99+TGG8$**,bmm\Bhhqk'')C YY[

		1H )88C=( ' rM   c                    U(       aR  U R                  5       R                  SU 35      nUS:w  a*  XUS-    n[        US5      nU(       a  UR                  5       $ [        U S5      nU(       a  UR                  5       $ S$ )Nz
ABOUT THE r  i@  r)   r#   )r  rl  r  rU   )r  r  idxchunkvals        rK   extract_about_namer    sr    oo$$z%%9:"93:.E"5&1Cyy{"
9f
-C399;%2%rM   c           	     *   [        S [        R                  " SU R                  5       5       5       5      nU[        S [        R                  " SU R                  5       5       5       5      -  n[	        SS5       Vs0 s H
  o" S3X!;   _M     sn$ s  snf )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr   rw   r  r   s     rK   r   parse_heights.<locals>.<genexpr>  s     V U1A U   z\b(\d{1,2})\s*-\s*INCH\bc              3  8   #    U  H  n[        U5      v   M     g 7fr   r  r  s     rK   r  r     s     S!RAQ!Rr  z\b(\d{1,2})\s+INCH\b      ")r  rS   findallr  r  )r`   foundr  s      rK   parse_heightsr    st    V

+F

 UVVE	SS,CTZZ\!RSSSE+0B<8<acGaj <888s   <Bc                    U =(       d    SR                  5       nUR                  S5      nUR                  S5      nUR                  S5      nSU;   a  UR                  S5      (       d  SU;   a  SnX#U4$ )Nr#   zred wingzirish setterworxzby red wingT)r  r  )	brand_strbis_rwis_isis_worxs        rK   classify_brand_familyr  $  sj    	b!ALL$ELL(Ell6"G!all622!9K  rM   c                   U =(       d    SR                  5       n[        R                  " SSU5      nUR                  SS5      n[        R                  " SSU[        R                  S9nUR                  5       R                  SS5      n[        R                  " SSU5      nU(       d  S	n[        U5      U:  a  US U R                  S5      nU$ )
Nr#   rP   rQ      ®z[^\w\-\.\s]+r  r  z_+boot)rU   rS   rT   rR   UNICODEr  r   )r   max_lenrV   s      rK   _safe_filename_from_namer  3  s    	A
vsAA			$A
ARZZ8A		#s#A
uc1A
1vhwKs#HrM   c                     [         R                  " 5       R                  S-  R                  5       n U R	                  SSS9  U $ )NImagesTr   )r   cwdparentresolver   r?  s    rK   _images_dirr  A  s6    			X	%..0AGGD4G(HrM   c                6    [        [        S5      S-  U -  5      $ )Nz..r  )r$   r   )fnames    rK   _rel_image_pathr  G  s    tDzH$u,--rM   c                  ^^^ S HF  n U R                  [        R                  U5      nU(       a  UR                  5       (       a  Us  $ MF  MH      U R                  [        R
                  S5      nUR                  [        R                  S5      nU(       a  UR                  5       (       a  U$  U R                  [        R                  S5      nU(       d!   U R                  [        R                  S5      nS nSnU GHj  n UR                  5       (       d  M  UR                  =(       d    0 n[        UR                  S5      =(       d    S5      n	[        UR                  S5      =(       d    S5      n
U	S	:  d  U
S	:  a  M  UR                  S
5      =(       d    SR                  5       mUR                  S5      =(       d    SR                  5       mUR                  S5      =(       d    SR                  5       mX-  n[        U4S jS 5       5      (       a  US-  n[        U4S jS 5       5      (       a  US-  n[        U4S jS 5       5      (       a  US-  nX:  a  UnUnGMj  GMm     U$ ! [         a     GMw  f = f! [         a     GNf = f! [         a    / n GNf = f! [         a    / n GNf = f! [         a     GM  f = f)N)@div.slick-slide.slick-current.slick-active img[itemprop='image'].div.slick-slide.slick-current.slick-active imgHdiv.c-image-carousel__slider-item.js-carousel-item img[itemprop='image']productImageimgmain imgr   r   r   heightx   altr#   classsrcc              3  ,   >#    U  H	  oT;   v   M     g 7fr   rI   )r  r  r$  s     rK   r  1_pick_best_product_img_element.<locals>.<genexpr>|  s     P%O8%Or  )r  shoechukkahikermocr  c              3  ,   >#    U  H	  oT;   v   M     g 7fr   rI   )r  r  clss     rK   r  r(  ~  s     M%L8%Lr  )productprimaryheroimagegffffff?c              3  ,   >#    U  H	  oT;   v   M     g 7fr   rI   )r  r  r&  s     rK   r  r(    s     R%Q8%Qr  )z	/dw/imagescene7z/imagesstaticg?)r  r   r  r  ry   r5  r  r  sizer   rx   r  r  r  )ro  r,  r   	containerimgsbest
best_scorer:  szr   hr  r$  r.  r&  s               @@@rK   _pick_best_product_img_elementr=  K  ss   
	%%boos;Cs''))
 *s''~>	$$R__e<3##%%J
##BOOZ@ 	''U;D DJ	??$$BBbffWo*+AbffX&+!,A3w!c'##E*0b779C##G,299;C##E*0b779CEEP%OPPPM%LMMMR%QRRR!"
 "' 0 Kc  		    
  	D	6  		sh   =I/AJ 0 J  J% J8A'J8C"J8/
I>=I>
JJJ"!J"%J54J58
KKc                H   S H  n U R                  [        R                  U5      nUR                  S5      =(       d    SR	                  5       nUR                  S5      =(       d    SR	                  5       nU(       d  U(       a  U=(       d    Us  $ M      U R                  [        R                  S5      n UR                  [        R                  S5      nU(       ai  UR                  S5      =(       d    SR	                  5       nUR                  S5      =(       d    SR	                  5       nU(       d  U(       a  U=(       d    U$ S H  n U R                  [        R                  U5      nUR                  S5      =(       d    SR	                  5       nUR                  S5      =(       d    SR	                  5       nU(       d  U(       a  U=(       d    Us  $ M     g! [
         a     GM  f = f! [
         a    S n GN'f = f! [
         a     Nf = f! [
         a     M  f = f)N)r  r  r  z6div.c-image-carousel__slider-item.js-carousel-item imgzoomimgr#   r&  r  r   )z$li[data-orbit-slide="product-1"] imgzli.active imgzul#productImage li.active imgzmain li.active imgr!  )r  r   r  r  rU   ry   r5  )ro  r,  r   rM  rV   r7  s         rK   _extract_product_image_urlr@    s   	%%boos;C""9-3::<A""5)/R668AAvA  ''~>		((%@C ""9-3::<A""5)/R668AAvA
	%%boos;C""9-3::<A""5)/R668AAvA   G  		  	C	  "  		s\   BG H 8 G0 A/H BH
G-,G-0H <H ?H  H 
HH
H! H!c                j   U=(       d    SR                  5       nU(       d  gSU S3n[        5       U-  n [        U 5      nU(       as  UR                  SS5      nSS0n[        R
                  " XESS	9nUR                  (       a7  UR                  (       a&  UR                  UR                  5        [        U5      $ S
n [        U 5      nU(       d  g U R                  SU5        [        R                  " S5         [        U 5         UR!                  [#        U5      5        [        U5      $ ! [         a     N}f = f! [         a    S
n Nf = f! [         a     N`f = f! [         a     Ndf = f! [         a:     U R%                  [#        U5      5        [        U5      s $ ! [         a      gf = ff = f)zSave the main product image as a temp file (__<style>.png).

Preferred: download the product image URL (zoomimg/src) for best quality.
Fallback: element screenshot if download fails.
Returns relative path like ../Images/__595.png (or empty string).
r#   __.pngz&amp;&r"  zMozilla/5.0rA   )r$  r&  Nr3  r  )rU   r  r@  rR   requestsrx   okrj  write_bytesr  ry   r=  rt  rs   rx  r  
screenshotr$   save_screenshot)ro  r  r  out_pathimg_urlr$  r-  img_els           rK   capture_product_image_temprM    s    [b!EtE}u$H
,V4oogs3G#]3GWrBAtt		$$QYY/&u--
 F/7 _agh

3v#h-(u%%5      
    	""3x=1"5)) 			sx   BD, >D< (E ;E $E. ,
D98D9<E
E
EE
E+*E+.
F29$F F2 
F.*F2-F..F2c                ~   U(       d  g[         R                  " 5       U-  R                  5       nUR                  5       (       d  U$ [        R
                  " SSU =(       d    SR                  5       5      nU(       d  U$ U S3n[        5       U-  n UR                  U5        [        U5      $ ! [         a    Us $ f = f)z8Rename ../Images/__<style>.png -> ../Images/<style>.png.r#   z[^\dA-Za-z_-]rC  )r   r  r  rp   rS   rT   rU   r  rR   ry   r  )r  	boot_namerel_temp_pathtemp_absstyle_cleanr  dest_abss          rK   finalize_image_filenamerT    s    
]*335H??&&)2/B/B/DEKm4 E}u$H" 5!!  s   B- -B<;B<c                   U =(       d    SR                  5       nU=(       d    SR                  5       n[        R                  " SU5      (       d  SU;   d  SU;   d  SU;   a  g[        R                  " SU5      (       d  SU;   d  S	U;   d  S
U;   a  g[	        [        R                  " SU5      5      n[	        [        R                  " SU5      5      nXE4$ )zInfer gender flags from URL + header (do NOT use body text; it frequently contains both words).

Priority:
  1) URL slug/query (mens/womens)
  2) Header text (MEN'S / WOMEN'S)
  3) Otherwise: unknown -> (False, False)
r#   z/womens(?:[-/]|$)zwomens-zgender=womenzgender=female)FTz/mens(?:[-/]|$)zmens-z
gender=menzgender=male)TFr  r  )r  r  rS   r  r  )r  
source_urlhuulmalefemales         rK   infer_genderr[    s     ,B			B

	!	!	#B 
yy%r**i2oSUAUYhlnYn	yy#R((GrM\R=OS`dfSf		-,-D"))OR01F>rM   rV  image_rel_pathr  c                  ^< U=(       d    SnU=(       d    SnUR                  5       nUR                  5       n[        X#5      n	[        U	5      (       a  Sn	[	        U	5      u  pnU	(       dZ  UR                  5       nUR                  5       nSU;   d  SU;   a  Sn	OSU;   d  SU;   a  Sn	OSU;   d  SU;   a  Sn	[	        U	5      u  pnUR                  5       n[        X$5      u  nnU(       dm  U(       df  USS	 n[        [        R                  " S
U5      5      n[        [        R                  " SU5      5      nU(       a
  U(       d  SnOU(       a	  U(       d  SnSU;   nSU;   =(       d    SU;   nSU;   =(       d    SU;   =(       d    SU;   nSU;   =(       d    U=(       d    U=(       d    UnU(       + nSU;   =(       d    SU;   nSU;   n[        US5      nU(       a  UR                  5       R                  5       n[        R                  " SU5      (       a  SnO[        R                  " SU5      nU(       a   [        UR                  S5      5      S:  nOBSU;   =(       d    SU;   nO0[        R                  " SU5      (       a  SnOSU;   =(       d    SU;   nSU;   =(       d    [        R                  " S U5      SLn S!U;   =(       d    [        R                  " S"U5      SLn!S#U;   =(       d    S$U;   n"S%U;   =(       d    [        R                  " S&U5      SLn#S'U;   =(       d    S(U;   =(       a    S)U;   n$S*U;   n%[        US+5      n&U&(       a  U&R                  5       R                  S,5      OS-U;   n'[        US.5      n(Sn)U((       a,  U(R                  5       m<[        U<4S/ jS0 5       5      (       d  Sn)[        US15      =(       d    [        US25      n*Sn+U*(       a.  U*R                  5       R                  5       R                  S,5      n+OI[        R                  " S3U5      (       d  [        R                  " S4U5      (       a  S,U;   =(       d    S5U;   n+U=(       d    SR                  5       n,S6U,;   =(       d    S7U,;   =(       d    S8U,;   n-S9U,;   n.S:U,;   n/[!        US;-   U-   5      n0[        US<5      n1U1R                  5       n2S=U2;   =(       d    S>U2;   n3S?U2;   =(       d    S@U2;   =(       d    U3n4[#        5       n5U(       a   [%        USA5      n6U6R'                  S5       Hx  n7SB Ho  n8U7R)                  U85      n9[+        U9[,        5      (       d  M+  U9R                  5       (       d  MB  U5R/                  U9R                  5       R                  5       5        Mq     Mz     U'(       d(  U5(       a!  U5 H  n:U:S-:X  d  SCU:;   d  M  SDU:;   d  M  Sn'  O   U5(       aS  U5 HM  n:U:R1                  SESF5      n;U3(       d  SGU;;   d  SHU;;   a  Sn3Sn4M.  U4(       a  M7  SIU;;   d  SJU;;   d  SKU;;   d  MK  Sn4MO     / [3        U 5      P[3        U5      P[3        U5      P[3        U5      P[3        U	5      P[5        U5      P[5        U5      P[5        U
5      P[5        U5      P[5        U5      P[5        U5      P[5        U5      P[5        U5      P[5        U5      P[5        U5      P[5        U5      P[5        U5      P[5        U5      P[5        U 5      P[5        U!5      P[5        U"5      P[5        U#5      P[5        U$5      P[5        U%5      P[5        U'5      P[5        U)5      P[5        U+5      P[5        U-5      P[5        U.5      P[5        U/5      P[5        U0SL   5      P[5        U0SM   5      P[5        U0SN   5      P[5        U0SO   5      P[5        U0SP   5      P[5        U0SQ   5      P[5        U0SR   5      P[5        U0SS   5      P[5        U45      P[5        U35      P$ ! [         a    Sn GNff = f! [         a     GN{f = f)Ta  
Build a row for the markdown table from the scraped pieces of a product page.

In addition to the previously-supported text parsing, this version accepts the
raw HTML of the page (via the ``html`` parameter) so that additional
features can be inferred from non-visible attributes such as ``alt``,
``title``, or ``aria-label`` on icons.  Passing ``html`` is optional; if
omitted, the function falls back to the older behaviour.
r#   zIRISH SETTERr0   WORXr1   zRED WINGr/   NiX  r  r  Tz	STEEL TOEzALUMINUM TOEz	ALLOY TOEzNON-METALLIC TOEzNON METALLIC TOEzCOMPOSITE TOEz
SAFETY TOEzMETATARSAL GUARDz	MET GUARD
WATERPROOFr3   z$\b(non[-\s]?insulated|uninsulated)\bFz(\d+)\s*g\br   r   insulat
thinsulatezSLIP RESISTANTz\bSR\bzELECTRICAL HAZARDz\bEH\bzPUNCTURE RESISTANTPUNCTUREzSTATIC DISSIPATIVEz\bSD\bzANKLE PROTECTIONankleprotectBOAr4   r    zDEFINED HEELzLeather Typec              3  ,   >#    U  H	  oT;   v   M     g 7fr   rI   )r  r   lls     rK   r  '_build_row_from_text.<locals>.<genexpr>  s     `$_q7$_r  )meshnylonfabrictextilepoly	syntheticr5   
Resolvablez\bresoleable\bz\bresolvable\br!   oxfordathleticr)  r*  r+  r   zCountry of Originzmade in usazmade in the usazbuilt in usazassembled in the usar  )r$  r   r  DEFINEDHEEL    rQ   zMADE IN USAzMADE IN THE USAzBUILT IN USAzASSEMBLED IN USAzASSEMBLED IN THE USAr8   r9   r:   r;   r<   r=   r>   r?   )r  r  r  r  r  r[  r  rS   r  r  rU   rw   r  ry   r  r  r  r  r   r  rx   rn  r$   r  rR   rW   rL   )=r  r   r  r  rV  r]  r  
body_upper
body_lowerr  r  r	  r
  rW  buheader_upperrY  rZ  first_chunk
male_foundfemale_found	steel_toealuminum_toenon_metal_toe
safety_toesoft_toe	met_guard
waterproofinsulation_lineil	insulatedr  slip_resistantelectrical_hazardpuncture_resistantstatic_dissipativeankle_protectionboadefined_heel_linedefined_heelleather_lineall_leather_upperresoleable_line
resoleable
name_loweroxford_athleticr*  r+  heightsorigin_lineorigin_lowermade_in_usabuilt_in_usa
alt_tokensr  r  rJ  r  tokrO  rh  s=                                                               @rK   _build_row_from_textr  $  sI   ( RI\rF"J"Jf0I 	""	1)<E'\\^__R>R#7&Ir\Vr\I2r!1"I 5i @g<<>L3LD&  #&"))M;?@
BIIo{CDlD*F z)I!Z/L;*3LL':5;MQ[;[apt~a~M*,[[l[mJ~H#z1Q{j7PI+J&y,?O""$**,99<bAAI		."-A% #AGGAJ! 3I '"_E,"2D	99<jIII"j0Qlj6PI&*4f"))Iy:Yae:eN,
:l		)U^@_gk@k.*<[*PZBZ.*<n"))IW`BaimBm*j8pg>S>oXaeoXoJC(NCBS$**,77>ZhlvZvL#I~>L!`$_``` $&y,?lCST]_kClOJ$**,224??F
	$j	1	1RYY?PR\5]5]j(@Fj,@
*"##%J:-f:3KfQW[eQeO#Fz!EFTMI56G"9.ABK$$&L L0X6G<6WK"l2n8NR^8^ncnL 5J
	 }5D}}T*:D''$-C!#s++		"syy{'8'8':; ; + JCn$c)9fm#	  CFC(AMQ$6:Kq:P"#<^q%8<NRS<SWmqrWr# 1u1t1 	z"1 	~&	1
 	y!1 	D	1 	F1 	E
1 	E
1 	G1 	J1 	I1  	M!1" 	L#1$ 	I%1& 	H'1* 	J+1, 	I-1. 	N/10 	112 	314 	516 	718 	C91< 	L=1> 	?1@ 	JA1D 	OE1F 	FG1H 	E
I1L 	GDMM1N 	GDMO1P 	GDMQ1R 	GDMS1T 	GDMU1V 	GENW1X 	GENY1Z 	GEN[1^ 	L_1` 	Ka1 1o ! % $I%z  		s1   5_, 8A	_? _? 5_? ,_<;_<?
``c                V   [         [        U 5      R                  =(       d    S;   a  [        S5      eS n[	        U 5       H:  n [        USS9n[        XC5      u  pVnUn[        U5      n	[        UUUU	U UUS9n
U
s  $    U(       a  Ue[        S5      e! [         a  nUn S nAMd  S nAff = f)Nr#   z3HTTP fallback disabled for authenticated order siterA   r%  r\  zHTTP fallback failed)
rF  r   rE  r3  r  r  r}  r  r  ry   )r  image_temp_relr  r  r  r  r   rs  r  r  r  rW  s               rK   _scrape_product_via_httpr  	  s    Xd^**0b1PQQ(,H4 	q"-D&Ft&O#EF%d+I&-C J !& 
-
..  	H	s   5B
B(B##B(prefer_httpc          
        Sn[        U5      nUn[        5         U(       a
   [        XS9$  [        [        U5      R                  =(       d    S;   a  [        U [        5        [        X5        [        U 5        U R                  =(       d    Sn[        Xa5      u  pGn U(       a  [        X5      n[        U S5      R                  [         R"                  " [$        R&                  S45      5        U R)                  [$        R&                  S5      R*                  =(       d    Sn	Un
[-        UUU
U	UUUS9nU$ ! [         a     GNf = f! [         a    Sn Nf = f! [         a    [        XS9s $ f = f)Nr#   )r  r  r  r\  )rk  r}  r  ry   rF  r   rE  r1  r  r  r  r  r}  rM  r   r6  r7  presence_of_element_locatedr   r  r  r`   r  )ro  r  r  r  r  style_from_urlr  r   rs  r  r  r  s               rK   scrape_productr  	  sX   N"4(ENO	+DPP
"M$..4"5V%89v !!'R"B4"NZ	 !;F!J 	fb!''(F(FU[G\(]^''V<AAGR	")
 
E  		$  	 N	 $  M'LLMsH   D3 A/E E .BE 3
E EEE EE E,+E,c           	     >   [         nSS jn[        XS9n/ nUR                  SSR                  U5      -   S-   5        UR                  SSR                  S/[	        U5      -  5      -   S-   5        U H  n[	        U5      [	        U5      :  a  US/[	        U5      [	        U5      -
  -  -   nO&[	        U5      [	        U5      :  a  US [	        U5       nUR                  SSR                  U5      -   S-   5        M     [        [        SR                  U5      S-   5        g )Nc                |    U S   n S[        [        R                  " SSU5      5      4$ ! [         a    SU4s $ f = f)Nr   z\Dr#   r   )rw   rS   rT   ry   )r-  rV   s     rK   	style_key!write_markdown.<locals>.style_keyS	  sE    aD	s266%Q/011 	q6M	s   "* ;;)r  rO   z---r#   r   )r-  	List[str])
MD_HEADERSr  r  r  r  rb   OUT_MD)rh   r$  r  sorted_rows	out_linesr-  s         rK   write_markdownr  P	  s    G -KIS388G,,s23S388UGc'l$:;;cABq6CL RDCL3q6122AVc'l"-3w< Asxx{*S01  9-45rM   c                 V   [         R                  5       (       d  / $ / n [         R                  SSS9R                  5        He  nUR	                  5       nU(       d  M  [
        R                  " SU5      nU(       d  M<  UR                  S5      nX0;  d  MT  U R                  U5        Mg     U $ )NrY   rR   )r[   r(  zhttps?://\S+r   )	r   rp   rv   r  rU   rS   r  r  r  )rK  r   r  r  s       rK   _read_errors_urlsr  j	  s    	D$$gi$HSSUzz|IIot,1
A}A V KrM   c                    U (       d   [         R                  SS9  g U  Vs/ s H  nS[         SU 3PM     nn[	        [         SR                  U5      S-   5        g ! [         a     g f = fs  snf )NTr.  FAILED x: r   )r   r;  ry   MAX_FAILS_PER_LINKrb   r  )rK  r  r  s      rK   _rewrite_errors_filer  {	  sw    	. 	;?@4aw)*#aS14E@TYYu-45	  		 As   A A,
A)(A)c                    [        5       n [        U R                  S/ 5      5      n[        U R                  S0 5      5      n[	        U R                  S/ 5      5      n[        U R                  S[        R                  " 5       5      5      n[	        U R                  S/ 5      5      nU(       d
  [        5       n[        (       a~  [        U 5      nU(       al  [        U R                  S/ 5      5      n[        U R                  S0 5      5      n[	        U R                  S/ 5      5      n[	        U R                  S/ 5      5      nS nS n [        U [        SSS9n[        U5        U R                  S	5      =(       d    / n	U R                  S
5      =(       d    0 n
[        (       d  U	(       d  [        SUS9nUR                  SSS9  [!        U5      u  pU(       a  Un	O["        R$                  R'                  S5        U(       a  Un
 [(        (       a;  U
(       a4  [+        X:5      nU(       a"  X0S'   XS
'   [-        U 5        [/        SU S35        XS	'   [(        (       a  XS
'   [-        U 5        [(        (       a`  U
(       dY   [!        U5      u  pXS
'   [-        U 5         U
(       a4  [+        X:5      nU(       a"  X0S'   XS
'   [-        U 5        [/        SU S35        [2        (       aR  [5        U 5      nU R                  S5      (       d  SU S'   [-        U 5        U(       a  [/        SU S35        [-        U 5        [6        R9                  5       (       aO  [6        R9                  5       nU	=(       d    /  Vs/ s H  n[;        U5      U:X  d  M  UPM     n	nXS	'   [-        U 5        [=        U	5      nUS:X  a  [?        S5      eU(       a  URA                  5         [        UUS9n[=        U5      nUR                  USS9  [C        U5       VVs0 s H!  u  nnU(       d  M  US   (       d  M  US   U_M#     nnnU	 GH  nUU;   a  US-  nUR                  USS9  M!  Sn  [E        UU5      nUS   (       d  [?        SU 35      eUS   (       d  [?        SUS    SU S35      e[(        (       a:  U
(       a3  [G        U
R                  US   S5      5      nU(       a  [I        U5      US'   [=        U5      S:  a  US   (       d  [I        U5      US'    [=        U5      S :  a,  US    (       a"  [I        [K        US   US   US    5      5      US '   US   U;   a  UUUUS      '   O"[=        U5      UUS   '   URM                  U5        URO                  U5        [Q        U5      U S'   X0S'   X S'   XPS'   [-        U 5        [S        U5        Sn US-  nUR                  UU(       a  S'OS(S9  GM     [S        U5        UR                  US)S9  [f        (       Ga  U(       Gay  ["        R$                  R'                  S*[=        U5       S+35        / n U(       a   URe                  5         [        U [        S,S[h        S-9n[        U5        [C        [	        U5      SS.9 H  u  nnSn[k        [l        5       Hi  n [E        UUSS/9nU(       aS  US   (       aG  US   (       a;  US   U;   a  UUUUS      '   O"[=        U5      UUS   '   URM                  U5        Sn  OMg  Mi  Mk     U(       a4  ["        R$                  R'                  S2U S$[=        U5       S3U S&35        M  URM                  U5        M     UnX0S'   XPS'   [-        U 5        [S        U5        [q        U5        ["        R$                  R'                  S5[r         S&35        [t        Rw                  5       (       a'  ["        R$                  R'                  S6[t         S&35         U(       a  URA                  5          U(       a  URe                  5         g g ! [0         a     GNf = f! [0         a    U
=(       d    0 n
 GNf = f! [0         a     GNf = fs  snf s  snnf ! [0         a     GN5f = f! [0         Ga  n[U        UR                  US5      5      S-   UU'   [Q        U5      U S'   X0S'   X S'   XPS'   [-        U 5        UU   [V        :  a`  UU;  a%  URM                  U5        [Y        S!UU    S"U 35        URO                  U5        [Q        U5      U S'   XPS'   [-        U 5         S nAGM|  ["        R$                  R'                  S#UU    S$[V         S%U S&35        ["        R$                  R'                  SR[                  [\        R^                  " [a        U5      UURb                  5      5      S&-   5         U(       a  URe                  5         O! [0         a     Of = f[        U [        SSS9n[        U5         S nAOS nAff = fGM  ! [0         a     GNf = f! [0         a>     UR                  S05        O! [0         a     Of = f[        Rn                  " S15         GM  f = f! [0         a  n["        R$                  R'                  S45        ["        R$                  R'                  SR[                  [\        R^                  " [a        U5      UURb                  5      5      S&-   5         S nAGNS nAff = f! [0         a     GN
f = f! [0         a     g f = f!  U(       a  URA                  5         O! [0         a     Of = f U(       a  URe                  5         f f ! [0         a     f f = f= f)7Nrg   rk   rh   rm   rl   rZ  T)rm  r[  r\  rf   ri   r   )r   rm   r   zRefreshing product links...)r   zN[warn] Link refresh discovered 0 links; falling back to checkpoint link list.
z)Applied preferred International names to z existing rowsrj   z
Re-queued z links due to missing URL/Imagez{No product links discovered from catalog page. Login/navigation likely succeeded but product link extraction found nothing.startingz	(resumed)Fz$Style number parsed empty for link: zName parsed empty for style r   )r#   rD   r&   r  r  z
Error scraping link (attempt r   z): r   rF  skippeddonez
Starting salvage pass for z failed links...
normal)rm  r[  r\  r]  )startr  rr  g      ?z	Salvaged z: z$
Salvage pass encountered an error:
z
DONE. Wrote: z#Some links failed repeatedly; see: )<r}   r  rx   rp  ro  r   rs   r  AUTO_REPAIR_BAD_ROWSr  rp  RUN_HEADLESSru  REFRESH_PRODUCT_LINKS_EACH_RUNr   r   r  r   r  r   r  r  r   printry   AUTO_REPAIR_MISSING_MEDIAr  r%   rU   rk  r  r3  r   r  r  r  rW   rT  r  r  r  r  rw   r  r   r  	tracebackformat_exceptiontype__traceback__quitENABLE_SALVAGE_PASSSALVAGE_PAGE_LOAD_TIMEOUTr  SALVAGE_MAX_TRIES_PER_LINKrx  r  r  r   rp   )rS  rg   rk   rh   rm   rl   repairedreporterro  rf   ri   fresh_linksfresh_preferred_namesnupdr  rmwantr  r   	processedr  r-  style_to_idxr  rF  r  r  rW  	remainingsalvageds                                 rK   mainr  	  s
   		BrvvlB78J"&rvvmR'@"AK !34DrvvlDIIK89J#'/BB(G#H-/
 -b1RVVL"56JrvvmR89Kvr*+D $RVV,?%D E ,0HFrLWcghF#/52*,&&1B*C*Ir))'aJGHOOA$AOB1Fv1N.K +

  e %"7	55/0GD%)6
0?,-'+ I$~^_ #011(7$%B .-o8%:6%B"(7$%#
	"0GD%)6
0?,-'+ I$~^_ %$%b)B66-..*.&'#
2$&EFG# ##%D)6)<")<c)<1AWXYAZ^bAbQ)<Mc"/BM"A:_  NN#%JG
O		
3 -6dOJODAqqQqT!aOJ!Dz!Q		<BL0(6Cq6*-QRVQW+XYYq6*-I#a&QSTXSYYZ+[\\ :9o2?3F3Fs1vr3RS%3D%9CF 3x!|CF!/!5As8a<CF%34KCPQFTWXYTZ\_`a\b4c%dCF
 1v-58\#a&12/24ySV,C(NN4('-j'9B|$!%vJ(3}%.?*+#B'"4(BF NIOOIRTYOHo "t 	tF+ #4JJ;C@Q<R;SSefg#%I,j ')'/!&&? $F+(.?)@JGAt$H"#=>,"04"PC"s1v#a&#&q6\#9ADDc!f)=$>;>t9LQ$8$(KK$4+/ % 39vs ?$  

((9QCq=N9O8PPRSWRXXZ)[\!((./  K2 %.! vJ&7"#B4  !23

?6("56JJB:,bQR	 	     8"1"7R8    d$ K@ % . ! 0(+KOOD!,D(E(IK%'-j'9B|$!%vJ(3}%.?*+#B'"4(,>>'88-44T:-D8I7J#dV.TU"t,+1*+=<(2C./'+JJ$$9+d:K9LAN`Maadeidjjlm JJ$$RWWY-G-GQQRTUTcTc-d%ehl%lm!"KKM$ *2Y`ostF'//?0] ~ % 0  ) ,% &

= 9#, % $% JJsOO,  j

  !IJ

  )C)CDGQPQP_P_)`!adh!hiij$  		
  			  			  		s*  B:l ?A` 0l 6`& ;a Bl %a<aA7l 9aa
a!)l B/a/;;a6A:a/0Bl 2i' :h 
Ai' Ah7Ai' Bl $k8 <l	 
`#l "`##l &`?;l >`??l 
al al 
a,(a/+a,,a//h:B,h &l -B
h 8gh 
gh gh ;l  hl 
hi' hi' 
i$'h98i$9
i	i$i	i$i' #i$$i' '
k51A9k0*l 0k55l 8
ll	
llm-l32m-3
m =m-?m  m-mm-
m*'m-)m**m-__main__)rJ   r  r   r$   )rV   r$   r   r$   )r_   r   r`   r$   r   r   )r   r   )r{   r   r   r   )r   r$   r   r   r^  )r   r$   r   Optional[str])r   r   )r   r  )r   zTuple[str, str])rB  r   r   r   )rS  r   r   r$   )T)rS  r   rm  r  r[  r$   r\  r  r]  rw   r^  r  r   zwebdriver.Firefoxr   )
r  r$   r&  rw   r  r   r  rw   r   r   )r  r$   r   r  )rA   )r  r$   r&  rw   r   r$   )r  r$   r   r$   )r  r$   r   r  )r   r  )r%  r$   r   r   )r&  rw   r   r   )r  r$   r  r$   r   zTuple[str, str, str])P   )r  rw   r   r   )r   r$   )rh   List[List[str]]r  zDict[str, str]r   rw   )r   z Tuple[List[str], Dict[str, str]])r  r$   r   r$   r  )r  r  r   r  )rS  r   r   rw   )r  r$   r  r$   r   r$   )r  r$   r  r$   r   r$   )r  r$   r  r$   r   r$   )r`   r$   r   zDict[str, bool])r  r$   r   zTuple[bool, bool, bool])r#  )r   r$   r  rw   r   r$   )r  r$   r   r$   )r  r$   r   r$   )r  r$   rO  r$   rP  r$   r   r$   )r  r$   rV  r$   r   zTuple[bool, bool])r  r$   r   r$   r  r$   r  r$   rV  r$   r]  r$   r  r$   r   r  r   )r  r$   r  r$   r   r  )r  r$   r  r  r   r  )rh   r  r   r   )rK  r  r   r   )r   
__future__r   rt   r   r  rS   r  r<  r   r9  rs   r  ry  rE  r  r  html.parserr   r5  pathlibr   typingr   r   r   r	   r
   urllib.parser   r   r   urllib.requestr   r   bs4r   seleniumr   selenium.common.exceptionsr   r   selenium.webdriver.common.byr   selenium.webdriver.common.keysr   "selenium.webdriver.firefox.optionsr   "selenium.webdriver.firefox.servicer   rh  selenium.webdriver.supportr   r7  selenium.webdriver.support.uir   rr   r  rF  r>  r   rx   r   r   rU   r  r  r  r%   __annotations____file__r  r  r  r  ro   r   r  r  r  r  r  r  r  r  PAGE_LOAD_TIMEOUTrl  NAV_SETTLE_SECONDSrw  re  r  r  r  rq   rF   rL   rW   rb   r}   r   r   r   r   r   r   r
  r   rQ  rY  rp  ru  r}  r  r  r  r  r  r  r  r  r  r  r1  r;  rC  rN  r_  r}  r  r  r  r  r  r  rk  r  r  r  r  r  r  r  r  r  r  r  r  r=  r@  rM  rT  r[  r  r  r  r  r  r  r  r   rI   rM   rK   <module>r     s  *X #  	  	   
       "   3 3 4 4 +
   K + / 6 H @ 7 3 #1o $	$  ::>>"46PQ ::>>"46FG zz~~mS1779??AEdd!#0BC!H!N!N!P!V!V!X\{!{  
C >!!#**	.	.<<
77
 
  +  &+ "
 J !     #         ::>>"4b9??AIT <
~I&
4G GZ#L!0AH!
L 5 &. $555 	5
 5 5 5 5p, %&C	C C 	C
 C 
CN"2$J $4
3#r"JCL=@aH0&REP1DC*H.BL41h"=&@ J"#J-d62	&9!.<@1f8v",6 [[
[ [ 	[ [ [ [ [|/: >C 0Mb64"6Zz zF rM   