+
    oºiO              	        a  0 t $ R t^ RIHt ^ RIt^ RIt^ RIt^ RIt^ RIt^ RI	t	^ RI
t
^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIHt ^ RIt^ RIHt ^ RIHtHtHtHtHt ^ RIHtHt ^ RI H!t! ^ RI"H#t# ^ R	I$H%t%H&t& ^ R
I'H(t( ^ RI)H*t* ^ RI+H,t- ^ RI.H/t0 ^ RI1H2t2 R.t3]3^ ,          t4Rt5R]6R&   ]! ]74      Pq                  4       Pr                  t:]Pv                  Py                  RR4      P{                  4       P}                  4       R9   t?]:R,          t@]:R,          tA]:R,          tB^tCRtDRtERtF. RNRNRNRNRNR NR!NR"NR#NR$NR%NR&NR'NR(NR)NR*NR+NR,NR-NR.NR/NR0NR1NR2NR3NR4NR5NR6NR7NR8NR9NR:NR;NR<NR=NR>NR?NR@NRANtG]H! ]G4      tIRtJ^-tK^tLRBtMRtNRtO^ZtP^tQ^tR]Pv                  Py                  RCR4      P{                  4       ;'       g    RtSRD RE ltTRF RG ltURH RI ltVRJ RK ltWRL RM ltXRN RO ltYRP RQ ltZRR RS lt[ ! RT RU4      t\RV RW lt]RX RY lt^RZ R[ lt_R\ R] lt`R^ R_ ltaRR`RaRbRRc]KRdR/Re Rf llltbRg Rh ltc]K]M^3Ri Rj lltdRk Rl lteRRm Rn lltf ! Ro Rp]4      tgRq Rr lthRs Rt ltiRu Rv ltjRw Rx ltkRy Rz ltlRR{ R| lltmR} R~ ltnR R ltoR R ltpR R ltqR R ltrR R ltsR R lttR R ltuR R ltvR R ltwR R ltxR R ltyR R ltzR R lt{RR R llt|R R lt}R R lt~R tR R ltR R ltR R ltR R ltRR R lltRR R lltRR/R R lltR R ltR R ltR R ltR R lt]R8X  d
   ]! 4        R# R# )a9  
RW_Site_Scraper-For_Buisness.py
===============================

Scrapes Red Wing Safety "For Business" safety boots catalogue:
- https://www.redwingsafety.com/safety-boots/page-1/maxnum-0?catalog=international

Test mode:
- Set ONLY_STYLE at the top of this file to a style number (e.g. "400")
  to scrape just that single product.

Outputs (next to this script):
- RW_Site_Scrape.md
- RW_Site_Scraper_checkpoint.json   (resume state)
- RW_Site_Scraper_errors.txt        (links that failed repeatedly)

Stability features:
- `safe_get()` uses short timeouts + window.stop() so Selenium doesn't hang forever
- HTTP fallback (no Selenium) for the few product pages that still time out
- Optional salvage pass at the end to retry hard failures (HTTP-first)

Cross-platform (Windows + Linux Cinnamon):
- Headless Firefox (default)
- Geckodriver resolution "like Parts_Auto" (explicit Service path; no Selenium Manager):
    1) GECKODRIVER_PATH env var (file or directory)
    2) geckodriver(.exe) on PATH
    3) auto-download geckodriver (GitHub releases) into a user cache dir
    4) (optional) if double-click/no terminal and Tk is available, prompt to pick geckodriver

Feature columns are 1/0 (not Yes/No).
Includes Brand (string) + brand family flags (Red Wing / Irish Setter / Worx).

Dependencies:
- Python 3.9+
- Firefox installed
- Selenium installed:
    Linux Mint/Ubuntu: sudo apt install -y python3-selenium
    Windows: python -m pip install selenium

Notes for Linux Mint PEP 668:
- Prefer `python3-selenium` from apt (as above).
- This script does NOT require webdriver-manager.
)annotationsN)
HTMLParser)Path)DictListOptionalSetTuple)Requesturlopen)BeautifulSoup)	webdriver)TimeoutExceptionWebDriverException)By)Options)Service)expected_conditions)WebDriverWaitzPhttps://www.redwingsafety.com/safety-boots/page-1/maxnum-0?catalog=international str
ONLY_STYLERW_REFRESH_LINKS1zRW_Site_Scrape.mdzRW_Site_Scraper_checkpoint.jsonzRW_Site_Scraper_errors.txtTRED WING FOR BUSINESSFzStyle #NameURLImageBrandMaleFemaleRed WingIrish SetterWorxz
Safety Toez	Steel ToezNon-Metallic ToezAluminum ToezMetatarsal GuardzSoft Toe
Waterproof
InsulationzSlip ResistantzElectrical HazardzPuncture ResistantzStatic DissipativezAnkle Protectionu   BOA® Lacing SystemDefined HeelzAll Leather UpperzOxford/AthleticChukkaHiker5"6"7"8"9"10"11"12"zBuilt in USAzMade in USAg      ?GECKODRIVER_PATHc                    V ^8  d   QhRRRR/# )   vboolreturnr    )formats   ";.\Boot_Features\RW_Scrapers\RW_Site_Scraper-For_Buisness.py__annotate__r:      s      4 C     c                    V '       d   R # R# )r   0r7   )r4   s   &r9   b01r>      s    3r;   c                    V ^8  d   QhRRRR/# r3   sr   r6   r7   )r8   s   "r9   r:   r:      s      c c r;   c                    T ;'       g    R P                  RR4      p \        P                  ! RRV 4      P                  4       p V # )r   |z\|\s+ )replaceresubstrip)rA   s   &r9   md_escape_cellrJ      s:    	
b#u%A
vsA$$&AHr;   c               $    V ^8  d   QhRRRRRR/# )r3   pathr   textr   r6   Noner7   )r8   s   "r9   r:   r:      s!      t 3 4 r;   c                    V P                  V P                  R ,           4      pVP                  VRR7       VP                  V 4       R# )z.tmputf-8encodingN)with_suffixsuffix
write_textrF   )rL   rM   tmps   && r9   atomic_writerW      s7    


4;;/
0CNN4'N*KKr;   c                   V ^8  d   QhRR/# )r3   r6   r   r7   )r8   s   "r9   r:   r:      s     <
 <
 <
r;   c                    \         P                  4       '       g4   R \        R\        R. R. R. R/ RRR/ R	. R
\        P                  ! 4       RR/#  \
        P                  ! \         P                  RR7      4      p \        V P                  R ^ 4      4      pV\        8w  d[    \         P                  \         P                  RV R24      4       R \        R\        R. R. R. R/ R
\        P                  ! 4       RR/# V P                  R \        4       V P                  R\        4       V P                  R. 4       V P                  R. 4       V P                  R. 4       V P                  R/ 4       V P                  R	. 4       V P                  R
\        P                  ! 4       4       V P                  RR4       V #   \         d     Li ; i  \         dr     \         P                  \         P                  R4      4       M  \         d     Mi ; iR \        R\        R. R. R. R/ RRR/ R	. R
\        P                  ! 4       RR/u # i ; i)versioncatalog_urlsproduct_links
done_linksrowspreferred_namesmedia_repair_doneFfail_countshard_failed_links
started_atgeckodriver_pathNrP   rQ   z.json.vz.bakz.json.corrupt)
CHECKPOINTexistsCHECKPOINT_VERSIONCATALOG_URLStimejsonloads	read_textintgetrF   rS   	Exception
setdefault)dataold_vers     r9   load_checkpointrs      s&   )LR"Br2$))+
 	
-
zz*...@Adhhy!,-(("":#9#9GG9D:Q#RS -brdiik"D	 	 		#565,b)#r*+R0diik2*D1+  ,  
	z55oFG 		 )LR"Br2$))+
 	

sb   AG	  ,F8 ,G	 9B>G	 8GG	 GG	 	I(G>=I>H	IH6IIc                    V ^8  d   QhRRRR/# )r3   rq   r   r6   rN   r7   )r8   s   "r9   r:   r:      s     I I$ I4 Ir;   c           	     T    \        \        \        P                  ! V ^RR7      4       R# )r3   T)indent	sort_keysN)rW   re   rj   dumps)rq   s   &r9   save_checkpointry      s    TZZQ$GHr;   c                    V ^8  d   QhRRRR/# )r3   liner   r6   rN   r7   )r8   s   "r9   r:   r:      s     & &C &D &r;   c                    \        \        R RR7      ;_uu_ 4       pVP                  V P                  4       R,           4       RRR4       R#   + '       g   i     R# ; i)arP   rQ   
N)open
ERRORS_TXTwriterstrip)r{   fs   & r9   write_errors_liner      s9    	j#	0	0A	$% 
1	0	0	0s   'AA	c                   V ^8  d   QhRR/# )r3   r6   r5   r7   )r8   s   "r9   r:   r:      s      T r;   c                 4     ^ RI p R#   \         d     R# i ; i)    NTF)tkinterro   )r   s    r9   _can_use_tkr      s     s    c                    V ^8  d   QhRRRR/# )r3   titler   r6   Optional[str]r7   )r8   s   "r9   r:   r:      s        r;   c                   \         P                  P                  4       '       g   \        4       '       g   R #  ^ R Ip^ RIHpHp VP                  4       pVP                  4        VP                  RR4       VP                  RR4       VP                  V R7      pVP                  4        T;'       g    RP                  4       pV'       d   V# R #   \         d     R # i ; i)N)
filedialog
messageboxz-topmostTRW Site ScraperzCould not find geckodriver automatically.

Please select the geckodriver executable.
Windows: geckodriver.exe
Linux: geckodriver)r   r   )sysstdoutisattyr   r   r   r   Tkwithdraw
attributesshowinfoaskopenfilenamedestroyrI   ro   )r   tkr   r   rootrL   s   &     r9   _tk_pick_filer      s    
zz+--2uuw
D);	
 )))6

!!#t%% s$   A:C 3C C C CCc                  B    ] tR tRtRtR R ltRR R lltR R ltR	tR
# )ProgressReporteri  zDTTY progress bar, or Tk window if launched by double-click (no TTY).c                    V ^8  d   QhRRRR/# )r3   totalrm   rc   floatr7   )r8   s   "r9   r:   ProgressReporter.__annotate__  s     ! !c !u !r;   c                	   \        \        V4      ^4      V n        W n        \        P
                  P                  4       V n        RV n        RV n	        RV n
        RV n        V P                  '       EgN   \        4       '       Ed;    ^ RIp^ RIHp VP                  4       V n	        V P                  P!                  R4       V P                  P#                  R4       V P                  P%                  RR4       VP'                  V P                  RRR7      V n
        V P                  P)                  R	^RR
7       VP+                  V P                  V P                  RR7      V n        V P                  P)                  ^RR7       RV n        V P                  P-                  4        V P                  P/                  4        R# R# R#   \0         d    RT n         R# i ; i)   FN)ttkr   620x150zStarting...w)rM   anchorx)fillpadxpadyiD  )maximumlength)r   r   T)      )r   
   )maxrm   r   rc   r   r   r   use_ttygui_root_label_pbarr   r   r   r   r   geometry	resizableLabelpackProgressbarupdate_idletasksupdatero   )selfr   rc   r   r   s   &&&  r9   __init__ProgressReporter.__init__  sU   UQ'
$zz((*

+--!$'UUW


  !23

##I.

$$UE2 hhtzzchR  c A __TZZTW_X


Rg6

++-

!!#% #0&  ! !s   D7G GGc               $    V ^8  d   QhRRRRRR/# )r3   currentrm   noter   r6   rN   r7   )r8   s   "r9   r:   r   7  s!      c  d r;   c                	   \        ^ \        \        V4      V P                  4      4      pV P                  '       d   V P
                  '       d   V RV P                   RV 2P                  4       pV P                  '       d   V P                  P                  VR7       V P                  '       d   WP                  R&   V P
                  P                  4        V P
                  P                  4        R# ^ pWP                  ,          p\        \        WT,          4      4      pRV,          RWF,
          ,          ,           p\        \        P                  ! 4       V P                  ,
          R4      pW,          p	V	R	8  d   V P                  V,
          V	,          MR
p
RV RV RV P                   RV^d,          R R\        V
4       R2pV'       d   VRV 2,          p\        P                   P#                  RVR,          ,           4       \        P                   P%                  4        WP                  8X  d@   \        P                   P#                  R4       \        P                   P%                  4        R# R# )r   /z  )rM   valueN#-g-C6?g&.>        [z]  (z5.1fz%) ETA rA   :N   Nr~   )r   minrm   r   r   r   rI   r   configr   r   r   roundri   rc   r   r   r   flush)r   r   r   msgwidthfracfilledbarelapsedrateetas   &&&        r9   r   ProgressReporter.update7  s   aS\4::67888


IQtzzl"TF399;C{{{""",zzz&-

7#JJ'')JJ#U4<()FlSEN33diikDOO3V< /3d{tzzG#t+#b	4::,bc$ws3xjPQRRv;C

D	)*

jj JJT"JJ !r;   c                   V ^8  d   QhRR/# r3   r6   rN   r7   )r8   s   "r9   r:   r   W  s      t r;   c                	    V P                   '       d2   V P                  '       d    V P                  P                  4        R # R # R #   \         d     R # i ; iN)r   r   r   ro   )r   s   &r9   closeProgressReporter.closeW  sC    888




""$ #8  s   A AA)r   r   r   r   rc   r   r   Nr   )	__name__
__module____qualname____firstlineno____doc__r   r   r   __static_attributes__r7   r;   r9   r   r     s    N!>@ r;   r   c                   V ^8  d   QhRR/# r3   r6   r   r7   )r8   s   "r9   r:   r:   b  s      D r;   c                    \         P                  R 8X  d   \         P                  P                  R4      ;'       gF    \         P                  P                  R4      ;'       g    \	        \
        P                  ! 4       4      p \        V 4      R,          pM^\         P                  P                  R4      ;'       g&    \	        \
        P                  ! 4       R,          4      p \        V 4      R,          pVP                  RRR7       V# )	ntLOCALAPPDATAAPPDATARW_Site_ScraperXDG_CACHE_HOMEz.cacherw_site_scraperTparentsexist_ok)osnameenvironrn   r   r   homemkdir)baseds     r9   
_cache_dirr   b  s    	ww$zz~~n-^^	1J^^cRVR[R[R]N^J**zz~~./NN3tyy{X7M3NJ**GGD4G(Hr;   c                   V ^8  d   QhRR/# )r3   r6   r   r7   )r8   s   "r9   r:   r:   m  s     # #= #r;   c                    \         '       d{   \        \         4      p V P                  4       '       d   \        V 4      # V P	                  4       '       d5   R F.  pW,          pVP                  4       '       g   K#  \        V4      u # 	  R F3  p\
        V,          pVP                  4       '       g   K(  \        V4      u # 	  R F7  p\        4       V,          pVP                  4       '       g   K,  \        V4      u # 	  \        P                  R8w  ds   \        P                  ! 4       R,          \        P                  ! 4       R,          R,          3 F/  pVR,          pVP                  4       '       g   K$  \        V4      u # 	  \        P                  ! R4      ;'       g    \        P                  ! R 4      p V '       d   V # R# )geckodriver.exegeckodriverr   binz.localN)r   r   )r1   r   is_filer   is_dirBASE_DIRr   r   r   r   shutilwhich)pr   candr   s       r9   _resolve_from_env_or_pathr	  m  s3   !"99;;q6M88:::x<<>>t9$ ; 3$<<>>t9 3 3|d"<<>>t9 3 
ww$))+%tyy{X'='EFA}$D||~~4y  G 	]#FFv||4E'FAr;   c                   V ^8  d   QhRR/# )r3   r6   zTuple[str, str]r7   )r8   s   "r9   r:   r:     s     ! !_ !r;   c                    \         P                  P                  4       p \        P                  ! 4       P                  4       pV P	                  R4      '       d.   R\        P
                  ! 4       ^ ,          9   pV'       d   R	# RR3# V P	                  R4      '       d   RV9   g   RV9   d   R
# R# V R8X  d   RV9   g   RV9   d   R# R# R# )zG
Returns (asset_contains, archive_type) matching geckodriver releases.
win64win32ziplinuxaarch64arm64darwin)win64r  )zlinux-aarch64tar.gz)linux64r  )zmacos-aarch64r  )macosr  )r   platformlowermachine
startswitharchitecture)sysplatmachis_64s      r9   _platform_asset_keyr     s     ll  "G##%D%  --/22 55gu55'""4..$$(d?i4/..""  r;   c                    V ^8  d   QhRRRR/# )r3   dest_dirr   r6   r7   )r8   s   "r9   r:   r:     s     A A4 AD Ar;   c           	         \        4       w  rRp\        VRR/R7      p\        V^R7      ;_uu_ 4       p\        P                  ! VP                  4       P                  RRR7      4      pR	R	R	4       XP                  R
. 4      pR	pR	p	V FI  p
V
P                  RR4      pW9   g   K  VP                  V4      '       g   K6  V
P                  R4      pTp	 M	  V'       g   \        RV RV R24      hW	,          p\        VRR/R7      p\        V^<R7      ;_uu_ 4       p\        VR4      ;_uu_ 4       p\        P                  ! W^4       R	R	R	4       R	R	R	4       \        P                  R8X  d   RMRpW,          pVR8X  d   \        P                   ! VR4      ;_uu_ 4       pVP#                  4        FI  pVP                  V4      '       g   K  VP%                  VV R7       V V,          pVP'                  V4        M	  R	R	R	4       M\(        P                  ! VR4      ;_uu_ 4       pVP+                  4        Fu  pVP                  P                  RV,           4      '       g   VP                  V8X  g   K>  VP%                  VV R7       V VP                  ,          pVP'                  V4        M	  R	R	R	4        VP-                  RR7       \        P                  R8w  dx    \        P0                  ! V4      p\        P2                  ! VVP4                  \0        P6                  ,          \0        P8                  ,          \0        P:                  ,          4       VP=                  4       '       g   \        R4      hV#   + '       g   i     EL; i  + '       g   i     ELN; i  + '       g   i     ELZ; i  + '       g   i     EL; i  + '       g   i     EL#; i  \.         d     EL!i ; i  \.         d     Li ; i)z\
Downloads and extracts latest geckodriver into dest_dir.
Returns path to extracted driver.
z@https://api.github.com/repos/mozilla/geckodriver/releases/latest
User-AgentzRW_Site_Scraper/1.0headerstimeoutrP   rF   errorsNassetsr   r   browser_download_urlz'Could not find a geckodriver asset for r   z).wbr   r   r   r  r)rL   zr:gzr   T
missing_okz5Download succeeded but geckodriver was not extracted.)r   r
   r   rj   rk   readdecodern   endswithRuntimeErrorr   r  copyfileobjr   r   zipfileZipFilenamelistextractrF   tarfile
getmembersunlinkro   statchmodst_modeS_IXUSRS_IXGRPS_IXOTHrf   )r"  	asset_keyarchive_typeapireqr.  rq   r+  dl_urldl_namer}   r   archive_pathreq2r   driver_nameextracted_pathzmemberr  tsts   &                     r9   _download_latest_geckodriverrQ    s   
 23I
LC
#.CD
EC	b	!	!Qzz!&&(//')/DE 
" XXh#FFGuuVR |!<!<UU12FG  DYKrR^Q__abcc%L6L2G#HID	r	"	"alD)A)AQ1  *B	" (*ww$#MK+Nu__\3//1**,??;//IIf8I4 6)AIIn- ' 0/ \\,//1,,.;;''k(9::fkk[>XIIf8I4 6;;.AIIn- ) 0t, 
ww$	(BHH^RZZ$,,%>%MPTP\P\%\]   ""RSSq 
"	!	!$ *B)A)A	"	"	" 0// 0//    		sy   6M9N!1N	N!(N51N5%A
O	4;O	8O  A6O/ 9N
	N	N!!N2	5O		O	O,+O,/O=<O=c                    V ^8  d   QhRRRR/# )r3   ckr   r6   r   r7   )r8   s   "r9   r:   r:     s     !
 !
4 !
C !
r;   c                R   \        4       pV'       d"   \        V4      P                  4       '       d   V# V P                  R 4      ;'       g    RP	                  4       pV'       d"   \        V4      P                  4       '       d   V#  \        4       p\        V4      p\        V4      V R &   \        V 4       \        V4      #   \         d\   p\        R4      pT'       d6   \        T4      P                  4       '       d   Y`R &   \        T 4       Tu Rp?# \        RT 24      hRp?ii ; i)rd   r   z$Select geckodriver / geckodriver.exeNzUnable to locate or install geckodriver.

Fix options:
  - Put geckodriver on PATH
  - OR set GECKODRIVER_PATH to the full driver path
  - OR install via package manager (Linux often: sudo apt install firefox-geckodriver)

Underlying error: )r	  r   rf   rn   rI   r   rQ  r   ry   ro   r   r4  )rS  r  saveddestdriver_pathepickeds   &      r9   ensure_geckodriverrZ    s    !#AT!W^^ VV&'--2446Ee##%%
|248!$[!1; 
EFd6l))++%+!"BM!
 "#%
 	

s%   8C   D&AD!D&D!!D&page_load_strategyeagerblock_imagespage_load_timeout
user_agentc               4    V ^8  d   QhRRRRRRRRRR	R
RRR/# )r3   rS  r   headlessr5   r[  r   r]  r^  rm   r_  r   r6   zwebdriver.Firefoxr7   )r8   s   "r9   r:   r:     sN     * *** 	*
 * * * *r;   c                   \        V 4      p\        4       pV'       d   VP                  R4       V'       d   VP                  RV4       VP	                  RR4       VP	                  RR4       VP	                  RR4       VP	                  R	R4       V'       d   VP	                  R
^4       V'       d   VP	                  RV4       \        VR7      p\        P                  ! WR7      p	V	P                  V4       V	P                  \        4       V	# )a6  Start Firefox using an explicit geckodriver path (no Selenium Manager).

Args:
    page_load_strategy: "eager" returns after DOMContentLoaded. "normal" waits for full load.
    block_images: If True, blocks images to reduce load stalls/timeouts.
    page_load_timeout: Seconds for Selenium navigation timeout.
z	-headlesspageLoadStrategyzdom.webnotifications.enabledFzmedia.volume_scalez0.0z!browser.privatebrowsing.autostartTznetwork.http.http3.enablezpermissions.default.imagezgeneral.useragent.override)executable_path)serviceoptions)rZ  r   add_argumentset_capabilityset_preferenceFirefoxServicer   Firefoxset_page_load_timeoutset_script_timeoutSCRIPT_TIMEOUT)
rS  ra  r[  r]  r^  r_  geckorf  re  drivers
   &&$$$$    r9   create_driverrq    s      r"EiG[)13EF 95A/7>E6>:A>;ZHU3Gw@F
  !23
n-Mr;   c                   V ^8  d   QhRR/# r   r7   )r8   s   "r9   r:   r:   D  s      t r;   c                     \         w  r \        P                  ! \        P                  ! W4      4       R #   \
         d    \        P                  ! T 4        R # i ; ir   )REQUEST_DELAY_RANGEri   sleeprandomuniformro   )lohis     r9   _polite_delayrz  D  s=     FB

6>>")* 

2s   *6 !AAc          
     ,    V ^8  d   QhRRRRRRRRRR	/# )
r3   urlr   r(  rm   settler   	max_triesr6   rN   r7   )r8   s   "r9   r:   r:   L  sC     C C	C C 	C
 C 
Cr;   c                   Rp\        ^V^,           4       EF  p V P                  V4       \        4         \        V RR4      pVe   \	        VR4      '       d>   \	        VP
                  R4      '       d"   \        V4      ^
,           VP
                  n        \	        VR4      '       d>   \	        VP                  R4      '       d"   \        V4      ^
,           VP                  n        T P                  T4       \        P                  ! T4        T P                  \        P                  R4        R# 	  V'       d   VhR#   \         d     Lfi ; i  \         d     L/i ; i  \          d   pTp T P#                  R4       M  \         d     Mi ; i\        P                  ! T4        T P                  \        P                  R4        Rp? R#   \         d      Rp?M!i ; iRp?i\$         d   pTp Rp?MRp?ii ; i T P                  R4       M  \         d     Mi ; i\        P                  ! R	T,          4       EK0  )
zNavigate without getting stuck on pages that never fully finish loading.

- Uses page_load_timeout
- On timeout, calls window.stop() and continues if the DOM exists
- Retries a couple times with light cleanup
Ncommand_executor_client_configr(  _connbodyzwindow.stop();about:blankg      ?)rangerl  rz  getattrhasattrr  rm   r(  r  ro   rn   ri   ru  find_elementr   TAG_NAMEr   execute_scriptr   )	rp  r|  r(  r}  r~  last_excattemptcerX  s	   &&&&&    r9   safe_getr  L  s    )-HIM*)	((1O
V%7>>r#344ARART]9^9^47L24E))1r7++)0L0L+.w<"+<( JJsO JJv##BKK8 5 +f  K       	H%%&67 JJv##BKK8  " 	H		JJ}% 		

4'>"s   E!B/D?&'E! E?E
E!EE!EE!EE!!G?,G%/F G%FG%FG%) GG"G%!G""G%%G?2G?3G::G?HH#"H#c                    V ^8  d   QhRRRR/# )r3   r|  r   r6   	List[str]r7   )r8   s   "r9   r:   r:     s      s y r;   c                
   . pV 3 F  pW!9  g   K  VP                  V4       K  	  RV 9   d-   V P                  RR^4      pW19  d   VP                  V4       V# V P                  RR^4      pW19  d   VP                  V4       V# )zOReturn a small set of URL variants (www/non-www) to dodge occasional redirects.z//www.z//)appendrF   )r|  outsuu2s   &   r9   _url_variantsr    s~    DU=KKN  3[[4+>KKO
 K [[x+>KKOKr;   c               $    V ^8  d   QhRRRRRR/# )r3   r|  r   r(  rm   r6   r7   )r8   s   "r9   r:   r:     s!     2 2S 23 2 2r;   c           	         \        V R RRRRR/R7      p\        W!R7      ;_uu_ 4       pVP                  4       pRRR4       XP                  R	R
R7      #   + '       g   i     L#; i)r$  z_Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36Acceptz?text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8zAccept-Languagezen-US,en;q=0.9r%  r'  NrP   rF   r)  )r
   r   r1  r2  )r|  r(  rF  r.  rq   s   &&   r9   _fetch_htmlr    sc    
{W/
C 
	&	&!vvx 
' ;;wy;11 
'	&s   AA$	c                  F   a  ] tR tRtRt0 RmtV 3R ltR tR tR t	Rt
V ;t# )	_VisibleTextExtractori  z;Very small HTML->visible text extractor (no external deps).c                	2   < \         SV `  4        . V n        R # r   )superr   parts)r   	__class__s   &r9   r   _VisibleTextExtractor.__init__  s     "
r;   c                	~    VP                  4       V P                  9   d   V P                  P                  R 4       R# R# r~   Nr  _BLOCK_TAGSr  r  )r   tagattrss   &&&r9   handle_starttag%_VisibleTextExtractor.handle_starttag  -    99;$***JJd# +r;   c                	~    VP                  4       V P                  9   d   V P                  P                  R 4       R# R# r  r  )r   r  s   &&r9   handle_endtag#_VisibleTextExtractor.handle_endtag  r  r;   c                	    V'       d6   VP                  4       '       d   V P                  P                  V4       R # R # R # r   )rI   r  r  )r   rq   s   &&r9   handle_data!_VisibleTextExtractor.handle_data  s(    DJJLLJJd# !4r;   )r  >   r  brh1h2h3h4h5h6litdthtrdivfooterheaderarticlesection)r   r   r   r   r   r  r   r  r  r  r   __classcell__)r  s   @r9   r  r    s&    EK#$$$ $r;   r  c                    V ^8  d   QhRRRR/# r3   htmlr   r6   r7   )r8   s   "r9   r:   r:     s     
 
 
 
r;   c                >   \        4       p VP                  V 4       R P                  VP                  4      p\
        P                  ! V4      p\        P                  ! RRV4      p\        P                  ! RRV4      pVP                  4       #   \         d     L~i ; i)r   z\n{3,}z

z[\t\r]+rE   )
r  feedro   joinr  html_libunescaperG   rH   rI   )r  r  raws   &  r9   _html_to_textr    s    A	t ''!''
C


C
 C
&&FC
(C
&&S#
&C99;  s   B BBc                    V ^8  d   QhRRRR/# r  r7   )r8   s   "r9   r:   r:     s      C C r;   c                \   \         P                  ! R V \         P                  \         P                  ,          R7      pV'       g   R# VP	                  ^4      p\         P
                  ! RRV4      p\        P                  ! V4      p\         P
                  ! RRV4      P                  4       pV# )z<h1\b[^>]*>(.*?)</h1>flagsr   <[^>]+>rE   rD   )	rG   search
IGNORECASEDOTALLgrouprH   r  r  rI   )r  minners   &  r9   _extract_first_h1r    sw    
		*D		8QRAGGAJEFF:sE*Ee$EFF63&,,.ELr;   c                    V ^8  d   QhRRRR/# r  r7   )r8   s   "r9   r:   r:     s     3# 3# 3# 3#r;   c                  aa R0o0 R	moR R lpR VV3R llp\         P                  ! RV \         P                  \         P                  ,          R7      pV'       d(   V! VP	                  ^4      4      pV! V4      '       d   V# \         P
                  ! RV \         P                  \         P                  ,          R7       F,  pV! VP	                  ^4      4      pV! V4      '       g   K*  Vu # 	  \        V 4      # )
zExtract a likely product title from HTML.

Many Red Wing product pages place the actual product name in <h3>, while <h1>
can be a site/banner header (e.g., 'RED WING FOR BUSINESS'). We therefore
try meaningful <h3> first, then fall back to <h1>.
r   c                    V ^8  d   QhRRRR/# )r3   r  r   r6   r7   )r8   s   "r9   r:   ,_extract_first_heading.<locals>.__annotate__  s      c c r;   c                    \         P                  ! R RV 4      p \        P                  ! V 4      p \         P                  ! RRV 4      P	                  4       p V # )r  rE   rD   )rG   rH   r  r  rI   )r  s   &r9   _clean&_extract_first_heading.<locals>._clean  sD    z3.!!%(vsE*002r;   c                    V ^8  d   QhRRRR/# r3   rA   r   r6   r5   r7   )r8   s   "r9   r:   r    s      s t r;   c                   < T ;'       g    R P                  4       p V '       g   R# V P                  4       pVS9   g   VS9   d   R# \        V4      ^8:  d   R# \        P                  ! RV4      '       g   R# R# )r   Fz[A-Z]T)rI   upperlenrG   r  )rA   upBADSTOPs   & r9   _ok#_extract_first_heading.<locals>._ok  s[    WW"OOWWY9d
r7a<yy2&&r;   z<td[^>]*class=['\"][^'\"]*prTitle[^'\"]*['\"][^>]*>\s*Name\s*</td>\s*<td[^>]*class=['\"][^'\"]*prValue[^'\"]*['\"][^>]*>(.*?)</td>r  z<h3\b[^>]*>(.*?)</h3>>	   CARESIZINGDETAILSREVIEWSFEATURES
TECHNOLOGYSPECIFICATIONS
SIZE & FITRELATED PRODUCTS)rG   r  r  r  r  finditerr  )r  r  r  mnamerA   mh3r  r  s   &     @@r9   _extract_first_headingr    s     #
#CD
  II	Immbii'	E 5;;q>"q66H {{3TQSQZQZAZ[399Q< q66H \
 T""r;   c               $    V ^8  d   QhRRRRRR/# )r3   r  r   r|  r6   zTuple[str, str, str]r7   )r8   s   "r9   r:   r:      s"     * *3 *S *=Q *r;   c                   \        V R4      pVP                  RRR7      ;'       g    VP                  RR7      pVP                  R4      pVP                  R4      pVP                  R	R
R7      p\        P                  ! RV4      pV'       d   VP                  ^4      MVP                  4       pVP                  R	R
R7      p	WV3# )zExtract (style_number, name, style_text) using the ONLY allowed method.

IMPORTANT: Per user instruction, this function intentionally uses *only* the
shoeguide/printSpacing -> h3 + strong logic (no fallbacks).
zhtml.parserr  	shoeguide)class_printSpacing)idr  strongrE   T)rI   z	#\s*(\d+))r   findget_textrG   r  r  rI   )
r  r|  soup	guide_divh3_tag
strong_tag
style_textmatchstyle_numberr   s
   &&        r9    extract_style_and_name_from_htmlr     s     }-D		%	4TT		^	8TI^^D!F)JD1JIIlJ/E%*5;;q>
0@0@0BLs$/Dz))r;   c                   V ^8  d   QhRR/# r   r7   )r8   s   "r9   r:   r:   0  s      d r;   c                   . ROp\         P                   ! 4       R,           p\         P                   ! 4       V8  d   RpV Ff  p V P                  \        P                  V4      pVP	                  4       '       d-   VP                  4       '       d   VP                  4        Rp MKf  Kh  	  V'       g   R# \         P                  ! R4       K  R#   \         d     K  i ; i)N//button[contains(.,'Agree') or contains(.,'Accept') or contains(.,'Proceed')]g       @FT皙?N)r	  zI//a[contains(.,'Agree') or contains(.,'Accept') or contains(.,'Proceed')]uR   //button[contains(.,'Close') or contains(.,'×') or contains(@aria-label,'Close')])	ri   r  r   XPATHis_displayed
is_enabledclickro   ru  )rp  xpathsendclickedxpels   &     r9   dismiss_popupsr  0  s    F
 ))+
C
))+
B((26??$$HHJ"G *9$  

3   s   ACCCCc                    V ^8  d   QhRRRR/# )r3   
max_roundsrm   r6   rN   r7   )r8   s   "r9   r:   r:   G  s      3  r;   c           	        ^ pRp\        V4       F  p\        V 4       V P                  \        P                  R4      p\        V Uu0 uF,  qfP                  R4      '       g   K  VP                  R4      kK.  	  up4      pWs8X  d   V^,          pM^ pTpV^8  d    R# V P                  R4       \        P                  ! R4       K  	  R# u upi )r   $//a[contains(@href,'/safety-boot/')]hrefz/window.scrollTo(0, document.body.scrollHeight);g      ?N)
r  r  find_elementsr   r  r  get_attributer  ri   ru  )rp  r  stable_rounds
last_count_linksr}   counts   &&      r9   scroll_to_load_allr"  G  s    MJ:v$$RXX/UVeWev?V,Q__V,eWXQMMJAOP

3  Xs   C
$C
c                    V ^8  d   QhRRRR/# r@   r7   )r8   s   "r9   r:   r:   [  s      3 3 r;   c                $   T ;'       g    R P                  4       p \        P                  ! RRV 4      p V '       g   R # V P                  4       pVR9   d   R # \        P                  ! RV 4      '       d   R # RV 9   d   R # \        V 4      ^8  d   R # V # )r   rD   rE   z\d{2,}$>   r  r  r  r  
QUICK VIEWADD TO CARTVIEW DETAILSr   )rI   rG   rH   r  	fullmatchr  )rA   r  s   & r9   _clean_listing_namer*  [  sz    	
bA
vsAA		A 	 	 	||Iq!!
ax
1vzHr;   c                   V ^8  d   QhRR/# r3   r6   r   r7   )r8   s   "r9   r:   r:   u  s     1 1C 1r;   c                   . p TP                  V P                  R4      ;'       g    R4        TP                  V P                  R4      ;'       g    R4        TP                  V P                  ;'       g    R4       . pV FS  pV'       g   K  \	        V4      P                  4        F)  p\        V4      pV'       g   K  VP                  V4       K+  	  KU  	  \        4       p. pV F-  pWE9  g   K  VP                  V4       VP                  V4       K/  	  V'       g   R# R R lpVP                  VRR7       V^ ,          #   \         d     EL%i ; i  \         d     ELi ; i  \         d     Li ; i)zJBest-effort: read the product name as displayed on a catalog listing tile.z
aria-labelr   r   c                    V ^8  d   QhRRRR/# )r3   rA   r   r6   rm   r7   )r8   s   "r9   r:   /_listing_name_from_anchor.<locals>.__annotate__  s     
 
 
 
r;   c                  a  \        S 4      p\        P                  ! R S 4      '       d
   V^,          p\        P                  ! RS P                  4       4      '       d
   V^
,          p\        P                  ! RS P                  4       4      '       d
   V^,          p\        ;QJ d    V 3R lR 4       F  '       g   K   RM	  RM! V 3R lR 4       4      '       d
   V^,          pV# )z[A-Za-z]\bWOMEN'?S\b
\bMEN'?S\bc              3  H   <"   T F  qSP                  4       9   x  K  	  R # 5ir   )r  .0rO  rA   s   & r9   	<genexpr>;_listing_name_from_anchor.<locals>.score.<locals>.<genexpr>  s     X'W!AGGI~'Ws   "TF)viewquickcartcomparewishlist)r  rG   r  r  any)rA   scs   f r9   score(_listing_name_from_anchor.<locals>.score  s    V99[!$$"HB99_aggi00"HB99]AGGI..!GB3X'WX333X'WXXX"HB	r;   T)keyreverse)
r  r  ro   rM   r   
splitlinesr*  setaddsort)r}   
candidateslinesclnseenuniqr?  s   &       r9   _listing_name_from_anchorrM  u  sK   J!//,7==2>!//'288b9!&&,,B' Ea&##%B$R(BrR  &  5DD>HHRLKKO 
 
 	II%I&7NY      sE   "E E "E E E) 8E) EEE&%E&)E76E7c               $    V ^8  d   QhRRRRRR/# )r3   r^   List[List[str]]	preferredzDict[str, str]r6   rm   r7   )r8   s   "r9   r:   r:     s!       N s r;   c                j   V '       d	   V'       g   ^ # ^ pV  F  p\        V\        4      '       d   \        V4      ^8  d   K+  V^ ,          ;'       g    RP                  4       pV'       g   KV  \	        VP                  VR4      4      pV'       g   K{  V^,          V8w  g   K  WS^&   V^,          pK  	  V# )zQOverwrite Name column using preferred mapping (by style). Returns number updated.r   )
isinstancelistr  rI   r*  rn   )r^   rP  updatedr.  styleprefs   &&    r9   apply_preferred_namesrW    s    yG!T""c!fqj1""$"9==#;<4AaDDLaDqLG  Nr;   c                   V ^8  d   QhRR/# )r3   r6   z Tuple[List[str], Dict[str, str]]r7   )r8   s   "r9   r:   r:     s     /& /&%E /&r;   c                8   . p\        4       p\        4       p/ p\        V ^4      p\         EFX  p\        4        \	        W4       VP                  \        P                  ! \        P                  R34      4       \        V 4       \        V 4       V P                  \        P                  R4       F  pVP                  R4      pV'       g   K  RV9  d   K'  W9   d   K/  \        V4      p	\        '       d7   V	'       d/   RV9   d(    \!        \#        V4      4      p
V
'       d   W9  d   WV	&   V	'       d+   W9   d   VP'                  V4       K  VP'                  V	4       VP'                  V4       VP)                  V4       K  	  EK[  	  W3#   \$         d     Lmi ; i)zCollect unique product links from all configured catalog sections.

Dedupes by style number when possible (preferred), otherwise by URL.
r  r  z/safety-boot/zcatalog=international)rD  r   rh   rz  r  untilECpresence_of_element_locatedr   r  r  r"  r  r  extract_style_from_url"PREFER_INTERNATIONAL_LISTING_NAMESr*  rM  ro   rE  r  )rp  	all_links	seen_href
seen_styler_   waitr|  r}   r  rU  nms   &          r9   collect_product_linksrd    sY   
 I%I5J&(O$D |

211288=c2defv6"%%bhh0VWA??6*Dd* *40E11e@W[^@^,-Fq-IJBe:13. &MM$'u%MM$T"3 X D %% ! s   &FFFc                    V ^8  d   QhRRRR/# )r3   linkr   r6   r7   )r8   s   "r9   r:   r:     s     # # # #r;   c                    \         P                  ! R V 4      pV'       d   VP                  ^4      # \         P                  ! RV 4      pV'       d   VP                  ^4      # R# )z/safety-boot/(\d+)[-/]z/safety-boot/(\d+)r   )rG   r  r  )rf  r  s   & r9   r]  r]    sJ    
		+T2Awwqz
		'.A1771:""r;   c                    V ^8  d   QhRRRR/# r  r7   )r8   s   "r9   r:   r:     s        r;   c                  a  S ;'       g    R P                  4       P                  4       o S '       g   R# Rp\        ;QJ d    V 3R lV 4       F  '       g   K   RM	  RM! V 3R lV 4       4      '       d   R# \        S 4      ^<8  d   R# R# )r   Tc              3  ,   <"   T F	  qS9   x  K  	  R # 5ir   r7   r4  s   & r9   r6  #_looks_like_junk.<locals>.<genexpr>  s     
';a6;   F)zwindow.openzjavascript:zfacebook.comzhttp://zhttps://)rI   r  r=  r  )rA   junk_tokenss   f r9   _looks_like_junkrn    s^    	
b!AWK
s
';
'sss
';
'''
1v{r;   c                    V ^8  d   QhRRRR/# )r3   rowr  r6   r5   r7   )r8   s   "r9   r:   r:     s      Y 4 r;   c                     V ^ ,          ;'       g    RP                  4       pV ^,          ;'       g    RP                  4       pT'       g   R# TP                  4       \        8X  d   R# R#   \         d     R# i ; i)r   r   TF)rI   ro   r  BAD_NAME_SENTINEL)rp  rU  r   s   &  r9   _is_bad_rowrs    sh    Q2$$&A"##% zz|((  s   A+  A+ A+ +A:9A:c                    V ^8  d   QhRRRR/# r3   rS  r   r6   rm   r7   )r8   s   "r9   r:   r:     s     # #T #c #r;   c                   V P                  R4      '       d   ^ # V P                  R. 4      ;'       g    . p\        V P                  R. 4      ;'       g    . 4      p^ p\        4       pV F  p\        V\        4      '       d	   V'       g   K#  V^ ,          ;'       g    RP	                  4       pV'       g   KN  \        V4      \        8  d   VP                  V4       Ku  \        V4      ^8  d"   V^,          ;'       g    RP	                  4       MRp\        V4      ^8  d"   V^,          ;'       g    RP	                  4       MRpV'       d   V'       d   K  VP                  V4       EK  	  V'       g   ^ # \        V4       F:  p	\        V	4      p
V
'       g   K  W9   g   K   VP                  V	4       V^,          pK<  	  \        V4      V R&   V# )zIf checkpoint rows are missing URL/Image columns or have empty URL/Image, requeue those links.
Runs at most once per checkpoint unless you delete/clear ck['media_repair_done'].
r`   r^   r]   r   )rn   rD  rR  rS  rI   r  EXPECTED_COLSrE  r]  discardsorted)rS  r^   r]   removedstyles_neededr.  rU  url_cellimg_cellr  rP  s   &          r9   repair_missing_mediar~    sd    
vv!""66&"##DRVVL"-334JGEM!T""!1""$q6M!e$+.q6A:AaDJJB%%'2+.q6A:AaDJJB%%'2xxe$  Z #D)2"%t$qLG	 ! j)B|Nr;   c                    V ^8  d   QhRRRR/# ru  r7   )r8   s   "r9   r:   r:   :  s     - -4 -C -r;   c                    \        V P                  R. 4      ;'       g    . 4      p\        V P                  R. 4      ;'       g    . 4      pV'       d	   V'       g   ^ # V Uu0 uF(  q3'       g   K  \        V4      '       g   K  V^ ,          kK*  	  ppV'       g   ^ # ^ p\        V4       F:  p\	        V4      pV'       g   K  Wt9   g   K   VP                  V4       V^,          pK<  	  V'       d   \        V4      V R&   \        V P                  R/ 4      ;'       g    / 4      p\        VP                  4       4       F2  p	\	        V	4      pV'       g   K  Wt9   g   K   VP                  V	R4       K4  	  WR&   V P                  R. 4      ;'       g    .  U
u. uF  p
\	        V
4      V9  g   K  V
NK  	  up
V R&   \        V 4       V# u upi u up
i   \         d     ^ # i ; i)aM  If checkpoint contains obviously bad rows, un-mark those links as 'done'.

This fixes the situation where a previous run captured the site header/ads into the Name/Brand
columns and those rows are now 'stuck' because resume logic skips already-done links.

Returns:
    Number of links that were re-queued (removed from done_links).
r^   r]   ra   Nrb   )rS  rn   rD  rs  r]  removery  dictkeyspopry   ro   )rS  r^   r]   r.  
bad_stylesrz  rf  rP  fckr  s   &          r9   repair_bad_checkpoint_rowsr  :  s   $BFF62&,,"-b177R8
:$(ADqAd+a.daddD
A$D'-Brb&!!$'1	 % %j1B| bff]B/5526B"'')_+A.2"*FF1dO % !#} 4666:Mr3R3X3XVX3X 'V3XQ)?)B*)T ()q3X 'VB"# B9 B.'V  s   G $G G G G "	G0GG
G  G G 	%G /+G 3G G 2G G G*G0G 
G GGc               $    V ^8  d   QhRRRRRR/# )r3   
text_blockr   
field_namer6   r7   )r8   s   "r9   r:   r:   l  s!       # # r;   c           
        T ;'       g    RP                  4        Uu. uF  q"P                  4       NK  	  pp\        V4       F  w  rEV'       g   K  VP                  V4      '       g   K(  V\	        V4      R P                  R4      pV'       d   Vu # \        V^,           \        V^,           \	        V4      4      4       F1  pW7,          ;'       g    RP                  4       pV'       g   K-  Vu u # 	   R# 	  R# u upi )u   Parse a simple field/value from extracted page text.

Red Wing pages often render fields in tables so the extracted text looks like:

    Name
    DynaForce®

We support both 'Name: DynaForce' and 'Name' on one line with the value on the next.
r   Nz :	)rC  rI   	enumerater  r  r  r   )	r  r  rJ  rH  ir{   tailjnxts	   &&       r9   parse_field_liner  l  s     $.#3#3"?"?"AB"ABXXZ"AEBU#??:&&J()//7D1q5#a!eSZ"89x~~2,,.3J :  $  Cs   C<c               $    V ^8  d   QhRRRRRR/# )r3   header_textr   	body_textr6   r7   )r8   s   "r9   r:   r:     s!      s s s r;   c                   T ;'       g    R P                  4       p\        P                  ! RV\        P                  R7      pV'       d   VP	                  ^4      P                  4       pV'       db   . pVP                  4        F:  pTP                  VP                  4       '       d   TMVP                  4       4       K<  	  RP                  V4      # VP                  4        F  p\        P                  ! RV\        P                  R7      '       g   K2  \        P                  ! RVP                  4       \        P                  R7      pV'       d   VP	                  ^4      P                  4       pV'       dd   . pVP                  4        F:  pTP                  VP                  4       '       d   TMVP                  4       4       K<  	  RP                  V4      u #  R # 	  R # )r   z^(.*?)\s+style\s*#\s*\d+r  rE   z\bstyle\s*#\s*\d+\bz^(.*?)\s+style\s*#\s*\d+\b)rI   rG   r  r  r  splitr  isupper
capitalizer  rC  )	r  r  htr  r  outr   r{   m2s	   &&       r9   extract_brandr    s<   


	"	"	$B
		-rGAggaj CYY[

		1@ !88C= $$&99+TGG8$**,bmm\Bhhqk'')C YY[

		1H )88C=( ' r;   c               $    V ^8  d   QhRRRRRR/# )r3   r  r   rU  r6   r7   )r8   s   "r9   r:   r:     s!     	& 	&# 	&c 	&c 	&r;   c                   V'       dY   V P                  4       P                  R V 24      pVR8w  d0   WVR,            p\        VR4      pV'       d   VP                  4       # \        V R4      pV'       d   VP                  4       # R# )z
ABOUT THE i@  r   r   r  )r  r  r  rI   )r  rU  idxchunkvals   &&   r9   extract_about_namer    sr    oo$$z%%9:"93:.E"5&1Cyy{"
9f
-C399;%2%r;   c                    V ^8  d   QhRRRR/# )r3   rM   r   r6   zDict[str, bool]r7   )r8   s   "r9   r:   r:     s     9 9 9 9r;   c           	     0   \        R  \        P                  ! RV P                  4       4       4       4      pV\        R \        P                  ! RV P                  4       4       4       4      ,          p\	        ^^4       Uu/ uF
  q" R2W!9   bK  	  up# u upi )c              3  8   "   T F  p\        V4      x  K  	  R # 5ir   rm   r5  r   s   & r9   r6   parse_heights.<locals>.<genexpr>  s     V U1A U   z\b(\d{1,2})\s*-\s*INCH\bc              3  8   "   T F  p\        V4      x  K  	  R # 5ir   r  r  s   & r9   r6  r    s     S!RAQ!Rr  z\b(\d{1,2})\s+INCH\b")rD  rG   findallr  r  )rM   foundr  s   &  r9   parse_heightsr    st    V

+F

 UVVE	SS,CTZZ\!RSSSE+0B<8<acGaj <888s    Bc                    V ^8  d   QhRRRR/# )r3   	brand_strr   r6   zTuple[bool, bool, bool]r7   )r8   s   "r9   r:   r:     s     ! !S !-D !r;   c                    T ;'       g    R P                  4       pVP                  R4      pVP                  R4      pVP                  R4      pRV9   d!   VP                  R4      '       g
   RV9   d   RpW#V3# )r   zred wingzirish setterworxzby red wingT)r  r  )r  bis_rwis_isis_worxs   &    r9   classify_brand_familyr    sl    	b!ALL$ELL(Ell6"G!all622!9K  r;   c               $    V ^8  d   QhRRRRRR/# )r3   r   r   max_lenrm   r6   r7   )r8   s   "r9   r:   r:     s!      3  s r;   c                   T ;'       g    R P                  4       p\        P                  ! RRV4      pVP                  RR 4      p\        P                  ! RR V\        P                  R7      pVP                  4       P                  RR4      p\        P                  ! RRV4      pV'       g   Rp\        V4      V8  d   VR	V P                  R4      pV# )
r   rD   rE      ®z[^\w\-\.\s]+r  r  z_+bootN)rI   rG   rH   rF   UNICODEr  r   )r   r  rA   s   && r9   _safe_filename_from_namer    s    	A
vsAA			$A
ARZZ8A		#s#A
uc1A
1vhwKs#Hr;   c                   V ^8  d   QhRR/# r   r7   )r8   s   "r9   r:   r:     s      T r;   c                     \         P                  ! 4       P                  R ,          P                  4       p V P	                  RRR7       V # )ImagesTr   )r   cwdparentresolver   )r   s    r9   _images_dirr    s6    			X	%..0AGGD4G(Hr;   c                    V ^8  d   QhRRRR/# )r3   fnamer   r6   r7   )r8   s   "r9   r:   r:     s     . .3 .3 .r;   c                F    \        \        R 4      R,          V ,          4      # )z..r  )r   r   )r  s   &r9   _rel_image_pathr    s    tDzH$u,--r;   c                  aaa  V P                  \        P                  R 4      pVP                  \        P                  R4      pV'       d   VP	                  4       '       d   V#  T P                  \        P                  R4      pT'       g"    T P                  \        P                  R4      pRpRpT EF  p TP	                  4       '       g   K  TP                  ;'       g    / p\        TP                  R4      ;'       g    ^ 4      p\        TP                  R4      ;'       g    ^ 4      p	T^x8  g   T	^x8  d   K  TP                  R4      ;'       g    RP                  4       oTP                  R	4      ;'       g    RP                  4       oTP                  R
4      ;'       g    RP                  4       oY,          p
\        ;QJ d    T3R lR 4       F  '       g   K   RM	  RM! T3R lR 4       4      '       d
   T
R,          p
\        ;QJ d    T3R lR 4       F  '       g   K   RM	  RM! T3R lR 4       4      '       d
   T
R,          p
\        ;QJ d    T3R lR 4       F  '       g   K   RM	  RM! T3R lR 4       4      '       d
   T
R,          p
Y8  d   T
pTpEK  EK  	  T#   \
         d     EL[i ; i  \
         d    . p ELNi ; i  \
         d    . p EL9i ; i  \
         d     EKF  i ; i)productImageimgmain imgNr   r   heightaltr   classsrcc              3  ,   <"   T F	  qS9   x  K  	  R # 5ir   r7   )r5  r  r  s   & r9   r6  1_pick_best_product_img_element.<locals>.<genexpr>  s     P%O8%Orl  TFg333333?c              3  ,   <"   T F	  qS9   x  K  	  R # 5ir   r7   )r5  r  clss   & r9   r6  r    s     M%L8%Lrl  gffffff?c              3  ,   <"   T F	  qS9   x  K  	  R # 5ir   r7   )r5  r  r  s   & r9   r6  r    s     R%Q8%Qrl  g?)r  shoechukkahikermoc)productprimaryheroimage)z	/dw/imagescene7z/imagesstatic)r  r   IDCSS_SELECTORr  ro   r  r  sizer   rn   r  r  r=  )rp  	containerr  imgsbest
best_scorer  szr   hr?  r  r  r  s   &          @@@r9   _pick_best_product_img_elementr    s`   ''~>	$$R__e<3##%%J
##BOOZ@ 	''U;D DJ	??$$BBbffWo**+AbffX&++!,A3w!c'##E*00b779C##G,2299;C##E*00b779CEEsP%OPsssP%OPPPsM%LMsssM%LMMMsR%QRsssR%QRRR!"
 "' 0 KQ    
  	D	6  		s   AJ2 J2 #J2 & K  K :K,K,%K,#K,)K, K,(K,(K,*8K,%K,K,K,.K,K, K,7K,K,2K KKKK)(K),K<;K<c                   V ^8  d   QhRR/# r,  r7   )r8   s   "r9   r:   r:     s     ! !# !r;   c                    V P                  \        P                  R 4      p VP                  \        P                  R4      pV'       do   VP                  R4      ;'       g    RP                  4       pVP                  R4      ;'       g    RP                  4       pV'       g	   V'       d   T;'       g    T# R F  p T P                  \        P                  T4      pTP                  R4      ;'       g    RP                  4       pTP                  R4      ;'       g    RP                  4       pT'       g	   T'       d   T;'       g    Tu # K  	  R#   \         d    Rp EL"i ; i  \         d     Li ; i  \         d     K  i ; i)r  r  Nzoomimgr   r  )z$li[data-orbit-slide="product-1"] imgzli.active imgzul#productImage li.active imgzmain li.active imgr  )r  r   r  r  ro   r  rI   )rp  r  r  rM  rA   sels   &     r9   _extract_product_image_urlr    sN   ''~>		((%@C ""9-33::<A""5)//R668AAvvA
	%%boos;C""9-33::<A""5)//R668AAvvA   7  	C	  "  		s    E)  E E) E) #(E) E) %E) -E) 6E) ?7E:7(E: E:9E:E:
E:E&"E) %E&&E) )E76E7:F	F	c                    V ^8  d   QhRRRR/# )r3   rU  r   r6   r7   )r8   s   "r9   r:   r:   /  s     8 8c 8c 8r;   c                   T;'       g    RP                  4       pV'       g   R# RV R2p\        4       V,          p \        V 4      pV'       dy   VP                  RR4      pRR/p\        P
                  ! WE^R7      pVP                  '       d9   VP                  '       d'   VP                  VP                  4       \        V4      # R	p \        T 4      pT'       g   R#  T P                  R
T4       \        P                  ! R4        \        T 4        TP!                  \#        T4      4       \        T4      #   \         d     Li ; i  \         d    R	p Li ; i  \         d     Lci ; i  \         d     Lhi ; i  \         d=     T P%                  \#        T4      4       \        T4      u #   \         d      R# i ; ii ; i)zSave the main product image as a temp file (__<style>.png).

Preferred: download the product image URL (zoomimg/src) for best quality.
Fallback: element screenshot if download fails.
Returns relative path like ../Images/__595.png (or empty string).
r   __.pngz&amp;&r$  zMozilla/5.0)r&  r(  Nz?arguments[0].scrollIntoView({block:'center', inline:'center'});r
  )rI   r  r  rF   requestsrn   okcontentwrite_bytesr  ro   r  r  ri   ru  r  
screenshotr   save_screenshot)rp  rU  r  out_pathimg_urlr&  r.  img_els   &&      r9   capture_product_image_tempr  /  s    [[b!EtE}u$H
,V4oogs3G#]3GWrBAttt			$$QYY/&u--
 F/7 _agh

3v#h-(u%%5      
    	""3x=1"5)) 			s~   A$D; "%D; E !(E 
E0 $F ;E	E	EEE-,E-0E>=E>G$F41G4G?GGGc               (    V ^8  d   QhRRRRRRRR/# )r3   rU  r   	boot_namerel_temp_pathr6   r7   )r8   s   "r9   r:   r:   j  s(     " "3 "3 "s "s "r;   c                   V'       g   R# \         P                  ! 4       V,          P                  4       pVP                  4       '       g   V# \        P
                  ! RRT ;'       g    RP                  4       4      pV'       g   V# V R2p\        4       V,          p VP                  V4       \        T4      #   \         d    Tu # i ; i)z8Rename ../Images/__<style>.png -> ../Images/<style>.png.r   z[^\dA-Za-z_-]r  )r   r  r  rf   rG   rH   rI   r  rF   ro   r  )rU  r  r  temp_absstyle_cleanr  dest_abss   &&&    r9   finalize_image_filenamer  j  s    
]*335H??&&)2/B/B/DEKm4 E}u$H" 5!!  s   B: :C
	C
c               $    V ^8  d   QhRRRRRR/# )r3   r  r   
source_urlr6   zTuple[bool, bool]r7   )r8   s   "r9   r:   r:     s"       # 2C r;   c                   T ;'       g    RP                  4       pT;'       g    RP                  4       p\        P                  ! RV4      '       g   RV9   g   RV9   g   RV9   d   R# \        P                  ! RV4      '       g   RV9   g   RV9   g   R	V9   d   R# \	        \        P                  ! R
V4      4      p\	        \        P                  ! RV4      4      pWE3# )zInfer gender flags from URL + header (do NOT use body text; it frequently contains both words).

Priority:
  1) URL slug/query (mens/womens)
  2) Header text (MEN'S / WOMEN'S)
  3) Otherwise: unknown -> (False, False)
r   z/womens(?:[-/]|$)zwomens-zgender=womenzgender=femalez/mens(?:[-/]|$)zmens-z
gender=menzgender=maler2  r1  )FT)TF)r  r  rG   r  r5   )r  r	  huulmalefemales   &&    r9   infer_genderr    s     ,,B			B


	!	!	#B 
yy%r**i2oSUAUYhlnYn	yy#R((GrM\R=OS`dfSf		-,-D"))OR01F>r;   c               4    V ^8  d   QhRRRRRRRRRRRRRR	/# )
r3   rU  r   r   r  r  r	  image_rel_pathr6   r  r7   )r8   s   "r9   r:   r:     sW     O OO
O O 	O
 O O Or;   c                B  a/ T;'       g    R pT;'       g    R pVP                  4       pVP                  4       p\        W#4      p\        V4      '       d   R p\	        V4      w  rpV'       ga   VP                  4       pVP                  4       pRV9   g   RV9   d   RpM!RV9   g   RV9   d   RpMRV9   g   RV9   d   Rp\	        V4      w  rpVP                  4       p\        W$4      w  ppRV9   pRV9   ;'       g    R	V9   pR
V9   ;'       g    RV9   ;'       g    RV9   pRV9   ;'       g    T;'       g    T;'       g    TpV'       * pRV9   ;'       g    RV9   pRV9   p\        VR4      pV'       d   VP                  4       P                  4       p\        P                  ! RV4      '       d   RpM\        P                  ! RV4      pV'       d     \        VP                  ^4      4      ^ 8  pMERV9   ;'       g    RV9   pM2\        P                  ! RV4      '       d   RpMRV9   ;'       g    RV9   pRV9   ;'       g    \        P                  ! RV4      RJpRV9   ;'       g    \        P                  ! RV4      RJpRV9   ;'       g    RV9   pRV9   ;'       g    \        P                  ! R V4      RJpR!V9   ;'       g    R"V9   ;'       d    R#V9   p R$V9   p!\        VR%4      p"V"'       d    V"P                  4       P                  R&4      MR'V9   p#\        VR(4      p$Rp%V$'       dS   V$P                  4       o/\        ;QJ d    V/3R) lR= 4       F  '       g   K   RM	  RM! V/3R) lR= 4       4      '       g   Rp%T;'       g    R P                  4       p&R*V&9   ;'       g    R+V&9   ;'       g    R,V&9   p'R-V&9   p(R.V&9   p)\        VR/,           V,           4      p*\        VR04      p+V+P                  4       p,R1V,9   ;'       g    R2V,9   p-R3V,9   ;'       g    R4V,9   ;'       g    T-p.. \!        V 4      N\!        V4      N\!        V4      N\!        V4      N\!        V4      N\#        V4      N\#        V4      N\#        V	4      N\#        V
4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V4      N\#        V 4      N\#        V!4      N\#        V#4      N\#        V%4      N\#        V'4      N\#        V(4      N\#        V)4      N\#        V*R5,          4      N\#        V*R6,          4      N\#        V*R7,          4      N\#        V*R8,          4      N\#        V*R9,          4      N\#        V*R:,          4      N\#        V*R;,          4      N\#        V*R<,          4      N\#        V.4      N\#        V-4      N#   \         d    Rp ELi ; i)>r   zIRISH SETTERr"   WORXr#   zRED WINGr!   z	STEEL TOEzALUMINUM TOEz	ALLOY TOEzNON-METALLIC TOEzNON METALLIC TOEzCOMPOSITE TOEz
SAFETY TOEzMETATARSAL GUARDz	MET GUARD
WATERPROOFr%   z$\b(non[-\s]?insulated|uninsulated)\bFz(\d+)\s*g\bTinsulat
thinsulatezSLIP RESISTANTz\bSR\bNzELECTRICAL HAZARDz\bEH\bzPUNCTURE RESISTANTPUNCTUREzSTATIC DISSIPATIVEz\bSD\bzANKLE PROTECTIONankleprotectBOAr&   yeszDEFINED HEELzLeather Typec              3  ,   <"   T F	  qS9   x  K  	  R # 5ir   r7   )r5  r   lls   & r9   r6  '_build_row_from_text.<locals>.<genexpr>  s     `$_q7$_rl  oxfordathleticr  r  r  r~   zCountry of Originzmade in usazmade in the usazbuilt in usazassembled in the usar)   r*   r+   r,   r-   r.   r/   r0   )meshnylonfabrictextilepoly	synthetic)r  r  r  rn  r  r  r  rI   rG   r  rm   r  ro   r  r=  r  rJ   r>   )0rU  r   r  r  r	  r  
body_upper
body_lowerr  r  r  r  r  buheader_upperr  r  	steel_toealuminum_toenon_metal_toe
safety_toesoft_toe	met_guard
waterproofinsulation_lineil	insulatedr  slip_resistantelectrical_hazardpuncture_resistantstatic_dissipativeankle_protectionboadefined_heel_linedefined_heelleather_lineall_leather_upper
name_loweroxford_athleticr  r  heightsorigin_lineorigin_lowermade_in_usabuilt_in_usar  s0   &&&&&&                                         @r9   _build_row_from_textrF    s    RI\\rF"J"Jf0I 	""	1)<E'\\^__R>R#7&Ir\Vr\I2r!1"I 5i @g<<>L3LD& z)I!Z/LL;*3LL':5;MQ[;[apt~a~M*,[[[[l[[mJ~H#z1QQ{j7PI+J&y,?O""$**,99<bAAI		."-A% #AGGAJ! 3I '"_EE,"2D	99<jIII"j0QQlj6PI&*4ff"))Iy:Yae:eN,
:ll		)U^@_gk@k.*<[[*PZBZ.*<nn"))IW`BaimBm*j8ppg>S>o>oXaeoXoJC(NCBS$**,77>ZhlvZvL#I~>L!s`$_`sss`$_``` $**"##%J:-ff:3KffQW[eQeO#Fz!EFTMI56G"9.ABK$$&L L0XX6G<6WK"l2nn8NR^8^nncnL0u0t0 	z"0 	~&	0
 	y!0 	D	0 	F0 	E
0 	E
0 	G0 	J0 	I0  	M!0" 	L#0$ 	I%0& 	H'0* 	J+0, 	I-0. 	N/00 	102 	304 	506 	708 	C90< 	L=0> 	?0B 	OC0D 	FE0F 	E
G0J 	GDMK0L 	GDMM0N 	GDMO0P 	GDMQ0R 	GDMS0T 	GENU0V 	GENW0X 	GENY0\ 	L]0^ 	K_0 0O ! % $I%s   X XXc               $    V ^8  d   QhRRRRRR/# )r3   rf  r   image_temp_relr6   r  r7   )r8   s   "r9   r:   r:   '  s!     / /3 / /Y /r;   c           
         R p\        V 4       F<  p \        V^R7      p\        WC4      w  rVpTp\        V4      p	\	        WVWWR7      p
V
u # 	  V'       d   Vh\        R4      h  \
         d   pTp R p?Kh  R p?ii ; i)Nr'  r	  r  zHTTP fallback failed)r  r  r  r  rF  ro   r4  )rf  rH  r  r  r  rU  r   r  r  r  rp  rX  s   &&          r9   _scrape_product_via_httprK  '  s    (,H4 	q"-D&Ft&O#EF%d+I&uFRVvCJ ! 
-
..  	H	s   7A##A;.A66A;prefer_httpc               $    V ^8  d   QhRRRRRR/# )r3   rf  r   rL  r5   r6   r  r7   )r8   s   "r9   r:   r:   9  s&     &M &M &Md &My &Mr;   c          	        R p\        V4      pTp\        4        V'       d    \        WR7      #  \	        W4       \        V 4       V P                  ;'       g    R p\        Wa4      w  rGp V'       d   \        W4      p\        T ^4      P                  \        P                  ! \        P                  R34      4       T P                  \        P                  R4      P                   ;'       g    R p	Tp
\#        YGYYR7      pT#   \         d     Li ; i  \         d    R p Li ; i  \         d    \        YR7      u # i ; i)r   )rH  r  rJ  )r]  rz  rK  ro   r  r  page_sourcer  r  r   rZ  r[  r\  r   r  r  rM   rF  )rp  rf  rL  rH  rU  style_from_urlr  r   r  r  r  rp  s   &&$         r9   scrape_productrQ  9  s(   N"4(ENO	+DPP
Mv !!''R"B4"NZ	 !;F!J 	fb!''(F(FU[G\(]^''V<AAGGR	"5dr
1  		   	 N	   M'LLMsM   D :D' +D >A/D' .D' DDD$!D' #D$$D' 'E Ec                    V ^8  d   QhRRRR/# )r3   r^   rO  r6   rN   r7   )r8   s   "r9   r:   r:   `  s     6 6 6T 6r;   c           	        \         pR  R lp\        WR7      p. pVP                  RRP                  V4      ,           R,           4       VP                  RRP                  R.\	        V4      ,          4      ,           R,           4       V F  p\	        V4      \	        V4      8  d,   VR.\	        V4      \	        V4      ,
          ,          ,           pM'\	        V4      \	        V4      8  d   VR\	        V4       pVP                  RRP                  V4      ,           R,           4       K  	  \        \        RP                  V4      R,           4       R# )c                   V ^8  d   QhRR/# )r3   r.  r  r7   )r8   s   "r9   r:   $write_markdown.<locals>.__annotate__c  s      Y r;   c                    V ^ ,          p ^ \        \        P                  ! RRV4      4      3#   \         d    ^T3u # i ; i)r   z\Dr   )rm   rG   rH   ro   )r.  rA   s   & r9   	style_key!write_markdown.<locals>.style_keyc  sE    aD	s266%Q/011 	q6M	s   ". A A )rA  rC   z---r   Nr~   )
MD_HEADERSry  r  r  r  rW   OUT_MD)r^   r&  rW  sorted_rows	out_linesr.  s   &     r9   write_markdownr]  `  s    G -KIS388G,,s23S388UGc'l$:;;cABq6CL RDCL3q6122AVc'l"-3w< Asxx{*S01  9-45r;   c                   V ^8  d   QhRR/# )r3   r6   r  r7   )r8   s   "r9   r:   r:   z  s      9 r;   c                 d   \         P                  4       '       g   . # . p \         P                  R RR7      P                  4        Fh  pVP	                  4       pV'       g   K  \
        P                  ! RV4      pV'       g   K>  VP                  ^ 4      pW09  g   KW  V P                  V4       Kj  	  V # )rP   rF   )rR   r*  zhttps?://\S+)	r   rf   rl   rC  rI   rG   r  r  r  )urlsr{   r  r  s       r9   _read_errors_urlsra  z  s    	D$$gi$HSSUzz|IIot,1
A}A V Kr;   c                    V ^8  d   QhRRRR/# )r3   r`  r  r6   rN   r7   )r8   s   "r9   r:   r:     s     6 6y 6T 6r;   c                    V '       g    \         P                  R R7       R# V  Uu. uF  pR\         RV 2NK  	  pp\	        \         RP                  V4      R,           4       R#   \         d     R# i ; iu upi )Tr/  NFAILED x: r~   )r   r<  ro   MAX_FAILS_PER_LINKrW   r  )r`  r  rH  s   &  r9   _rewrite_errors_filerg    sw    	. 	;?@4aw)*#aS14E@TYYu-45	  		 As   A% A7%A43A4c                   V ^8  d   QhRR/# r   r7   )r8   s   "r9   r:   r:     s     R Rd Rr;   c                    \        4       p \        V P                  R . 4      4      p\        V P                  R/ 4      4      p\	        V P                  R. 4      4      p\        V P                  R\        P                  ! 4       4      4      p\	        V P                  R. 4      4      pV'       g   \        4       p\        '       d   \        V 4      pV'       dm   \        V P                  R . 4      4      p\        V P                  R/ 4      4      p\	        V P                  R. 4      4      p\	        V P                  R. 4      4      pRpRp \        V RRRR7      pV P                  R	4      ;'       g    . p	V P                  R
4      ;'       g    / p
\        '       g	   V	'       g   \        ^VR7      pVP                  ^ RR7       \        V4      w  rV'       d   Tp	M\        P                   P#                  R4       V'       d   Tp
 \$        '       d>   V
'       d6   \'        W:4      pV'       d#   W0R&   WR
&   \)        V 4       \+        RV R24       YR	&   \$        '       d   YR
&   \)        T 4       \$        '       dd   V
'       g\    \        V4      w  rWR
&   \)        V 4        V
'       d6   \'        W:4      pV'       d#   W0R&   WR
&   \)        V 4       \+        RV R24       \.        '       dU   \1        V 4      pV P                  R4      '       g   RV R&   \)        V 4       V'       d   \+        RV R24       \)        V 4       \2        P5                  4       '       dQ   \2        P5                  4       pT	;'       g    .  Uu. uF  p\7        V4      V8X  g   K  VNK  	  p	pWR	&   \)        V 4       \9        V	4      pV'       d   VP;                  4        \        VVR7      p\9        V4      pVP                  VRR7       \=        V4       UUu/ uF+  w  ppV'       g   K  V^ ,          '       g   K!  V^ ,          VbK-  	  pppV	 EF  pVV9   d   V^,          pVP                  VRR7       K)  Rp  \?        VV4      pV^ ,          '       g   \A        RV 24      hV^,          '       g   \A        RV^ ,           RV R24      h\$        '       dA   V
'       d9   \C        V
P                  V^ ,          R4      4      pV'       d   \E        V4      V^&   \9        V4      ^8  d   V^,          '       g   \E        V4      V^&    \9        V4      ^8  d>   V^,          '       d/   \E        \G        V^ ,          V^,          V^,          4      4      V^&   T^ ,          T9   d   TTTT^ ,          ,          &   M&\9        T4      TT^ ,          &   TPI                  T4       TPK                  T4       \M        T4      T R &   Y0R&   Y R&   YPR&   \)        T 4       \O        T4       Rp T^,          pTP                  TT'       d   R"MR#R7       EK  	  \O        V4       VP                  VR$R7       \b        '       Ed   V'       Ed   \        P                   P#                  R%\9        V4       R&24       . p V'       d    VPa                  4        \        V RR'R\d        R(7      p\=        \	        V4      ^R)7       F  w  ppRp\g        \h        4       F  p \?        VVRR*7      pV'       do   V^ ,          '       d^   V^,          '       dM   V^ ,          V9   d   VVVV^ ,          ,          &   M&\9        V4      VV^ ,          &   VPI                  V4       Rp MK  K  K  	  V'       d5   \        P                   P#                  R-V R\9        V4       R.V R!24       K  VPI                  V4       K  	  TpW0R&   WPR&   \)        V 4       \O        V4       \m        V4       \        P                   P#                  R0\n         R!24       \p        Ps                  4       '       d(   \        P                   P#                  R1\p         R!24        V'       d   VP;                  4         T'       d   TPa                  4        R# R#   \,         d     ELWi ; i  \,         d    T
;'       g    / p
 EL%i ; i  \,         d     ELi ; iu upi u uppi   \,         d     ELui ; i  \,         Ed   p\Q        TP                  T^ 4      4      ^,           TT&   \M        T4      T R &   Y0R&   Y R&   YPR&   \)        T 4       TT,          \R        8  df   TT9  d*   TPI                  T4       \U        RTT,           RT 24       TPK                  T4       \M        T4      T R &   YPR&   \)        T 4        Rp?EK  \        P                   P#                  RTT,           R\R         R T R!24       \        P                   P#                  RPW                  \X        PZ                  ! \]        T4      TTP^                  4      4      R!,           4        T'       d   TPa                  4        M  \,         d     Mi ; i\        T RRRR7      p Rp?EK3  Rp?ii ; i  \,         d     ELi ; i  \,         d@     TP                  R+4       M  \,         d     Mi ; i\        Pj                  ! R,4        EK  i ; i  \,         d   p\        P                   P#                  R/4       \        P                   P#                  RPW                  \X        PZ                  ! \]        T4      TTP^                  4      4      R!,           4        Rp?ELRp?ii ; i  \,         d     EL'i ; i  \,         d     R# i ; i   T'       d   TP;                  4        M  \,         d     Mi ; i T'       d   TPa                  4        i i   \,         d     i i ; i; i)2r]   ra   r^   rc   rb   NTr\  )ra  r[  r]  r\   r_   )r   rc   zRefreshing product links...)r   zN[warn] Link refresh discovered 0 links; falling back to checkpoint link list.
z)Applied preferred International names to z existing rowsr`   z
Re-queued z links due to missing URL/Imagestartingz	(resumed)Fz$Style number parsed empty for link: zName parsed empty for style r   )r   rd  re  z
Error scraping link (attempt r   z): r~   r  skippeddonez
Starting salvage pass for z failed links...
normal)ra  r[  r]  r^  )start)rL  r  g      ?z	Salvaged z: z$
Salvage pass encountered an error:
z
DONE. Wrote: z#Some links failed repeatedly; see: ):rs   rD  rn   r  rS  r   ri   ra  AUTO_REPAIR_BAD_ROWSr  rq  REFRESH_PRODUCT_LINKS_EACH_RUNr   r   rd  r   stderrr   r^  rW  ry   printro   AUTO_REPAIR_MISSING_MEDIAr~  r   rI   r]  r  r   r  rQ  r4  r*  rJ   r  r  rE  ry  r]  rm   rf  r   r  	tracebackformat_exceptiontype__traceback__quitENABLE_SALVAGE_PASSSALVAGE_PAGE_LOAD_TIMEOUTr  SALVAGE_MAX_TRIES_PER_LINKru  rg  rZ  r   rf   )rS  r]   ra   r^   rc   rb   repairedreporterrp  r\   r_   fresh_linksfresh_preferred_namesnupdr  rmwantr  r   	processedr  r.  style_to_idxrf  r  rp  rV  rX  	remainingsalvageds                                 r9   mainr    s
   		BrvvlB78J"&rvvmR'@"AK !34DrvvlDIIK89J#'/BB(G#H-/
 -b1RVVL"56JrvvmR89Kvr*+D $RVV,?%D E ,0HFwrDW[_`/552*,&&1B*C*I*Ir))'aJGHOOA$AOB1Fv1N.K +

  e %"7	55/0GD%)6
0?,-'+ I$~^_ #011(7$%B .-o8%:6%B"(7$%#
	"0GD%)6
0?,-'+ I$~^_ %$%b)B66-..*.&'#
2$&EFG# ##%D)6)<)<")<c)<1AWXYAZ^bAbQQ)<Mc"/BM"NN#%JG
O		
3 -6dOJODAqqQqTT!aOJ!Dz!Q		<BKm(6Cq66*-QRVQW+XYYq66*-I#a&QSTXSYYZ+[\\ :9o2?3F3Fs1vr3RS%3D%9CF 3x!|CFF!/!5As8a<CFF%34KCPQFTWXYTZ\_`a\b4c%dCF
 1v-58\#a&12/24ySV,C(NN4('-j'9B|$!%vJ(3}%.?*+#B'"4(BD NIOOIRTYOHm "r 	tF+ #4#4JJ;C@Q<R;SSefg#%I+j '!'/!&&?  ).?)@JGAt$H"#=>,"04"PC"s1vv#a&&#&q6\#9ADDc!f)=$>;>t9LQ$8$(KK$4+/ % 39vs ?$  

((9QCq=N9O8PPRSWRXXZ)[\!((./  K2 %.! vJ&7"#B4  !23

?6("56JJB:,bQR	 	 q    8"1"7"7R8    d K@ % . ! m(+KOOD!,D(E(IK%'-j'9B|$!%vJ(3}%.?*+#B'"4(,>>'88-44T:-D8I7J#dV.TU"t,+1*+=<(2C./'+JJ$$9+d:K9LAN`Maadeidjjlm JJ$$RWWY-G-GQQRTUTcTc-d%ehl%lm!"KKM$ *2QXgklF=m` % .  ) ,% &

= 9#, % $% JJsOO,  j

  !IJ

  )C)CDGQPQP_P_)`!adh!hiij$  		
  			  			  		s  &n4 .n4 	n4 n4  An4 ?n4 b b 5b *n4 6n4 ?b& c $5c n4 %9n4 3n4 n4 0n4 5ccA-n4 ?cc"c01n4 "7c3$c3?c3Ac3c3-c!.c!:Bc3n4 #An4 %.n4 k: j -Ak: 2$j-j-&A
j-0k: Ak: A)n4 7'n4 n 8n"  n" b#n4 "b##n4 &c:c=n4  cn4 cn4 cn4 !c0,c3/c00c33j?B:j9n4  Bji,+j,i:7j9i::jn4 jn4 j*&k: )j**k: -k79k
k7k	k7k	k72k: 6k77k: :nA=nn4 nn4 nn"n10n14p6opopop!o;)o;9p;p	pp		p__main__>   r   yonr  true)g333333?g?)T)   )P   )x   )r   r   r   )__conditional_annotations__r   
__future__r   rj   r   r  rG   r  r=  r   r:  ri   ru  rv  r  r  r  html.parserr   r6  pathlibr   typingr   r   r   r   r	   urllib.requestr
   r   bs4r   seleniumr   selenium.common.exceptionsr   r   selenium.webdriver.common.byr   "selenium.webdriver.firefox.optionsr   "selenium.webdriver.firefox.servicer   rj  selenium.webdriver.supportr   r[  selenium.webdriver.support.uir   rh   CATALOG_URL_PRIMARYr   __annotations____file__r  r  r  r   rn   rI   r  rq  rZ  re   r   rf  rp  rr  r^  rY  r  rw  rt  PAGE_LOAD_TIMEOUTrn  NAV_SETTLE_SECONDSrt  rz  r{  r|  rg   r1   r>   rJ   rW   rs   ry   r   r   r   r   r   r	  r   rQ  rZ  rq  rz  r  r  r  r  r  r  r  r  r  r"  r*  rM  rW  rd  r]  rn  rs  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rF  rK  rQ  r]  ra  rg  r  r   )r  s   @r9   <module>r     s  *X #  	  	   
       "   3 3 +
   K + 6 H @ 7 W #1o  
C >!!#**!#0BC!H!N!N!P!V!V!X\{!{ 	'	'99
44
 
  +  &+ "%'.
   !' 	 	  2	 4B	 DV	 Xb	
 
 
 !1
 3F
 H\  - /D  (    ") 	    ! #( */ 16  "
 J !     #       ::>>"4b9??AIIT <
~I&
4G GZ#L!0AH!
H* &	*
 * /* !%* *b %&CN"2$J $4
3#r* .(41h"/&d#"#J-d62	&9!./f!F8v",*Od/$&MU &MN64"6Rj zF r;   