
    ?i,m                      U d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlZddlZddlZddlmZ ddlZddlmZ ddlmZmZmZmZmZ ddlmZmZmZ ddl m!Z!m"Z" dd	l#m$Z$ dd
l%m&Z& ddl'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z2 ddl3m4Z5 ddl6m7Z7 dgZ8e8d   Z9dZ:dZ;ejx                  j{                  dd      Z>ejx                  j{                  dd      Z?ejx                  j{                  dd      j                         j                         dv ZBejx                  j{                  dd      j                         j                         dv ZCdZDdeEd<    eeF      j                         j                  ZIeId z  ZJeId!z  ZKeId"z  ZLd#ZMd$ZNd%ZOd&ZPg d'ZQ eReQ      ZSd$ZTd(ZUd)ZVd*ZWd+ZXd$ZYd$ZZd,Z[d-Z\d.Z]ejx                  j{                  d/d      j                         xs dZ^dwd0Z_dxd1Z`dyd2Zadzd3Zbd{d4Zcd|d5Zdd}d6Zed~d7Zf G d8 d9      Zgdd:Zhdd;Zidd<Zjdd=Zkdd>Zl	 dd?d$eUdd@	 	 	 	 	 	 	 	 	 	 	 	 	 ddAZmddBZnddCZoeUeWd-f	 	 	 	 	 	 	 	 	 ddDZpddEZqdddFZr G dG dHe      ZsddIZtddJZuddKZvddLZwdM ZxddNZyddOZzddPZ{ddQZ|ddRZ}dddSZ~ddTZddUZdddVZdxdWZddXZddYZddZZdd[Zdd\Zdd]Zdd^Zdd_Zdd`ZddaZddbZddcZdddZdddeZddfZddgZdh ZddiZddjZddkZddlZddddm	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddnZdddoZd&dpddqZddrZddsZddtZdduZedvk(  r e        yy)a  
RW_Site_Scraper-Orders_Page.py
==============================

Scrapes Red Wing footwear catalog:
- https://order.redwingshoes.com/footwear-rwbr

Test mode:
- Set ONLY_STYLE at the top of this file to a style number (e.g. "400")
  to scrape just that single product.

Outputs (next to this script):
- RW_Orders_Site_Scrape.md
- RW_Site_Scraper_v2_checkpoint.json   (resume state)
- RW_Site_Scraper_v2_errors.txt        (links that failed repeatedly)

Stability features:
- `safe_get()` uses short timeouts + window.stop() so Selenium doesn't hang forever
- HTTP fallback (no Selenium) for the few product pages that still time out
- Optional salvage pass at the end to retry hard failures (HTTP-first)

Cross-platform (Windows + Linux Cinnamon):
- Headless Firefox (default)
- Geckodriver resolution "like Parts_Auto" (explicit Service path; no Selenium Manager):
    1) GECKODRIVER_PATH env var (file or directory)
    2) geckodriver(.exe) on PATH
    3) auto-download geckodriver (GitHub releases) into a user cache dir
    4) (optional) if double-click/no terminal and Tk is available, prompt to pick geckodriver

Feature columns are 1/0 (not Yes/No).
Includes Brand (string) + brand family flags (Red Wing / Irish Setter / Worx).

Dependencies:
- Python 3.9+
- Firefox installed
- Selenium installed:
    Linux Mint/Ubuntu: sudo apt install -y python3-selenium
    Windows: python -m pip install selenium

Notes for Linux Mint PEP 668:
- Prefer `python3-selenium` from apt (as above).
- This script does NOT require webdriver-manager.
    )annotationsN)
HTMLParser)Path)DictListOptionalSetTuple)parse_qsurljoinurlparse)Requesturlopen)BeautifulSoup)	webdriver)TimeoutExceptionWebDriverException)By)Keys)Options)Service)expected_conditions)WebDriverWaitz,https://order.redwingshoes.com/footwear-rwbrzorder.redwingshoes.comz/footwear-rwbrRW_SITE_USERNAMEzrwss614@redwingshoes.comRW_SITE_PASSWORDzWelcomeBack99!RW_HEADLESS1>   r   yonyestrueRW_REFRESH_LINKS str
ONLY_STYLEzRW_Orders_Site_Scrape.mdz"RW_Site_Scraper_v2_checkpoint.jsonzRW_Site_Scraper_v2_errors.txt   TRED WING FOR BUSINESSF)'Style #NameURLImageBrandMaleFemaleRed WingIrish SetterWorxz
Safety Toez	Steel ToezNon-Metallic ToezAluminum ToezMetatarsal GuardzSoft Toe
Waterproof
InsulationzSlip ResistantzElectrical HazardzPuncture ResistantzStatic DissipativezAnkle Protectionu   BOA® Lacing SystemDefined HeelzAll Leather UpperzOxford/AthleticChukkaHiker5"6"7"8"9"10"11"12"zBuilt in USAzMade in USA-      g      ?)333333?g?Z      
   GECKODRIVER_PATHc                    | rdS dS )Nr   0 )vs    ,Boot_Features/RW_Site_Scraper-Orders_Page.pyb01rK      s    3    c                |    | xs dj                  dd      } t        j                  dd|       j                         } | S )Nr#   |z\|\s+ )replaceresubstrip)ss    rJ   md_escape_cellrV      s8    	
b#u%A
vsA$$&AHrL   c                    | j                  | j                  dz         }|j                  |d       |j                  |        y )Nz.tmputf-8encoding)with_suffixsuffix
write_textrQ   )pathtexttmps      rJ   atomic_writera      s7    


4;;/
0CNN4'N*KKrL   c                 4   t         j                         s(t        t        g g g i di g t	        j                         d dS 	 t        j                  t         j                  d            } t        | j                  dd            }|t        k7  rR	 t         j                  t         j                  d| d             t        t        g g g i t	        j                         d d	S | j                  dt               | j                  d
t               | j                  dg        | j                  dg        | j                  dg        | j                  di        | j                  dg        | j                  dt	        j                                | j                  dd        | S # t        $ r Y w xY w# t        $ rd 	 t         j                  t         j                  d             n# t        $ r Y nw xY wt        t        g g g i di g t	        j                         d dcY S w xY w)NF)versioncatalog_urlsproduct_links
done_linksrowspreferred_namesmedia_repair_donefail_countshard_failed_links
started_atgeckodriver_pathrX   rY   rc   r   z.json.vz.bak)rc   rd   re   rf   rg   rj   rl   rm   rd   re   rf   rg   rj   rk   rl   rm   z.json.corrupt)
CHECKPOINTexistsCHECKPOINT_VERSIONCATALOG_URLStimejsonloads	read_textintgetrQ   r[   	Exception
setdefault)dataold_vers     rJ   load_checkpointr|      s   )(!!&!#))+ $
 	
-
zz*...@Adhhy!,-(("":#9#9GG9D:Q#RS . ,!# !"iik$(	 	 		#565,b)#r*+R0diik2*D1+  ,  
	z55oFG 		 *(!!&!#))+ $
 	

sa   AF* ,F 8$F* B=F* 	F'$F* &F''F* *	H4(GH	G)&H(G))+HHc                P    t        t        t        j                  | dd             y )NrC   T)indent	sort_keys)ra   rn   rs   dumps)rz   s    rJ   save_checkpointr      s    TZZQ$GHrL   c                    t        t        dd      5 }|j                  | j                         dz          d d d        y # 1 sw Y   y xY w)NarX   rY   
)open
ERRORS_TXTwriterstrip)linefs     rJ   write_errors_liner      s<    	j#	0 &A	$%& & &s	   #?Ac                 ,    	 dd l } y# t        $ r Y yw xY w)Nr   TF)tkinterrx   )r   s    rJ   _can_use_tkr      s     s    	c                   t         j                  j                         s
t               sy 	 dd l}ddlm}m} |j                         }|j                          |j                  dd       |j                  dd       |j                  |       }|j                          |xs dj                         }|r|S d S # t        $ r Y y w xY w)	Nr   )
filedialog
messageboxz-topmostTRW Site ScraperzCould not find geckodriver automatically.

Please select the geckodriver executable.
Windows: geckodriver.exe
Linux: geckodriver)titler#   )sysstdoutisattyr   r   r   r   Tkwithdraw
attributesshowinfoaskopenfilenamedestroyrT   rx   )r   tkr   r   rootr^   s         rJ   _tk_pick_filer     s    
zz+-2uuw
D);	
 )))6
!!#t%% s   B	B7 5B7 7	CCc                  *    e Zd ZdZddZdddZd	dZy)
ProgressReporterzDTTY progress bar, or Tk window if launched by double-click (no TTY).c                   t        t        |      d      | _        || _        t        j
                  j                         | _        d| _        d | _	        d | _
        d | _        | j                  sEt               r9	 dd l}ddlm} |j                         | _	        | j                  j!                  d       | j                  j#                  d       | j                  j%                  dd       |j'                  | j                  dd	      | _
        | j                  j)                  d
dd       |j+                  | j                  | j                  d      | _        | j                  j)                  dd       d| _        | j                  j-                          | j                  j/                          y y y # t0        $ r
 d| _        Y y w xY w)N   Fr   )ttkr   620x150zStarting...w)r_   anchorx   )r      )fillpadxpadyiD  )maximumlength)r   rD   )r   r   T)maxrv   totalrl   r   r   r   use_ttygui_root_label_pbarr   r   r   r   r   geometry	resizableLabelpackProgressbarupdate_idletasksupdaterx   )selfr   rl   r   r   s        rJ   __init__zProgressReporter.__init__$  sS   UQ'
$zz((*

+-!$'UUW


  !23

##I.

$$UE2 hhtzzchR  c A __TZZTW_X


Rg6

++-

!!#% #0&  ! !s   :D7F4 4GGc                >   t        dt        t        |      | j                              }| j                  r| j
                  r| d| j                   d| j                         }| j                  r| j                  j                  |       | j                  r|| j                  d<   | j
                  j                          | j
                  j                          y d}|| j                  z  }t        t        ||z              }d|z  d||z
  z  z   }t        t        j                         | j                  z
  d	      }||z  }	|	d
kD  r| j                  |z
  |	z  nd}
d| d| d| j                   d|dz  ddt        |
       d}|r|d| z  }t        j                   j#                  d|d d z          t        j                   j%                          || j                  k(  r>t        j                   j#                  d       t        j                   j%                          y y )Nr   /z  )r_   value    #-g-C6?g&.>        [z]  (d   z5.1fz%) ETA rU      r   )r   minrv   r   r   r   rT   r   configr   r   r   roundrr   rl   r   r   r   flush)r   currentnotemsgwidthfracfilledbarelapsedrateetas              rJ   r   zProgressReporter.updateC  s   aS\4::6788

IQtzzl"TF399;C{{""",zz&-

7#JJ'')JJ#U4%<()FlSEFN33diikDOO3V< /3d{tzzG#t+#b	4::,bc$ws3xjPQRRv;C

DS	)*

djj JJT"JJ !rL   c                    | j                   r)| j                  r	 | j                  j                          y y y # t        $ r Y y w xY wN)r   r   r   rx   )r   s    rJ   closezProgressReporter.closec  s?    88



""$ #8  s   7 	AAN)r   rv   rl   floatr#   )r   rv   r   r$   returnNoner   r   )__name__
__module____qualname____doc__r   r   r   rH   rL   rJ   r   r   !  s    N!>@rL   r   c                    t         j                  dk(  rnt         j                  j                  d      xs> t         j                  j                  d      xs t	        t        j                               } t        |       dz  }nOt         j                  j                  d      xs  t	        t        j                         dz        } t        |       dz  }|j                  dd	       |S )
NntLOCALAPPDATAAPPDATARW_Site_ScraperXDG_CACHE_HOMEz.cacherw_site_scraperTparentsexist_ok)osnameenvironrw   r$   r   homemkdir)baseds     rJ   
_cache_dirr   n  s    	ww$zz~~n-^	1J^cRVR[R[R]N^J**zz~~./N3tyy{X7M3NJ**GGD4G(HrL   c                 f   t         rct        t               } | j                         rt        |       S | j	                         r)dD ]$  }| |z  }|j                         st        |      c S  dD ](  }t
        |z  }|j                         st        |      c S  t        j                  dk7  rXt        j                         dz  t        j                         dz  dz  fD ]$  }|dz  }|j                         st        |      c S  t        j                  d      xs t        j                  d      } | r| S y )N)geckodriver.exegeckodriverr   binz.localr   r   )rE   r   is_filer$   is_dirBASE_DIRr   r   r   shutilwhich)pr   candr   s       rJ   _resolve_from_env_or_pathr  y  s   !"99;q6M88:: %4x<<>t9$% 3 $<<>t9 
ww$))+%tyy{X'='EF 	!A}$D||~4y 	! 	]#Fv||4E'FArL   c                 @   t         j                  j                         } t        j                         j                         }| j	                  d      r#dt        j
                         d   v }|rddfS ddfS | j	                  d      r
d|v sd	|v ry
y| dk(  r
d	|v sd|v ryyy)zO
    Returns (asset_contains, archive_type) matching geckodriver releases.
    win64r   win64win32ziplinuxaarch64arm64)zlinux-aarch64tar.gz)linux64r  darwin)zmacos-aarch64r  )macosr  )r   platformlowermachine
startswitharchitecture)sysplatmachis_64s      rJ   _platform_asset_keyr    s     ll  "G##%D% --/22 u55gu55'"4.$(d?i4/." rL   c                   t               \  }}d}t        |ddi      }t        |d      5 }t        j                  |j                         j                  dd	            }d
d
d
       j                  dg       }d
}d
}	|D ]?  }
|
j                  dd      }||v s|j                  |      s,|
j                  d      }|}	 n |st        d| d| d      | |	z  }t        |ddi      }t        |d      5 }t        |d      5 }t        j                  ||       d
d
d
       d
d
d
       t        j                  dk(  rdnd}| |z  }|dk(  rqt        j                   |d      5 }|j#                         D ]>  }|j                  |      s|j%                  ||        | |z  }|j'                  |        n d
d
d
       nt)        j                  |d      5 }|j+                         D ]d  }|j                  j                  d|z         s|j                  |k(  s1|j%                  ||        | |j                  z  }|j'                  |        n d
d
d
       	 |j-                  d       t        j                  dk7  ri	 t        j0                  |      }t        j2                  ||j4                  t0        j6                  z  t0        j8                  z  t0        j:                  z         |j=                         st        d      |S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# t.        $ r Y w xY w# t.        $ r Y vw xY w)zh
    Downloads and extracts latest geckodriver into dest_dir.
    Returns path to extracted driver.
    z@https://api.github.com/repos/mozilla/geckodriver/releases/latest
User-AgentzRW_Site_Scraper/1.0headersr@   timeoutrX   rQ   errorsNassetsr   r#   browser_download_urlz'Could not find a geckodriver asset for r   z).<   wbr   r   r   r	  r)r^   zr:gzr   T
missing_okz5Download succeeded but geckodriver was not extracted.)r  r   r   rs   rt   readdecoderw   endswithRuntimeErrorr   r   copyfileobjr   r   zipfileZipFilenamelistextractrQ   tarfile
getmembersunlinkrx   statchmodst_modeS_IXUSRS_IXGRPS_IXOTHro   )dest_dir	asset_keyarchive_typeapireqr&  rz   r"  dl_urldl_namer   r   archive_pathreq2r   driver_nameextracted_pathzmemberr  tsts                         rJ   _download_latest_geckodriverrJ    s5   
 23I|
LC
#.CD
EC	b	! FQzz!&&(//')/DEF XXh#FFG uuVR |!<UU12FG DYKrR^Q__abccg%L6L2G#HID	r	" !alD)A !Q1a ! ! (*ww$#MK+Nu__\3/ 	1**, ??;/IIf8I4 6)AIIn-	 	 \\,/ 	1,,. ;;''k(9:fkk[>XIIf8I4 6;;.AIIn-	t, 
ww$	(BHH^RZZ$,,%>%MPTP\P\%\]   "RSSqF F$! ! ! !	 		 	    		sy   5L 8LLL#%L'	,L'AL36L3L? ;A(M  L
L	LL$'L03L<?	M
M	MMc                   t               }|rt        |      j                         r|S | j                  d      xs dj	                         }|rt        |      j                         r|S 	 t               }t        |      }t        |      | d<   t        |        t        |      S # t        $ rP}t        d      }|r0t        |      j                         r|| d<   t        |        |cY d }~S t        d|       d }~ww xY w)Nrm   r#   z$Select geckodriver / geckodriver.exezUnable to locate or install geckodriver.

Fix options:
  - Put geckodriver on PATH
  - OR set GECKODRIVER_PATH to the full driver path
  - OR install via package manager (Linux often: sudo apt install firefox-geckodriver)

Underlying error: )r  r   ro   rw   rT   r   rJ  r$   r   rx   r   r,  )ckr  saveddestdriver_pathepickeds          rJ   ensure_geckodriverrR    s    !#AT!W^^ VV&'-2446Ee##%
|248!$[!1; 
EFd6l))+%+B!"BM!
 "#%
 	

s$   )8B" "	C;+7C6"C;(C66C;eager)page_load_strategyblock_imagespage_load_timeout
user_agentc               >   t        |       }t               }|r|j                  d       t        r|j                  d       |r|j	                  d|       |j                  dd       |j                  dd       |j                  dd	       |j                  d
d       |j                  dd       |j                  dd       |j                  dd       |j                  dd       |j                  dd       |j                  dd       |j                  dd	       |j                  dd	       |r|j                  dd       |r|j                  d|       t        |      }t        j                  ||      }	|	j                  |       |	j                  t               |	S )aJ  Start Firefox using an explicit geckodriver path (no Selenium Manager).

    Args:
        page_load_strategy: "eager" returns after DOMContentLoaded. "normal" waits for full load.
        block_images: If True, blocks images to reduce load stalls/timeouts.
        page_load_timeout: Seconds for Selenium navigation timeout.
    z	-headlessz-privatepageLoadStrategyzdom.webnotifications.enabledFzmedia.volume_scalez0.0z!browser.privatebrowsing.autostartTznetwork.http.http3.enablezbrowser.cache.disk.enablezbrowser.cache.memory.enablezbrowser.cache.offline.enableznetwork.http.use-cachezplaces.history.enabledzsignon.rememberSignonsz"privacy.trackingprotection.enabledz)privacy.trackingprotection.pbmode.enabledzpermissions.default.imagerC   zgeneral.useragent.override)executable_path)serviceoptions)rR  r   add_argumentPRIVATE_BROWSER_MODEset_capabilityset_preferenceFirefoxServicer   Firefoxset_page_load_timeoutset_script_timeoutSCRIPT_TIMEOUT)
rL  headlessrT  rU  rV  rW  geckor\  r[  drivers
             rJ   create_driverri    s     r"EiG[)Z(13EF 95A/7>E6>6>8%@95A3U;3U;3U;?FFM:A>;ZHU3Gw@F
  !23
n-MrL   c                    	 | j                          	 | j                  d       | j                  d       y# t        $ r Y /w xY w# t        $ r Y yw xY w)z;Clear cookies/storage for a clean private session baseline.about:blankz}
            try { localStorage.clear(); } catch (e) {}
            try { sessionStorage.clear(); } catch (e) {}
            N)delete_all_cookiesrx   rw   execute_script)rh  s    rJ   reset_browser_statern  Q  s^    !!#	

=!	
	    s    6 "A 	AA	AAc                     t         \  } }	 t        j                  t        j                  | |             y # t
        $ r t        j                  |        Y y w xY wr   )REQUEST_DELAY_RANGErr   sleeprandomuniformrx   )lohis     rJ   _polite_delayrv  g  sA     FB

6>>"b)* 

2s   )5 AAc                   d}t        d|dz         D ]  }	 | j                  |       t                	 t        | dd      }|~t	        |d      r3t	        |j
                  d      rt        |      dz   |j
                  _        t	        |d      r3t	        |j                  d      rt        |      dz   |j                  _        | j                  |       t        j                  |       	 | j                  t        j                  d	        y |r|y# t        $ r Y Zw xY w# t        $ r Y  yw xY w# t         $ ry}|}	 | j#                  d
       n# t        $ r Y nw xY wt        j                  |       	 | j                  t        j                  d	       Y d}~ y# t        $ r Y nw xY wY d}~nd}~wt$        $ r}|}Y d}~nd}~ww xY w	 | j                  d       n# t        $ r Y nw xY wt        j                  d|z         )zNavigate without getting stuck on pages that never fully finish loading.

    - Uses page_load_timeout
    - On timeout, calls window.stop() and continues if the DOM exists
    - Retries a couple times with light cleanup
    Nr   rC   command_executor_client_configr  rD   _connbodyzwindow.stop();rk  g      ?)rangerc  rv  getattrhasattrry  rv   r  rz  rx   rw   rr   rq  find_elementr   TAG_NAMEr   rm  r   )	rh  urlr  settle	max_trieslast_excattemptcerP  s	            rJ   safe_getr  o  s    )-HIM* 1#)	((1O
V%7>>r#34ARART]9^47L24E))1r7+)0L+.w<"+<( JJsO JJv##BKK8 51#f  K       	H%%&67 JJv##BKK8 ! 	H		JJ}% 		

4'>"s   D-BD?&D-& D	DD-DD-	D*&D-)D**D--	G6F*9E
F*	EF*EF*0 F	F"F*!F""F**G6F==GG	G$#G$c                    g }| fD ]  }||vs|j                  |        d| v r*| j                  ddd      }||vr|j                  |       |S | j                  ddd      }||vr|j                  |       |S )zOReturn a small set of URL variants (www/non-www) to dodge occasional redirects.z//www.z//r   )appendrQ   )r  outsuu2s       rJ   _url_variantsr    s    DU D=KKN 3[[4+T>KKO
 K [[x+T>KKOKrL   c                    t        | dddd      }t        ||      5 }|j                         }d d d        j                  dd	      S # 1 sw Y   xY w)
Nz_Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36z?text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8zen-US,en;q=0.9)r  AcceptzAccept-Languager  r  rX   rQ   r   )r   r   r)  r*  )r  r  r?  r&  rz   s        rJ   _fetch_htmlr    s`    
{W/
C 
g	& !vvx ;;wy;11 s   AAc                  <     e Zd ZdZh dZ fdZd Zd Zd Z xZ	S )_VisibleTextExtractorz;Very small HTML->visible text extractor (no external deps).>   r  brh1h2h3h4h5h6litdthtrdivfooterheaderarticlesectionc                0    t         |           g | _        y r   )superr   parts)r   	__class__s    rJ   r   z_VisibleTextExtractor.__init__  s     "
rL   c                t    |j                         | j                  v r| j                  j                  d       y y Nr   r  _BLOCK_TAGSr  r  )r   tagattrss      rJ   handle_starttagz%_VisibleTextExtractor.handle_starttag  -    99;$***JJd# +rL   c                t    |j                         | j                  v r| j                  j                  d       y y r  r  )r   r  s     rJ   handle_endtagz#_VisibleTextExtractor.handle_endtag  r  rL   c                b    |r-|j                         r| j                  j                  |       y y y r   )rT   r  r  )r   rz   s     rJ   handle_dataz!_VisibleTextExtractor.handle_data  s&    DJJLJJd# !4rL   )
r   r   r   r   r  r   r  r  r  __classcell__)r  s   @rJ   r  r    s!    EK#$$$rL   r  c                4   t               }	 |j                  |        dj                  |j                        }t        j                  |      }t        j                  dd|      }t        j                  dd|      }|j                         S # t        $ r Y yw xY w)Nr#   z\n{3,}z

z[\t\r]+rP   )
r  feedrx   joinr  html_libunescaperR   rS   rT   )htmlr  raws      rJ   _html_to_textr    s    A	t ''!''
C


C
 C
&&FC
(C
&&S#
&C99;  s   B 	BBc                >   t        j                  d| t         j                  t         j                  z        }|sy|j	                  d      }t        j
                  dd|      }t        j                  |      }t        j
                  dd|      j                         }|S )Nz<h1\b[^>]*>(.*?)</h1>flagsr#   r   <[^>]+>rP   rO   )	rR   search
IGNORECASEDOTALLgrouprS   r  r  rT   )r  minners      rJ   _extract_first_h1r    sw    
		*D		8QRAGGAJEFF:sE*Ee$EFF63&,,.ELrL   c                   dhh dd	d}d
fd}t        j                  d| t         j                  t         j                  z        }|r! ||j	                  d            } ||      r|S t        j
                  d| t         j                  t         j                  z        D ]%  } ||j	                  d            } ||      s#|c S  t        |       S )a  Extract a likely product title from HTML.

    Many Red Wing product pages place the actual product name in <h3>, while <h1>
    can be a site/banner header (e.g., 'RED WING FOR BUSINESS'). We therefore
    try meaningful <h3> first, then fall back to <h1>.
    r'   >	   
SIZE & FITRELATED PRODUCTSCARESIZINGDETAILSREVIEWSFEATURES
TECHNOLOGYSPECIFICATIONSc                    t        j                  dd|       } t        j                  |       } t        j                  dd|       j	                         } | S )Nr  rP   rO   )rR   rS   r  r  rT   )r  s    rJ   _cleanz&_extract_first_heading.<locals>._clean  sD    z3.!!%(vsE*002rL   c                    | xs dj                         } | sy| j                         }|v s|v ryt        |      dk  ryt        j                  d|      syy)Nr#   Fr&   z[A-Z]T)rT   upperlenrR   r  )rU   upBADSTOPs     rJ   _okz#_extract_first_heading.<locals>._ok  sV    W"OOWWY9d
r7a<yy2&rL   z<td[^>]*class=['\"][^'\"]*prTitle[^'\"]*['\"][^>]*>\s*Name\s*</td>\s*<td[^>]*class=['\"][^'\"]*prValue[^'\"]*['\"][^>]*>(.*?)</td>r  r   z<h3\b[^>]*>(.*?)</h3>)r  r$   r   r$   rU   r$   r   bool)rR   r  r  r  r  finditerr  )r  r  r  mnamerU   mh3r  r  s         @@rJ   _extract_first_headingr  
  s     #
#CD
 II	Immbii'	E 5;;q>"q6H {{3TQSQZQZAZ[ 399Q< q6H
 T""rL   c                   | xs dj                         } | sy| j                         }|j                  d      s"|j                  d      s|j                  d      ryd|v ry	 t        |       }|j                  xs dj                         j                  d      }t        |j                  xs d      }d	|v rQt        j                  d
|      r:dD ]4  }|j                  |      s |j                  |j                               s4 y yyd|v rt        j                  d|      ryt        j                  d|      ryy# t        $ r |}i }Y w xY w)Nr#   Fjavascript:zmailto:ztel:z/safety-boot/Tr   
/footwear-z.*/footwear-[^/]+stylestyleNumberskuitempid	productIdz	/product/z\d{3,6}z/\d{3,6}\.html$)rT   r  r  r   r^   r   r   queryrx   rR   	fullmatchrw   r  )hreflowparsedr^   r  keys         rJ   _is_product_linkr  C  s4   JBD
**,C
~~m$y(AS^^TZE[#$!r((*11#6+,
 t<<,d3R  99S>UYYsyy{%;   dryyT: 
yy#T*+  s   "AD; ;E
Ec                   g }t               }	 | j                  t        j                  d      }|D ][  }	 |j	                  d      xs dj                         }|r||v r.t        |      s:|j                  |       |j                  |       ] 	 	 | j                  t        j                  d      }|D ][  }	 |j	                  d      xs dj                         }|r||v r.t        |      s:|j                  |       |j                  |       ] 	 |s	 | j                  t        j                  d      D ][  }	 |j	                  d      xs dj                         }|r||v r.t        |      s:|j                  |       |j                  |       ] 	 |S |S # t        $ r d}Y Nw xY w# t        $ r Y &w xY w# t        $ r d}Y w xY w# t        $ r Y w xY w# t        $ r d}Y w xY w# t        $ r Y |S w xY w)u  Locate product anchors on the catalog page.

    The Red Wing wholesale catalog renders product tiles with specific CSS
    classes.  Each tile (<li>) contains two links pointing at the same
    product detail page: one wrapping the image and one wrapping the product
    name.  Both have an href that ends in ".html" with the numeric style
    number.  On the current site the anchor for the name has the classes
    ``c-product-tile__pdp-link js-product-name``, while the image link has
    no special class but is still within the tile.  This helper first
    attempts to extract anchors using those specific selectors and falls
    back to scanning all anchors when necessary.

    Returns a list of WebElement objects (anchors) with unique hrefs.
    Hli.js-product-grid-item a.c-product-tile__pdp-link.js-product-name[href]r  r#   (li.js-product-grid-item a[href$='.html']z
//a[@href])setfind_elementsr   CSS_SELECTORget_attributerT   rx   r  r  addXPATH)rh  anchorsseenelementsr   r  	elements2s          rJ   _find_product_anchorsr  h  s    GUD''OOV
  		A/52<<> 44<%q!		((OOG
	  		A/52<<> 44<%q!		 	))"((LA 	#OOF39r@@BD tt|#D)NN1%HHTN	# N7NS         ! D  	N	s   %F) #FF) *$F) %G
 6#F9G
 ,$G
 #G* 8#GG* .$G* F&"F) %F&&F) )	F65F69GG
 GG
 
	GGG'$G* &G''G* *	G76G7c                t   	 | j                   xs t        }	 | j                  xs d}|sg S t	               }	 t        |d      }|j                  d      D ]9  }|j                  d      xs dj                         }|s)|j                  |       ; |j                  d      D ]9  }|j                  d      xs dj                         }|s)|j                  |       ; |j                  dd      D ]9  }|j                  d      xs dj                         }|s)|j                  |       ; 	 t        j                  |      }g d	}|D ]d  }	t        j                  |	|t        j                  
      D ]9  }
|
j!                  d      xs dj                         }|s)|j                  |       ; f g }t	               }|D ]A  }t#        ||      }t%        |      s||v r |j                  |       |j'                  |       C |S # t        $ r
 t        }Y w xY w# t        $ r d}Y w xY w# t        $ r Y 
w xY w)zIFallback link discovery when clickable anchors are sparse or JS-rendered.r#   html.parserr  r  r  r   T)r  )zhttps?://[^\s\"'<>]+z2/(?:footwear-rwbr|safety-boot|product)/[^\s\"'<>]+z"/\d{3,6}\.html(?:[?#][^\s\"'<>]*)?r  r   )current_urlCATALOG_URL_PRIMARYrx   page_sourcer  r   selectrw   rT   r  find_allr  r  rR   r  r  r  r   r  r  )rh  base_urlr  
candidatessoupr   r  blobpatternspatr  r  outr  r  abs_urls                   rJ   '_extract_product_links_from_page_sourcer    s/   '%%<)<!!'R 	5JT=1gh 	%AEE&M'R..0Dt$	%
 GH 	%AEE&M'R..0Dt$	%
 s. 	%AEE&M'R..0Dt$	% T"DH
  "S$bmm< 	"A!r((*Aq!	"" CUD (C((d?

7 Js  '&'
  4  sH   H H AH* <AH* 	AH* H* HHH'&H'*	H76H7c                   t                t        | |       t        |        t        j                  dft        j                  dft        j                  dft        j                  dft        j                  dft        j                  dfg}t        j                  dft        j                  dft        j                  d	ft        j                  d
fg}t        j                  dft        j
                  dft        j
                  dfg}	 t        |       ry	 d}d}|D ],  \  }}	 | j                  ||      }	|	j                         r|	} n. |D ],  \  }}	 | j                  ||      }	|	j                         r|	} n. |r|sy	 |j                          |j                  t               	 |j                          |j                  t               d}
|D ]L  \  }}	 | j                  ||      }|j                         r$|j                         r|j                          d}
 nN |
s|j                  t         j"                         t%        j$                         dz   }t%        j$                         |k  rxt        |        	 t        |       ry	 	 |D ])  \  }}| j                  ||      }|j                         s) n y	 t%        j&                  d       t%        j$                         |k  rwyy# t        $ r Y w xY w# t        $ r Y w xY w# t        $ r Y w xY w# t        $ r Y w xY w# t        $ r Y |w xY w# t        $ r Y sw xY w# t        $ r Y w xY w# t        $ r Y yw xY w)zCAttempt a best-effort login for order.redwingshoes.com if prompted.zinput[type='email']zinput[name='email']zinput[name='username']zinput[id*='email' i]zinput[id*='user' i]zinput[autocomplete='username']zinput[type='password']zinput[name='password']zinput[id*='password' i]z&input[autocomplete='current-password']zbutton[type='submit']z//button[contains(translate(.,'LOGINSGNIN','loginsgnin'),'login') or contains(translate(.,'LOGINSGNIN','loginsgnin'),'sign in')]z//input[@type='submit']NFT   g?)rv  r  dismiss_popupsr   r  r  r  rx   r  is_displayedclear	send_keysr   r   
is_enabledclickr   ENTERrr   rq  )rh  
target_urllogin_user_selectorslogin_pass_selectorssubmit_selectorsuser_elpass_elbyselr  	submittedbtnendpes                 rJ   ensure_logged_inr$    sy   OVZ 6 
/0	/0	23	01	/0	:; 
23	23	34	BC	 
12	  V  	W	,- ( )
 GG' C	&&r3/D  " # ( C	&&r3/D  " # ' &' &'I# C	%%b#.C!cnn&6		 	 $**% ))+
C
))+
v	$V, -	/ C((S1??$
  
 	

3! ))+
g    		  		  
    		  		  		s   K%  $K5$L?L %L% AL58M *M 1M %	K21K25	LL	LL	L"!L"%	L21L25	MM	MM	M M c                $   t         j                  dft         j                  dft         j                  dft         j                  dft         j                  dfg}|D ]  \  }}	 t	        | d      j                  t        j                  ||f            }| j                  d|       t        j                  d       	 |j                          t        j                  d
        y y# t        $ r | j                  d	|       Y 6w xY w# t        $ r Y w xY w)zCOpen the Footwear section after login when required by the site UI.zfootwear-labelz#footwear-labelz;//button[@id='footwear-label' or @aria-controls='footwear']z//a[@id='footwear-label']zU//*[self::button or self::a or @role='tab'][contains(normalize-space(.), 'Footwear')]rD   ?arguments[0].scrollIntoView({block:'center', inline:'center'});rA   arguments[0].click();g?N)r   IDr  r  r   untilECelement_to_be_clickablerm  rr   rq  r  rx   )rh  	selectorsr  r  els        rJ   open_footwear_sectionr.  R  s    
 !	+,	PQ	./	jkI  C	vr*001K1KRQTI1VWB!!"ceghJJtC
 JJsO  C%%&=rBC  		s7   !AD9C"	D"D =D?D  D	DDc                   t         j                  dft         j                  dft         j                  dfg}d}|D ]q  \  }}	 t        | d      j                  t	        j
                  ||f            }| j                  d|       t        j                  d       	 |j                          d	} n 	 t        | t               	 t        | d
      j                  d        y# t        $ r | j                  d|       Y Qw xY w# t        $ r Y w xY w# t        $ r Y Yw xY w# t        $ r$ 	 t        | t               Y y# t        $ r Y Y yw xY ww xY w)zQUse Footwear > Red Wing navigation described in docs, then ensure /footwear-rwbr.z%//a[contains(@href,'/footwear-rwbr')]z0//button[contains(@data-target,'footwear-rwbr')]zx//*[self::a or self::button][contains(normalize-space(.), 'Red Wing') and not(contains(normalize-space(.), 'Heritage'))]Fr   r&  rA   r'  Tr   c                p    t         t        | j                        j                  xs dj	                         v S )Nr#   )FOOTWEAR_RWB_SLUGr   r  r^   r  r   s    rJ   <lambda>z-open_red_wing_footwear_page.<locals>.<lambda>  s)    'Xamm-D-I-I-OR,V,V,XY rL   N)r   r  r   r)  r*  r+  rm  rr   rq  r  rx   r  r  )rh  r,  clickedr  r  r-  s         rJ   open_red_wing_footwear_pager5  j  s^    
:;	EF	  N  	OI
 G C	vq)//0J0JBPS90UVB!!"ceghJJtC
 G",-fb!''Y	
  C%%&=rBC  		    	V01 		ss   AC>C)C>/D  D C;8C>:C;;C>>	D
	D
	DD	E	&D88	EE	EE	c                   	 | j                   xs t        }g }g d}|D ]_  }	 | j                  t        j
                  |      D ]9  }|j                  d      xs dj                         }|s)|j                  |       ; a 	 | j                  t        j
                  d      D ]B  }dD ];  }|j                  |      xs dj                         }d|v s+|j                  |       = D 	 	 | j                  xs d}	|	rt        j                  |	      }
t        j                  d|
t        j                        D ]"  }|j                  |j                  d	             $ t        j                  d
|
t        j                        D ]"  }|j                  |j                  d	             $ g }t!               }|D ]  }t#        ||      }	 t%        |      }|j&                  xs dj)                         }|j*                  xs dj)                         }t,        |vr`t        j.                  d|      sw|j0                   d|j&                   | }||vs|j3                  |       |j                  |        t        |vr|j5                  d	t               |S # t        $ r
 t        }Y w xY w# t        $ r Y w xY w# t        $ r Y w xY w# t        $ r d}	Y w xY w# t        $ r Y 1w xY w)zGDiscover footwear brand catalog paths from RW site navigation/elements.)za[href*='/footwear-']z[id*='footwear' i] a[href]znav a[href*='footwear']r  r#   z&[data-target], [data-url], [data-href])zdata-targetzdata-urlz	data-hrefr  z/footwear-[a-z0-9\-]+r  r   z)https?://[^\s\"'<>]*/footwear-[a-z0-9\-]+z/footwear-[a-z0-9\-]+$z://)r  r  rx   r  r   r  r  rT   r  r  r  r  rR   r  r  r  r  r   r   netlocr  r^   	SITE_HOSTr  schemer  insert)rh  r  r  dom_selectorsr  r-  r  attrrI   r  r	  r  urlsr  r  r  r  hostr^   norms                       rJ   discover_footwear_catalog_urlsr@    s   '%%<)< JM
  	**2??C@ ,((06B==?%%d+,&&r8`a 	)B@ )%%d+1r88:1$%%a()	)!!'R   &5t2==Q 	*Aaggaj)	*I4WYWdWde 	*Aaggaj)	* DUD Hc"	AHHN))+DFFLb'')D D yy2D9((3qxxj/tHHTNKK$ $&A*+KC  '&'"  		    "  		sg   J AJ&+J&AJ6 J6 (K AKJ#"J#&	J32J36	KKKK	K%$K%c                B    dd}t        | |      j                  |       y)zVWait for shared footwear-page elements instead of requiring product links immediately.c                   	 | j                  t        j                  d      }|ry	 	 | j	                  t        j
                  d      j                  xs dj                         }d|v }d|v }d|v }d	|v }d
|v }d|v xs  d| j                  xs dj                         v }|xr |xs |xs
 |xs |xs |S # t        $ r Y w xY w# t        $ r Y yw xY w)u  Check that the catalog UI has loaded.

        On the current Red Wing site, the grid of products is present in
        ``li.js-product-grid-item`` elements.  Earlier heuristics looked for
        text such as "Sort By", "Results" or "Filters"; those still apply
        but may not appear until the user scrolls.  We consider the catalog
        ready when either at least one product tile exists or legacy text
        heuristics are satisfied.
        zli.js-product-grid-itemTr{  r#   Fresultzsort byfilterszmore resultsr  footwearr  )	r  r   r  rx   r  r  r_   r  r  )	r   tilestxthas_resultshas_sorthas_filtershas_more
has_searchhas_footwear_ctxs	            rJ   _readyz/wait_for_footwear_catalog_ready.<locals>._ready  s    	OOBOO5NOE 	>>"++v6;;ArHHJC #o#3&!S(_
%,]ATRT@[@[@]0]fX%e%e%ex%e[ef  		  		s"   "B3 <C 3	B?>B?	CCNr   r  )r   r)  )rh  r  rN  s      rJ   wait_for_footwear_catalog_readyrP    s    g: &'"((0rL   c                   t        | d      }t        |      }d}d}|j                  dd      xs |j                  d      }|ru|j                  d      }|j                  d	      }|r<|j                  d
d      }t	        j
                  d|      }	|	r|	j                  d      }|r|j                  d
d      }|s |j                  d      D ]  }
	 t        j                  |
j                  d            }t        |t              r|n|g}|D ]  }t        |t              s|s'|j                  d      xs dj                         }|r|}|s^dD ]Y  }t!        |j                  |      xs d      j                         }t	        j
                  d|      }|sH|j                  d      } n |s|s n |s|s n |s}dD ]x  }	 |j#                  |      }|s|j%                  d      r$|j                  d      xs dj                         }n|j                  d
d      }|scd|j'                         vsv|} n |s|rd| n|xs d}|s|j                         }|s|}|||fS # t        $ r Y w xY w# t        $ r d}Y w xY w)zMExtract (style_number, name, style_text) from legacy and RW order-site pages.r   r#   r  	shoeguide)class_printSpacing)idr  strongrP   T)rT   z	#\s*(\d+)r   z"script[type='application/ld+json']r   )r  mpn	productID\b(\d{3,6})\b)r  r  r   zmeta[property='og:title']Nmetacontentzred wing for businessr(   )r   extract_style_from_urlfindget_textrR   r  r  r  rs   rt   rx   
isinstancelistdictrw   rT   r$   
select_oner  r  )r  r  r  style_numberr   
style_text	guide_divh3_tag
strong_tagmatchscriptpayloadobjsobjnmr  r  r  r  r-  rG  s                        rJ    extract_style_and_name_from_htmlrn    s   }-D)#.LDJ		%	4T		^	8TI%^^H-
D9JIIlJ7E${{1~&&s$&7Dkk"FG 	F**V__4_%@A )$77gYD !#t,''&//R668B!#: "!#''#,"4"5;;=II&6<+,771:L!" L  /	2 E 	C__S) ~~f%vvi(.B557kk#Tk2.ciikA	 1=w|n-DJB
!'')z))Y  4  s$   %I&I6&	I32I36JJc                   g d}t        j                          dz   }t        j                          |k  rd}|D ]W  }	 | j                  t        j                  |      }|j	                         r$|j                         r|j                          d} nY |sy t        j                  d       t        j                          |k  ry y # t        $ r Y w xY w)N)zN//button[contains(.,'Agree') or contains(.,'Accept') or contains(.,'Proceed')]zI//a[contains(.,'Agree') or contains(.,'Accept') or contains(.,'Proceed')]uR   //button[contains(.,'Close') or contains(.,'×') or contains(@aria-label,'Close')]g       @FT皙?)	rr   r  r   r  r  r  r  rx   rq  )rh  xpathsr"  r4  xpr-  s         rJ   r  r  A  s    F
 ))+
C
))+
 	B((26??$HHJ"G	 

3 ))+
  s   AC	CCc           	        d}d}t        |      D ]Q  }t        |        d}dD ]  }	 | j                  t        j                  |      }|j                         s5|j                  d      }t        |d      r|j                         sc|dvrh	 | j                  d|       t        j                  d	       	 |j                          d}t        j                  d        n t        |       }	t        |	D 
ch c]%  }
|
j                  d      s|
j                  d      ' c}
      }||k(  r|dz  }nd}|}|s|dk\  r y 	 | j                  d       t        j                  d       T y # t        $ r Y w xY w# t        $ r( 	 | j                  d
|       n# t        $ r Y Y |w xY wY w xY w# t        $ r Y w xY wc c}
w # t        $ r Y |w xY w)Nr   F)zz//button[contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'more results')]zu//a[contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'more results')]disabledr  )Nfalser#   r&  rA   r'  T333333?r  r   r&   z/window.scrollTo(0, document.body.scrollHeight);g      ?)r|  r  r  r   r  r  r  r~  r  rm  rx   rr   rq  r  r  r  )rh  
max_roundsstable_rounds
last_count_clicked_morerr  r!  disabled_attrlinksr   counts               rJ   scroll_to_load_allr  X  s   MJ: ?v
 %	B!))"((B7'') # 1 1* =3-cnn6F (;;))Y 

4 !IIK  $

3G%	N &f-eWqv?VQ__V,WXJQMMJ  2	!!"ST 	

3?8 ! 
 ! !!--.EsK$ ! ! L!   X  		s   0F)-F)F)E&F)4E5F)2F9
	F9
<F>&	E2/F)1E22F)5	F&?FF&	F 	F&F)F 	 F&#F)%F&&F))	F65F6>	G
	G
c                    | xs dj                         } t        j                  dd|       } | sy| j                         }|dv ryt        j                  d|       ryd| v ryt        |       dk  ry| S )Nr#   rO   rP   >   
QUICK VIEWADD TO CARTVIEW DETAILSr  r  r  r  r'   z\d{2,}$r&   )rT   rR   rS   r  r  r  )rU   r  s     rJ   _clean_listing_namer    su    	
bA
vsAA		A 	 	 	||Iq!
ax
1vzHrL   c                x   g }	 |j                  | j                  d      xs d       	 |j                  | j                  d      xs d       	 |j                  | j                  xs d       g }|D ]B  }|st	        |      j                         D ]!  }t        |      }|s|j                  |       # D t               }g }|D ])  }||vs|j                  |       |j                  |       + |sydd}|j                  |d       |d   S # t        $ r Y w xY w# t        $ r Y w xY w# t        $ r Y w xY w)	zJBest-effort: read the product name as displayed on a catalog listing tile.
aria-labelr#   r   c                *    t               }t        j                  d       r|dz  }t        j                  d j                               r|dz  }t        j                  d j                               r|dz  }t	         fddD              r|d	z  }|S )
Nz[A-Za-z]   \bWOMEN'?S\brD   
\bMEN'?S\b   c              3  B   K   | ]  }|j                         v   y wr   )r  .0rH  rU   s     rJ   	<genexpr>z;_listing_name_from_anchor.<locals>.score.<locals>.<genexpr>  s     X!qAGGI~Xs   )viewquickcartcomparewishlistr@   )r  rR   r  r  any)rU   scs   ` rJ   scorez(_listing_name_from_anchor.<locals>.score  sz    V99[!$"HB99_aggi0"HB99]AGGI.!GBX'WXX"HB	rL   T)r  reverser   )rU   r$   r   rv   )
r  r  rx   r_   r$   
splitlinesr  r  r  sort)r   r  linesclnr  uniqr  s           rJ   _listing_name_from_anchorr    sR   J!//,7=2>!//'28b9!&&,B' E !a&##% 	!B$R(BR 	!! 5DD T>HHRLKKO
 
 	II%I&7NY      s4   $D $D D- 	DD	D*)D*-	D98D9c                    | r|syd}| D ]l  }t        |t              rt        |      dk  r"|d   xs dj                         }|s<t	        |j                  |d            }|sZ|d   |k7  sc||d<   |dz  }n |S )zQOverwrite Name column using preferred mapping (by style). Returns number updated.r   rC   r#   r   )r_  r`  r  rT   r  rw   )rg   	preferredupdatedr&  r  prefs         rJ   apply_preferred_namesr    s    yG 	!T"c!fqj1""$"9==#;<AaDDLAaDqLG	 NrL   c           	        g }t               }t               }i }t        | t               t        |        t	        |       xs t        t              }t        j                  j                  dt        |       d       |D ]%  }t        j                  j                  d| d       ' |D ]O  }t                t        | |       t        | |       t        | d       t        |        t        |        g }t!        |       D ]3  }	|	j#                  d      }
|
st%        |
      s#|j'                  |
       5 t)        |       D ]  }
|
|vs|j'                  |
        t        j                  j                  d| dt        |       d	       |D ]j  }
|
|v rt+        |
      }t,        r|rd
|v r	 |r'||v r|j/                  |
       8|j/                  |       |j/                  |
       |j'                  |
       l R ||fS )zCollect unique product links from all configured catalog sections.

    Dedupes by style number when possible (preferred), otherwise by URL.
    z([collect] footwear catalogs discovered: r   z[collect] catalog: #   r  r  z
[collect] z -> discovered z candidate product links
zcatalog=international)r  r$  r  r.  r@  r`  rq   r   stderrr   r  rv  r  rP  r  r  r  r  r  r  r  r\  "PREFER_INTERNATIONAL_LISTING_NAMESr  )rh  	all_links	seen_href
seen_stylerh   rd   r  r  
page_linksr   r  r  s               rJ   collect_product_linksr    s   
 I%I5J&(OV01&!1&9OT,=OLJJ?L@Q?RRTUV 6

.qc456  )#%';v6" !#
&v. 	$A??6*D#D)d#	$ <FC 	(D:%!!$'	( 	

:cU/#j/9JJdef 	#Dy *40E1e@W[^@^J&MM$'u%MM$T"!	#3)#V o%%rL   c                Z   | syt        j                  d|       }|r|j                  d      S t        j                  d|       }|r|j                  d      S t        j                  d| t         j                        }|r|j                  d      S t        j                  d| t         j                        }|r|j                  d      S t        j                  d|       }|r#d	| j	                         v r|j                  d      S 	 t        t        |       j                        }d
D ]u  }|j                  |      xs# |j                  |j	                               xs g }|D ]8  }t        j                  dt        |            }|s%|j                  d      c c S  w 	 y# t        $ r Y yw xY w)Nr#   z/safety-boot/(\d+)[-/]r   z/safety-boot/(\d+)z2/footwear-[^/]+/(?:[^/?#]*/)?(\d{3,6})(?:[-/?#]|$)r  z/(\d{3,6})\.html(?:[?#]|$)z/(\d{3,6})(?:[-/?#]|$)zfootwear-rwbrr  rY  )rR   r  r  r  r  r   r   r  rw   r$   rx   )linkr  qr  valsrI   s         rJ   r\  r\  =  so   
		+T2Awwqz
		'.Awwqz 			GUWUbUbcAwwqz
		/R]]KAwwqz
		+T2A_

,wwqz	Xd^))*N 	&C55:9syy{!39rD &II.A7771:%&	&   s   A?F F F 	F*)F*c                      xs dj                         j                           syd}t         fd|D              ryt               dkD  ryy)Nr#   T)zwindow.openr  zfacebook.comzhttp://zhttps://c              3  &   K   | ]  }|v  
 y wr   rH   r  s     rJ   r  z#_looks_like_junk.<locals>.<genexpr>h  s     
'a16
'   r$  F)rT   r  r  r  )rU   junk_tokenss   ` rJ   _looks_like_junkr  b  sI    	
b!AWK

';
''
1v{rL   c                    	 | d   xs dj                         }| d   xs dj                         }|sy|j                         t        k(  ryy# t        $ r Y yw xY w)Nr   r#   r   TF)rT   rx   r  BAD_NAME_SENTINEL)rowr  r   s      rJ   _is_bad_rowr  p  sd    Q2$$&A"##% zz|((  s   .A 	AAc                   | j                  d      ry| j                  dg       xs g }t        | j                  dg       xs g       }d}t               }|D ]  }t        |t              r|s|d   xs dj	                         }|s0t        |      t        k  r|j                  |       Tt        |      dkD  r|d   xs dj	                         nd}t        |      dkD  r|d   xs dj	                         nd}|r|r|j                  |        |syt        |      D ]+  }	t        |	      }
|
s|
|v s|j                  |	       |dz  }- t        |      | d<   |S )	zIf checkpoint rows are missing URL/Image columns or have empty URL/Image, requeue those links.
    Runs at most once per checkpoint unless you delete/clear ck['media_repair_done'].
    ri   r   rg   rf   r#   rC   r&   r   )rw   r  r_  r`  rT   r  EXPECTED_COLSr  r\  discardsorted)rL  rg   rf   removedstyles_neededr&  r  url_cellimg_cellr  rI  s              rJ   repair_missing_mediar    sZ    
vv!"66&"#DRVVL"-34JGEM %!T"!1""$q6M!e$+.q6A:AaDJB%%'2+.q6A:AaDJB%%'2xe$% Z  #D)"%t$qLG	 j)B|NrL   c                   	 t        | j                  dg       xs g       }t        | j                  dg       xs g       }|r|sy|D ch c]  }|st        |      s|d    }}|syd}t        |      D ]+  }t	        |      }|s||v s|j                  |       |dz  }- |rt        |      | d<   t        | j                  di       xs i       }t        |j                               D ]'  }	t	        |	      }|s||v s|j                  |	d       ) || d<   | j                  dg       xs g D 
cg c]  }
t	        |
      |vr|
 c}
| d<   t        |        |S c c}w c c}
w # t        $ r Y yw xY w)aa  If checkpoint contains obviously bad rows, un-mark those links as 'done'.

    This fixes the situation where a previous run captured the site header/ads into the Name/Brand
    columns and those rows are now 'stuck' because resume logic skips already-done links.

    Returns:
        Number of links that were re-queued (removed from done_links).
    rg   rf   r   r   rj   Nrk   )r`  rw   r  r  r\  remover  ra  keyspopr   rx   )rL  rg   rf   r&  
bad_stylesr  r  rI  fckr  s              rJ   repair_bad_checkpoint_rowsr    s   $BFF62&,"-b17R8
:$(AqA+a.adA
A$ 	D'-BbJ&!!$'1		 %j1B| bff]B/526B"'')_ $+A."
*FF1dO$ !#B} 4666:Mr3R3XVX 'VQ)?)B*)T () 'VB"# B9 B.'V  sa   AE& E& 	EEE$E& )E& E& A0E& =E& 1E& 3E!	E& 
E& &	E21E2c           
        | xs dj                         D cg c]  }|j                          }}t        |      D ]  \  }}|s	|j                  |      s|t	        |      d j                  d      }|r|c S t        |dz   t        |dz   t	        |                  D ]!  }||   xs dj                         }|s|c c S   y yc c}w )u	  Parse a simple field/value from extracted page text.

    Red Wing pages often render fields in tables so the extracted text looks like:

        Name
        DynaForce®

    We support both 'Name: DynaForce' and 'Name' on one line with the value on the next.
    r#   Nz :	r   r   )r  rT   	enumerater  r  r|  r   )	
text_block
field_namer  r  ir   tailjnxts	            rJ   parse_field_liner    s     $.#3"?"?"ABBRXXZBEBU# 4??:&J()//7D1q5#a!eSZ"89 Qx~2,,.J   Cs   Cc                N   | xs dj                         }t        j                  d|t        j                        }|rz|j	                  d      j                         }|rYg }|j                         D ]3  }|j                  |j                         r|n|j                                5 dj                  |      S |j                         D ]  }t        j                  d|t        j                        s*t        j                  d|j                         t        j                        }|r||j	                  d      j                         }|r[g }|j                         D ]3  }|j                  |j                         r|n|j                                5 dj                  |      c S  y y)Nr#   z^(.*?)\s+style\s*#\s*\d+r  r   rP   z\bstyle\s*#\s*\d+\bz^(.*?)\s+style\s*#\s*\d+\b)rT   rR   r  r  r  splitr  isupper
capitalizer  r  )	header_text	body_texthtr  r  r  r   r   m2s	            rJ   extract_brandr    sE   

	"	"	$B
		-rGAggaj CYY[ A

		1@A88C= $$& 
99+TG8$**,bmm\Bhhqk'')C YY[ I

		1HI88C=(
 rL   c                    |rM| j                         j                  d|       }|dk7  r&| ||dz    }t        |d      }|r|j                         S t        | d      }|r|j                         S dS )Nz
ABOUT THE rt  i@  r)   r#   )r  r]  r  rT   )r  r  idxchunkvals        rJ   extract_about_namer    st    oo$$z%%9:"9c3:.E"5&1Cyy{"
9f
-C399;%2%rL   c           	     "   t        d t        j                  d| j                               D              }|t        d t        j                  d| j                               D              z  }t	        dd      D ci c]
  }| d||v  c}S c c}w )Nc              3  2   K   | ]  }t        |        y wr   rv   r  r   s     rJ   r  z parse_heights.<locals>.<genexpr>  s     V1AV   z\b(\d{1,2})\s*-\s*INCH\bc              3  2   K   | ]  }t        |        y wr   r  r  s     rJ   r  z parse_heights.<locals>.<genexpr>  s     SAQSr  z\b(\d{1,2})\s+INCH\b      ")r  rR   findallr  r|  )r_   foundr  s      rJ   parse_heightsr    st    V

+F

 UVVE	SS,CTZZ\!RSSSE+0B<8aqcGa5j 888s   :Bc                    | xs dj                         }|j                  d      }|j                  d      }|j                  d      }d|v r|j                  d      sd|v rd}|||fS )Nr#   zred wingzirish setterworxzby red wingT)r  r  )	brand_strbis_rwis_isis_worxs        rJ   classify_brand_familyr    si    	b!ALL$ELL(Ell6"G!all62!9K%  rL   c                   | xs dj                         }t        j                  dd|      }|j                  dd      }t        j                  dd|t        j                        }|j                         j                  dd      }t        j                  dd|      }|sd	}t        |      |kD  r|d | j                  d      }|S )
Nr#   rO   rP      ®z[^\w\-\.\s]+r  r{  z_+boot)rT   rR   rS   rQ   UNICODEr  r   )r   max_lenrU   s      rJ   _safe_filename_from_namer  -  s    	A
vsAA			$A
ARZZ8A		#s#A
uc1A
1vhwKs#HrL   c                     t        j                         j                  dz  j                         } | j	                  dd       | S )NImagesTr   )r   cwdparentresolver   r2  s    rJ   _images_dirr  ;  s6    			X	%..0AGGD4G(HrL   c                6    t        t        d      dz  | z        S )Nz..r   )r$   r   )fnames    rJ   _rel_image_pathr  A  s    tDzH$u,--rL   c                   dD ]9  }	 | j                  t        j                  |      }|r|j                         r|c S ; 	 | j                  t        j
                  d      }|j                  t        j                  d      }|r|j                         r|S 	 | j                  t        j                  d      }|s!	 | j                  t        j                  d      }d }d}|D ].  }	 |j                         s|j                  xs i }t        |j                  d      xs d      }	t        |j                  d      xs d      }
|	d	k  s|
d	k  rm|j                  d
      xs dj                         |j                  d      xs dj                         |j                  d      xs dj                         |	|
z  }t        fddD              r|dz  }t        fddD              r|dz  }t        fddD              r|dz  }||kD  r|}|}1 |S # t        $ r Y w xY w# t        $ r Y w xY w# t        $ r g }Y w xY w# t        $ r g }Y zw xY w# t        $ r Y w xY w)N)@div.slick-slide.slick-current.slick-active img[itemprop='image'].div.slick-slide.slick-current.slick-active imgHdiv.c-image-carousel__slider-item.js-carousel-item img[itemprop='image']productImageimgmain imgr   r   r   heightx   altr#   classsrcc              3  &   K   | ]  }|v  
 y wr   rH   )r  r  r  s     rJ   r  z1_pick_best_product_img_element.<locals>.<genexpr>v  s     P18Pr  )r  shoechukkahikermocrw  c              3  &   K   | ]  }|v  
 y wr   rH   )r  r  clss     rJ   r  z1_pick_best_product_img_element.<locals>.<genexpr>x  s     M18Mr  )productprimaryheroimagegffffff?c              3  &   K   | ]  }|v  
 y wr   rH   )r  r  r  s     rJ   r  z1_pick_best_product_img_element.<locals>.<genexpr>z  s     R18Rr  )z	/dw/imagescene7z/imagesstaticg?)r  r   r  r  rx   r(  r  r  sizer   rw   r  r  r  )rh  r  r  	containerimgsbest
best_scorer-  szr   hr  r  r  r  s               @@@rJ   _pick_best_product_img_elementr)  E  se    

	%%boos;Cs'')

''~>	$$R__e<3##%J
##BOOZ@ 	''U;D DJ 	??$BBbffWo*+AbffX&+!,A3w!c'##E*0b779C##G,299;C##E*0b779CEEP%OPPM%LMMR%QRRz!"
+0 Kc  		    
  	D	6  		sh   3HAH%  H5 ; I &I7AICI	H"!H"%	H21H25IIII	I&%I&c                   dD ]u  }	 | j                  t        j                  |      }|j                  d      xs dj	                         }|j                  d      xs dj	                         }|s|r|xs |c S w 	 | j                  t        j                  d      }	 |j                  t        j                  d      }|rP|j                  d      xs dj	                         }|j                  d      xs dj	                         }|s|r|xs |S dD ]u  }	 | j                  t        j                  |      }|j                  d      xs dj	                         }|j                  d      xs dj	                         }|s|r|xs |c S w y# t
        $ r Y w xY w# t
        $ r d }Y w xY w# t
        $ r Y w xY w# t
        $ r Y w xY w)N)r	  r
  r  z6div.c-image-carousel__slider-item.js-carousel-item imgzoomimgr#   r  r  r  )z$li[data-orbit-slide="product-1"] imgzli.active imgzul#productImage li.active imgzmain li.active imgr  )r  r   r  r  rT   rx   r(  )rh  r  r  rF  rU   r#  s         rJ   _extract_product_image_urlr,    s    	%%boos;C""9-3::<A""5)/R668AAvA  ''~>		((%@C ""9-3::<A""5)/R668AAvA
 	%%boos;C""9-3::<A""5)/R668AAvA   G  		  	C	  "  		s\   A/F
< F+  F =AF+ A/F:
	FFF(%F+ 'F((F+ +	F76F7:	GGc                &   |xs dj                         }|syd| d}t               |z  }	 t        |       }|rl|j                  dd      }ddi}t	        j
                  ||d	      }|j                  r2|j                  r&|j                  |j                         t        |      S d
}	 t        |       }|sy	 | j                  d|       t        j                  d       	 t        |        	 |j!                  t#        |             t        |      S # t        $ r Y vw xY w# t        $ r d
}Y yw xY w# t        $ r Y ]w xY w# t        $ r Y `w xY w# t        $ r9 	 | j%                  t#        |             t        |      cY S # t        $ r Y Y yw xY ww xY w)a	  Save the main product image as a temp file (__<style>.png).

    Preferred: download the product image URL (zoomimg/src) for best quality.
    Fallback: element screenshot if download fails.
    Returns relative path like ../Images/__595.png (or empty string).
    r#   __.pngz&amp;&r  zMozilla/5.0r@   )r  r  Nr&  rp  )rT   r  r,  rQ   requestsrw   okr[  write_bytesr  rx   r)  rm  rr   rq  r  
screenshotr$   save_screenshot)rh  r  r  out_pathimg_urlr  r&  img_els           rJ   capture_product_image_tempr9    s    [b!EtE}u$H
,V4oogs3G#]3GWgrBAtt		$$QYY/&u--
 F/7 _agh

3v#h-(u%%5      
    	""3x=1"5)) 			sx   A8D (D 7'D0 D? +$E 	DDD-,D-0	D<;D<?	E
E	F$E?<F?	FFFFc                   |syt        j                         |z  j                         }|j                         s|S t	        |      }| d}t               |z  }|j                         r`| d|  d}t               |z  }|j                         r;t        dd      D ],  }| d|  d| d}t               |z  }|j                         r, n 	 |j                  |       t        |      S # t        $ r |cY S w xY w)zNRename ../Images/__<style>.png -> ../Images/Name_of_Boot.png (collision-safe).r#   r/  r{  rC   r   )
r   r  r  ro   r  r  r|  rQ   rx   r  )r  	boot_namerel_temp_pathtemp_absstemr  dest_absr  s           rJ   finalize_image_filenamer@    s   
]*335H??#I.DfDME}u$H&%%=5(??1c] &%!D1&=50(	" 5!!  s   	C% %C32C3c                b   | xs dj                         }|xs dj                         }t        j                  d|      sd|v sd|v sd|v ryt        j                  d|      sd|v sd	|v sd
|v ryt	        t        j                  d|            }t	        t        j                  d|            }||fS )zInfer gender flags from URL + header (do NOT use body text; it frequently contains both words).

    Priority:
      1) URL slug/query (mens/womens)
      2) Header text (MEN'S / WOMEN'S)
      3) Otherwise: unknown -> (False, False)
    r#   z/womens(?:[-/]|$)zwomens-zgender=womenzgender=female)FTz/mens(?:[-/]|$)zmens-z
gender=menzgender=male)TFr  r  )r  r  rR   r  r  )r  
source_urlhuulmalefemales         rJ   infer_genderrG    s     ,B			B

	!	!	#B 
yy%r*i2oSUAUYhlnYn	yy#R(GrM\R=OS`dfSf		-,-D"))OR01F&>rL   rB  image_rel_pathr  c               X  : |xs d}|xs d}|j                         }|j                         }t        ||      }	t        |	      rd}	t	        |	      \  }
}}|	sO|j                         }|j                         }d|v sd|v rd}	nd|v sd|v rd}	n
d|v sd|v rd}	t	        |	      \  }
}}|j                         }t        ||      \  }}|sR|sP|dd	 }t        t        j                  d
|            }t        t        j                  d|            }|r|sd}n|r|sd}d|v }d|v xs d|v }d|v xs
 d|v xs d|v }d|v xs
 |xs |xs |}| }d|v xs d|v }d|v }t        |d      }|ry|j                         j                         }t        j                  d|      rd}net        j                  d|      }|r	 t        |j                  d            dkD  }n.d|v xs d|v }n#t        j                  d|      rd}n
d|v xs d|v }d|v xs t        j                  d |      du} d!|v xs t        j                  d"|      du}!d#|v xs d$|v }"d%|v xs t        j                  d&|      du}#d'|v xs
 d(|v xr d)|v }$d*|v }%t        |d+      }&|&r|&j                         j                  d,      nd-|v }'t        |d.      }(d})|(r&|(j                         :t        :fd/d0D              sd})|xs dj                         }*d1|*v xs
 d2|*v xs d3|*v }+d4|*v },d5|*v }-t!        |d6z   |z         }.t        |d7      }/|/j                         }0d8|0v xs d9|0v }1d:|0v xs d;|0v xs |1}2t#               }3|r	 t%        |d<      }4|4j'                  d      D ]i  }5d=D ]b  }6|5j)                  |6      }7t+        |7t,              s%|7j                         s6|3j/                  |7j                         j                                d k 	 |'s|3r|3D ]  }8|8d-k(  s
d>|8v sd?|8v sd}' n |3r:|3D ]5  }8|8j1                  d@dA      }9|1sdB|9v sdC|9v rd}1d}2$|2r'dD|9v s	dE|9v sdF|9v s4d}27 g t3        |       t3        |      t3        |      t3        |      t3        |	      t5        |      t5        |      t5        |
      t5        |      t5        |      t5        |      t5        |      t5        |      t5        |      t5        |      t5        |      t5        |      t5        |      t5        |       t5        |!      t5        |"      t5        |#      t5        |$      t5        |%      t5        |'      t5        |)      t5        |+      t5        |,      t5        |-      t5        |.dG         t5        |.dH         t5        |.dI         t5        |.dJ         t5        |.dK         t5        |.dL         t5        |.dM         t5        |.dN         t5        |2      t5        |1      S # t        $ r d}Y w xY w# t        $ r Y >w xY w)Oa  
    Build a row for the markdown table from the scraped pieces of a product page.

    In addition to the previously-supported text parsing, this version accepts the
    raw HTML of the page (via the ``html`` parameter) so that additional
    features can be inferred from non-visible attributes such as ``alt``,
    ``title``, or ``aria-label`` on icons.  Passing ``html`` is optional; if
    omitted, the function falls back to the older behaviour.
    r#   zIRISH SETTERr0   WORXr1   zRED WINGr/   NiX  r  r  Tz	STEEL TOEzALUMINUM TOEz	ALLOY TOEzNON-METALLIC TOEzNON METALLIC TOEzCOMPOSITE TOEz
SAFETY TOEzMETATARSAL GUARDz	MET GUARD
WATERPROOFr3   z$\b(non[-\s]?insulated|uninsulated)\bFz(\d+)\s*g\br   r   insulat
thinsulatezSLIP RESISTANTz\bSR\bzELECTRICAL HAZARDz\bEH\bzPUNCTURE RESISTANTPUNCTUREzSTATIC DISSIPATIVEz\bSD\bzANKLE PROTECTIONankleprotectBOAr4   r    zDEFINED HEELzLeather Typec              3  &   K   | ]  }|v  
 y wr   rH   )r  r   lls     rJ   r  z'_build_row_from_text.<locals>.<genexpr>  s     `q17`r  )meshnylonfabrictextilepoly	syntheticoxfordathleticr  r  r  r   zCountry of Originzmade in usazmade in the usazbuilt in usazassembled in the usar   )r  r   r  DEFINEDHEEL    rP   zMADE IN USAzMADE IN THE USAzBUILT IN USAzASSEMBLED IN USAzASSEMBLED IN THE USAr7   r8   r9   r:   r;   r<   r=   r>   )r  r  r  r  r  rG  r  rR   r  r  rT   rv   r  rx   r  r  r  r  r   r  rw   r_  r$   r  rQ   rV   rK   );r  r   r  r  rB  rI  r  
body_upper
body_lowerr  r  r  r  rC  buheader_upperrE  rF  first_chunk
male_foundfemale_found	steel_toealuminum_toenon_metal_toe
safety_toesoft_toe	met_guard
waterproofinsulation_lineil	insulatedr  slip_resistantelectrical_hazardpuncture_resistantstatic_dissipativeankle_protectionboadefined_heel_linedefined_heelleather_lineall_leather_upper
name_loweroxford_athleticr  r  heightsorigin_lineorigin_lowermade_in_usabuilt_in_usa
alt_tokensr  r  r<  r  tokrH  rT  s;                                                             @rJ   _build_row_from_textr  &  s   ( RI\rF"J"Jfi0I 	"	1)<E5'\\^__R>R#7&Ir\Vr\I2r!1"I 5i @ug<<>L
3LD&  #&"))M;?@
BIIo{CDlD*F z)I!Z/L;*3LL':5;MQ[;[apt~a~M*,[[l[mJ~H#z1Q{j7PI+J&y,?O""$**,99<bAI		."-A% #AGGAJ! 3I '"_E,"2D	99<jII"j0Qlj6PI&*4f"))Iy:Yae:eN,
:l		)U^@_gk@k.*<[*PZBZ.*<n"))IW`BaimBm*j8pg>S>oXaeoXoJC(NCBS$**,77>ZhlvZvL#I~>L!`$_`` $*"##%J:-f:3KfQW[eQeO#Fz!EFTMI56G"9.ABK$$&L L0X6G<6WK"l2n8NR^8^ncnL 5J
	 }5D}}T* <: <D''$-C!#s+		"syy{'8'8':;<< J 	Cn$c)9fm#		  	$CFC(AMQ$6:Kq:P"#^q%8<NRS<SWmqrWr#	$0u0t0 	z"0 	~&	0
 	y!0 	D	0 	F0 	E
0 	E
0 	G0 	J0 	I0  	M!0" 	L#0$ 	I%0& 	H'0* 	J+0, 	I-0. 	N/00 	102 	304 	506 	708 	C90< 	L=0> 	?0B 	OC0D 	FE0F 	E
G0J 	GDMK0L 	GDMM0N 	GDMO0P 	GDMQ0R 	GDMS0T 	GENU0V 	GENW0X 	GENY0\ 	L]0^ 	K_0 0a ! % $I%l  		s1   7X
  AX 'X 81X 
XX	X)(X)c           
     H   t         t        |       j                  xs dv rt        d      d }t	        |       D ]B  }	 t        |d      }t        ||      \  }}}|}t        |      }	t        ||||	| ||      }
|
c S  |r|t        d      # t        $ r}|}Y d }~dd }~ww xY w)Nr#   z3HTTP fallback disabled for authenticated order siter@   r  rH  zHTTP fallback failed)
r8  r   r7  r,  r  r  rn  r  r  rx   )r  image_temp_relr  r  r  r  r   rd  r  r  r  rP  s               rJ   _scrape_product_via_httpr    s    Xd^**0b1PQQ(,H4  	q"-D&FtQ&O#E4F%d+I&-C J& 
-
..  	H	s   =B	B!BB!prefer_httpc          	        d}t        |      }|}t                |r	 t        ||      S 	 t        t        |      j                  xs dv rt        | t               t        | |       t        |        | j                  xs d}t        ||      \  }}}	 |rt        | |      }t        | d      j                  t!        j"                  t$        j&                  df             | j)                  t$        j&                  d      j*                  xs d}	|}
t-        |||
|	|||      }|S # t        $ r Y w xY w# t        $ r d}Y w xY w# t        $ r t        ||      cY S w xY w)Nr#   )r  r  r{  rH  )r\  rv  r  rx   r8  r   r7  r$  r  r  r  r  rn  r9  r   r)  r*  presence_of_element_locatedr   r  r  r_   r  )rh  r  r  r  r  style_from_urlr  r   rd  r  r  r  s               rJ   scrape_productr  	  sd   N"4(ENO	+DPP
"M$..4"5V%89v !!'R"B4"NtZ	 !;FE!J 	fb!''(F(FU[G\(]^''V<AAGR	")
 
E  		$  	 N	 $  M'^LLMsH   D" A&E D2  BE "	D/.D/2E =E ?E  E EEc           	     <   t         }dd}t        | |      }g }|j                  ddj                  |      z   dz          |j                  ddj                  dgt	        |      z        z   dz          |D ]  }t	        |      t	        |      k  r|dgt	        |      t	        |      z
  z  z   }n%t	        |      t	        |      kD  r|d t	        |       }|j                  ddj                  |      z   dz           t        t        dj                  |      dz          y )Nc                x    | d   }	 dt        t        j                  dd|            fS # t        $ r d|fcY S w xY w)Nr   z\Dr#   r   )rv   rR   rS   rx   )r&  rU   s     rJ   	style_keyz!write_markdown.<locals>.style_keyM	  sE    aD	s266%Q/011 	q6M	s   !) 99)r  rN   z---r#   r   )r&  	List[str])
MD_HEADERSr  r  r  r  ra   OUT_MD)rg   r  r  sorted_rows	out_linesr&  s         rJ   write_markdownr  J	  s   G 9-KIS388G,,s23S388UGc'l$:;;cAB 2q6CL RDCL3q6122AVc'l"-3w< Asxx{*S012 9-45rL   c                 0   t         j                         sg S g } t         j                  dd      j                         D ]U  }|j	                         }|st        j                  d|      }|s/|j                  d      }|| vsE| j                  |       W | S )NrX   rQ   )rZ   r!  zhttps?://\S+r   )	r   ro   ru   r  rT   rR   r  r  r  )r=  r   r  r  s       rJ   _read_errors_urlsr  d	  s    	D$$gi$HSSU 	zz|IIot,
A}A	 KrL   c                    | s	 t         j                  d       y | D cg c]  }dt         d|  }}t	        t         dj                  |      dz          y # t        $ r Y y w xY wc c}w )NTr'  FAILED x: r   )r   r4  rx   MAX_FAILS_PER_LINKra   r  )r=  r  r  s      rJ   _rewrite_errors_filer  u	  sv    	. 	;?@aw)*#aS1@E@TYYu-45	  		 As   A A&	A#"A#c                    t               } t        | j                  dg             }t        | j                  di             }t	        | j                  dg             }t        | j                  dt        j                                     }t	        | j                  dg             }|s
t               }t        ryt        |       }|rlt        | j                  dg             }t        | j                  di             }t	        | j                  dg             }t	        | j                  dg             }d }d }	 t        | t        dd      }t        |       | j                  d	      xs g }	| j                  d
      xs i }
t        s|	st        d|      }|j                  dd       t!        |      \  }}|r|}	nt"        j$                  j'                  d       |r|}
	 t(        r4|
r2t+        ||
      }|r$|| d<   |
| d
<   t-        |        t/        d| d       |	| d	<   t(        r|
| d
<   t-        |        t(        rV|
sT	 t!        |      \  }}
|
| d
<   t-        |        	 |
r2t+        ||
      }|r$|| d<   |
| d
<   t-        |        t/        d| d       t2        rHt5        |       }| j                  d      sd| d<   t-        |        |rt/        d| d       t-        |        t6        j9                         rFt6        j9                         }|	xs g D cg c]  }t;        |      |k(  s| }	}|	| d	<   t-        |        t=        |	      }|dk(  rt?        d      |r|jA                          t        ||      }t=        |      }|j                  |d       tC        |      D ci c]  \  }}|s	|d   s|d   | }}}|	D ]  }||v r|dz  }|j                  |d       !d}	 	 tE        ||      }|d   st?        d|       |d   st?        d|d    d| d      t(        r0|
r.tG        |
j                  |d   d            }|rtI        |      |d<   t=        |      dkD  r|d   stI        |      |d<   	 t=        |      d kD  r'|d    r"tI        tK        |d   |d   |d                |d <   |d   |v r||||d      <   n"t=        |      ||d   <   |jM                  |       |jO                  |       tQ        |      | d<   || d<   || d<   || d<   t-        |        tS        |       d}	 |dz  }|j                  ||rd'nd(        tS        |       |j                  |d)       tf        rc|r`t"        j$                  j'                  d*t=        |       d+       g }	 |r	 |je                          t        | t        d,dth        -      }t        |       tC        t	        |      d.      D ]  \  }}d}tk        tl              D ]V  }	 tE        ||d/      }|rC|d   r>|d   r9|d   |v r||||d      <   n"t=        |      ||d   <   |jM                  |       d} nX |r3t"        j$                  j'                  d2| d$t=        |       d3| d&       |jM                  |        |}|| d<   || d<   t-        |        tS        |       tq        |       t"        j$                  j'                  d5tr         d&       tt        jw                         r't"        j$                  j'                  d6tt         d&       	 |r|jA                          	 |r|je                          y y # t0        $ r Y Gw xY w# t0        $ r
 |
xs i }
Y w xY w# t0        $ r Y w xY wc c}w c c}}w # t0        $ r Y w xY w# t0        $ r}tU        |j                  |d            dz   ||<   tQ        |      | d<   || d<   || d<   || d<   t-        |        ||   tV        k\  r^||vr%|jM                  |       tY        d!||    d"|        |jO                  |       tQ        |      | d<   || d<   t-        |        Y d }~?t"        j$                  j'                  d#||    d$tV         d%| d&       t"        j$                  j'                  dj[                  t]        j^                  ta        |      ||jb                              d&z          	 |r|je                          n# t0        $ r Y nw xY wt        | t        dd      }t        |       Y d }~nd }~ww xY w[# t0        $ r Y w xY w# t0        $ r; 	 |j                  d0       n# t0        $ r Y nw xY wt        jn                  d1       Y w xY w# t0        $ r}t"        j$                  j'                  d4       t"        j$                  j'                  dj[                  t]        j^                  ta        |      ||jb                              d&z          Y d }~zd }~ww xY w# t0        $ r Y w xY w# t0        $ r Y y w xY w# 	 |r|jA                          n# t0        $ r Y nw xY w	 |r|je                          w w # t0        $ r Y w w xY wxY w)7Nrf   rj   rg   rl   rk   rS  T)rf  rT  rU  re   rh   r   )r   rl   r   zRefreshing product links...)r   zN[warn] Link refresh discovered 0 links; falling back to checkpoint link list.
z)Applied preferred International names to z existing rowsri   z
Re-queued z links due to missing URL/Imagez{No product links discovered from catalog page. Login/navigation likely succeeded but product link extraction found nothing.startingz	(resumed)Fz$Style number parsed empty for link: zName parsed empty for style r   )r#   rC   r&   r  r  z
Error scraping link (attempt r   z): r   r2  skippeddonez
Starting salvage pass for z failed links...
normal)rf  rT  rU  rV  )startr  rk  g      ?z	Salvaged z: z$
Salvage pass encountered an error:
z
DONE. Wrote: z#Some links failed repeatedly; see: )<r|   r  rw   ra  r`  r   rr   r  AUTO_REPAIR_BAD_ROWSr  ri  RUN_HEADLESSrn  REFRESH_PRODUCT_LINKS_EACH_RUNr   r   r  r   r  r   r  r  r   printrx   AUTO_REPAIR_MISSING_MEDIAr  r%   rT   r\  r  r,  r   r  r  r  rV   r@  r  r  r  r  rv   r  r   r  	tracebackformat_exceptiontype__traceback__quitENABLE_SALVAGE_PASSSALVAGE_PAGE_LOAD_TIMEOUTr|  SALVAGE_MAX_TRIES_PER_LINKrq  r  r  r   ro   )rL  rf   rj   rg   rl   rk   repairedreporterrh  re   rh   fresh_linksfresh_preferred_namesnupdr{  rmwantr  r   	processedr  r&  style_to_idxr  r2  r  r  rP  	remainingsalvageds                                 rJ   mainr  	  s
   		BrvvlB78J"&rvvmR'@"AK !34DrvvlDIIK89J#'/BB(G#H-/
 -b1RVVL"56JrvvmR89Kvr*+D $RVV,?%D E ,0HFrLWcghF#/52*,&&1B*C*Ir)'aJGHOOA$AOB1Fv1N.K. +

  e %"7	5/0GD%)6
0?,-'+ I$~^_ #0B1(7$%B .o8%:6%B"?(7$%#
	"0GD%)6
0?,-'+ I$~^_ %%b)B66-.*.&'#
2$&EFG# ##%D)6)<"c1AWXYAZ^bAbQcMc"/BBM"A:_  NN#%JG
O		
3 -6dOJDAqqQqT!aJJ! W	IDz!Q		<BL0(6Cq6*-QRVQW+XYYq6*-I#a&QSTXSYYZ+[\\ :o2?3F3Fs1vr3RS%3D%9CF 3x!|CF!/!5As8a<CF%34KCPQFTWXYTZ\_`a\b4c%dCF
 1v-58\#a&12/24ySV,C(NN4('-j'9B|$!%BvJ(3B}%.?B*+#B'"4(BF NIOOIRTYOHoW	It 	tF+ #4JJ;C@Q<R;SSefg#%I,j ')'/!&&? $F+(.?)@J /GAt$H"#=> ,,"04"PC"s1v#a&#&q6\#9ADDc!f)=$>;>t9LQ$8$(KK$4+/ %,$  

((9QCq=N9O8PPRSWRXXZ)[\!((.//2 %.! BvJ&7B"#B4  !23

?6("56JJB:,bQR	 	     8"1"7R8    d$ K@ % . ! 0(+KOOD!,D(E(IK%'-j'9B|$!%BvJ(3B}%.?B*+#B'"4(,>>'88-44T:-D8I7J#dV.TU"t,+1*+=<(2C./'+JJ$$9+d:K9LAN`Maadeidjjlm JJ$$RWWY-G-GQQRTUTcTc-d%ehl%lm!"KKM$ *2Y`ostF'//?0] ~ % 0  ) ,% &

= 9#, % $% JJsO,  j

  !IJ

  )C)CDGQPQP_P_)`!adh!hiij$  		
  			  			  		s)  4B&h< :] #h< 9]  4]6 A>h< 
^^"A8h< 
^%^+^3)h< B^!.5^#A;^!A6h< f d7 (Af <AeAf Bh< )h <h- 	]h< ]h<  ]3/h< 2]33h< 6	^?h< ^h< 	^^!^^!!
d2+B-d-h< B	d-(c;:d-;	dd-d!d-(h< -d22h< 7	e f ef 	fe#"f#	e/	,f.e/	/ff 
ff 	hA8hh< hh< 	h*)h*-	h98h9<j>ij	ijij!i53j5	j>j jj__main__)rI   r  r   r$   )rU   r$   r   r$   )r^   r   r_   r$   r   r   )r   r   )rz   r   r   r   )r   r$   r   r   rO  )r   r$   r   Optional[str])r   r   )r   r  )r   zTuple[str, str])r;  r   r   r   )rL  r   r   r$   )T)rL  r   rf  r  rT  r$   rU  r  rV  rv   rW  r  r   zwebdriver.Firefoxr   )
r  r$   r  rv   r  r   r  rv   r   r   )r  r$   r   r  )r@   )r  r$   r  rv   r   r$   )r  r$   r   r$   )r  r$   r   r  )r   r  )r  r$   r   r   )r  rv   r   r   )r  r$   r  r$   r   zTuple[str, str, str])P   )rx  rv   r   r   )r   r$   )rg   List[List[str]]r  zDict[str, str]r   rv   )r   z Tuple[List[str], Dict[str, str]])r  r$   r   r$   r  )r  r  r   r  )rL  r   r   rv   )r  r$   r  r$   r   r$   )r  r$   r  r$   r   r$   )r  r$   r  r$   r   r$   )r_   r$   r   zDict[str, bool])r  r$   r   zTuple[bool, bool, bool])r  )r   r$   r  rv   r   r$   )r  r$   r   r$   )r  r$   r   r$   )r  r$   r;  r$   r<  r$   r   r$   )r  r$   rB  r$   r   zTuple[bool, bool])r  r$   r   r$   r  r$   r  r$   rB  r$   rI  r$   r  r$   r   r  r   )r  r$   r  r$   r   r  )r  r$   r  r  r   r  )rg   r  r   r   )r=  r  r   r   )r   
__future__r   rs   r   r  rR   r   r5  r   r2  rr   r  rr  r1  r  r  html.parserr   r.  pathlibr   typingr   r   r   r	   r
   urllib.parser   r   r   urllib.requestr   r   bs4r   seleniumr   selenium.common.exceptionsr   r   selenium.webdriver.common.byr   selenium.webdriver.common.keysr   "selenium.webdriver.firefox.optionsr   "selenium.webdriver.firefox.servicer   ra  selenium.webdriver.supportr   r*  selenium.webdriver.support.uir   rq   r  r8  r1  r   rw   r   r   rT   r  r  r  r%   __annotations____file__r  r  r   r  rn   r   r  r  r  r  r  r  r  r  PAGE_LOAD_TIMEOUTre  NAV_SETTLE_SECONDSrp  r^  r  r  r  rp   rE   rK   rV   ra   r|   r   r   r   r   r   r   r  r  rJ  rR  ri  rn  rv  r  r  r  r  r  r  r  r  r  r  r$  r.  r5  r@  rP  rn  r  r  r  r  r  r  r\  r  r  r  r  r  r  r  r  r  r  r  r  r)  r,  r9  r@  rG  r  r  r  r  r  r  r  r   rH   rL   rJ   <module>r     s  *X #  	  	   
       "   3 3 4 4 +
   K + / 6 H @ 7 3 #1o $	$  ::>>"46PQ ::>>"46FG zz~~mS1779??AEdd!#0BC!H!N!N!P!V!V!X\{!{  
C >!!#**	.	.<<
77
 
  +  &+ "
 J !     #         ::>>"4b9??AIT <
~I&
4G GZ@!0AH!
L 5 &. $555 	5
 5 5 5 5p, %&C	C C 	C
 C 
CN"2$J $4
3#r"JCL=@aH0&REP1DC*H.BL41h"=&@ J"#J-d62	&9!.<@1f8v"<6 SS
S S 	S S S S Sl/: >C 0Mb64"6Zz zF rL   