Spaces:
No application file
No application file
Fetching metadata from the HF Docker repository...
import io, re from typing import List, Dict, Tuple, Any from PIL import Image, ImageDraw try: import pytesseract from pytesseract import Output as TessOutput except Exception: pytesseract = None TessOutput = None PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?|\b\d{1,4}\.\d{2}\b)") def ocr_image(image_bytes: bytes): img = Image.open(io.BytesIO(image_bytes)).convert("RGB") if pytesseract is None: return "", [], img.size data = pytesseract.image_to_data(img, output_type=TessOutput.DICT) tokens = [] for i in range(len(data['text'])): txt = data['text'][i] if not txt: continue try: conf = float(data.get('conf', ['-1'])[i]) except: conf = -1.0 x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] tokens.append({'text': txt, 'conf': conf, 'box': (x,y,w,h)}) full = " ".join([t['text'] for t in tokens]) return full, tokens, img.size def guess_price(tokens: List[Dict[str,Any]]): best = None for t in tokens: m = PRICE_RE.search(t['text'].replace(",", "")) if m: raw = m.group(0).replace("$","").strip() try: val = float(raw) if 0.5 <= val <= 1000: if best is None or val < best[0]: best = (val, t['box']) except: pass return best def annotate_with_box(image_bytes: bytes, box: Tuple[int,int,int,int], label: str = None) -> bytes: img = Image.open(io.BytesIO(image_bytes)).convert("RGB") draw = ImageDraw.Draw(img) x,y,w,h = box draw.rectangle([x,y,x+w,y+h], outline=(255,0,0), width=3) if label: tw = max(50, len(label)*8) y0 = max(0, y-22) draw.rectangle([x, y0, x+tw, y0+22], fill=(255,0,0)) draw.text((x+4, y0+4), label, fill=(255,255,255)) out = io.BytesIO() img.save(out, format="PNG") return out.getvalue()
83cb218 verified - 1.52 kB initial commit
- 305 Bytes initial commit
- 0 Bytes xcode-select --install
- 0 Bytes import re, os import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder from sklearn.linear_model import Ridge def _norm_key(title: str): toks = re.findall(r"[a-z0-9]+", str(title).lower()) toks = [t for t in toks if len(t) > 2] key = " ".join(sorted(dict.fromkeys(toks))[:6]) return key or None class DealValuator: def __init__(self, comps_csv: str, feedback_csv: str | None = None): self.df = pd.read_csv(comps_csv) self.feedback_csv = feedback_csv self.feedback_stats = self._load_feedback(feedback_csv) if feedback_csv else {} self.model = self._train(self.df) def _train(self, df: pd.DataFrame): text_features = 'title' cat_features = ['category','brand','condition'] text_pipe = Pipeline([('tfidf', TfidfVectorizer(ngram_range=(1,2), min_df=1))]) cat_pipe = Pipeline([('ohe', OneHotEncoder(handle_unknown='ignore'))]) pre = ColumnTransformer([('t', text_pipe, text_features), ('c', cat_pipe, cat_features)]) model = Pipeline([('pre', pre), ('reg', Ridge(alpha=1.0))]) X = df[['title','category','brand','condition']] y = df['price'] model.fit(X, y) return model def _load_feedback(self, feedback_csv: str): if not feedback_csv or not os.path.exists(feedback_csv): return {} fb = pd.read_csv(feedback_csv) if fb.empty: return {} fb['key'] = fb['title'].apply(_norm_key) stats = (fb.dropna(subset=['key']) .groupby('key')['correct'] .mean() .to_dict()) return stats def _apply_feedback_adjustment(self, title: str, price: float) -> float: key = _norm_key(title) if not key or key not in self.feedback_stats: return price score = self.feedback_stats[key] # 0..1 if score >= 0.7: return price * 1.10 elif score <= 0.3: return price * 0.90 return price def predict_resale(self, title: str, category: str=None, brand: str=None, condition: str='Used-Good'): guess_cat, guess_brand = self._guess_meta(title) category = category or guess_cat brand = brand or guess_brand X = pd.DataFrame([{ 'title': title, 'category': category or 'Unknown', 'brand': brand or 'Unknown', 'condition': condition or 'Used-Good' }]) pred = float(self.model.predict(X)[0]) pred = self._apply_feedback_adjustment(title, pred) return { 'predicted_resale': max(5.0, round(pred, 2)), 'category': category, 'brand': brand, 'condition': condition } @staticmethod def _guess_meta(title: str): title_l = str(title).lower() known_brands = [ 'nintendo','sony','apple','kitchenaid','bose','canon','seiko','ikea','carhartt', 'levis','levi','patagonia','yeti','coach','dansk','dansko','dewalt','makita','all-clad', 'pokemon','herman miller','pyrex','le creuset','marantz','ll bean','ralph lauren','vera bradley','nerf' ] brand = None for b in known_brands: if b in title_l: brand = b.title() break cat_map = { 'console':'Electronics','playstation':'Electronics','wii':'Electronics','iphone':'Electronics','camera':'Electronics', 'headphone':'Electronics','receiver':'Electronics','walkman':'Electronics', 'bowl':'Home & Kitchen','mixer':'Home & Kitchen','pan':'Home & Kitchen','skillet':'Home & Kitchen','dutch oven':'Home & Kitchen','tumbler':'Home & Kitchen', 'jeans':'Clothing','jacket':'Clothing','shirt':'Clothing','dress':'Clothing','fleece':'Clothing', 'watch':'Accessories','handbag':'Accessories','tote':'Accessories', 'boots':'Shoes','clogs':'Shoes', 'chair':'Furniture', 'drill':'Tools','saw':'Tools', 'pokemon':'Collectibles','cards':'Collectibles', 'toy':'Toys','blaster':'Toys' } category = None for k,v in cat_map.items(): if k in title_l: category = v break return category, brand def compute_deal_score(predicted_resale: float, asking_price: float, fees_rate: float=0.13, ship_estimate: float=12.0): fees = predicted_resale * fees_rate net = predicted_resale - fees - ship_estimate profit = net - asking_price margin = profit / asking_price if asking_price > 0 else 0.0 if profit >= 50 and margin >= 0.8: label = 'Home Run' elif profit >= 25 and margin >= 0.5: label = 'Great' elif profit >= 10 and margin >= 0.3: label = 'Good' elif profit >= 5: label = 'Meh' else: label = 'Pass' return { 'fees': round(fees,2), 'net_after_fees': round(net,2), 'profit': round(profit,2), 'margin': round(margin,2), 'label': label }
- 0 Bytes import io, re from typing import List, Dict, Tuple, Any from PIL import Image, ImageDraw try: import pytesseract from pytesseract import Output as TessOutput except Exception: pytesseract = None TessOutput = None PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?|\b\d{1,4}\.\d{2}\b)") def ocr_image(image_bytes: bytes): img = Image.open(io.BytesIO(image_bytes)).convert("RGB") if pytesseract is None: return "", [], img.size data = pytesseract.image_to_data(img, output_type=TessOutput.DICT) tokens = [] for i in range(len(data['text'])): txt = data['text'][i] if not txt: continue try: conf = float(data.get('conf', ['-1'])[i]) except: conf = -1.0 x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] tokens.append({'text': txt, 'conf': conf, 'box': (x,y,w,h)}) full = " ".join([t['text'] for t in tokens]) return full, tokens, img.size def guess_price(tokens: List[Dict[str,Any]]): best = None for t in tokens: m = PRICE_RE.search(t['text'].replace(",", "")) if m: raw = m.group(0).replace("$","").strip() try: val = float(raw) if 0.5 <= val <= 1000: if best is None or val < best[0]: best = (val, t['box']) except: pass return best def annotate_with_box(image_bytes: bytes, box: Tuple[int,int,int,int], label: str = None) -> bytes: img = Image.open(io.BytesIO(image_bytes)).convert("RGB") draw = ImageDraw.Draw(img) x,y,w,h = box draw.rectangle([x,y,x+w,y+h], outline=(255,0,0), width=3) if label: tw = max(50, len(label)*8) y0 = max(0, y-22) draw.rectangle([x, y0, x+tw, y0+22], fill=(255,0,0)) draw.text((x+4, y0+4), label, fill=(255,255,255)) out = io.BytesIO() img.save(out, format="PNG") return out.getvalue()
- 0 Bytes import streamlit as st from valuation import DealValuator, compute_deal_score import pandas as pd import os, base64 from datetime import datetime DATA_DIR = "data" FEEDBACK_CSV = os.path.join(DATA_DIR, "feedback.csv") st.set_page_config(page_title="GoodFind: Thrift Deal Finder", page_icon="π") st.title("π GoodFind β Thrift & Yard Sale Deal Checker") with st.expander("How it works"): st.markdown(""" - A lightweight ML model predicts **resale value** from item title (plus optional brand/category/condition). - I estimate marketplace fees and shipping to compute **profit** and a **deal label**. - **Group scan** lets you review many items at once (with photos). If a price isn't visible, **enter it manually**. - **Auto-scan with OCR**: tries to read **price tags** and **names/brands** from your photo and pre-fill fields. - **Hover labels**: after evaluation, the item name appears on hover over the photo. - **π/π feedback** trains the estimator over time (nudges predictions Β±10% for similar items). """) if 'valuator' not in st.session_state: os.makedirs(DATA_DIR, exist_ok=True) if not os.path.exists(FEEDBACK_CSV): pd.DataFrame(columns=["title","correct","ts"]).to_csv(FEEDBACK_CSV, index=False) st.session_state['valuator'] = DealValuator(os.path.join(DATA_DIR,'comps.csv'), feedback_csv=FEEDBACK_CSV) valuator = st.session_state['valuator'] # CSS for overlays st.markdown(''' <style> .img-wrap{position:relative; display:inline-block; margin:8px; max-width:100%;} .img-wrap img{display:block; max-width:100%; height:auto; border-radius:6px; border:1px solid rgba(0,0,0,0.1);} .hover-label{ position:absolute; left:8px; top:8px; padding:4px 8px; border-radius:4px; background:rgba(0,0,0,0.65); color:#fff; font-size:0.9rem; opacity:0; transition:opacity .15s; pointer-events:none; } .img-wrap:hover .hover-label{opacity:1;} </style> ''', unsafe_allow_html=True) tab1, tab2, tab3 = st.tabs(["Single check", "Batch (CSV)", "Group scan (photos + OCR)"]) # -------- Single -------- with tab1: st.subheader("Check a deal") title = st.text_input("Listing title", placeholder="e.g., Nintendo Wii bundle with games") col1, col2, col3 = st.columns(3) with col1: asking = st.number_input("Asking price ($)", min_value=0.0, step=1.0, value=10.0) with col2: condition = st.selectbox("Condition", ["Used-Good","Used-Fair","Used-Excellent","New-Open Box","New"]) with col3: fees_rate = st.slider("Fees rate", min_value=0.05, max_value=0.20, value=0.13, step=0.01) extra = st.expander("Optional details") with extra: category = st.text_input("Category (optional)", placeholder="Electronics") brand = st.text_input("Brand (optional)", placeholder="Nintendo") if st.button("Evaluate Deal", type="primary") and title.strip(): res = valuator.predict_resale(title=title, category=category or None, brand=brand or None, condition=condition) predicted_resale = res['predicted_resale'] score = compute_deal_score(predicted_resale, asking_price=asking, fees_rate=fees_rate) st.markdown(f"### Estimated resale: **${predicted_resale:.2f}**") st.markdown(f"**Deal verdict:** {score['label']}") st.progress(min(1.0, max(0.0, (score['margin']+1)/2))) st.markdown("**Breakdown**") st.write(pd.DataFrame([{ 'Category': res['category'] or 'β', 'Brand': res['brand'] or 'β', 'Condition': res['condition'], 'Fees ($)': score['fees'], 'Net after fees+ship ($)': score['net_after_fees'], 'Profit ($)': score['profit'], 'Margin on ask': score['margin'] }])) # -------- Batch CSV -------- with tab2: st.subheader("Batch evaluate via CSV") st.caption("Upload a CSV with columns: title, asking_price, [category], [brand], [condition].") fees_rate_batch = st.slider("Fees rate (apply to all)", min_value=0.05, max_value=0.20, value=0.13, step=0.01, key="fees_batch") file = st.file_uploader("CSV file", type=['csv'], key="csv_uploader") if file is not None: df = pd.read_csv(file) required = {'title','asking_price'} if not required.issubset(set(df.columns)): st.error("CSV must include at least 'title' and 'asking_price' columns.") else: vals = [] for _, row in df.iterrows(): r = valuator.predict_resale( title=row['title'], category=row.get('category', None), brand=row.get('brand', None), condition=row.get('condition', 'Used-Good') ) s = compute_deal_score(r['predicted_resale'], row['asking_price'], fees_rate=fees_rate_batch) vals.append({**row.to_dict(), **r, **s}) out = pd.DataFrame(vals).sort_values(by=['profit','predicted_resale'], ascending=[False, False]) st.dataframe(out, use_container_width=True) st.download_button("Download results CSV", data=out.to_csv(index=False), file_name="goodfind_results.csv", mime="text/csv") top = out.iloc[0] st.success(f"Top pick: **{top['title']}** β est. resale ${top['predicted_resale']:.0f}, profit ${top['profit']:.0f} (label: {top['label']})") # -------- Group scan with OCR -------- with tab3: st.subheader("Group scan: photos + OCR (auto price & name)") st.caption("Upload photos, let OCR prefill titles and prices (you can edit), then evaluate and rank.") fees_rate_group = st.slider("Fees rate (apply to all)", min_value=0.05, max_value=0.20, value=0.13, step=0.01, key="fees_group") from ocr_utils import ocr_image, guess_price, annotate_with_box from detect_utils import load_brands, guess_title_from_text brands_map = load_brands(os.path.join(DATA_DIR, "brands.json")) photos = st.file_uploader("Upload photos (multiple allowed)", type=['jpg','jpeg','png','webp'], accept_multiple_files=True, key="photo_uploader") photo_meta = [] # (title, price_text, cond, bytes, mime, suggested_title) autoscan = st.checkbox("Auto-scan with OCR (read tags & names)", value=True) if photos: for i, img in enumerate(photos): b = img.read() mime = "image/" + (img.type.split('/')[-1] if img.type else 'jpeg') suggested_title = "" suggested_price = "" annotated = None if autoscan: full, tokens, _ = ocr_image(b) suggested_title = guess_title_from_text(full, brands_map) if full else "" gp = guess_price(tokens) if gp: val, box = gp suggested_price = f"{val:.2f}" try: annotated = annotate_with_box(b, box, label=f"${val:.2f}") except Exception: annotated = None with st.expander(f"Photo {i+1}"): st.image(annotated or b, use_column_width=True) c1, c2, c3 = st.columns([2,1,1]) with c1: t = st.text_input(f"Item title #{i+1}", value=suggested_title, placeholder="e.g., Bose QC35 headphones", key=f"title_{i}") with c2: price = st.text_input(f"Asking price #{i+1} ($)", value=suggested_price, placeholder="e.g., 25 or 25.00", key=f"price_{i}") with c3: cond = st.selectbox(f"Condition #{i+1}", ["Used-Good","Used-Fair","Used-Excellent","New-Open Box","New"], key=f"cond_{i}") photo_meta.append((t, price, cond, b, mime, suggested_title)) if st.button("Evaluate group", type="primary"): rows = [] gallery_blocks = [] for idx, (t, price, cond, b, mime, suggested_title) in enumerate(photo_meta): if not str(t).strip(): t = suggested_title or f"Item {idx+1}" try: ask = float(price) if str(price).strip() else 0.0 except: ask = 0.0 r = valuator.predict_resale(title=t, condition=cond) s = compute_deal_score(r['predicted_resale'], ask, fees_rate=fees_rate_group) rows.append({'idx': idx, 'title': t, 'asking_price': ask, **r, **s}) b64 = base64.b64encode(b).decode('utf-8') html = f''' <div class="img-wrap" title="{t}"> <img src="data:{mime};base64,{b64}" alt="{t}"/> <span class="hover-label">{t}</span> </div> ''' gallery_blocks.append(html) if rows: out = pd.DataFrame(rows).sort_values(by=['profit','predicted_resale'], ascending=[False, False]).reset_index(drop=True) st.markdown("### Ranked results") st.dataframe(out[['title','asking_price','predicted_resale','profit','margin','label','category','brand','condition']], use_container_width=True) top = out.iloc[0] st.success(f"Top pick: **{top['title']}** β est. resale ${top['predicted_resale']:.0f}, profit ${top['profit']:.0f} (label: {top['label']})") st.markdown("### Hover over each image to see the identified name") st.markdown("".join(gallery_blocks), unsafe_allow_html=True) st.markdown("#### Feedback") st.caption("Mark whether the auto-identification/valuation felt right. This helps future scans.") for i, row in out.iterrows(): c1, c2, c3 = st.columns([6,1,1]) with c1: st.write(f"**{row['title']}** β est. ${row['predicted_resale']:.0f}, profit ${row['profit']:.0f}") with c2: if st.button("π", key=f"fb_up_{i}"): _append_feedback(FEEDBACK_CSV, row['title'], True) st.toast(f"Thanks! Logged π for '{row['title']}'.") with c3: if st.button("π", key=f"fb_dn_{i}"): _append_feedback(FEEDBACK_CSV, row['title'], False) st.toast(f"Logged π for '{row['title']}'.") def _append_feedback(path: str, title: str, correct: bool): os.makedirs(os.path.dirname(path), exist_ok=True) new = pd.DataFrame([{'title': title, 'correct': 1 if correct else 0, 'ts': datetime.utcnow().isoformat()}]) if os.path.exists(path): new.to_csv(path, mode='a', header=False, index=False) else: new.to_csv(path, index=False)
- 0 Bytes xcode-select --install