diff --git a/gig-poc/apps/api/app/domain/schemas.py b/gig-poc/apps/api/app/domain/schemas.py index 9193c0e..6cc6191 100644 --- a/gig-poc/apps/api/app/domain/schemas.py +++ b/gig-poc/apps/api/app/domain/schemas.py @@ -1,6 +1,6 @@ from __future__ import annotations -from datetime import datetime +from datetime import datetime, timedelta, timezone from enum import Enum from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator @@ -46,6 +46,16 @@ class JobCard(BaseModel): tags: list[str] = Field(default_factory=list, description="业务标签列表") confidence: float = Field(ge=0, le=1, description="数据置信度,范围 0~1") + @field_validator("start_time", mode="after") + @classmethod + def normalize_start_time(cls, value: datetime) -> datetime: + shanghai_tz = timezone(timedelta(hours=8)) + if value.tzinfo is None: + value = value.replace(tzinfo=shanghai_tz) + else: + value = value.astimezone(shanghai_tz) + return value.replace(second=0, microsecond=0) + class WorkerCard(BaseModel): worker_id: str = Field(description="工人唯一 ID") diff --git a/gig-poc/apps/api/app/services/extraction_service.py b/gig-poc/apps/api/app/services/extraction_service.py index 70f55c4..4a560df 100644 --- a/gig-poc/apps/api/app/services/extraction_service.py +++ b/gig-poc/apps/api/app/services/extraction_service.py @@ -2,6 +2,7 @@ from __future__ import annotations import json import re +from collections import Counter from datetime import datetime, timedelta, timezone from pathlib import Path @@ -22,7 +23,19 @@ class ExtractionService: self.skills = json.loads((self.settings.sample_data_dir / "skills.json").read_text(encoding="utf-8")) self.categories = json.loads((self.settings.sample_data_dir / "categories.json").read_text(encoding="utf-8")) self.regions = json.loads((self.settings.sample_data_dir / "regions.json").read_text(encoding="utf-8")) + self.sample_jobs = json.loads((self.settings.sample_data_dir / "jobs.json").read_text(encoding="utf-8")) + self.sample_workers = json.loads((self.settings.sample_data_dir / "workers.json").read_text(encoding="utf-8")) + self.default_region = self._build_default_region() + self.default_category = self._build_default_category() + self.default_salary_amount = self._build_default_salary_amount() + self.default_job_tags = self._build_default_job_tags() + self.default_worker_skills = self._build_default_worker_skills() + self.default_experience_tags = self._build_default_experience_tags() + self.category_skill_defaults = self._build_category_skill_defaults() + self.city_region_defaults = self._build_city_region_defaults() + self.tag_candidates = self._build_tag_candidates() self.llm_client = LLMClient(self.settings) + self.shanghai_tz = timezone(timedelta(hours=8)) def extract_job(self, text: str) -> ExtractResponse: logger.info("extract_job request text=%s", text) @@ -110,12 +123,12 @@ class ExtractionService: def _extract_job_rule(self, text: str) -> JobCard: skill_hits = [item for item in self.skills if item in text] - category = next((item for item in self.categories if item in text), "活动执行") + category = next((item for item in self.categories if item in text), self.default_category) region = self._extract_region(text) salary = self._extract_salary(text) headcount = self._extract_number(text, [r"(\d+)\s*[个名人位]"], default=1) duration = self._extract_number(text, [r"(\d+(?:\.\d+)?)\s*小时"], default=4.0, cast=float) - tags = [tag for tag in ["女生优先", "男生优先", "有经验优先", "沟通好", "可连做优先"] if tag in text] + tags = [tag for tag in self.tag_candidates if tag in text][:3] title = next((f"{category}{skill_hits[0]}兼职" for _ in [0] if skill_hits), f"{category}兼职") card = JobCard( job_id=generate_id("job"), @@ -131,7 +144,7 @@ class ExtractionService: headcount=int(headcount), salary=salary, work_mode="排班制" if "排班" in text else "兼职", - tags=tags or ["有经验优先"], + tags=tags or self.default_job_tags, confidence=self._compute_confidence(skill_hits, region, salary.amount > 0), ) return card @@ -139,19 +152,29 @@ class ExtractionService: def _extract_worker_rule(self, text: str) -> WorkerCard: skill_hits = [item for item in self.skills if item in text][:6] region_hits = [item for item in self.regions if item["region"] in text or item["city"] in text] - city_names = list(dict.fromkeys([item["city"] for item in region_hits])) or ["深圳"] - region_names = list(dict.fromkeys([item["region"] for item in region_hits])) or ["南山"] + if not region_hits: + city_hits = [item["city"] for item in self.regions if item["city"] in text] + unique_city_hits = list(dict.fromkeys(city_hits)) + region_hits = [ + {"city": city, "region": self.city_region_defaults.get(city, self.default_region["region"])} + for city in unique_city_hits + ] + city_names = list(dict.fromkeys([item["city"] for item in region_hits])) or [self.default_region["city"]] + region_names = list(dict.fromkeys([item["region"] for item in region_hits])) or [self.default_region["region"]] availability = self._extract_availability(text) - experience = [item for item in ["商场", "会展", "活动执行", "物流", "零售", "客服中心", "快消", "校园推广"] if item in text] + experience = [item for item in self.default_experience_tags if item in text] card = WorkerCard( worker_id=generate_id("worker"), name=self._extract_name(text), description=text, - skills=[SkillScore(name=item, score=round(0.72 + index * 0.04, 2)) for index, item in enumerate(skill_hits or ["活动执行", "引导", "登记"])], + skills=[ + SkillScore(name=item, score=round(0.72 + index * 0.04, 2)) + for index, item in enumerate(skill_hits or self.default_worker_skills) + ], cities=city_names, regions=region_names, availability=availability, - experience_tags=experience or ["活动执行"], + experience_tags=experience or self.default_experience_tags[:2], reliability_score=0.76, profile_completion=0.68, confidence=self._compute_confidence(skill_hits, {"city": city_names[0], "region": region_names[0]}, True), @@ -165,7 +188,10 @@ class ExtractionService: for item in self.regions: if item["region"] in text: return item - return {"city": "深圳", "region": "南山"} + city_match = next((item["city"] for item in self.regions if item["city"] in text), "") + if city_match: + return {"city": city_match, "region": self.city_region_defaults.get(city_match, self.default_region["region"])} + return self.default_region def _extract_location(self, text: str, region: dict) -> str: markers = ["会展中心", "商场", "地铁站", "园区", "写字楼", "仓库", "门店"] @@ -175,7 +201,7 @@ class ExtractionService: return f"{region['city']}{region['region']}待定点位" def _extract_salary(self, text: str) -> Salary: - amount = self._extract_number(text, [r"(\d+(?:\.\d+)?)\s*(?:元|块)"], default=150.0, cast=float) + amount = self._extract_number(text, [r"(\d+(?:\.\d+)?)\s*(?:元|块)"], default=self.default_salary_amount, cast=float) salary_type = "hourly" if "小时" in text and "/小时" in text else "daily" return Salary(type=salary_type, amount=amount, currency="CNY") @@ -187,28 +213,72 @@ class ExtractionService: return default def _extract_job_time(self, text: str) -> datetime: - shanghai_tz = timezone(timedelta(hours=8)) - now = datetime.now(shanghai_tz) + now = datetime.now(self.shanghai_tz) + for candidate in self._time_candidates(text, now): + parsed = self._parse_datetime(candidate, now) + if parsed: + return parsed + return self._normalize_datetime(now + timedelta(days=1)) + + def _time_candidates(self, text: str, now: datetime) -> list[str]: + candidates = [text] + + if any(token in text for token in ("今天", "今日")): + candidates.append(text.replace("今日", now.strftime("%Y-%m-%d")).replace("今天", now.strftime("%Y-%m-%d"))) if "明天" in text: - base = now + timedelta(days=1) - elif "后天" in text: - base = now + timedelta(days=2) + tomorrow = now + timedelta(days=1) + candidates.append(text.replace("明天", tomorrow.strftime("%Y-%m-%d"))) + if "后天" in text: + day_after = now + timedelta(days=2) + candidates.append(text.replace("后天", day_after.strftime("%Y-%m-%d"))) + + weekday_map = {"一": 0, "二": 1, "三": 2, "四": 3, "五": 4, "六": 5, "日": 6, "天": 6} + week_match = re.search(r"(下周|本周|这周|周)([一二三四五六日天])", text) + if week_match: + week_token, weekday_token = week_match.groups() + target_weekday = weekday_map[weekday_token] + days_ahead = (target_weekday - now.weekday()) % 7 + if week_token == "下周": + days_ahead = days_ahead + 7 + elif week_token == "周" and days_ahead == 0: + days_ahead = 7 + target_day = now + timedelta(days=days_ahead) + candidates.append(text.replace(week_match.group(0), target_day.strftime("%Y-%m-%d"))) + return candidates + + def _parse_datetime(self, text: str, now: datetime) -> datetime | None: + normalized = self._replace_time_words(text) + cleaned = re.sub(r"[,、。;,;]", " ", normalized) + cleaned = cleaned.replace("号", "日") + cleaned = re.sub(r"(\d{1,2})月(\d{1,2})日", rf"{now.year}-\1-\2", cleaned) + cleaned = re.sub(r"(\d{1,2})点半", r"\1:30", cleaned) + cleaned = re.sub(r"(\d{1,2})点", r"\1:00", cleaned) + cleaned = re.sub(r"(\d{1,2})时", r"\1:00", cleaned) + + has_date = bool(re.search(r"\d{4}-\d{1,2}-\d{1,2}", cleaned)) + if not has_date: + return None + try: + parsed = date_parser.parse(cleaned, fuzzy=True) + except Exception: + return None + return self._normalize_datetime(parsed) + + def _replace_time_words(self, text: str) -> str: + replaced = text + replaced = re.sub(r"(今晚|晚上)", " 19:00 ", replaced) + replaced = re.sub(r"(下午)", " 14:00 ", replaced) + replaced = re.sub(r"(中午)", " 12:00 ", replaced) + replaced = re.sub(r"(早上|上午)", " 09:00 ", replaced) + replaced = re.sub(r"(凌晨)", " 01:00 ", replaced) + return replaced + + def _normalize_datetime(self, value: datetime) -> datetime: + if value.tzinfo is None: + value = value.replace(tzinfo=self.shanghai_tz) else: - month_day = re.search(r"(\d{1,2})月(\d{1,2})日", text) - if month_day: - month, day = int(month_day.group(1)), int(month_day.group(2)) - base = now.replace(month=month, day=day) - else: - base = now + timedelta(days=1) - hour = 9 - if "下午" in text: - hour = 13 - elif "晚上" in text: - hour = 19 - explicit_hour = re.search(r"(\d{1,2})[:点时](\d{0,2})?", text) - if explicit_hour: - hour = int(explicit_hour.group(1)) - return base.replace(hour=hour, minute=0, second=0, microsecond=0) + value = value.astimezone(self.shanghai_tz) + return value.replace(second=0, microsecond=0) def _extract_availability(self, text: str) -> list[str]: tags = [] @@ -230,13 +300,10 @@ class ExtractionService: return "匿名候选人" def _guess_category_skills(self, category: str) -> list[str]: - mapping = { - "活动执行": ["签到", "引导", "登记"], - "促销": ["促销", "导购", "陈列"], - "配送": ["配送", "装卸", "司机协助"], - "客服": ["客服", "电话邀约", "线上客服"], - } - return mapping.get(category, ["活动执行", "沟通"]) + skills = self.category_skill_defaults.get(category) + if skills: + return skills + return self.default_worker_skills[:3] def _compute_confidence(self, skill_hits: list[str], region: dict, has_salary: bool) -> float: score = 0.55 @@ -250,3 +317,109 @@ class ExtractionService: def _missing_fields(self, exc: ValidationError) -> list[str]: return [".".join(str(part) for part in item["loc"]) for item in exc.errors()] + + def _build_default_region(self) -> dict: + if self.sample_jobs: + pair_counter = Counter( + (item.get("city"), item.get("region")) + for item in self.sample_jobs + if item.get("city") and item.get("region") + ) + if pair_counter: + city, region = pair_counter.most_common(1)[0][0] + return {"city": city, "region": region} + if self.regions: + return {"city": self.regions[0]["city"], "region": self.regions[0]["region"]} + return {"city": "深圳", "region": "南山"} + + def _build_default_category(self) -> str: + counter = Counter(item.get("category") for item in self.sample_jobs if item.get("category")) + if counter: + return counter.most_common(1)[0][0] + return self.categories[0] if self.categories else "活动执行" + + def _build_default_salary_amount(self) -> float: + amounts = sorted( + float(item["salary"]["amount"]) + for item in self.sample_jobs + if isinstance(item.get("salary"), dict) and isinstance(item["salary"].get("amount"), (int, float)) + ) + if not amounts: + return 150.0 + mid = len(amounts) // 2 + if len(amounts) % 2 == 1: + return amounts[mid] + return round((amounts[mid - 1] + amounts[mid]) / 2, 2) + + def _build_default_job_tags(self) -> list[str]: + counter = Counter( + tag + for item in self.sample_jobs + for tag in item.get("tags", []) + if isinstance(tag, str) and tag.strip() + ) + top_tags = [tag for tag, _ in counter.most_common(3)] + return top_tags or ["有经验优先"] + + def _build_default_worker_skills(self) -> list[str]: + counter = Counter( + skill.get("name") + for item in self.sample_workers + for skill in item.get("skills", []) + if isinstance(skill, dict) and isinstance(skill.get("name"), str) and skill.get("name") + ) + top_skills = [name for name, _ in counter.most_common(4)] + return top_skills or ["活动执行", "引导", "登记"] + + def _build_default_experience_tags(self) -> list[str]: + counter = Counter( + tag + for item in self.sample_workers + for tag in item.get("experience_tags", []) + if isinstance(tag, str) and tag.strip() + ) + top_tags = [tag for tag, _ in counter.most_common(5)] + return top_tags or ["活动执行"] + + def _build_category_skill_defaults(self) -> dict[str, list[str]]: + category_skills: dict[str, Counter] = {} + for item in self.sample_jobs: + category = item.get("category") + if not isinstance(category, str) or not category: + continue + counter = category_skills.setdefault(category, Counter()) + for skill in item.get("skills", []): + if isinstance(skill, str) and skill: + counter[skill] += 1 + return {category: [name for name, _ in counter.most_common(4)] for category, counter in category_skills.items()} + + def _build_city_region_defaults(self) -> dict[str, str]: + counter: dict[str, Counter] = {} + for item in self.regions: + city = item.get("city") + region = item.get("region") + if not city or not region: + continue + counter.setdefault(city, Counter())[region] += 1 + for item in self.sample_jobs: + city = item.get("city") + region = item.get("region") + if city and region: + counter.setdefault(city, Counter())[region] += 3 + defaults: dict[str, str] = {} + for city, regions in counter.items(): + defaults[city] = regions.most_common(1)[0][0] + return defaults + + def _build_tag_candidates(self) -> list[str]: + sample_tags = list( + dict.fromkeys( + tag + for item in self.sample_jobs + for tag in item.get("tags", []) + if isinstance(tag, str) and tag.strip() + ) + ) + baseline_tags = ["女生优先", "男生优先", "有经验优先", "沟通好", "可连做优先"] + merged = list(dict.fromkeys([*sample_tags, *baseline_tags])) + return merged[:30] diff --git a/gig-poc/apps/web/src/pages/JobPage.tsx b/gig-poc/apps/web/src/pages/JobPage.tsx index c623110..3ac07d7 100644 --- a/gig-poc/apps/web/src/pages/JobPage.tsx +++ b/gig-poc/apps/web/src/pages/JobPage.tsx @@ -1,16 +1,76 @@ -import { useState } from "react"; +import { useEffect, useState } from "react"; import { api } from "../api/client"; import { JsonPanel } from "../components/JsonPanel"; import { MatchList } from "../components/MatchList"; -const DEFAULT_TEXT = "明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先,需要会签到、引导和登记。"; +const FALLBACK_TEXT = "明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先,需要会签到、引导和登记。"; + +function pickRandom(items: T[]): T { + return items[Math.floor(Math.random() * items.length)]; +} + +function asString(value: unknown): string { + return typeof value === "string" ? value.trim() : ""; +} + +function asNumber(value: unknown): number | null { + return typeof value === "number" && Number.isFinite(value) ? value : null; +} + +function asStringArray(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return value.filter((item): item is string => typeof item === "string" && item.trim().length > 0); +} + +function buildAdaptiveJobText(items: Record[]): string { + if (!items.length) { + return FALLBACK_TEXT; + } + const source = pickRandom(items); + const title = asString(source.title) || asString(source.category) || "活动兼职"; + const city = asString(source.city) || "深圳"; + const region = asString(source.region) || "南山"; + const headcount = asNumber(source.headcount) ?? 2; + const duration = asNumber(source.duration_hours) ?? 4; + const location = asString(source.location_detail) || `${city}${region}待定点位`; + const skills = asStringArray(source.skills).slice(0, 3); + const tags = asStringArray(source.tags).slice(0, 2); + const salary = (source.salary as Record | undefined) ?? {}; + const amount = asNumber(salary.amount) ?? 150; + const skillText = skills.length ? `需要会${skills.join("、")}` : "有相关经验优先"; + const tagText = tags.length ? `,${tags.join(",")}` : ""; + return `明天下午${location}需要${headcount}个${title},${duration}小时,${amount}/人${tagText},${skillText}。`; +} export function JobPage() { - const [text, setText] = useState(DEFAULT_TEXT); + const [text, setText] = useState(""); const [jobCard, setJobCard] = useState(null); const [matches, setMatches] = useState([]); const [loading, setLoading] = useState(false); + useEffect(() => { + let active = true; + void (async () => { + try { + const result = await api.jobs(); + if (!active) { + return; + } + setText((current) => current || buildAdaptiveJobText(result.items)); + } catch { + if (!active) { + return; + } + setText((current) => current || FALLBACK_TEXT); + } + })(); + return () => { + active = false; + }; + }, []); + const handleExtract = async () => { setLoading(true); try { diff --git a/gig-poc/apps/web/src/pages/WorkerPage.tsx b/gig-poc/apps/web/src/pages/WorkerPage.tsx index 1f51645..9938fbf 100644 --- a/gig-poc/apps/web/src/pages/WorkerPage.tsx +++ b/gig-poc/apps/web/src/pages/WorkerPage.tsx @@ -1,16 +1,73 @@ -import { useState } from "react"; +import { useEffect, useState } from "react"; import { api } from "../api/client"; import { JsonPanel } from "../components/JsonPanel"; import { MatchList } from "../components/MatchList"; -const DEFAULT_TEXT = "我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。"; +const FALLBACK_TEXT = "我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。"; + +function pickRandom(items: T[]): T { + return items[Math.floor(Math.random() * items.length)]; +} + +function asString(value: unknown): string { + return typeof value === "string" ? value.trim() : ""; +} + +function asStringArray(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return value.filter((item): item is string => typeof item === "string" && item.trim().length > 0); +} + +function buildAdaptiveWorkerText(items: Record[]): string { + if (!items.length) { + return FALLBACK_TEXT; + } + const source = pickRandom(items); + const name = asString(source.name) || "我"; + const regions = asStringArray(source.regions).slice(0, 2); + const experiences = asStringArray(source.experience_tags).slice(0, 2); + const skillObjects = Array.isArray(source.skills) ? source.skills : []; + const skills = skillObjects + .map((item) => (item && typeof item === "object" ? asString((item as Record).name) : "")) + .filter(Boolean) + .slice(0, 3); + const availability = asStringArray(source.availability); + const expText = experiences.length ? experiences.join("和") : "活动执行"; + const skillText = skills.length ? skills.join("、") : "沟通和执行"; + const regionText = regions.length ? `${regions.join("、")}都方便` : "同城都方便"; + const timeText = availability.some((item) => item.includes("weekend")) ? "周末都能接" : "时间比较灵活"; + return `${name}做过${expText},也能做${skillText},${timeText},${regionText}。`; +} export function WorkerPage() { - const [text, setText] = useState(DEFAULT_TEXT); + const [text, setText] = useState(""); const [workerCard, setWorkerCard] = useState(null); const [matches, setMatches] = useState([]); const [loading, setLoading] = useState(false); + useEffect(() => { + let active = true; + void (async () => { + try { + const result = await api.workers(); + if (!active) { + return; + } + setText((current) => current || buildAdaptiveWorkerText(result.items)); + } catch { + if (!active) { + return; + } + setText((current) => current || FALLBACK_TEXT); + } + })(); + return () => { + active = false; + }; + }, []); + const handleExtract = async () => { setLoading(true); try {