Files
Airtep/gig-poc/packages/sample-data/generate_sample_data.py
2026-03-30 20:49:40 +08:00

184 lines
6.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import json
import random
from datetime import datetime, timedelta
from pathlib import Path
ROOT = Path(__file__).resolve().parent
random.seed(42)
skills = [
"签到",
"引导",
"登记",
"促销",
"地推",
"导购",
"会展接待",
"分拣",
"打包",
"客服",
"电话邀约",
"安装",
"配送",
"仓储",
"陈列",
"数据录入",
"物料搬运",
"收银",
"盘点",
"直播协助",
"短视频拍摄",
"面销",
"海报派发",
"问卷访问",
"现场执行",
"活动控场",
"礼仪接待",
"样品派发",
"售后支持",
"装卸",
"拣货",
"骑手配送",
"司机协助",
"设备调试",
"展台搭建",
"线上客服",
"社群运营",
"线索收集",
"POS操作",
"报表整理",
]
extra_skills = [f"扩展技能{i:02d}" for i in range(1, 61)]
skills = (skills + extra_skills)[:100]
categories = [
"促销",
"地推",
"导购",
"会展",
"分拣",
"客服",
"安装",
"配送",
"仓储",
"活动执行",
"礼仪",
"数据录入",
"盘点",
"装卸",
"直播协助",
"社群运营",
"收银",
"拣货",
"线下推广",
"电话销售",
"问卷调研",
"样品派发",
"售后服务",
"展台搭建",
"酒店服务",
"商超导购",
"会场服务",
"物流协助",
"活动控场",
"物料执行",
]
regions = [
{"city": "深圳", "region": "南山"},
{"city": "深圳", "region": "福田"},
{"city": "深圳", "region": "宝安"},
{"city": "深圳", "region": "龙岗"},
{"city": "深圳", "region": "罗湖"},
{"city": "广州", "region": "天河"},
{"city": "广州", "region": "海珠"},
{"city": "广州", "region": "番禺"},
{"city": "广州", "region": "白云"},
{"city": "广州", "region": "越秀"},
{"city": "上海", "region": "浦东"},
{"city": "上海", "region": "徐汇"},
{"city": "上海", "region": "静安"},
{"city": "上海", "region": "闵行"},
{"city": "上海", "region": "杨浦"},
{"city": "杭州", "region": "西湖"},
{"city": "杭州", "region": "滨江"},
{"city": "杭州", "region": "余杭"},
{"city": "成都", "region": "高新"},
{"city": "成都", "region": "武侯"},
]
time_tags = ["weekend", "weekday_am", "weekday_pm", "anytime"]
experience_tags = ["商场", "会展", "活动执行", "物流", "零售", "校园推广", "客服中心", "展台", "仓库", "快消"]
work_modes = ["排班制", "兼职", "临时工", "项目制"]
def region_label(item: dict) -> str:
return f"{item['city']}{item['region']}"
jobs = []
for index in range(1, 101):
region = random.choice(regions)
category = random.choice(categories)
job_skills = random.sample(skills[:40], k=random.randint(3, 5))
start_at = datetime(2026, 4, 1, 9, 0, 0) + timedelta(days=random.randint(0, 20), hours=random.choice([0, 4, 8]))
tags = list(set(random.sample(experience_tags, k=2) + [random.choice(["女生优先", "有经验优先", "可连做优先", "沟通好"]) ]))
salary_amount = random.choice([120, 150, 180, 200, 220, 260, 300])
jobs.append(
{
"job_id": f"job_{index:03d}",
"title": f"{category}兼职{index:03d}",
"category": category,
"description": f"{start_at.month}{start_at.day}{region['city']}{region['region']}需要{random.randint(1,4)}{category}兼职,负责{''.join(job_skills[:3])}{random.randint(4,8)}小时,{salary_amount}元/天。",
"skills": job_skills,
"city": region["city"],
"region": region["region"],
"location_detail": f"{region_label(region)}核心商圈点位{random.randint(1, 20)}",
"start_time": start_at.isoformat() + "+08:00",
"duration_hours": random.randint(4, 8),
"headcount": random.randint(1, 4),
"salary": {"type": "daily", "amount": salary_amount, "currency": "CNY"},
"work_mode": random.choice(work_modes),
"tags": tags,
"confidence": round(random.uniform(0.82, 0.96), 2),
}
)
workers = []
family_names = "赵钱孙李周吴郑王冯陈褚卫蒋沈韩杨朱秦尤许何吕施张孔曹严华金魏陶姜"
given_names = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "秀英", ""]
for index in range(1, 301):
primary_region = random.choice(regions)
extra_region = random.choice([r for r in regions if r["city"] == primary_region["city"] and r["region"] != primary_region["region"]])
worker_skills = random.sample(skills[:40], k=random.randint(3, 6))
name = random.choice(list(family_names)) + random.choice(given_names)
availability = random.sample(time_tags, k=random.randint(1, 2))
exp_tags = list(set(random.sample(experience_tags, k=2) + [random.choice(categories[:10])]))
workers.append(
{
"worker_id": f"worker_{index:03d}",
"name": name,
"description": f"我做过{''.join(exp_tags[:2])}相关兼职,擅长{''.join(worker_skills[:3])},平时{ ''.join(availability) }都能接单,{primary_region['region']}{extra_region['region']}都方便。",
"skills": [{"name": item, "score": round(random.uniform(0.62, 0.94), 2)} for item in worker_skills],
"cities": [primary_region["city"]],
"regions": [primary_region["region"], extra_region["region"]],
"availability": availability,
"experience_tags": exp_tags,
"reliability_score": round(random.uniform(0.65, 0.95), 2),
"profile_completion": round(random.uniform(0.55, 0.98), 2),
"confidence": round(random.uniform(0.8, 0.96), 2),
}
)
(ROOT / "skills.json").write_text(json.dumps(skills, ensure_ascii=False, indent=2), encoding="utf-8")
(ROOT / "categories.json").write_text(json.dumps(categories, ensure_ascii=False, indent=2), encoding="utf-8")
(ROOT / "regions.json").write_text(json.dumps(regions, ensure_ascii=False, indent=2), encoding="utf-8")
(ROOT / "jobs.json").write_text(json.dumps(jobs, ensure_ascii=False, indent=2), encoding="utf-8")
(ROOT / "workers.json").write_text(json.dumps(workers, ensure_ascii=False, indent=2), encoding="utf-8")