import sqlite3, timefrom pathlib import Path# 搜索共享目录下的文件并过滤def apply_dfs(root_path, file_callback, dir_callback): try: [x for x in root_path.iterdir()] except FileNotFoundError: print("file not found:", root_path) return except: print("something bad happened:", root_path) return for x in root_path.iterdir(): if x.is_file(): file_callback(x) elif x.is_dir(): dir_callback(x) apply_dfs(x, file_callback, dir_callback) else: pass# 过滤文件格式def process_file_path(path): try: if not path.suffix in acceptable: return cursor.execute("insert into shared_files values (?, ?)", \ (path.name, str(path).replace("\\", "/"))) except: returndef process_dir_path(path): passdef format_host(host): host = str(host) while len(host) < 3: host = "0" + host return host# main functionconn = sqlite3.connect("share.db")cursor = conn.cursor()acceptable = (".txt", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", \ ".pdf", ".rar", ".zip", ".tar", ".gz", ".bz2", ".xz", ".7z", \ ".msg", ".exe", ".msi")for host in range(445,999): start = time.time() p = Path(r"\\hostnameprex" + format_host(host) + "/share") print("start crawling host:hostnameprex",format_host(host)) apply_dfs(p, process_file_path, process_dir_path) end = time.time() print("complete crawling:", format_host(host), \ "elasped time:", end - start, "s") conn.commit()conn.close()
查询脚本:
import sqlite3 as dbimport os#查询数据库def query(name): #创建连接 conn = db.connect("share.db") #获取游标 sqlite_cursor = conn.cursor() sql_select="SELECT * FROM shared_files where filename like ?;" sqlite_cursor.execute(sql_select,('%'+name+'%',)) for row in sqlite_cursor: print(row[0]+""+row[1]) conn.close()# 入口while True : keyword = input('please input keyword:') if keyword != '': query(keyword) break