[Python] 分享百度快排发包python源码核心文件

最近都在研究怎么做快排,分享下成果,可以一起学习交流!

  附上代码:

  1. # -*- coding: utf-8 -*-from selenium import webdriver
  2. import time
  3. import requests
  4. import random
  5. import os
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import traceback
  8. import urllib.request
  9. import pymysql
  10. import socket
  11. #import win32api #pip install pypiwin32
  12.  
  13. #from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  14. #DesiredCapabilities.INTERNETEXPLORER['ignoreProtectedModeSettings'] = True
  15.  
  16.  
  17.  
  18. #rasdial 宽带连接 19ab68----643534
  19. def connect():
  20.     cmd_str = "rasdial %s %s %s" % (g_adsl_account['name'], g_adsl_account['username'], g_adsl_account['password'])
  21.     os.system(cmd_str)
  22.     time.sleep(5)
  23.  
  24.  
  25. #"rasdial 断开宽带连接 /disconnect"
  26. def disconnect():
  27.     cmd_str = "rasdial %s /disconnect" % g_adsl_account['name']
  28.     os.system(cmd_str)
  29.     time.sleep(5)
  30.      
  31. #获取ip地址
  32. def get_ip():
  33.     #return ['ip','address']
  34.     fp = urllib.request.urlopen("http://ip.chinaz.com/getip.aspx")
  35.     mybytes = fp.read()
  36.     # note that Python3 does not read the html code as string
  37.     # but as html code bytearray, convert to string with
  38.     mystr = mybytes.decode("utf8")
  39.     fp.close()
  40.     ip = mystr.find("ip")
  41.     add = mystr.find("address")
  42.     ip = mystr[ip+4:add-2]
  43.     address = mystr[add+9:-2]
  44.     return [ip,address]
  45.  
  46. #将ip地址插入数据库
  47. def insert_db(ipdate):
  48.     #try:
  49.         #获取一个数据库连接,注意如果是UTF-8类型的,需要制定数据库
  50.         conn=pymysql.connect(host='localhost',user='root',passwd='',port=3306,charset='utf8')
  51.         cur=conn.cursor()                              #获取一个游标对象
  52.         #cur.execute("CREATE DATABASE zongzong")          #执行对应的SQL语句
  53.         #exit()
  54.         cur.execute("USE zongzong")
  55.         #exit()
  56.         #cur.execute("CREATE TABLE `ip_log` (`id` int(11) NOT NULL AUTO_INCREMENT,`ip` varchar(32) DEFAULT NULL,`address` varchar(64) DEFAULT NULL,`keyword` varchar(64) DEFAULT '',`url` varchar(256) DEFAULT '',`error` varchar(64) DEFAULT '',`created_at` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP,PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=21 DEFAULT CHARSET=utf8;")
  57.          
  58.         #插入数据
  59.         ISOTIMEFORMAT='%Y-%m-%d %X'
  60.         ipdate.append( time.strftime( ISOTIMEFORMAT, time.localtime() ))
  61.         cur.execute("INSERT INTO ip_log(ip,address,keyword,url,error,page,rank,created_at) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)",ipdate)
  62.          
  63.         #cur.execute("SELECT * FROM ip_log")
  64.         #data=cur.fetchall()
  65.         #print(data)
  66.              
  67.         cur.close()#关闭游标
  68.         conn.commit()#向数据库中提交任何未解决的事务,对不支持事务的数据库不进行任何操作
  69.         conn.close()#关闭到数据库的连接,释放数据库资源
  70.     #except:
  71.     #   print("发生异常")  
  72.  
  73.  
  74. #获取搜素出来的url
  75. def get_search_url(driver):
  76.     urls = []
  77.     real = []
  78.     real_url = []
  79.     click_link = []
  80.     content = driver.find_element_by_css_selector("div[id=\"content_left\"]")
  81.     links = content.find_elements_by_tag_name("a")
  82.     for link in links:
  83.         if link.get_attribute('class') == "c-showurl":
  84.             real.append(link.text)
  85.             url = link.get_attribute('href')
  86.             urls.append(url)
  87.              
  88.             #解密url
  89.             header = requests.head(url).headers
  90.             is_append = True
  91.             for out_url in out_urls:
  92.                 if out_url in header['location']:
  93.                     is_append = False
  94.                     break
  95.                      
  96.             if is_append == True:
  97.                 real_url.append(header['location'])
  98.                 #a标签对象
  99.                 click_link.append(link)
  100.                      
  101.     #print(real)
  102.     #print(urls)
  103.     #return urls
  104.     return [real_url,click_link]
  105.      
  106.      
  107. #function:解析加密url,剔除竞争对手的url
  108. # def get_real_url(urls):
  109.     # real_url = []
  110.     # for url in urls:
  111.         # header = requests.head(url).headers
  112.         # is_append = True
  113.         # for out_url in out_urls:
  114.             # if out_url in header['location']:
  115.                 # is_append = False
  116.                 # break
  117.              
  118.         # if is_append == True:
  119.             # real_url.append(header['location'])
  120.     # return real_url
  121.  
  122. #function 目标地址是否在某个list中
  123. def get_urlIndex(tagurl,urls):
  124.     i = 0
  125.     has = -1
  126.     for url in urls:
  127.         if tagurl in url:
  128.             has = True
  129.             return i
  130.         i = i+1
  131.     return has
  132.  
  133.      
  134. #点击百度搜索内容下面的下一页
  135. def click_nextBtn(driver):
  136.     div = driver.find_element_by_css_selector("div[id=\"page\"]")
  137.     a = div.find_elements_by_tag_name("a")
  138.     for item in a:
  139.         print(item.text)
  140.         if item.text == "下一页>":
  141.             item.click()
  142.      
  143.     return driver
  144.  
  145.              
  146.  
  147.  
  148. #随机点击
  149. def click_search_url(driver,items):
  150.     urls = []
  151.     real = []
  152.     content = driver.find_element_by_css_selector("div[id=\"content_left\"]")
  153.     links = content.find_elements_by_tag_name("a")
  154.     i=0
  155.     '''获取当前窗口'''
  156.     nowhandle = driver.current_window_handle
  157.     #allhandles=driver.window_handles
  158.     #for handle in allhandles:
  159.     #   print('....当前窗口....',handle.title)
  160.     #exit()
  161.      
  162.     for link in links:
  163.         if link.get_attribute('class') == "c-showurl":
  164.             if i in items:
  165.                 print("随机点击item:",i)
  166.                 print(link.get_attribute('href'),link.text)
  167.                 #exit()
  168.                 link.click()
  169.                 #停留在点击页面
  170.                 time.sleep(random.randint(5,10))
  171.              
  172.                 '''获取所有窗口'''
  173.                 allhandles=driver.window_handles
  174.                 #for handle in allhandles:
  175.                 #   print('....当前窗口....',handle.title)
  176.                 #exit()
  177.                  
  178.                 '''循环判断窗口是否为当前窗口'''
  179.                 for handle in allhandles:
  180.                     if handle != nowhandle:
  181.                         print("切换到当前窗口")
  182.                         driver.switch_to_window(handle)
  183.                         print("title:",driver.title)
  184.                         '''关闭当前窗口'''
  185.                         driver.close()
  186.                         '''回到原先的窗口'''
  187.                         print("切换到原来的窗口")
  188.                         driver.switch_to_window(nowhandle)
  189.                         print("title:",driver.title)
  190.                 print("本次随机点击完毕!")
  191.                          
  192.             i=i+1
  193.  
  194.              
  195. #获取随机点击的搜索页random.randint(0
  196. def get_random_index(index,len):
  197.     if index >= 8:
  198.         random_index = [
  199.             random.randint(0,4),random.randint(5,8)
  200.         ]
  201.     elif index>=4:
  202.         random_index = [
  203.             random.randint(0,3),random.randint(3,index)
  204.         ]
  205.     elif index>=0:
  206.         random_index = [
  207.             index
  208.         ]
  209.     elif index == -1:
  210.         if len <=5:
  211.             random_index = [
  212.                 random.randint(0,5)
  213.             ]
  214.         else:
  215.             random_index = [
  216.                 #random.randint(0,4),random.randint(5,len)
  217.                 random.randint(5,len)
  218.             ]
  219.     return random_index
  220.  
  221.  
  222.  
  223. def getUA():
  224.     uaList = [
  225.         #360
  226.         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
  227.         #chrome
  228.         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
  229.         #"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
  230.         "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
  231.          
  232.         #firefox
  233.         #"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
  234.         "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0",
  235.          
  236.         #ie11
  237.         #"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
  238.         #ie8
  239.         #"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; 4399Box.1357; 4399Box.1253; 4399Box.1357)",
  240.          
  241.         #2345王牌
  242.         #"Chrome/39.0.2171.99 Safari/537.36 2345Explorer/6.5.0.11018",
  243.          
  244.         #搜狗
  245.         #"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
  246.         #opera
  247.         "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60"
  248.          
  249.     ]
  250.     headers = random.choice(uaList)
  251.     return headers
  252.  
  253. #屏幕浏览器窗口大小
  254. def getWindowSize():
  255.     wind_size = [
  256.         [1920,1080],
  257.         [1600,900],
  258.         [1280,720]
  259.     ]
  260.     headers = random.choice(wind_size)
  261.     return headers
  262.      
  263.      
  264. #屏幕分辨率设置
  265. def setDisplay():
  266.     display_size = [
  267.         [1920,1080],
  268.         [1680,1050],
  269.         [1600,900],
  270.         [1440,900],
  271.         [1400,1050]
  272.     ]
  273.     d_size = random.choice(display_size)
  274.      
  275.     dm = win32api.EnumDisplaySettings(None, 0)
  276.     dm.PelsWidth = d_size[0]
  277.     dm.PelsHeight = d_size[1]
  278.     dm.BitsPerPel = 32
  279.     dm.DisplayFixedOutput = 0
  280.     win32api.ChangeDisplaySettings(dm, 0)
  281.  
  282.      
  283. #拨号 19ab68----643534       
  284. g_adsl_account = {
  285.     "name":"宽带连接",
  286.     "username":"19ab68",
  287.     "password":"643534"
  288. }
  289.  
  290.  
  291. #屏蔽点击的地址(竞争对手)
  292. out_urls = [
  293.     'zhimo.yuanzhumuban.cc',
  294.     'bbs.yuanzhumuban.cc',
  295.         'http://money.163.com/15/0416/11/ANANRECC00253B0H.html'
  296. ]
  297.  
  298.  
  299. ##内页词
  300. targetURL = [
  301.      
  302.         ['http://www.hkuws.com','注册离岸公司'],
  303.     ['zs.efu.com.cn/mornfeeit/','梦菲雪'],
  304.     ['zs.efu.com.cn/chengshijiaren/','城市佳人'],
  305.     ['www.kidsnet.cn/exposition','童装展会'],
  306.     #['top.kidsnet.cn/','童装加盟排行榜'],
  307.     #['www.nynet.com.cn/','内衣网'],
  308.     #['www.nzw.cn/','女装网'],
  309.     ['zs.efu.com.cn/ks/','卡索'],
  310.     ['zs.efu.com.cn/distin-kidny/','迪斯廷凯'],
  311.     ['zs.efu.com.cn/fuzhuang/luyidigao/','路易迪高童装代{过}{滤}理'],
  312.     ['brand.efu.com.cn/brandshow-1221090.html','凯帝龙驰'],
  313.     ['zs.efu.com.cn/rabbitjero/','兔子杰罗'],
  314.     ['zs.efu.com.cn/wmprince/','西瓜王子'],
  315.         ['zs.efu.com.cn/betu','百图'],
  316.         ['zs.efu.com.cn/pepco/','小猪班纳'],
  317.  
  318.  
  319.     #['http://news.ifeng.com/a/20160518/48795120_0.shtml','华夏信财'],
  320.     ['http://weibo.com/huaxiafinance','华夏信财'],
  321.     ['http://p2p.hexun.com/2016-04-26/183531215.html','华夏信财'],
  322.     #['http://news.xinhuanet.com/fortune/2016-04/26/c_128932834.htm','华夏信财'],
  323.     ['http://www.xcf.cn/gdyw/201605/t20160526_772682.htm','华夏信财'],
  324.     ['http://www.huaxiaoxia.com/','华夏信财'],
  325.         #['https://lc.huaxiafinance.com/','华夏信财'],
  326.  
  327.  
  328.  
  329.         ['so.tedu.cn','网络营销培训机构'],
  330.         ['www.cosatto.net.cn','个性安全座椅'],
  331.         ['www.kaihuata.com/','开化旅游'],
  332.         #['www.kaihuata.com/','开化'],
  333.  
  334. ]
  335.  
  336.  
  337. for targetInfo in targetURL:
  338.     try:
  339.         #更换ip
  340.         disconnect()
  341.         connect()
  342.          
  343.         while(1):
  344.                     try:
  345.                         socket.gethostbyname("baidu.com")
  346.                         break;
  347.                     except:
  348.                         disconnect()
  349.                         connect()
  350.         #更换分辨率
  351.         #setDisplay()
  352.          
  353.          
  354.         #启动浏览器
  355.         #driver = webdriver.Ie()
  356.         #driver = webdriver.Chrome()
  357.         #driver = webdriver.Firefox()
  358.          
  359.         #设置PhantomJS的user_agent
  360.         dcap = dict(DesiredCapabilities.PHANTOMJS)
  361.         user_agent = getUA()
  362.         print(user_agent)
  363.         dcap["phantomjs.page.settings.userAgent"] = (
  364.                 user_agent
  365.         )
  366.         #dcap["phantomjs.page.settings.resourceTimeout"] = (15000)
  367.         dcap["phantomjs.page.settings.loadImages"] = (False)
  368.         driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=['--load-images=no'])
  369.          
  370.          
  371.         # UA = getUA()
  372.         # print(UA)
  373.         # webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.User-Agent'] = UA
  374.         # driver = webdriver.PhantomJS()
  375.          
  376.         driver.implicitly_wait(30)
  377.          
  378.         #清cookie
  379.         driver.delete_all_cookies()
  380.  
  381.         #driver.maximize_window() # 浏览器全屏显示
  382.  
  383.         #打开百度
  384.         driver.get("http://www.baidu.com/")
  385.         #driver.get("http://mch.weiba01.com/2.php")
  386.          
  387.         #设置浏览器窗口大小
  388.         window_size = getWindowSize()
  389.         driver.set_window_size(window_size[0], window_size[1])
  390.                  
  391.                  
  392.         #搜索某个关键词
  393.         print('打开百度成功',driver.title)
  394.         target = targetInfo[0]
  395.         keyword = targetInfo[1]
  396.         if len(targetInfo)>2:
  397.                 error_keyword = targetInfo[random.randint(2,len(targetInfo)-1)]      
  398.         print(">>>>>>>>>>>>>>>点击的关键词:",keyword,"--->目标地址:",target,">>>>>>>>>>>>>>>>>>>>")
  399.          
  400.          
  401.         if len(targetInfo)>2:
  402.             #模拟错误关键词
  403.             print("点击错误关键词:",error_keyword);
  404.             driver.find_element_by_id("kw").send_keys(error_keyword)
  405.             time.sleep(2)
  406.             driver.find_element_by_id("su").click()
  407.             time.sleep(5)
  408.             driver.find_element_by_id("kw").clear()
  409.             time.sleep(2)
  410.             print("错误关键词点击完毕")
  411.              
  412.         driver.find_element_by_id("kw").send_keys(keyword)
  413.         #time.sleep(2)
  414.  
  415.         #点击搜索按钮
  416.         print("...开始点击搜索按钮..")
  417.         driver.find_element_by_id("su").click()
  418.         #exit()
  419.         print("...点击完毕..")
  420.         time.sleep(2)
  421.  
  422.          
  423.         #获取搜索结果页 0:着陆页  1:对应的链接对象
  424.         urls_res = get_search_url(driver)
  425.         real_urls = urls_res[0]
  426.         #get_search_url(driver)[1][2].click()
  427.          
  428.          
  429.         #real_urls = get_real_url(urls)
  430.         print("搜索出来的可点击着陆页个数:",len(real_urls))
  431.         print(real_urls)
  432.         index = get_urlIndex(target,real_urls)
  433.         print("目标index:",index)
  434.  
  435.         page = 1
  436.         while index == -1 and page <= 4:
  437.             if page == 1:
  438.                 #点击前面的几个着陆页,模拟用户真实行为
  439.                 items = get_random_index(index,len(real_urls))
  440.                 #items = [4]
  441.                 print(items)
  442.                 click_search_url(driver,items)
  443.              
  444.             #下一页
  445.             driver = click_nextBtn(driver)
  446.             time.sleep(3)
  447.             urls_res = get_search_url(driver)
  448.             real_urls = urls_res[0]
  449.             #real_urls = get_real_url(urls)
  450.             print(real_urls)
  451.             index = get_urlIndex(target,real_urls)
  452.              
  453.             page = page+1
  454.  
  455.              
  456.              
  457.         if index > 4 and page == 1:
  458.             #第一页,随机点击两个或一个
  459.             int = random.randint(1,2)
  460.             if int == 2:
  461.                 items = get_random_index(index,len(real_urls))
  462.             else:
  463.                 items = [1]
  464.             print(items)
  465.             click_search_url(driver,items)
  466.              
  467.         if page >=5:
  468.             print("没有找到目标地址,放弃搜索...")
  469.             print("关闭浏览器")
  470.             driver.quit()
  471.              
  472.             time.sleep(5)
  473.             data = get_ip()
  474.             data.append(keyword)
  475.             data.append(target)
  476.             data.append("no_find")
  477.             data.append(-1)
  478.             data.append(-1)
  479.             insert_db(data)
  480.             continue
  481.          
  482.         print("目标在page",page,"当前排名:",index,real_urls[index])
  483.         print("反问最后的目标页...")
  484.         #driver.get(real_urls[index])
  485.         urls_res[1][index].click()
  486.         time.sleep(5)
  487.          
  488.         nowhandle = driver.current_window_handle
  489.         allhandles = driver.window_handles
  490.         #目标页和搜索栏目页切换下
  491.         for handle in allhandles:
  492.             if handle != nowhandle:
  493.                 print("切换到当前窗口")
  494.                 driver.switch_to_window(handle)
  495.                 stime = random.randint(15,25)
  496.                 #stime = 5;
  497.                 print("目标页title:",driver.title,"停留-->",stime)
  498.                 time.sleep(stime)
  499.                 '''关闭当前窗口'''
  500.                 driver.close()
  501.                  
  502.                 '''回到原先的窗口'''
  503.                 print("切换到原来的窗口")
  504.                 driver.switch_to_window(nowhandle)
  505.                 print("title:",driver.title)
  506.          
  507.          
  508.         #time.sleep(random.randint(40,60))
  509.         #time.sleep(5)
  510.  
  511.         #清除所有cookie
  512.         print("打印cookie")
  513.         cookie= driver.get_cookies()
  514.         print(cookie)
  515.         print("清除cookie")
  516.         driver.delete_all_cookies()
  517.         print("打印cookie:")
  518.         cookie= driver.get_cookies()
  519.         print(cookie)
  520.  
  521.         #关闭浏览器
  522.         print("关闭浏览器")
  523.         time.sleep(5)
  524.         #driver.close()
  525.         driver.quit()
  526.         #time.sleep(5)
  527.          
  528.         #数据库记录运行信息
  529.         data = get_ip()
  530.         data.append(keyword)
  531.         data.append(target)
  532.         data.append("success")
  533.         data.append(page)
  534.         data.append(index)
  535.         insert_db(data)
  536.      
  537.     except:
  538.         data = get_ip()
  539.         data.append(keyword)
  540.         data.append(target)
  541.         data.append("faild")
  542.         data.append(-1)
  543.         data.append(-1)
  544.         insert_db(data)
  545.     

转载自吾爱破解

编程语言

Wordpress二次开发快速入门之函数钩子(Hook)大全(2)

2019-9-6 16:33:37

编程语言

【前端开发,JS图表】Amcharts v4.7.4 - JS图表

2019-10-28 13:17:47

本站所发布的一切源码、模板、应用等文章仅限用于学习和研究目的;不得将上述内容用于商业或者非法用途,否则,一切后果请用户自负。本站信息来自网络,版权争议与本站无关。您必须在下载后的24个小时之内,从您的电脑中彻底删除上述内容。如果您喜欢该程序,请支持正版,购买注册,得到更好的正版服务。如有侵权。本站内容适用于DMCA政策。若您的权利被侵害,请与我们联系处理,站长 QQ: 84087680 或 点击右侧 私信:盾给网 反馈,我们将尽快处理。
⚠️
本站所发布的一切源码、模板、应用等文章仅限用于学习和研究目的;不得将上述内容用于商业或者非法用途,否则,一切后果请用户自负。本站信息来自网络,版权争议与本站无关。您必须在下载后的24个小时之内,从您的电脑中彻底删除上述内容。如果您喜欢该程序,请支持正版,购买注册,得到更好的正版服务。如有侵权。本站内容适用于DMCA政策
若您的权利被侵害,请与我们联系处理,站长 QQ: 84087680 或 点击右侧 私信:盾给网 反馈,我们将尽快处理。
0 条回复 A文章作者 M管理员
欢迎您,新朋友,感谢参与互动!
    暂无讨论,说说你的看法吧
个人中心
购物车
优惠劵
今日签到
私信列表
搜索