大佬们,python爬虫怎么筛选我要的内容啊

@Ta 06-21 12:05发布,06-21 12:15修改 940点击
用的是
soup = BeautifulSoup(html,"lxml")
news01 = soup.find_all(class_="clearfix")

我想要的是class="clearfix"的新闻列表,它怎么把class="con clearfix"的也搞进来了

[<div class="con clearfix">
<div class="xzrs-top-l lf">
<a>欢迎访问西藏自治区体育局</a>
</div>
</div>, <div class="xzrs-logo clearfix">
<a class="logo" href="#"><img src="../../images/xz-tyj-logo.png"/></a>
<div class="zw-topsear clearfix">
<input class="zw-tsch keywords" id="demojs" placeholder="请输入关键字后搜索" type="text"/>
<a class="zw-tsbtn rt submit" href="javascript:return false;"></a>
</div>
</div>, <div class="zw-topsear clearfix">
<input class="zw-tsch keywords" id="demojs" placeholder="请输入关键字后搜索" type="text"/>
<a class="zw-tsbtn rt submit" href="javascript:return false;"></a>
</div>, <div class="xz-posi-con clearfix" style="border-bottom: none;">
<a class="xz-posi-tit">当前位置:</a>
<p class="xz-posi-item">
<a class="CurrChnlCls" href="../../" target="_self" title="首页">首页</a><span>&gt;</span><a class="CurrChnlCls" href="../" target="_self" title="新闻中心">新闻中心</a><span>&gt;</span><a class="CurrChnlCls" href="./" target="_self" title="群众体育">群众体育</a>
</p>
</div>, <div class="gl-content clearfix">
<div class="gl-conts-le lf">
<p class="gl-le-tit"><a href="">
</a><a href="../">新闻中心</a>
</p>
<ul class="gl-le-item title-tyj">
<li><a chnl="1576" href="../tptt/">图片新闻</a></li>
<li><a chnl="1585" href="./">群众体育</a></li>
<li><a chnl="1586" href="../jjty/">竞技体育</a></li>
<li><a chnl="1588" href="../cy/">体育产业</a></li>
</ul>
</div>
<div class="gl-conts-rg rt">
<div class="gl-conts-items">
<p class="gl-le-btms">
<a href="./">群众体育</a>
</p>
<ul class="gl-cons-rgs">
<li class="clearfix">
<a href="./202206/t20220615_303213.html" target="_blank">2022年西藏自治区一级社会体育指导员培训班开班</a><span>
                      2022-06-15
                    </span>
</li>
<li class="clearfix">
<a href="./202206/t20220606_301765.html" target="_blank">2022年西藏自治区三级社会体育指导员示范培训班开班</a><span>
                      2022-06-06
                    </span>
</li>
<li class="clearfix">
<a href="./202205/t20220520_299482.html" target="_blank">推动全民健身工作 助力体育事业发展   区体育局组织开展第三期“体育大家讲”专题学习讲座</a><span>
                      2022-05-20
                    </span>
</li>
<li class="clearfix">
<a href="./202205/t20220510_297925.html" target="_blank">西藏自治区第五届民族传统体育运动会 第二期赛前培训班开班</a><span>
                      2022-05-10
                    </span>
</li>
<li class="clearfix">
<a href="https://m.xizangribao.com/zh/template/displayTemplate/news/newsDetail/16/490274.html?isShare=true" target="_blank">西藏全民健身志愿服务进区直机关(企事业)单位活动启动</a><span>
                      2022-05-09
                    </span>
</li>
</ul>
<ul class="gl-cons-rgs">
<li class="clearfix">
<a href="./202205/t20220505_297014.html" target="_blank">弘扬五四精神  激发奋进力量 ——区体育局组织举办“喜迎二十大暨庆祝建团100周年”主题团日羽毛球混合...</a><span>
                      2022-05-05
                    </span>
</li>
<li class="clearfix">
<a href="https://mp.weixin.qq.com/s/wxi5Xq2fz8CvR6h-i2bZ1w" target="_blank">今年我区将广泛开展民族传统体育赛事活动</a><span>
                      2022-04-13
                    </span>
</li>
<li class="clearfix">
<a href="https://mp.weixin.qq.com/s/Z2WGnMZJslfpLQWbCZ5ksA" target="_blank">全民健身让人民生活更美好</a><span>
                      2022-03-31
                    </span>
</li>
<li class="clearfix">
<a href="../../djgz/dw/202112/t20211229_276960.html" target="_blank">区体育局领导赴那曲驻村点宣讲党的十九届六中全会和自治区第十次党代会精神</a><span>
                      2021-12-29
                    </span>
</li>
<li class="clearfix">
<a href="./202112/t20211222_275836.html" target="_blank">我区代表队参加2021年“舞动中国—排舞联赛”线上总决赛斩获佳绩</a><span>
                      2021-12-22
                    </span>
</li>
</ul>
<ul class="gl-cons-rgs">
<li class="clearfix">
<a href="./202112/t20211214_274454.html" target="_blank">2021年西藏自治区一级社会体育指导员培训班开班</a><span>
                      2021-12-14
                    </span>
</li>
<li class="clearfix">
<a href="./202112/t20211205_273280.html" target="_blank">西藏自治区全民健身志愿服务“六进”进林芝市工布江达县中学活动举行</a><span>
                      2021-12-05
                    </span>
</li>
<li class="clearfix">
<a href="./202112/t20211203_273212.html" target="_blank">西藏体彩中心开展全区高校优秀贫困大学生关爱行动</a><span>
                      2021-12-03
                    </span>
</li>
<li class="clearfix">
<a href="./202112/t20211202_273084.html" target="_blank">体育拥军  送健康进军营 ——西藏自治区全民健身志愿服务 “六进”进32169部队活动举行</a><span>
                      2021-12-02
                    </span>
</li>
<li class="clearfix">
<a href="./202111/t20211129_272048.html" target="_blank">2021年西藏自治区国家级社会体育指导员和教练员、裁判员培训班开班</a><span>
                      2021-11-29
                    </span>
</li>
</ul>
<ul class="gl-cons-rgs">
<li class="clearfix">
<a href="./202111/t20211119_270675.html" target="_blank">西藏自治区登山协会进行换届</a><span>
                      2021-11-19
                    </span>
</li>
<li class="clearfix">
<a href="./202111/t20211119_270668.html" target="_blank">区体育局团委举办“珍爱生命 远离毒品” 禁毒知识专题讲座</a><span>
                      2021-11-19
                    </span>
</li>
<li class="clearfix">
<a href="./202111/t20211112_269792.html" target="_blank">西藏自治区社会和民族传统体育指导管理中心职业技能等级认定所揭牌</a><span>
                      2021-11-12
                    </span>
</li>
<li class="clearfix">
<a href="./202110/t20211030_267706.html" target="_blank">西藏自治区体育总会第四届代表大会召开</a><span>
                      2021-10-30
                    </span>
</li>
<li class="clearfix">
<a href="./202110/t20211029_267676.html" target="_blank">2021年西藏自治区体育项目进校园(足球进曲水县南木乡小学站)举行</a><span>
                      2021-10-29
                    </span>
</li>
</ul>
<ul class="gl-cons-rgs">
<li class="clearfix">
<a href="./202110/t20211027_267192.html" target="_blank">促进冰雪运动发展,助力北京冬奥会 推动山地户外运动大区建设 ——2021第十九届中国西藏登山大会开幕</a><span>
                      2021-10-27
                    </span>
</li>
<li class="clearfix">
<a href="./202110/t20211020_266092.html" target="_blank">2021年全区“三送”暨送体育下基层 贡嘎站活动举行</a><span>
                      2021-10-20
                    </span>
</li>
<li class="clearfix">
<a href="./202110/t20211014_265419.html" target="_blank">健身锅庄舞起来  健康生活更精彩</a><span>
                      2021-10-14
                    </span>
</li>
<li class="clearfix">
<a href="./202110/t20211013_265235.html" target="_blank">2021年全区“三送”暨送体育下基层墨竹工卡站活动举行</a><span>
                      2021-10-13
                    </span>
</li>
<li class="clearfix">
<a href="./202110/t20211012_265047.html" target="_blank">发展登山特色优势产业  打造西藏登山新名片</a><span>
                      2021-10-12
                    </span>
</li>
</ul>
<div class="gl-pages clearfix">
<style>
a {
    text-decoration: none;
}
.pg_num {
    width: 700px;
    margin: 0px auto;
    overflow: hidden;
   margin-left: auto;
}
.pg_num .page_now {
    background: #007ce3 none repeat scroll 0% 0%;
    color: #FFF;
    font-size: 15px;
    border: 1px solid #007ce3;
}
.pg_num a {
    display: block;
    height: 30px;
    line-height: 30px;
    padding: 0px 9px;
    float: left;
    margin-left: 5px;
    border: 1px solid #DADADA;
    color: #212121;
    width: auto;
     font-size:15px;
}
</style>
<div class="pg_num">
<script type="text/javascript">
function createPageHTML(_nPageCount, _nCurrIndex, _sPageName, _sPageExt){
  if(_nPageCount == null || _nPageCount<=1){
    return;
  }
  var nCurrIndex = _nCurrIndex || 0;
  // 1 输出首页和上一页
  // 1.1 当前页是首页
  if(nCurrIndex == 0){
    document.write("<a href=\""+_sPageName+"."+_sPageExt+"\">首页</a>");
    document.write("<a class='page_now' href=\""+_sPageName+"."+_sPageExt+"\">1</a>");
  }
  //1.2 当前页不是首页
  else{
    var nPreIndex = nCurrIndex - 1;
    var sPreFileExt = nPreIndex == 0 ? "" : ("_" + nPreIndex);
 
    document.write("<a href=\""+_sPageName+"."+_sPageExt+"\">首页</a>");
    document.write("<a href=\"" + _sPageName + sPreFileExt + "."+_sPageExt+"\">上一页</a>");
    document.write("<a href=\""+_sPageName+"."+_sPageExt+"\">1</a>");
  }
  // 2 输出中间分页
  var flag1=true;
  var flag2=true;

  for(var i=1; i<_nPageCount; i++){

    if(i-nCurrIndex<3&&nCurrIndex-i<3||i<2||_nPageCount-i<3){
      if(nCurrIndex == i)
        document.write("<a class='page_now' href=\""+_sPageName+"_" + i + "."+_sPageExt+"\">"+(i+1)+"</a>");
      else
        document.write("<a href=\""+_sPageName+"_" + i + "."+_sPageExt+"\">"+(i+1)+"</a>");
    }
    else if(i-nCurrIndex>2&&flag1){
      flag1=false;
      document.write("<a class='potc' style='border:0px;'>...</a>");
    }
    else if(nCurrIndex-i>2&&flag2){
      flag2=false;
      document.write("<a class='potc' style='border:0px;'>...</a>");
    }
  }
 
  // 3 输出下一页和尾页
  // 3.1 当前页是尾页
  if(nCurrIndex == (_nPageCount-1)){
    document.write("<a href=\""+_sPageName+"_" + (_nPageCount-1) + "."+_sPageExt+"\">尾页</a>");
  }
  // 3.2 当前页不是尾页
  else{
    var nNextIndex = nCurrIndex + 1;
    var sPreFileExt = nPreIndex == 0 ? "" : ("_" + nPreIndex);
 
    document.write("<a href=\""+_sPageName+"_" + nNextIndex + "."+_sPageExt+"\">下一页</a>");
    document.write("<a href=\""+_sPageName+"_" + (_nPageCount-1) + "."+_sPageExt+"\">尾页</a>");
  }
}
//WCM置标
createPageHTML(9,0,"index","html");
</script>
</div>
</div>
</div>
</div>
</div>, <li class="clearfix">
<a href="./202206/t20220615_303213.html" target="_blank">2022年西藏自治区一级社会体育指导员培训班开班</a><span>
                      2022-06-15
                    </span>
</li>, <li class="clearfix">
<a href="./202206/t20220606_301765.html" target="_blank">2022年西藏自治区三级社会体育指导员示范培训班开班</a><span>
                      2022-06-06
                    </span>
</li>, <li class="clearfix">
<a href="./202205/t20220520_299482.html" target="_blank">推动全民健身工作 助力体育事业发展   区体育局组织开展第三期“体育大家讲”专题学习讲座</a><span>
                      2022-05-20
                    </span>
</li>, <li class="clearfix">
<a href="./202205/t20220510_297925.html" target="_blank">西藏自治区第五届民族传统体育运动会 第二期赛前培训班开班</a><span>
                      2022-05-10
                    </span>
</li>, <li class="clearfix">
<a href="https://m.xizangribao.com/zh/template/displayTemplate/news/newsDetail/16/490274.html?isShare=true" target="_blank">西藏全民健身志愿服务进区直机关(企事业)单位活动启动</a><span>
                      2022-05-09
                    </span>
</li>, <li class="clearfix">
<a href="./202205/t20220505_297014.html" target="_blank">弘扬五四精神  激发奋进力量 ——区体育局组织举办“喜迎二十大暨庆祝建团100周年”主题团日羽毛球混合...</a><span>
                      2022-05-05
                    </span>
</li>, <li class="clearfix">
<a href="https://mp.weixin.qq.com/s/wxi5Xq2fz8CvR6h-i2bZ1w" target="_blank">今年我区将广泛开展民族传统体育赛事活动</a><span>
                      2022-04-13
                    </span>
</li>, <li class="clearfix">
<a href="https://mp.weixin.qq.com/s/Z2WGnMZJslfpLQWbCZ5ksA" target="_blank">全民健身让人民生活更美好</a><span>
                      2022-03-31
                    </span>
</li>, <li class="clearfix">
<a href="../../djgz/dw/202112/t20211229_276960.html" target="_blank">区体育局领导赴那曲驻村点宣讲党的十九届六中全会和自治区第十次党代会精神</a><span>
                      2021-12-29
                    </span>
</li>, <li class="clearfix">
<a href="./202112/t20211222_275836.html" target="_blank">我区代表队参加2021年“舞动中国—排舞联赛”线上总决赛斩获佳绩</a><span>
                      2021-12-22
                    </span>
</li>, <li class="clearfix">
<a href="./202112/t20211214_274454.html" target="_blank">2021年西藏自治区一级社会体育指导员培训班开班</a><span>
                      2021-12-14
                    </span>
</li>, <li class="clearfix">
<a href="./202112/t20211205_273280.html" target="_blank">西藏自治区全民健身志愿服务“六进”进林芝市工布江达县中学活动举行</a><span>
                      2021-12-05
                    </span>
</li>, <li class="clearfix">
<a href="./202112/t20211203_273212.html" target="_blank">西藏体彩中心开展全区高校优秀贫困大学生关爱行动</a><span>
                      2021-12-03
                    </span>
</li>, <li class="clearfix">
<a href="./202112/t20211202_273084.html" target="_blank">体育拥军  送健康进军营 ——西藏自治区全民健身志愿服务 “六进”进32169部队活动举行</a><span>
                      2021-12-02
                    </span>
</li>, <li class="clearfix">
<a href="./202111/t20211129_272048.html" target="_blank">2021年西藏自治区国家级社会体育指导员和教练员、裁判员培训班开班</a><span>
                      2021-11-29
                    </span>
</li>, <li class="clearfix">
<a href="./202111/t20211119_270675.html" target="_blank">西藏自治区登山协会进行换届</a><span>
                      2021-11-19
                    </span>
</li>, <li class="clearfix">
<a href="./202111/t20211119_270668.html" target="_blank">区体育局团委举办“珍爱生命 远离毒品” 禁毒知识专题讲座</a><span>
                      2021-11-19
                    </span>
</li>, <li class="clearfix">
<a href="./202111/t20211112_269792.html" target="_blank">西藏自治区社会和民族传统体育指导管理中心职业技能等级认定所揭牌</a><span>
                      2021-11-12
                    </span>
</li>, <li class="clearfix">
<a href="./202110/t20211030_267706.html" target="_blank">西藏自治区体育总会第四届代表大会召开</a><span>
                      2021-10-30
                    </span>
</li>, <li class="clearfix">
<a href="./202110/t20211029_267676.html" target="_blank">2021年西藏自治区体育项目进校园(足球进曲水县南木乡小学站)举行</a><span>
                      2021-10-29
                    </span>
</li>, <li class="clearfix">
<a href="./202110/t20211027_267192.html" target="_blank">促进冰雪运动发展,助力北京冬奥会 推动山地户外运动大区建设 ——2021第十九届中国西藏登山大会开幕</a><span>
                      2021-10-27
                    </span>
</li>, <li class="clearfix">
<a href="./202110/t20211020_266092.html" target="_blank">2021年全区“三送”暨送体育下基层 贡嘎站活动举行</a><span>
                      2021-10-20
                    </span>
</li>, <li class="clearfix">
<a href="./202110/t20211014_265419.html" target="_blank">健身锅庄舞起来  健康生活更精彩</a><span>
                      2021-10-14
                    </span>
</li>, <li class="clearfix">
<a href="./202110/t20211013_265235.html" target="_blank">2021年全区“三送”暨送体育下基层墨竹工卡站活动举行</a><span>
                      2021-10-13
                    </span>
</li>, <li class="clearfix">
<a href="./202110/t20211012_265047.html" target="_blank">发展登山特色优势产业  打造西藏登山新名片</a><span>
                      2021-10-12
                    </span>
</li>, <div class="gl-pages clearfix">
<style>
a {
    text-decoration: none;
}
.pg_num {
    width: 700px;
    margin: 0px auto;
    overflow: hidden;
   margin-left: auto;
}
.pg_num .page_now {
    background: #007ce3 none repeat scroll 0% 0%;
    color: #FFF;
    font-size: 15px;
    border: 1px solid #007ce3;
}
.pg_num a {
    display: block;
    height: 30px;
    line-height: 30px;
    padding: 0px 9px;
    float: left;
    margin-left: 5px;
    border: 1px solid #DADADA;
    color: #212121;
    width: auto;
     font-size:15px;
}
</style>
<div class="pg_num">
<script type="text/javascript">
function createPageHTML(_nPageCount, _nCurrIndex, _sPageName, _sPageExt){
  if(_nPageCount == null || _nPageCount<=1){
    return;
  }
  var nCurrIndex = _nCurrIndex || 0;
  // 1 输出首页和上一页
  // 1.1 当前页是首页
  if(nCurrIndex == 0){
    document.write("<a href=\""+_sPageName+"."+_sPageExt+"\">首页</a>");
    document.write("<a class='page_now' href=\""+_sPageName+"."+_sPageExt+"\">1</a>");
  }
  //1.2 当前页不是首页
  else{
    var nPreIndex = nCurrIndex - 1;
    var sPreFileExt = nPreIndex == 0 ? "" : ("_" + nPreIndex);
 
    document.write("<a href=\""+_sPageName+"."+_sPageExt+"\">首页</a>");
    document.write("<a href=\"" + _sPageName + sPreFileExt + "."+_sPageExt+"\">上一页</a>");
    document.write("<a href=\""+_sPageName+"."+_sPageExt+"\">1</a>");
  }
  // 2 输出中间分页
  var flag1=true;
  var flag2=true;

  for(var i=1; i<_nPageCount; i++){

    if(i-nCurrIndex<3&&nCurrIndex-i<3||i<2||_nPageCount-i<3){
      if(nCurrIndex == i)
        document.write("<a class='page_now' href=\""+_sPageName+"_" + i + "."+_sPageExt+"\">"+(i+1)+"</a>");
      else
        document.write("<a href=\""+_sPageName+"_" + i + "."+_sPageExt+"\">"+(i+1)+"</a>");
    }
    else if(i-nCurrIndex>2&&flag1){
      flag1=false;
      document.write("<a class='potc' style='border:0px;'>...</a>");
    }
    else if(nCurrIndex-i>2&&flag2){
      flag2=false;
      document.write("<a class='potc' style='border:0px;'>...</a>");
    }
  }
 
  // 3 输出下一页和尾页
  // 3.1 当前页是尾页
  if(nCurrIndex == (_nPageCount-1)){
    document.write("<a href=\""+_sPageName+"_" + (_nPageCount-1) + "."+_sPageExt+"\">尾页</a>");
  }
  // 3.2 当前页不是尾页
  else{
    var nNextIndex = nCurrIndex + 1;
    var sPreFileExt = nPreIndex == 0 ? "" : ("_" + nPreIndex);
 
    document.write("<a href=\""+_sPageName+"_" + nNextIndex + "."+_sPageExt+"\">下一页</a>");
    document.write("<a href=\""+_sPageName+"_" + (_nPageCount-1) + "."+_sPageExt+"\">尾页</a>");
  }
}
//WCM置标
createPageHTML(9,0,"index","html");
</script>
</div>
</div>]

回复列表(4)
添加新回复
回复需要登录