
Смышленный
  
Профиль
Группа: Завсегдатай
Сообщений: 1915
Регистрация: 24.4.2006
Где: Планета Земля
Репутация: 16 Всего: 38
|
Ну так и не будет =)))) Нет такого тега - "<a>". Да и подход не правильный =) Когда-то делал программу, для парсинга irr.ru. Вот кусок из нее: Код | const
C_Regions: array[0..1, 0..1] of string = ( ('москва', '34'), ('санкт-петербург', '52') );
C_HTML_BeginData = '<tbody id="advertsListContainer">';
C_HTML_BedinString = '<td class="first inner">'; C_HTML_Text = '<td class="inner title"'; C_HTML_PriceString = '<td class="price inner">'; C_HTML_PicString = '<td class="image inner">'; C_HTML_DateString = '<td class="inner date">'; C_HTML_Inner = '<td class="inner">'; C_HTML_Last = '<td class="inner last">'; C_HTML_End = '</td>';
C_HTML_NextSpan = '<span class="next">'; C_HTML_NextLink = '<a href="'; C_HTML_EndLink = '</a>';
C_HTML_Musor_1 = '<a href="http://www.irr.ru/help/offline_highlights_main'; C_HTML_Musor_2 = 'Регион: '; C_HTML_Musor_3 = ' (at) '; C_HTML_Musor_4 = '<img src='; C_HTML_Musor_5 = '" alt='; C_HTML_Musor_6 = 'Дата выхода объявления в газете:'; C_HTML_Pic = '<img src="'; C_HTML_Today = '<span class="today">сегодня</span>'; C_HTML_Yestoday = '<span class="yesterday">вчера</span>';
C_HTML_StartField = '<td>'; C_HTML_EndField = '</td>'; C_HTML_StartStrong = '<strong'; C_HTML_EndStrong = '</strong>'; C_HTML_Year = '<th>Год выпуска:</th>'; C_HTML_Probeg = '<th>Пробег:</th>'; C_HTML_Transm = '<th>Тип трансмиссии:</th>'; C_HTML_VDvig = '<th>Объем двигателя:</th>'; C_HTML_ADvig = '<th>Мощность двигателя:</th>'; C_HTML_Rul = '<th>Руль:</th>'; C_HTML_Privod = '<th>Привод:</th>'; C_HTML_Kuzov = '<th>Тип кузова:</th>'; C_HTML_Salon = '<label>Салон:</label>'; C_HTML_Person = '<div class="contactData">'; C_HTML_EndDiv = '</div>'; C_HTML_DivName = '<div class="name"'; C_HTML_DivCity = '<div class="city">'; C_HTML_DivPhone = '<div class="phone"'; C_HTML_DivMobile = '<div class="mobile">'; C_HTML_DivICQ = '<div class="messenger">'; C_HTML_DivMail = '<div class="mail">'; C_HTML_Condition = '<label>Кондиционер</label>'; C_HTML_Memo = '<div class="description">'; C_HTML_Picture = '<div class="mainPicture clearfix">';
function ParseData(const ACode: string): string; const C_Zero = '0'; var Idx, lIdx: Integer; lTRec: TTypeRec; lZap, lFlag: Boolean; lRecord: TRecord; lStr: string; y, m, d: Word; mm, dd: string; begin Result := ''; Idx := Pos(C_HTML_BeginData, ACode); if Idx <= 0 then Exit; lRecs.Clear; lTRec := trNone; lZap := False; lRecord.Text := ''; lRecord.Price := ''; lRecord.City := ''; lRecord.Date := ''; lRecord.PicLink := ''; lRecord.TextLink := ''; lRecord.Year := ''; lRecord.Km := ''; lRecord.Trans := ''; lRecord.VDvig := ''; lRecord.ADvig := ''; lRecord.Rul := ''; lRecord.Privod := ''; lRecord.Kuzov := ''; lRecord.Salon := ''; lRecord.Dop := ''; lRecord.Person := ''; lRecord.PersonCity := ''; lRecord.PersonPhone := ''; lRecord.PersonMobile := ''; lRecord.PersonICQ := ''; lRecord.PersonMail := ''; lRecord.Condition := False; lStr := ''; lFlag := False; while (Idx < Length(ACode)) and (LowerCase(Copy(ACode, Idx, Length(C_HTML_NextSpan))) <> C_HTML_NextSpan) do begin if lZap and (lTRec <> trNone) then begin case lTRec of trText: begin if (not lFlag) and (LowerCase(Copy(ACode, Idx, Length(C_HTML_NextLink))) = C_HTML_NextLink) then lFlag := True else if lFlag and (Copy(ACode, Idx, Length(C_HTML_EndLink)) = C_HTML_EndLink) then begin lStr := lStr + C_HTML_EndLink; Inc(Idx, Length(C_HTML_EndLink)); lFlag := False; end; if lFlag then lStr := lStr + ACode[Idx]; end; trPrice: if ACode[Idx] <> '<' then lStr := lStr + ACode[Idx]; trCity: if ACode[Idx] <> '<' then lStr := lStr + ACode[Idx]; trDate: lStr := lStr + ACode[Idx]; trPic: begin if (not lFlag) and (LowerCase(Copy(ACode, Idx, Length(C_HTML_Pic))) = C_HTML_Pic) then begin lFlag := True; Inc(Idx, Length(C_HTML_Pic)); end else if lFlag and (Copy(ACode, Idx, 1) = '"') then lFlag := False; if lFlag then lStr := lStr + ACode[Idx]; end; end; end; if lZap and (lTRec <> trNone) and (LowerCase(Copy(ACode, Idx, Length(C_HTML_End))) = C_HTML_End) then begin if lStr <> '' then begin case lTRec of trText: begin lIdx := Pos(C_HTML_Musor_1, lStr); if lIdx > 0 then begin while LowerCase(Copy(lStr, lIdx, Length(C_HTML_EndLink))) <> C_HTML_EndLink do Inc(lIdx); lStr := Copy(lStr, lIdx + Length(C_HTML_EndLink), Length(lStr) - lIdx - Length(C_HTML_EndLink) + 1); end; lStr := Utf8ToAnsi(Trim(StringReplace(lStr, '''', '"', [rfReplaceAll]))); if LowerCase(Copy(lStr, 1, Length(C_HTML_NextLink))) = C_HTML_NextLink then begin lIdx := 1; while (lIdx < Length(lStr)) and (Copy(lStr, lIdx, 2) <> '">') do Inc(lIdx); lRecord.TextLink := Copy(lStr, Length(C_HTML_NextLink) + 1, lIdx - Length(C_HTML_NextLink) - 1); lRecord.Text := Copy(lStr, lIdx + 2, Length(lStr) - lIdx - 5); end else lRecord.Text := lStr; end; trPrice: lRecord.Price := Utf8ToAnsi(Trim(lStr)); trCity: lRecord.City := Utf8ToAnsi(Trim(lStr)); trDate: begin lStr := StringReplace(lStr, #13, '', [rfReplaceAll]); lStr := StringReplace(lStr, #10, '', [rfReplaceAll]); lStr := StringReplace(lStr, '<br/>', ' ', [rfReplaceAll, rfIgnoreCase]); lStr := StringReplace(lStr, #9, '', [rfReplaceAll]); lStr := Utf8ToAnsi(Copy(lStr, 1, Length(lStr) - 1)); DecodeDate(Now, y, m, d); mm := IntToStr(m); if m < 10 then mm := C_Zero + mm; dd := IntToStr(d); if d < 10 then dd := C_Zero + dd; lStr := StringReplace(lStr, C_HTML_Today, dd + '.' + mm, []); DecodeDate(Now - 1, y, m, d); mm := IntToStr(m); if m < 10 then mm := C_Zero + mm; dd := IntToStr(d); if d < 10 then dd := C_Zero + dd; lStr := StringReplace(lStr, C_HTML_Yestoday, dd + '.' + mm, []); lRecord.Date := lStr; end; trPic: lRecord.PicLink := Utf8ToAnsi(Trim(lStr)); end; end; lStr := ''; lTRec := trNone; Inc(Idx, Length(C_HTML_End)); Continue; end; if lZap and (LowerCase(Copy(ACode, Idx, Length(C_HTML_Text))) = C_HTML_Text) then begin lTRec := trText; lFlag := False; Inc(Idx, Length(C_HTML_Text)); Continue; end; if lZap and (LowerCase(Copy(ACode, Idx, Length(C_HTML_Pricestring))) = C_HTML_Pricestring) then begin lTRec := trPrice; lFlag := False; Inc(Idx, Length(C_HTML_Pricestring)); Continue; end; if lZap and (LowerCase(Copy(ACode, Idx, Length(C_HTML_Picstring))) = C_HTML_Picstring) then begin lTRec := trPic; lFlag := False; Inc(Idx, Length(C_HTML_Picstring)); Continue; end; if lZap and (LowerCase(Copy(ACode, Idx, Length(C_HTML_Inner))) = C_HTML_Inner) then begin lTRec := trCity; lFlag := False; Inc(Idx, Length(C_HTML_Inner)); Continue; end; if lZap and (LowerCase(Copy(ACode, Idx, Length(C_HTML_Datestring))) = C_HTML_Datestring) then begin lTRec := trDate; lFlag := False; Inc(Idx, Length(C_HTML_Datestring)); Continue; end; if LowerCase(Copy(ACode, Idx, Length(C_HTML_Bedinstring))) = C_HTML_Bedinstring then begin lTRec := trNone; lZap := True; lFlag := False; Inc(Idx, Length(C_HTML_Bedinstring)); Continue; end; if LowerCase(Copy(ACode, Idx, Length(C_HTML_Last))) = C_HTML_Last then begin if lRecord.Text <> '' then begin PostMessage(FHandle, FMsgCounter, 1, 0); if lRecord.PicLink <> '' then PostMessage(FHandle, FMsgCounter, 1, 0); lRecs.Add(lRecord); end; lRecord.Text := ''; lRecord.Price := ''; lRecord.City := ''; lRecord.Date := ''; lRecord.PicLink := ''; lRecord.TextLink := ''; lRecord.Year := ''; lRecord.Km := ''; lRecord.Trans := ''; lRecord.VDvig := ''; lRecord.ADvig := ''; lRecord.Rul := ''; lRecord.Privod := ''; lRecord.Kuzov := ''; lRecord.Salon := ''; lRecord.Dop := ''; lRecord.Person := ''; lRecord.PersonCity := ''; lRecord.PersonPhone := ''; lRecord.PersonMobile := ''; lRecord.PersonICQ := ''; lRecord.PersonMail := ''; lRecord.Condition := False; lStr := ''; lZap := False; lTRec := trNone; lFlag := False; Inc(Idx, Length(C_HTML_Last)); Continue; end; Inc(Idx); end; if (lRecs.Count > 0) and FFullData then begin //PostMessage(FHandle, FMsgCounter, lRecs.Count, 0); Idx := 0; FThreadCount := 0; while Idx < lRecs.Count do begin if FThreadCount > 0 then Continue; Inc(FThreadCount); FIdx := Idx; Get(lRecs[Idx].TextLink, 60, EndFullReq, FHandle, FMsgCounter); Inc(Idx); end; while FThreadCount <> 0 do ; Idx := 0; while Idx < lRecs.Count do begin if FThreadCount > 0 then Continue; FIdx := Idx; if lRecs[Idx].PicLink <> '' then begin Inc(FThreadCount); GetPicture(lRecs[Idx].PicLink, 60, EndPict, FHandle, FMsgCounter); end; Inc(Idx); end; while FThreadCount <> 0 do ; Result := lRecs.Export; end; end;
|
Правда он большой, но думаю разобраться сможешь  Ну а если нет - тогда надо учиться Upd: обрати внимание на Код | if LowerCase(Copy(lStr, 1, Length(C_HTML_NextLink))) = C_HTML_NextLink then begin lIdx := 1; while (lIdx < Length(lStr)) and (Copy(lStr, lIdx, 2) <> '">') do Inc(lIdx); lRecord.TextLink := Copy(lStr, Length(C_HTML_NextLink) + 1, lIdx - Length(C_HTML_NextLink) - 1); lRecord.Text := Copy(lStr, lIdx + 2, Length(lStr) - lIdx - 5); end else lRecord.Text := lStr;
|
Это сообщение отредактировал(а) aktuba - 11.8.2007, 23:11
|