Update to latest dependencies

Signed-off-by: hr567 <hr567@hr567.me>
This commit is contained in:
hr567 2023-11-13 02:12:48 +08:00
parent f6298eef21
commit c2e7e6e915
Signed by: hr567
SSH Key Fingerprint: SHA256:AUrHz/3TKmc6tf4wqaaEPV/BGQ4aULSp7g76FpqujYw
3 changed files with 505 additions and 464 deletions

921
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -2,11 +2,11 @@
name = "quote-scraper" name = "quote-scraper"
version = "0.1.0" version = "0.1.0"
authors = ["hr567 <hr567@hr567.me>"] authors = ["hr567 <hr567@hr567.me>"]
edition = "2018" edition = "2021"
[dependencies] [dependencies]
lazy_static = "1.4.0" once_cell = "1.18.0"
reqwest = "0.11.11" reqwest = "0.11.22"
scraper = "0.13.0" scraper = "0.18.1"
tokio = { version = "1.19.2", features = ["macros", "rt-multi-thread", "sync"] } tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "sync"] }
url = "2.2.2" url = "2.4.1"

View File

@ -1,25 +1,23 @@
use std::sync::Arc; use std::sync::Arc;
use lazy_static::lazy_static; use once_cell::sync::Lazy;
use reqwest::Client; use reqwest::Client;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use tokio::sync::{mpsc, Semaphore}; use tokio::sync::{mpsc, Semaphore};
use url::Url; use url::Url;
const MAX_TASK: usize = 16; const MAX_TASK: usize = 8;
lazy_static! { static URL: Lazy<Url> = Lazy::new(|| Url::parse("https://quotes.toscrape.com/").unwrap());
static ref URL: Url = Url::parse("https://quotes.toscrape.com/").unwrap(); static CLIENT: Lazy<Client> = Lazy::new(|| {
static ref CLIENT: Client = { use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; let mut headers = HeaderMap::new();
let mut headers = HeaderMap::new(); let user_agent = HeaderValue::from_static(
let user_agent = HeaderValue::from_static( r"Mozilla/5.0 (X11; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0",
r"Mozilla/5.0 (X11; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0", );
); headers.insert(USER_AGENT, user_agent);
headers.insert(USER_AGENT, user_agent); Client::builder().default_headers(headers).build().unwrap()
Client::builder().default_headers(headers).build().unwrap() });
};
}
#[allow(dead_code)] #[allow(dead_code)]
#[derive(Debug)] #[derive(Debug)]
@ -36,13 +34,11 @@ async fn download_quote_html(idx: usize) -> reqwest::Result<String> {
Ok(html) Ok(html)
} }
static QUOTE: Lazy<Selector> = Lazy::new(|| Selector::parse(r#".quote"#).unwrap());
static TEXT: Lazy<Selector> = Lazy::new(|| Selector::parse(r#".text"#).unwrap());
static AUTHOR: Lazy<Selector> = Lazy::new(|| Selector::parse(r#".author"#).unwrap());
static TAG: Lazy<Selector> = Lazy::new(|| Selector::parse(r#".tag"#).unwrap());
fn parse_quote_html(page: Html) -> Vec<Quote> { fn parse_quote_html(page: Html) -> Vec<Quote> {
lazy_static! {
static ref QUOTE: Selector = Selector::parse(r#".quote"#).unwrap();
static ref TEXT: Selector = Selector::parse(r#".text"#).unwrap();
static ref AUTHOR: Selector = Selector::parse(r#".author"#).unwrap();
static ref TAG: Selector = Selector::parse(r#".tag"#).unwrap();
}
page.select(&QUOTE) page.select(&QUOTE)
.map(|quote| Quote { .map(|quote| Quote {
text: quote.select(&TEXT).next().unwrap().inner_html(), text: quote.select(&TEXT).next().unwrap().inner_html(),