Java 爬虫学习(一)关于 HttpClient 发起 Get 、Post 请求

it2022-05-05  126

0. 项目配置 jar 包引入,以及日志处理

项目目录结构?:

pom.xml <dependencies> <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient --> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.2</version> </dependency> <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 --> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.25</version> <!--<scope>test</scope>--> </dependency> </dependencies> log4j.properties log4j.rootLogger=DEBUG,A1 log4j.logger.mr.s = DEBUG log4j.appender.A1=org.apache.log4j.ConsoleAppender log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=%-d{yyyy-MM-dd HH:mm:ss,SSS} [%t] [%c]-[%p] %m%n

 

1. 创建 HttpClient 网络请求客户端

1.1 创建基本的 HttpClient 对象

// 创建 HttpClient 对象 CloseableHttpClient httpClient = HttpClients.createDefault();

1.2  使用连接池管理器创建 HttpClient 对象

public static void main(String[] args) { // 创建连接池管理器 PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(); // 设置最大连接数 cm.setMaxTotal(100); // 设置每个主机的最大连接数 cm.setDefaultMaxPerRoute(10); // 使用连接池管理器发起请求 doGet(cm); doGet(cm); } private static void doGet(PoolingHttpClientConnectionManager cm) { // 从连接池中获取 HttpClient 对象 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); }

2. 设置 HttpGet 、HttpPost 请求对象 

2.1 创建 HttpGet 对象,并设置请求参数

// 设置参数 URIBuilder URIBuilder uriBuilder = new URIBuilder("http://yun.itheima.com/search"); uriBuilder.setParameter("keys", "Java"); // 创建 HttpGet 对象 HttpGet httpGet = new HttpGet(uriBuilder.build());

该请求访问后,日志信息 ? 

 2.2 创建 HttpPost 对象,并设置表单请求参数

// 创建 HttpPost 对象 HttpPost httpPost = new HttpPost("http://yun.itheima.com/search"); // 创建 List 集合,添加请求参数 List<NameValuePair> params = new ArrayList<NameValuePair>(); params.add(new BasicNameValuePair("keys", "Java")); // 创建表单 Entity 对象, (参数, 编码) UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params, "utf-8"); // HttpPost 设置参数 httpPost.setEntity(formEntity);

该请求访问后,日志信息 ?

3. 请求的配置(有关请求的时间配置)(可选

// 请求配置 RequestConfig config = RequestConfig.custom().setConnectTimeout(1000) // 设置创建连接的最长时间,单位是毫秒 .setConnectionRequestTimeout(500) // 设置获取连接的最长时间 .setSocketTimeout(10 * 1000) // 设置数据传输的最长时间 .build(); // 设置配置信息 httpGet.setConfig(config);

4. 重要的来了,如何接收请求 !!!

对于 HttpGet 的解析,HttpPost 的解析类似,注意一点,这个是针对于没有使用 1.2 中所提及的使用连接池创建的 HttpClient 对象使用连接池创建的 HttpClient 对象,不需要关闭) 

CloseableHttpResponse response = null; try { // 获取 response response = httpClient.execute(httpGet); // 请求成功 if (response.getStatusLine().getStatusCode() == 200){ String content = EntityUtils.toString(response.getEntity()); System.out.println(content.length()); } } catch (IOException e) { e.printStackTrace(); }finally { // 关闭资源 try { response.close(); } catch (IOException e) { e.printStackTrace(); } try { httpClient.close(); } catch (IOException e) { e.printStackTrace(); } }

5. 一个较为完整的 Demo

package mr.s.crawler.test; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.util.EntityUtils; import java.io.IOException; public class HttpClientPoolTest { public static void main(String[] args) throws Exception { // 创建连接池管理器 PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(); // 设置最大连接数 cm.setMaxTotal(100); // 设置每个主机的最大连接数 cm.setDefaultMaxPerRoute(10); // 使用连接池管理器发起请求 doGet(cm); doGet(cm); } private static void doGet(PoolingHttpClientConnectionManager cm) throws Exception { // 从连接池中获取 HttpClient 对象 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); // 设置参数 URIBuilder URIBuilder uriBuilder = new URIBuilder("http://yun.itheima.com/search"); uriBuilder.setParameter("keys", "Java"); // 创建 HttpGet 对象 HttpGet httpGet = new HttpGet(uriBuilder.build()); // 请求配置 RequestConfig config = RequestConfig.custom().setConnectTimeout(1000) // 设置创建连接的最长时间,单位是毫秒 .setConnectionRequestTimeout(500) // 设置获取连接的最长时间 .setSocketTimeout(10 * 1000) // 设置数据传输的最长时间 .build(); // 设置配置信息 httpGet.setConfig(config); CloseableHttpResponse response = null; try { response = httpClient.execute(httpGet); if (response.getStatusLine().getStatusCode() == 200){ String content = EntityUtils.toString(response.getEntity(), "utf-8"); System.out.println(content.length()); } } catch (IOException e) { e.printStackTrace(); }finally { if (response != null){ try { response.close(); } catch (IOException e) { e.printStackTrace(); } } } } }

 


最新回复(0)