Tôi đã tự do viết lại mã của bạn một chút bằng OOP thay vì để nó hoạt động vì việc tập trung vào các bit nhỏ hơn của mã dễ dàng hơn nhiều. Nó sẽ dễ dàng để chuyển đổi nó thành mã hóa chức năng, nếu bạn cần nó.
Lớp này lấy một date
cái được định dạng Jan2020
để có thể lấy lịch.
$parser = new CalendarParser(date_create());
Để có được các sự kiện cho một phạm vi ngày trong các bản ghi lịch - bạn cần gọi $parser->getEventsBetweenDates()
với một startDate
và một endDate
. Giờ không được tính đến khi phân tích cú pháp, nhưng bạn có thể thêm nó nếu bạn cần. Đây là một ví dụ:
$parser->getEventsBetweenDates(
date_create_from_format('Y-m-d H:i:s', '2020-01-01 00:00:00'),
date_create_from_format('Y-m-d H:i:s', '2020-01-02 23:59:59')
)
Kết quả của đoạn mã trên là:
<!-- language: lang-none -->
array(22) {
[0] => array(10) {
'eventId' => string(6) "114340"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[1] => array(10) {
'eventId' => string(6) "114341"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[2] => array(10) {
'eventId' => string(6) "114342"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[3] => array(10) {
'eventId' => string(6) "114343"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[4] => array(10) {
'eventId' => string(6) "114328"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[5] => array(10) {
'eventId' => string(6) "113632"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[6] => array(10) {
'eventId' => string(6) "114308"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[7] => array(10) {
'eventId' => string(6) "113607"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[8] => array(10) {
'eventId' => string(6) "113816"
'date' => string(10) "2020-01-01"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[9] => array(10) {
'eventId' => string(6) "114718"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(25) "Reserve Bank of Australia"
'sourceURL' => string(21) "http://www.rba.gov.au"
'latestURL' => string(65) "http://www.rba.gov.au/statistics/frequency/commodity-prices/2019/"
'measures' => string(52) "Change in the selling price of exported commodities;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(120) "The average selling price of the nation's main commodity exports are sampled and then compared to the previous sampling;"
'why_traders_care' => string(128) "It's a leading indicator of the nation's trade balance with other countries because rising commodity prices boost export income;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[10] => array(10) {
'eventId' => string(6) "114344"
'date' => string(10) "2020-01-02"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
[11] => array(10) {
'eventId' => string(6) "111383"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 400 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[12] => array(10) {
'eventId' => string(6) "111382"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 450 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[13] => array(10) {
'eventId' => string(6) "111379"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 750 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[14] => array(10) {
'eventId' => string(6) "111380"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 800 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[15] => array(10) {
'eventId' => string(6) "111381"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(205) "Survey of about 5000 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[16] => array(10) {
'eventId' => string(6) "111397"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 650 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[17] => array(10) {
'eventId' => string(6) "111102"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(34) "Challenger, Gray & Christmas, Inc."
'sourceURL' => string(30) "http://www.challengergray.com/"
'latestURL' => string(50) "http://www.challengergray.com/press/press-releases"
'measures' => string(56) "Change in the number of job cuts announced by employers;"
'usual_effect' => string(51) "'Actual' less than 'Forecast' is good for currency;"
'derived_via' => NULL
'why_traders_care' => NULL
'frequency' => string(52) "Released monthly, about 3 days after the month ends;"
}
[18] => array(10) {
'eventId' => string(6) "110766"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(19) "Department of Labor"
'sourceURL' => string(18) "http://www.dol.gov"
'latestURL' => string(20) "https://www.dol.gov/"
'measures' => string(103) "The number of individuals who filed for unemployment insurance for the first time during the past week;"
'usual_effect' => string(51) "'Actual' less than 'Forecast' is good for currency;"
'derived_via' => NULL
'why_traders_care' => string(306) "Although it's generally viewed as a lagging indicator, the number of unemployed people is an important signal of overall economic health because consumer spending is highly correlated with labor-market conditions. Unemployment is also a major consideration for those steering the country's monetary policy;"
'frequency' => string(44) "Released weekly, 5 days after the week ends;"
}
[19] => array(10) {
'eventId' => string(6) "113642"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 400 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[20] => array(10) {
'eventId' => string(6) "111392"
'date' => string(10) "2020-01-02"
'sourceTEXT' => string(6) "Markit"
'sourceURL' => string(30) "http://www.markiteconomics.com"
'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
'derived_via' => string(204) "Survey of about 800 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
}
[21] => array(10) {
'eventId' => string(6) "113817"
'date' => string(10) "2020-01-02"
'sourceTEXT' => NULL
'sourceURL' => NULL
'latestURL' => NULL
'measures' => NULL
'usual_effect' => NULL
'derived_via' => NULL
'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
'frequency' => NULL
}
}
Đây là mã đầy đủ:
<?php
require 'vendor/autoload.php';
use Goutte\Client;
use Symfony\Component\DomCrawler\Crawler;
/**
* Thinking OOP is easier for me.
* You can easily restructure this into a `functional` code if that's what you need.
*/
class CalendarParser
{
const BASE_URL = 'https://www.forexfactory.com/calendar.php?month=%s';
const EVENT_URL = 'https://www.forexfactory.com/flex.php?do=ajax&contentType=Content&flex=calendar_mainCal&details=%d';
/**
* @var
*/
private $client;
/**
* @var DateTime
*/
private $calendarMonth;
/**
* @var Crawler
*/
private $page;
/**
* @var Crawler
*/
private $table;
/**
* @var array
*/
private $dateIndexes;
/**
* CalendarParser constructor.
*
* @param DateTime $calendarMonth
* @throws Exception
*/
public function __construct(DateTime $calendarMonth)
{
$this->client = new Client();
$this->calendarMonth = $calendarMonth;
// Fetch page and table data and store it so we can iterate over it.
$this->page = $this->client->request('GET', sprintf(self::BASE_URL, $this->calendarMonth->format('MY')));
$this->table = $this->page->filter('.calendar_row');
// Get date indexes
$this->generateDateIndexes();
}
/**
* The table uses a class called `newday` at each new date which can be used to create an index of
* where the date records begin which makes parsing easier.
*/
private function generateDateIndexes()
{
$dateIndexes = [];
$previousDate = null;
$this->table
/**
* NOTE: This is a closure function which will be called until the foreach completes.
* You cannot break out of it like when you do `foreach() { break; }`.
* If you do `return` - it will simply skip executing the rest of the function but won't break the cycle.
*/
->each(function (Crawler $node, $index) use (&$dateIndexes, &$previousDate) {
$isNewDateSeparator = strpos($node->getNode(0)->getAttribute('class'), 'newday') !== false;
if ($isNewDateSeparator) {
// Convert the date to `Jan-1-STARTING_YEAR` to be easier to search in the array.
$dateColumnNode = $node->filter('.date > span > span');
$stringDate = str_replace(' ', '-', $dateColumnNode->text()) . '-' . $this->calendarMonth->format('Y');
$date = date_create_from_format('M-d-Y', $stringDate);
$formattedDate = $date->format('Y-m-d');
$dateIndexes[$formattedDate] = [
'start' => $index,
'end' => null
];
if ($previousDate) {
$dateIndexes[$previousDate]['end'] = ($index - 1);
}
$previousDate = $formattedDate;
}
});
$this->dateIndexes = $dateIndexes;
}
/**
* @param Crawler $row
* @return array
*/
private function processEvent(DateTime $date, Crawler $row)
{
$eventId = $row->attr('data-eventid');
$event = [
'eventId' => $eventId,
'date' => $date->format('Y-m-d'),
'sourceTEXT' => null,
'sourceURL' => null,
'latestURL' => null,
'measures' => null,
'usual_effect' => null,
'derived_via' => null,
'why_traders_care' => null,
'frequency' => null
];
$content = $this->client->request('GET', sprintf(self::EVENT_URL, $eventId))->html();
$crawler = new Crawler($content, null, null);
$table = $crawler->filter('.calendarspecs__spec')->first()->closest('table');
$table->filter('tr')
->each(function (Crawler $tr) use (&$event) {
$label = $tr->filter('.calendarspecs__spec')->text();
$description = $tr->filter('.calendarspecs__specdescription');
if ($label === 'Source') {
$TEMP = [];
$description->filter(' a')
->each(function ($link) use (&$TEMP) {
array_push($TEMP, $link->text(), $link->attr('href'));
});
$event['sourceTEXT'] = $TEMP[0];
$event['sourceURL'] = $TEMP[1];
$event['latestURL'] = $TEMP[3];
}
if ($label == "Measures") {
$event['measures'] = $description->text();
}
if ($label == "Usual Effect") {
$event['usual_effect'] = $description->text();
}
if ($label == "Frequency") {
$event['frequency'] = $description->text();
}
// this is how it's returned.
if ($label == "Why TradersCare") {
$event['why_traders_care'] = $description->text();
}
if ($label == "Derived Via") {
$event['derived_via'] = $description->text();
}
});
return $event;
}
/**
* Get the events between a start and end date.
* If no endDate is defined - then it will get all events since $startDate.
*
* @param DateTime $startDate
* @param DateTime|null $endDate
*
* @return array
*/
public function getEventsBetweenDates(DateTime $startDate, DateTime $endDate = null)
{
$events = [];
$totalCalendarRows = $this->table->count();
foreach ($this->dateIndexes as $stringDate => $range) {
$date = date_create_from_format('Y-m-d', $stringDate);
// Process only the range from the start date
if ($date >= $startDate) {
// and break early when we reach the end.
if ($endDate && $date > $endDate) {
break;
}
// collect and process events for the current date
$start = $range['start'];
$end = $range['end'] !== null ? $range['end'] : $totalCalendarRows;
for ($i = $start; $i < $end; $i++) {
$events[] = $this->processEvent($date, new Crawler($this->table->getNode($i)));
}
}
}
return $events;
}
}
$parser = new CalendarParser(date_create());
var_dump(
$parser->getEventsBetweenDates(
date_create_from_format('Y-m-d H:i:s', '2020-01-01 00:00:00'),
date_create_from_format('Y-m-d H:i:s', '2020-01-02 23:59:59')
)
);
2020-01-02
vào một mảng chứa dữ liệu hàng. Đúng không?