Skip to content

Commit f3ea738

Browse files
authored
Merge pull request #856 from 0xSekar/issue-809-document-encoding
Fix encoding on downloaded documents
2 parents e2b79ed + 1b9c012 commit f3ea738

File tree

4 files changed

+160
-4
lines changed

4 files changed

+160
-4
lines changed

src/Traits/DownloadsDocument.php

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,13 @@ public function download(
7474
// SimpleXML or as a plain TAB/CSV, but the original encoding is required to parse XLSX
7575
// and PDF documents.
7676
if (! ($contentType === ContentType::XLSX || $contentType === ContentType::PDF)) {
77-
$this->encoding = $this->detectEncoding($contents, $response);
7877
$contents = mb_convert_encoding(
7978
$contents,
8079
static::$defaultEncoding,
81-
$this->encoding ?? mb_internal_encoding()
80+
$this->detectEncoding($contents, $response) ?? mb_internal_encoding()
8281
);
82+
// Encoding is now $defaultEncoding
83+
$this->encoding = static::$defaultEncoding;
8384
}
8485

8586
return $this->parseDocument($contents);
@@ -222,13 +223,17 @@ protected function detectEncoding(string $contents, ResponseInterface $response)
222223
$encoding = static::$defaultEncoding;
223224
} elseif (! $this->encoding) {
224225
// If encoding is not provided try to automatically detect the encoding from the HTTP response
225-
$encodings = [static::$defaultEncoding];
226+
// In some reports, japanese encoding is not sent in the response header,
227+
// so need to be forced as an option
228+
$encodings = [static::$defaultEncoding, 'CP932'];
226229
if ($response->hasHeader('Content-Type')) {
227230
$parsed = Header::parse($response->getHeader('Content-Type'));
228231

229232
foreach ($parsed as $header) {
230233
if ($header['charset'] ?? null) {
231-
$headerEncoding = $header['charset'];
234+
// Some reports are reporting 'Cp1252' encoding in the header,
235+
// while they really are 'ISO-8859-1'.
236+
$headerEncoding = str_replace('Cp1252', 'ISO-8859-1', $header['charset']);
232237
array_unshift($encodings, $headerEncoding);
233238
break;
234239
}

tests/DocumentDownloadTest.php

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace SellingPartnerApi\Tests;
6+
7+
use GuzzleHttp\Client as HttpClient;
8+
use GuzzleHttp\Handler\MockHandler;
9+
use GuzzleHttp\HandlerStack;
10+
use GuzzleHttp\Psr7\Response;
11+
use GuzzleHttp\Psr7\Utils;
12+
use PHPUnit\Framework\TestCase;
13+
use Saloon\Config;
14+
use Saloon\Http\Faking\MockClient;
15+
use Saloon\Http\Faking\MockResponse;
16+
use SellingPartnerApi\Authentication\GetAccessTokenRequest;
17+
use SellingPartnerApi\Enums\Endpoint;
18+
use SellingPartnerApi\Seller\ReportsV20210630\Requests\GetReportDocument;
19+
use SellingPartnerApi\Seller\ReportsV20210630\Responses\ReportDocument;
20+
use SellingPartnerApi\SellingPartnerApi;
21+
22+
class DocumentDownloadTest extends TestCase
23+
{
24+
private $mockClient;
25+
26+
private $mockDownloadResponseBody1;
27+
private $mockDownloadResponseBody2;
28+
private $mockDownloadResponse1;
29+
private $mockDownloadResponse2;
30+
31+
protected function setUp(): void
32+
{
33+
MockClient::destroyGlobal();
34+
Config::preventStrayRequests();
35+
}
36+
37+
private function prepareMockData($realEncoding, $responseEnconding): void
38+
{
39+
$this->mockClient = new MockClient([
40+
GetAccessTokenRequest::class => fn() => MockResponse::make([
41+
'access_token' => 'access-token',
42+
'token_type' => 'bearer',
43+
'expires_in' => 3600,
44+
'refresh_token' => 'refresh-token',
45+
]),
46+
GetReportDocument::class => MockResponse::make([
47+
'reportDocumentId' => 'TEST_ID',
48+
'url' => "https://test.com/{$realEncoding}EncodedFile.txt",
49+
]),
50+
]);
51+
52+
$this->mockDownloadResponseBody1 = Utils::streamFor(
53+
fopen(
54+
"./tests/MockData/{$realEncoding}EncodedFile.txt",
55+
'r+',
56+
),
57+
);
58+
$this->mockDownloadResponse1 = new Response(
59+
200,
60+
[
61+
'Content-Type' => "text/tab-separated-values; charset={$responseEnconding}",
62+
'host' => 'test.com',
63+
],
64+
$this->mockDownloadResponseBody1,
65+
);
66+
67+
$this->mockDownloadResponseBody2 = Utils::streamFor(
68+
fopen(
69+
"./tests/MockData/{$realEncoding}EncodedFile.txt",
70+
'r+',
71+
),
72+
);
73+
$this->mockDownloadResponse2 = new Response(
74+
200,
75+
[
76+
'Content-Type' => "text/tab-separated-values; charset={$responseEnconding}",
77+
'host' => 'test.com',
78+
],
79+
$this->mockDownloadResponseBody2,
80+
);
81+
}
82+
83+
public function test_document_download_cp932(): void
84+
{
85+
$this->prepareMockData('CP932', 'CP932');
86+
87+
$mockDownload = new MockHandler([$this->mockDownloadResponse1, $this->mockDownloadResponse2]);
88+
$stack = HandlerStack::create($mockDownload);
89+
$client = new HttpClient(['handler' => $stack]);
90+
91+
$connector = SellingPartnerApi::seller(
92+
clientId: 'client-id',
93+
clientSecret: 'client-secret',
94+
refreshToken: 'refresh-token',
95+
endpoint: Endpoint::FE_SANDBOX,
96+
);
97+
$connector->withMockClient($this->mockClient);
98+
99+
$api = $connector->reportsV20210630();
100+
$reportDocument = $api->getReportDocument('TEST_ID', 'GET_MERCHANT_LISTINGS_ALL_DATA');
101+
$docToDownload = $reportDocument->dto();
102+
$this->assertInstanceOf(ReportDocument::class, $docToDownload);
103+
104+
// Unprocessed document data is CP932 encoded
105+
$data = $docToDownload->download('GET_MERCHANT_LISTINGS_ALL_DATA', false, null, $client);
106+
$this->assertEquals('CP932', mb_detect_encoding($data, 'CP932, UTF-8', true));
107+
// Processed document data is UTF-8 encoded
108+
$data = $docToDownload->download('GET_MERCHANT_LISTINGS_ALL_DATA', true, null, $client);
109+
$this->assertArrayHasKey(0, $data);
110+
$this->assertArrayHasKey('Field1', $data[0]);
111+
$this->assertEquals('UTF-8', mb_detect_encoding($data[0]['Field1'], 'UTF-8, CP932', true));
112+
$this->assertEquals('こんにちは世界', $data[0]['Field1']);
113+
}
114+
115+
// Some EU countries report content-type as CP1252 when they are actually ISO-8859-1 encoded
116+
public function test_document_download_cp1252(): void
117+
{
118+
$this->prepareMockData('ISO-8859-1', 'CP1252');
119+
120+
$mockDownload = new MockHandler([$this->mockDownloadResponse1, $this->mockDownloadResponse2]);
121+
$stack = HandlerStack::create($mockDownload);
122+
$client = new HttpClient(['handler' => $stack]);
123+
124+
$connector = SellingPartnerApi::seller(
125+
clientId: 'client-id',
126+
clientSecret: 'client-secret',
127+
refreshToken: 'refresh-token',
128+
endpoint: Endpoint::FE_SANDBOX,
129+
);
130+
$connector->withMockClient($this->mockClient);
131+
132+
$api = $connector->reportsV20210630();
133+
$reportDocument = $api->getReportDocument('TEST_ID', 'GET_MERCHANT_LISTINGS_ALL_DATA');
134+
$docToDownload = $reportDocument->dto();
135+
$this->assertInstanceOf(ReportDocument::class, $docToDownload);
136+
137+
// Unprocessed document data is ISO-8859-1 encoded
138+
$data = $docToDownload->download('GET_MERCHANT_LISTINGS_ALL_DATA', false, null, $client);
139+
$this->assertEquals('ISO-8859-1', mb_detect_encoding($data, 'ISO-8859-1', true));
140+
// Processed document data is UTF-8 encoded
141+
$data = $docToDownload->download('GET_MERCHANT_LISTINGS_ALL_DATA', true, null, $client);
142+
$this->assertArrayHasKey(0, $data);
143+
$this->assertArrayHasKey('Field1', $data[0]);
144+
$this->assertEquals('UTF-8', mb_detect_encoding($data[0]['Field1'], 'UTF-8, ISO-8859-1', true));
145+
$this->assertEquals('Gähnen', $data[0]['Field1']);
146+
}
147+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Field1 Field2 Field3
2+
����ɂ��͐��E ����ɂ��͐��E ����ɂ��͐��E
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Field1 Field2 Field3
2+
G�hnen L�we Stra�e

0 commit comments

Comments
 (0)