Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
361 changes: 361 additions & 0 deletions client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"net"
"net/http"
"net/http/httptest"
"net/url"
"os"
"path"
"path/filepath"
Expand Down Expand Up @@ -1848,6 +1849,366 @@ func TestHTTPClientWithIPv6Disabled(t *testing.T) {
}
}

func TestHTTPClientPOSTWithTextPayload(t *testing.T) {
var (
rotatorSettings = defaultRotatorSettings(t)
err error
)

// Create a test server that expects POST requests and echoes back the received body
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != "POST" {
t.Errorf("Expected POST request, got %s", r.Method)
w.WriteHeader(http.StatusMethodNotAllowed)
return
}

body, err := io.ReadAll(r.Body)
if err != nil {
t.Errorf("Failed to read request body: %v", err)
w.WriteHeader(http.StatusInternalServerError)
return
}

w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusOK)
w.Write([]byte("Received: "))
w.Write(body)
}))
defer server.Close()

// Initialize the WARC-writing HTTP client
httpClient, err := NewWARCWritingHTTPClient(HTTPClientSettings{RotatorSettings: rotatorSettings})
if err != nil {
t.Fatalf("Unable to init WARC writing HTTP client: %s", err)
}
waitForErrors := drainErrChan(t, httpClient.ErrChan)

// Create a POST request with a text payload
requestBody := strings.NewReader("Hello from POST request")
req, err := http.NewRequest("POST", server.URL, requestBody)
if err != nil {
t.Fatal(err)
}
req.Header.Set("Content-Type", "text/plain")

resp, err := httpClient.Do(req)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()

io.Copy(io.Discard, resp.Body)

httpClient.Close()
waitForErrors()

files, err := filepath.Glob(rotatorSettings.OutputDirectory + "/*")
if err != nil {
t.Fatal(err)
}

// Verify the WARC file was created
if len(files) == 0 {
t.Fatal("No WARC files were created")
}

// Check the WARC records contain the POST request and response
for _, path := range files {
testFileSingleHashCheck(t, path, "sha1:RFV2ZU2BHITF3PW7BSPBQE65GFZS7F5G", []string{"154"}, 1, server.URL+"/")

file, err := os.Open(path)
if err != nil {
t.Fatalf("failed to open %q: %v", path, err)
}
defer file.Close()

reader, err := NewReader(file)
if err != nil {
t.Fatalf("warc.NewReader failed for %q: %v", path, err)
}

foundRequest := false

for {
record, err := reader.ReadRecord()
if err != nil {
if err == io.EOF {
break
}
t.Fatalf("warc.ReadRecord failed: %v", err)
}

// Check for request record
if record.Header.Get("WARC-Type") == "request" {
foundRequest = true
record.Content.Seek(0, 0)
content, _ := io.ReadAll(record.Content)
contentStr := string(content)

// Verify it's a POST request
if !strings.Contains(contentStr, "POST") {
t.Errorf("Request record does not contain POST method")
}

// Verify the request body is present
if !strings.Contains(contentStr, "Hello from POST request") {
t.Errorf("Request record does not contain the expected request body")
}
}

record.Content.Close()
}

if !foundRequest {
t.Error("No request record found in WARC file")
}
}
}

func TestHTTPClientPOSTWithJSONPayload(t *testing.T) {
var (
rotatorSettings = defaultRotatorSettings(t)
err error
)

// Create a test server that expects POST requests with JSON
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != "POST" {
t.Errorf("Expected POST request, got %s", r.Method)
w.WriteHeader(http.StatusMethodNotAllowed)
return
}

if r.Header.Get("Content-Type") != "application/json" {
t.Errorf("Expected Content-Type: application/json, got %s", r.Header.Get("Content-Type"))
}

body, err := io.ReadAll(r.Body)
if err != nil {
t.Errorf("Failed to read request body: %v", err)
w.WriteHeader(http.StatusInternalServerError)
return
}

w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
w.Write([]byte(`{"status":"success","received":`))
w.Write(body)
w.Write([]byte(`}`))
}))
defer server.Close()

// Initialize the WARC-writing HTTP client
httpClient, err := NewWARCWritingHTTPClient(HTTPClientSettings{RotatorSettings: rotatorSettings})
if err != nil {
t.Fatalf("Unable to init WARC writing HTTP client: %s", err)
}
waitForErrors := drainErrChan(t, httpClient.ErrChan)

// Create a POST request with a JSON payload
jsonPayload := `{"name":"test","value":123}`
requestBody := strings.NewReader(jsonPayload)
req, err := http.NewRequest("POST", server.URL, requestBody)
if err != nil {
t.Fatal(err)
}
req.Header.Set("Content-Type", "application/json")

resp, err := httpClient.Do(req)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()

io.Copy(io.Discard, resp.Body)

httpClient.Close()
waitForErrors()

files, err := filepath.Glob(rotatorSettings.OutputDirectory + "/*")
if err != nil {
t.Fatal(err)
}

// Verify the WARC file was created
if len(files) == 0 {
t.Fatal("No WARC files were created")
}

// Check the WARC records contain the POST request with JSON body
for _, path := range files {
testFileSingleHashCheck(t, path, "sha1:IAKLOIOTQX2W7PAAWWA2TELLU5HCKO3V", []string{"191"}, 1, server.URL+"/")

file, err := os.Open(path)
if err != nil {
t.Fatalf("failed to open %q: %v", path, err)
}
defer file.Close()

reader, err := NewReader(file)
if err != nil {
t.Fatalf("warc.NewReader failed for %q: %v", path, err)
}

foundJSONRequest := false

for {
record, err := reader.ReadRecord()
if err != nil {
if err == io.EOF {
break
}
t.Fatalf("warc.ReadRecord failed: %v", err)
}

// Check for request record
if record.Header.Get("WARC-Type") == "request" {
record.Content.Seek(0, 0)
content, _ := io.ReadAll(record.Content)
contentStr := string(content)

// Verify it's a POST request
if !strings.Contains(contentStr, "POST") {
t.Errorf("Request record does not contain POST method")
}

// Verify the JSON payload is present
if strings.Contains(contentStr, jsonPayload) {
foundJSONRequest = true
}
}

record.Content.Close()
}

if !foundJSONRequest {
t.Error("JSON payload not found in request record")
}
}
}

func TestHTTPClientPOSTWithFormData(t *testing.T) {
var (
rotatorSettings = defaultRotatorSettings(t)
err error
)

// Create a test server that expects POST requests with form data
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != "POST" {
t.Errorf("Expected POST request, got %s", r.Method)
w.WriteHeader(http.StatusMethodNotAllowed)
return
}

err := r.ParseForm()
if err != nil {
t.Errorf("Failed to parse form: %v", err)
w.WriteHeader(http.StatusBadRequest)
return
}

username := r.FormValue("username")
password := r.FormValue("password")

w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusOK)
w.Write([]byte("Login attempt for user: " + username + " (password length: " + strconv.Itoa(len(password)) + ")"))
}))
defer server.Close()

// Initialize the WARC-writing HTTP client
httpClient, err := NewWARCWritingHTTPClient(HTTPClientSettings{RotatorSettings: rotatorSettings})
if err != nil {
t.Fatalf("Unable to init WARC writing HTTP client: %s", err)
}
waitForErrors := drainErrChan(t, httpClient.ErrChan)

// Create a POST request with form data
formData := url.Values{}
formData.Set("username", "testuser")
formData.Set("password", "testpass123")

req, err := http.NewRequest("POST", server.URL, strings.NewReader(formData.Encode()))
if err != nil {
t.Fatal(err)
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")

resp, err := httpClient.Do(req)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()

io.Copy(io.Discard, resp.Body)

httpClient.Close()
waitForErrors()

files, err := filepath.Glob(rotatorSettings.OutputDirectory + "/*")
if err != nil {
t.Fatal(err)
}

// Verify the WARC file was created
if len(files) == 0 {
t.Fatal("No WARC files were created")
}

// Check the WARC records contain the POST request with form data
for _, path := range files {
testFileSingleHashCheck(t, path, "sha1:DGXE2J6TLUT3GYLTA2LNA4NQMMPF5SWX", []string{"175"}, 1, server.URL+"/")

file, err := os.Open(path)
if err != nil {
t.Fatalf("failed to open %q: %v", path, err)
}
defer file.Close()

reader, err := NewReader(file)
if err != nil {
t.Fatalf("warc.NewReader failed for %q: %v", path, err)
}

foundFormRequest := false

for {
record, err := reader.ReadRecord()
if err != nil {
if err == io.EOF {
break
}
t.Fatalf("warc.ReadRecord failed: %v", err)
}

// Check for request record
if record.Header.Get("WARC-Type") == "request" {
record.Content.Seek(0, 0)
content, _ := io.ReadAll(record.Content)
contentStr := string(content)

// Verify it's a POST request
if !strings.Contains(contentStr, "POST") {
t.Errorf("Request record does not contain POST method")
}

// Verify the form data is present (URL-encoded)
if strings.Contains(contentStr, "username=testuser") && strings.Contains(contentStr, "password=testpass123") {
foundFormRequest = true
}
}

record.Content.Close()
}

if !foundFormRequest {
t.Error("Form data not found in request record")
}
}
}

// MARK: Benchmarks
func BenchmarkConcurrentUnder2MB(b *testing.B) {
var (
Expand Down
4 changes: 2 additions & 2 deletions dedupe.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (d *customDialer) checkLocalRevisit(digest string) revisitRecord {
func checkCDXRevisit(CDXURL string, digest string, targetURI string, cookie string) (revisitRecord, error) {
// CDX expects no hash header. For now we need to strip it.
digest = strings.SplitN(digest, ":", 2)[1]

req, err := http.NewRequest("GET", CDXURL+"/web/timemap/cdx?url="+url.QueryEscape(targetURI)+"&limit=-1", nil)
if err != nil {
return revisitRecord{}, err
Expand Down Expand Up @@ -95,7 +95,7 @@ func checkCDXRevisit(CDXURL string, digest string, targetURI string, cookie stri
func checkDoppelgangerRevisit(DoppelgangerHost string, digest string, targetURI string) (revisitRecord, error) {
// Doppelganger is not expecting a hash header either but this will all be rewritten ... shortly...
digest = strings.SplitN(digest, ":", 2)[1]

req, err := http.NewRequest("GET", DoppelgangerHost+"/api/records/"+digest+"?uri="+targetURI, nil)
if err != nil {
return revisitRecord{}, err
Expand Down
2 changes: 1 addition & 1 deletion gzip_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ type GzipReaderInterface interface {
io.ReadCloser
Multistream(enable bool)
Reset(r io.Reader) error
}
}
Loading
Loading