Skip to content

Commit

Permalink
Use RFC5952 canonical form for IPv6 addresses in WARC-IP-Address
Browse files Browse the repository at this point in the history
  • Loading branch information
ato committed Nov 21, 2024
1 parent f207143 commit 314931d
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 1 deletion.
38 changes: 38 additions & 0 deletions src/org/netpreserve/jwarc/InetAddresses.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

/*
* Copyright (C) 2008 The Guava Authors
* Copyright (C) 2024 National Library of Australia and the jwarc contributors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
Expand All @@ -19,6 +20,7 @@

package org.netpreserve.jwarc;

import java.net.Inet6Address;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
Expand Down Expand Up @@ -224,4 +226,40 @@ private static IllegalArgumentException formatIllegalArgumentException(
String format, Object... args) {
return new IllegalArgumentException(String.format(Locale.ROOT, format, args));
}

/**
* Formats an IPv6 address as the RFC5952 canonical textual representation.
*/
static String canonicalInet6(Inet6Address address) {
byte[] bytes = address.getAddress();
StringBuilder full = new StringBuilder();
for (int i = 0; i < bytes.length; i += 2) {
if (i > 0) full.append(':');
int group = ((bytes[i] & 0xFF) << 8) | (bytes[i + 1] & 0xFF);
full.append(Integer.toHexString(group));
}

// Compress longest zero sequence
int lengthOfLongestZeroSequence = 2;
int startOfLongestZeroSequence = 0;
for (int i = 0; i < full.length(); i++) {
if (i > 0 && full.charAt(i) != ':') continue;

// Find the end of the zero sequence
int j;
for (j = i; j < full.length(); j++) {
char c = full.charAt(j);
if (c != ':' && c != '0') break;
}

int length = j - i;
if (length > lengthOfLongestZeroSequence) {
startOfLongestZeroSequence = i;
lengthOfLongestZeroSequence = length;
}
}
if (lengthOfLongestZeroSequence <= 2) return full.toString();
return full.substring(0, startOfLongestZeroSequence) + "::" +
full.substring(startOfLongestZeroSequence + lengthOfLongestZeroSequence);
}
}
9 changes: 8 additions & 1 deletion src/org/netpreserve/jwarc/WarcCaptureRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.netpreserve.jwarc;

import java.io.IOException;
import java.net.Inet6Address;
import java.net.InetAddress;
import java.net.URI;
import java.nio.ByteBuffer;
Expand Down Expand Up @@ -60,7 +61,13 @@ public B concurrentTo(URI recordId) {
}

public B ipAddress(InetAddress ipAddress) {
return addHeader("WARC-IP-Address", ipAddress.getHostAddress());
String formatted;
if (ipAddress instanceof Inet6Address) {
formatted = InetAddresses.canonicalInet6((Inet6Address) ipAddress);
} else {
formatted = ipAddress.getHostAddress();
}
return addHeader("WARC-IP-Address", formatted);
}
}
}
34 changes: 34 additions & 0 deletions test/org/netpreserve/jwarc/InetAddressesTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.netpreserve.jwarc;

import org.junit.Test;

import java.net.Inet6Address;
import java.net.InetAddress;

import static org.junit.Assert.*;
import static org.netpreserve.jwarc.InetAddresses.canonicalInet6;

public class InetAddressesTest {
@Test
public void testCanonicalInet6() throws Exception {
assertEquals("2001:db8::1",
canonicalInet6((Inet6Address) InetAddress.getByName("2001:db8:0:0:0:0:0:1")));
assertEquals("::",
canonicalInet6((Inet6Address) InetAddress.getByName("0:0:0:0:0:0:0:0")));
assertEquals("::1",
canonicalInet6((Inet6Address) InetAddress.getByName("0:0:0:0:0:0:0:1")));
assertEquals("2001:db8:1:1:1:1:1:1",
canonicalInet6((Inet6Address) InetAddress.getByName("2001:db8:1:1:1:1:1:1")));
assertEquals("2001:0:0:1::1",
canonicalInet6((Inet6Address) InetAddress.getByName("2001:0:0:1:0:0:0:1")));
assertEquals("2001:db8:f::1",
canonicalInet6((Inet6Address) InetAddress.getByName("2001:db8:000f:0:0:0:0:1")));
assertEquals("2001:db8::1:0:0:1",
canonicalInet6((Inet6Address) InetAddress.getByName("2001:0db8:0000:0000:0001:0000:0000:0001")));
assertEquals("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
canonicalInet6((Inet6Address) InetAddress.getByName("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")));
assertEquals("2001:200f::1",
canonicalInet6((Inet6Address) InetAddress.getByName("2001:200f:0:0:0:0:0:1")));
}

}

0 comments on commit 314931d

Please sign in to comment.