Skip to content

Commit 9f2c426

Browse files
committed
feat(importCDX): enhance importer VCS sanitization and comp naming convention using VCS
Signed-off-by: Sameed <[email protected]>
1 parent 840fa97 commit 9f2c426

File tree

3 files changed

+197
-159
lines changed

3 files changed

+197
-159
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
package org.eclipse.sw360.common.utils;
2+
3+
import java.io.IOException;
4+
import java.net.*;
5+
import java.nio.charset.StandardCharsets;
6+
import java.util.*;
7+
import java.util.stream.Collectors;
8+
9+
import org.apache.logging.log4j.LogManager;
10+
import org.apache.logging.log4j.Logger;
11+
import org.eclipse.sw360.datahandler.common.CommonUtils;
12+
import org.eclipse.sw360.datahandler.common.SW360Constants;
13+
14+
public class RepositoryURL {
15+
private String url;
16+
private static final Logger log = LogManager.getLogger(RepositoryURL.class);
17+
private static final String SCHEMA_PATTERN = ".+://(\\w*(?:[\\-@.\\\\s,_:/][/(.\\-)A-Za-z0-9]+)*)";
18+
private static final String VCS_HOSTS_STRING = SW360Constants.VCS_HOSTS;
19+
private static final Map<String, String> VCS_HOSTS= parseVCSHosts(VCS_HOSTS_STRING);
20+
private Set<String> redirectedUrls = new HashSet<>();
21+
22+
public RepositoryURL(){}
23+
24+
public RepositoryURL(String url) {
25+
if (url == null || url.isEmpty()) {
26+
throw new IllegalArgumentException("URL cannot be null or empty");
27+
}
28+
this.url = processURL(url);
29+
}
30+
31+
public String processURL(String url) {
32+
String sanitized = sanitizeVCS(url);
33+
return handleURLRedirection(sanitized);
34+
}
35+
36+
37+
private static String formatVCSUrl(String host, String[] urlParts) {
38+
String formatString = VCS_HOSTS.get(host);
39+
40+
int paramCount = formatString.split("%s", -1).length - 1;
41+
42+
List<String> extractedParams = new ArrayList<>();
43+
for (int i = 3; i < urlParts.length && extractedParams.size() < paramCount; i++) {
44+
String part = urlParts[i].replaceAll("\\.git.*|#.*", "");
45+
46+
if (part.equals("+") || part.equals("-") || CommonUtils.isNullEmptyOrWhitespace(part)) {
47+
break;
48+
}
49+
50+
extractedParams.add(part);
51+
}
52+
53+
while (extractedParams.size() < paramCount) {
54+
extractedParams.add("");
55+
}
56+
57+
return String.format(formatString, extractedParams.toArray()).replaceAll("(?<!:)//+", "");
58+
}
59+
60+
public static String sanitizeVCSByHost(String vcs, String host) {
61+
String encodedVCS = URLEncoder.encode(vcs, StandardCharsets.UTF_8);
62+
63+
try {
64+
URI uri = URI.create(encodedVCS);
65+
String[] urlParts = uri.getPath().split("/");
66+
67+
String formattedUrl = formatVCSUrl(host, urlParts);
68+
69+
return formattedUrl.endsWith("/") ? formattedUrl.substring(0, formattedUrl.length() - 1) : formattedUrl;
70+
71+
} catch (IllegalArgumentException e) {
72+
log.error("Invalid URL format: {}", vcs, e);
73+
return null;
74+
}
75+
}
76+
77+
public static String sanitizeVCS(String vcs) {
78+
for (String host : VCS_HOSTS.keySet()) {
79+
if (vcs.toLowerCase().contains(host.toLowerCase())) {
80+
return sanitizeVCSByHost(vcs, host);
81+
}
82+
}
83+
return vcs;
84+
}
85+
86+
public static String getComponentNameFromVCS(String vcsUrl, boolean isGetVendorandName) {
87+
String compName = vcsUrl.replaceAll(SCHEMA_PATTERN, "$1");
88+
String[] parts = compName.split("/");
89+
90+
if (parts.length < 2) {
91+
return compName;
92+
}
93+
94+
String domain = parts[0];
95+
String[] pathParts = Arrays.copyOfRange(parts, 1, parts.length);
96+
97+
if (VCS_HOSTS.containsKey(domain)) {
98+
return isGetVendorandName ? String.join("/", pathParts) : pathParts[pathParts.length - 1];
99+
}
100+
101+
return isGetVendorandName ? String.join("/", pathParts) : parts[parts.length - 1];
102+
}
103+
104+
public String handleURLRedirection(String urlString) {
105+
URL url;
106+
HttpURLConnection connection = null;
107+
try {
108+
url = new URL(urlString);
109+
} catch (MalformedURLException e) {
110+
log.error("Invalid URL format: {}", e.getMessage());
111+
return urlString;
112+
}
113+
114+
int redirectCount = 0;
115+
116+
while (redirectCount < SW360Constants.VCS_REDIRECTION_LIMIT) {
117+
try {
118+
connection = openConnection(url);
119+
int status = connection.getResponseCode();
120+
121+
if (status == HttpURLConnection.HTTP_MOVED_PERM || status == HttpURLConnection.HTTP_MOVED_TEMP || status == 308) {
122+
String newUrl = connection.getHeaderField("Location");
123+
connection.disconnect();
124+
125+
// Resolve relative URLs
126+
url = new URL(url, newUrl);
127+
128+
if (!"https".equalsIgnoreCase(url.getProtocol())) {
129+
log.error("Insecure redirection to non-HTTPS URL: {}", url);
130+
return urlString;
131+
}
132+
133+
redirectCount++;
134+
redirectedUrls.add(urlString);
135+
} else {
136+
connection.disconnect();
137+
break;
138+
}
139+
} catch (IOException e) {
140+
log.error("Error during redirection handling: {}", e.getMessage());
141+
return urlString;
142+
}
143+
finally {
144+
if (connection != null) {
145+
connection.disconnect();
146+
}
147+
}
148+
149+
}
150+
151+
if (redirectCount == 0 || redirectCount == SW360Constants.VCS_REDIRECTION_LIMIT) {
152+
if (redirectCount == SW360Constants.VCS_REDIRECTION_LIMIT) {
153+
log.error("Exceeded maximum redirect limit. Returning original URL.");
154+
}
155+
return urlString;
156+
}
157+
return sanitizeVCS(url.toString());
158+
}
159+
160+
private static HttpURLConnection openConnection(URL url) throws IOException{
161+
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
162+
connection.setInstanceFollowRedirects(false);
163+
connection.setConnectTimeout(SW360Constants.VCS_REDIRECTION_TIMEOUT_LIMIT);
164+
connection.setReadTimeout(SW360Constants.VCS_REDIRECTION_TIMEOUT_LIMIT);
165+
return connection;
166+
}
167+
168+
public Set<String> getRiderctedUrls(){
169+
return redirectedUrls;
170+
}
171+
172+
private static Map<String, String> parseVCSHosts(String propertyValue) {
173+
if (propertyValue == null || propertyValue.isEmpty()) {
174+
log.error("VCS_HOSTS property is empty");
175+
return new HashMap<>();
176+
}
177+
178+
return Arrays.stream(propertyValue.split(","))
179+
.map(entry -> entry.split(":", 2)) // Split each key-value pair
180+
.filter(parts -> parts.length == 2) // Ensure valid mappings
181+
.collect(Collectors.toMap(parts -> parts[0], parts -> parts[1]));
182+
}
183+
184+
}

0 commit comments

Comments
 (0)