Skip to content

Commit c95515e

Browse files
committed
Improve network analysis
1 parent 896512e commit c95515e

File tree

2 files changed

+11
-4
lines changed

2 files changed

+11
-4
lines changed

export-network.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ do
100100
done
101101
done
102102

103-
for TYPE in "density" "clustering-coefficient"; do
103+
for TYPE in "density" "clustering-coefficient" "triCountGraph-stat"; do
104104
# PREFIX=density
105105
if [[ $TAG == "all" && -e $OUTPUT_DIR/network-scores-${TYPE}.csv ]]; then
106106
rm $OUTPUT_DIR/network-scores-${TYPE}.csv
@@ -111,6 +111,6 @@ do
111111
| grep -v -P "^\d+,average-clustering-coefficient$" \
112112
| sed 's/all,average-clustering-coefficient/tag,average-clustering-coefficient/' \
113113
>> $OUTPUT_DIR/network-scores-${TYPE}.csv
114-
# rm $OUTPUT_DIR/network-scores-$TAG-${TYPE}.csv
114+
rm $OUTPUT_DIR/network-scores-$TAG-${TYPE}.csv
115115
done
116116
done < $INPUT_DIR/network-by-concepts-tags.csv

scala/src/main/scala/de/gwdg/metadataqa/marc/spark/Network.scala

+9-2
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,16 @@ object Network {
181181
log.info("STEP 5: clustering coefficients")
182182

183183
val triCountGraph = graph.triangleCount()
184-
// triCountGraph.vertices.map(x => x._2).stats()
185-
val tricountDF = triCountGraph.vertices.toDF("id", "count")
184+
val triCountGraphstat = triCountGraph.vertices
185+
.map(x => x._2)
186+
.toDF("x")
187+
.select("x")
188+
.summary().toDF("statistic", "value")
189+
this.write("network-scores" + suffix + "-triCountGraph-stat", triCountGraphstat)
190+
// val tricountDF = triCountGraph.vertices.toDF("id", "count")
186191

187192
// var degreesRDD = graph.degrees.cache()
193+
/*
188194
val maxTrisGraph = graph.degrees.mapValues(d => d * (d - 1) / 2.0)
189195
val maxTrisDF = maxTrisGraph.toDF("id", "theoreticalMax")
190196
@@ -199,6 +205,7 @@ object Network {
199205
200206
val dataDF = Seq(arageClusteringCoefficient).toDF("average-clustering-coefficient")
201207
this.write("network-scores" + suffix + "-clustering-coefficient", dataDF)
208+
*/
202209
}
203210

204211
def write(file: String, df: DataFrame): Unit = {

0 commit comments

Comments
 (0)